2 *******************************************************************************
\r
3 * Copyright (C) 2001-2004, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 *******************************************************************************
\r
7 package com.ibm.icu.dev.demo.translit;
\r
8 import com.ibm.icu.lang.UScript;
\r
9 import com.ibm.icu.lang.UCharacter;
\r
10 import com.ibm.icu.text.UTF16;
\r
11 import com.ibm.icu.text.Transliterator;
\r
12 import com.ibm.icu.text.UnicodeSet;
\r
13 import com.ibm.icu.text.UnicodeSetIterator;
\r
14 import com.ibm.icu.text.Normalizer;
\r
15 import com.ibm.icu.impl.Utility;
\r
20 public class TransliterationChart {
\r
21 public static void main(String[] args) throws IOException {
\r
22 System.out.println("Start");
\r
23 UnicodeSet lengthMarks = new UnicodeSet("[\u09D7\u0B56-\u0B57\u0BD7\u0C56\u0CD5-\u0CD6\u0D57\u0C55\u0CD5]");
\r
24 int[] indicScripts = {
\r
36 String[] names = new String[indicScripts.length];
\r
37 UnicodeSet[] sets = new UnicodeSet[indicScripts.length];
\r
38 Transliterator[] fallbacks = new Transliterator[indicScripts.length];
\r
39 for (int i = 0; i < indicScripts.length; ++i) {
\r
40 names[i] = UScript.getName(indicScripts[i]);
\r
41 sets[i] = new UnicodeSet("[[:" + names[i] + ":]&[[:L:][:M:]]&[:age=3.1:]]");
\r
42 fallbacks[i] = Transliterator.getInstance("any-" + names[i]);
\r
44 EquivClass eq = new EquivClass(new ReverseComparator());
\r
45 PrintWriter pw = openPrintWriter("transChart.html");
\r
46 pw.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
\r
47 pw.println("<title>Indic Transliteration Chart</title><style>");
\r
48 pw.println("td { text-align: Center; font-size: 200% }");
\r
49 pw.println("tt { font-size: 50% }");
\r
50 pw.println("td.miss { background-color: #CCCCFF }");
\r
51 pw.println("</style></head><body bgcolor='#FFFFFF'>");
\r
53 Transliterator anyToLatin = Transliterator.getInstance("any-latin");
\r
55 String testString = "\u0946\u093E";
\r
57 UnicodeSet failNorm = new UnicodeSet();
\r
58 Set latinFail = new TreeSet();
\r
60 for (int i = 0; i < indicScripts.length; ++i) {
\r
61 if (indicScripts[i] == UScript.LATIN) continue;
\r
62 String source = names[i];
\r
63 System.out.println(source);
\r
64 UnicodeSet sourceChars = sets[i];
\r
66 for (int j = 0; j < indicScripts.length; ++j) {
\r
67 if (i == j) continue;
\r
68 String target = names[j];
\r
69 Transliterator forward = Transliterator.getInstance(source + '-' + target);
\r
70 Transliterator backward = forward.getInverse();
\r
71 UnicodeSetIterator it = new UnicodeSetIterator(sourceChars);
\r
73 if (lengthMarks.contains(it.codepoint)) continue;
\r
74 String s = Normalizer.normalize(it.codepoint,Normalizer.NFC,0);
\r
75 //if (!Normalizer.isNormalized(s,Normalizer.NFC,0)) continue;
\r
76 if (!s.equals(Normalizer.normalize(s,Normalizer.NFD,0))) {
\r
77 failNorm.add(it.codepoint);
\r
79 String t = fix(forward.transliterate(s));
\r
80 if (t.equals(testString)) {
\r
81 System.out.println("debug");
\r
84 String r = fix(backward.transliterate(t));
\r
85 if (Normalizer.compare(s,r,0) == 0) {
\r
86 if (indicScripts[j] != UScript.LATIN) eq.add(s,t);
\r
88 if (indicScripts[j] == UScript.LATIN) {
\r
89 latinFail.add(s + " - " + t + " - " + r);
\r
95 // collect equivalents
\r
96 pw.println("<table border='1' cellspacing='0'><tr>");
\r
97 for (int i = 0; i < indicScripts.length; ++i) {
\r
98 pw.print("<th width='10%'>" + names[i].substring(0,3) + "</th>");
\r
100 pw.println("</tr>");
\r
102 Iterator rit = eq.getSetIterator(new MyComparator());
\r
103 while(rit.hasNext()) {
\r
104 Set equivs = (Set)rit.next();
\r
106 Iterator sit = equivs.iterator();
\r
107 String source = (String)sit.next();
\r
108 String item = anyToLatin.transliterate(source);
\r
109 if (item.equals("") || source.equals(item)) item = " ";
\r
110 pw.print("<td>" + item + "</td>");
\r
111 for (int i = 1; i < indicScripts.length; ++i) {
\r
112 sit = equivs.iterator();
\r
114 while (sit.hasNext()) {
\r
115 String trial = (String)sit.next();
\r
116 if (!sets[i].containsAll(trial)) continue;
\r
120 String classString = "";
\r
121 if (item.equals("")) {
\r
122 classString = " class='miss'";
\r
123 String temp = fallbacks[i].transliterate(source);
\r
124 if (!temp.equals("") && !temp.equals(source)) item = temp;
\r
126 String backup = item.equals("") ? " " : item;
\r
127 pw.print("<td" + classString + " title='" + getName(item, "; ") + "'>"
\r
128 + backup + "<br><tt>" + Utility.hex(item) + "</tt></td>");
\r
131 Iterator sit = equivs.iterator();
\r
132 while (sit.hasNext()) {
\r
133 String item = (String)sit.next();
\r
134 pw.print("<td>" + item + "</td>");
\r
137 pw.println("</tr>");
\r
139 pw.println("</table>");
\r
141 pw.println("<h2>Failed Normalization</h2>");
\r
143 UnicodeSetIterator it = new UnicodeSetIterator(failNorm);
\r
144 UnicodeSet pieces = new UnicodeSet();
\r
145 while (it.next()) {
\r
146 String s = UTF16.valueOf(it.codepoint);
\r
147 String d = Normalizer.normalize(s,Normalizer.NFD,0);
\r
148 pw.println("Norm:" + s + ", " + Utility.hex(s) + " " + UCharacter.getName(it.codepoint)
\r
149 + "; " + d + ", " + Utility.hex(d) + ", ");
\r
150 pw.println(UCharacter.getName(d.charAt(1)) + "<br>");
\r
151 if (UCharacter.getName(d.charAt(1)).indexOf("LENGTH") >= 0) pieces.add(d.charAt(1));
\r
153 pw.println(pieces);
\r
155 pw.println("<h2>Failed Round-Trip</h2>");
\r
156 Iterator cit = latinFail.iterator();
\r
157 while (cit.hasNext()) {
\r
158 pw.println(cit.next() + "<br>");
\r
162 pw.println("</table></body></html>");
\r
164 System.out.println("Done");
\r
167 public static String fix(String s) {
\r
168 if (s.equals("\u0946\u093E")) return "\u094A";
\r
169 if (s.equals("\u0C46\u0C3E")) return "\u0C4A";
\r
170 if (s.equals("\u0CC6\u0CBE")) return "\u0CCA";
\r
172 if (s.equals("\u0947\u093E")) return "\u094B";
\r
173 if (s.equals("\u0A47\u0A3E")) return "\u0A4B";
\r
174 if (s.equals("\u0AC7\u0ABE")) return "\u0ACB";
\r
175 if (s.equals("\u0C47\u0C3E")) return "\u0C4B";
\r
176 if (s.equals("\u0CC7\u0CBE")) return "\u0CCB";
\r
178 //return Normalizer.normalize(s,Normalizer.NFD,0);
\r
182 public static PrintWriter openPrintWriter(String fileName) throws IOException {
\r
183 File lf = new File(fileName);
\r
184 System.out.println("Creating file: " + lf.getAbsoluteFile());
\r
186 return new PrintWriter(
\r
187 new BufferedWriter(
\r
188 new OutputStreamWriter(
\r
189 new FileOutputStream(fileName), "UTF8"), 4*1024));
\r
193 public static String getName(String s, String separator) {
\r
195 StringBuffer sb = new StringBuffer();
\r
196 for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
\r
197 cp = UTF16.charAt(s,i);
\r
198 if (i != 0) sb.append(separator);
\r
199 sb.append(UCharacter.getName(cp));
\r
201 return sb.toString();
\r
204 static class MyComparator implements Comparator {
\r
205 public int compare(Object o1, Object o2) {
\r
206 Iterator i1 = ((TreeSet) o1).iterator();
\r
207 Iterator i2 = ((TreeSet) o2).iterator();
\r
208 while (i1.hasNext() && i2.hasNext()) {
\r
209 String a = (String)i1.next();
\r
210 String b = (String)i2.next();
\r
211 int result = a.compareTo(b);
\r
212 if (result != 0) return result;
\r
214 if (i1.hasNext()) return 1;
\r
215 if (i2.hasNext()) return -1;
\r
220 static class ReverseComparator implements Comparator {
\r
221 public int compare(Object o1, Object o2) {
\r
222 String a = o1.toString();
\r
223 char a1 = a.charAt(0);
\r
224 String b = o2.toString();
\r
225 char b1 = b.charAt(0);
\r
226 if (a1 < 0x900 && b1 > 0x900) return -1;
\r
227 if (a1 > 0x900 && b1 < 0x900) return +1;
\r
228 return a.compareTo(b);
\r
232 static class EquivClass {
\r
233 EquivClass(Comparator c) {
\r
236 private HashMap itemToSet = new HashMap();
\r
237 private Comparator comparator;
\r
239 void add(Object a, Object b) {
\r
240 Set sa = (Set)itemToSet.get(a);
\r
241 Set sb = (Set)itemToSet.get(b);
\r
242 if (sa == null && sb == null) { // new set!
\r
243 Set s = new TreeSet(comparator);
\r
246 itemToSet.put(a, s);
\r
247 itemToSet.put(b, s);
\r
248 } else if (sa == null) {
\r
250 } else if (sb == null) {
\r
252 } else { // merge sets, dumping sb
\r
254 Iterator it = sb.iterator();
\r
255 while (it.hasNext()) {
\r
256 itemToSet.put(it.next(), sa);
\r
261 private class MyIterator implements Iterator {
\r
262 private Iterator it;
\r
263 MyIterator (Comparator comp) {
\r
264 TreeSet values = new TreeSet(comp);
\r
265 values.addAll(itemToSet.values());
\r
266 it = values.iterator();
\r
269 public boolean hasNext() {
\r
270 return it.hasNext();
\r
272 public Object next() {
\r
275 public void remove() {
\r
276 throw new IllegalArgumentException("can't remove");
\r
280 public Iterator getSetIterator (Comparator comp) {
\r
281 return new MyIterator(comp);
\r