2 *******************************************************************************
3 * Copyright (C) 2001-2010, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 *******************************************************************************
7 package com.ibm.icu.dev.demo.translit;
8 import java.io.BufferedWriter;
10 import java.io.FileOutputStream;
11 import java.io.IOException;
12 import java.io.OutputStreamWriter;
13 import java.io.PrintWriter;
14 import java.util.Comparator;
15 import java.util.HashMap;
16 import java.util.Iterator;
18 import java.util.TreeSet;
20 import com.ibm.icu.impl.Utility;
21 import com.ibm.icu.lang.UCharacter;
22 import com.ibm.icu.lang.UScript;
23 import com.ibm.icu.text.Normalizer;
24 import com.ibm.icu.text.Transliterator;
25 import com.ibm.icu.text.UTF16;
26 import com.ibm.icu.text.UnicodeSet;
27 import com.ibm.icu.text.UnicodeSetIterator;
29 public class TransliterationChart {
30 public static void main(String[] args) throws IOException {
31 System.out.println("Start");
32 UnicodeSet lengthMarks = new UnicodeSet("[\u09D7\u0B56-\u0B57\u0BD7\u0C56\u0CD5-\u0CD6\u0D57\u0C55\u0CD5]");
33 int[] indicScripts = {
45 String[] names = new String[indicScripts.length];
46 UnicodeSet[] sets = new UnicodeSet[indicScripts.length];
47 Transliterator[] fallbacks = new Transliterator[indicScripts.length];
48 for (int i = 0; i < indicScripts.length; ++i) {
49 names[i] = UScript.getName(indicScripts[i]);
50 sets[i] = new UnicodeSet("[[:" + names[i] + ":]&[[:L:][:M:]]&[:age=3.1:]]");
51 fallbacks[i] = Transliterator.getInstance("any-" + names[i]);
53 EquivClass eq = new EquivClass(new ReverseComparator());
54 PrintWriter pw = openPrintWriter("transChart.html");
55 pw.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
56 pw.println("<title>Indic Transliteration Chart</title><style>");
57 pw.println("td { text-align: Center; font-size: 200% }");
58 pw.println("tt { font-size: 50% }");
59 pw.println("td.miss { background-color: #CCCCFF }");
60 pw.println("</style></head><body bgcolor='#FFFFFF'>");
62 Transliterator anyToLatin = Transliterator.getInstance("any-latin");
64 String testString = "\u0946\u093E";
66 UnicodeSet failNorm = new UnicodeSet();
67 Set latinFail = new TreeSet();
69 for (int i = 0; i < indicScripts.length; ++i) {
70 if (indicScripts[i] == UScript.LATIN) continue;
71 String source = names[i];
72 System.out.println(source);
73 UnicodeSet sourceChars = sets[i];
75 for (int j = 0; j < indicScripts.length; ++j) {
77 String target = names[j];
78 Transliterator forward = Transliterator.getInstance(source + '-' + target);
79 Transliterator backward = forward.getInverse();
80 UnicodeSetIterator it = new UnicodeSetIterator(sourceChars);
82 if (lengthMarks.contains(it.codepoint)) continue;
83 String s = Normalizer.normalize(it.codepoint,Normalizer.NFC,0);
84 //if (!Normalizer.isNormalized(s,Normalizer.NFC,0)) continue;
85 if (!s.equals(Normalizer.normalize(s,Normalizer.NFD,0))) {
86 failNorm.add(it.codepoint);
88 String t = fix(forward.transliterate(s));
89 if (t.equals(testString)) {
90 System.out.println("debug");
93 String r = fix(backward.transliterate(t));
94 if (Normalizer.compare(s,r,0) == 0) {
95 if (indicScripts[j] != UScript.LATIN) eq.add(s,t);
97 if (indicScripts[j] == UScript.LATIN) {
98 latinFail.add(s + " - " + t + " - " + r);
104 // collect equivalents
105 pw.println("<table border='1' cellspacing='0'><tr>");
106 for (int i = 0; i < indicScripts.length; ++i) {
107 pw.print("<th width='10%'>" + names[i].substring(0,3) + "</th>");
111 Iterator rit = eq.getSetIterator(new MyComparator());
112 while(rit.hasNext()) {
113 Set equivs = (Set)rit.next();
115 Iterator sit = equivs.iterator();
116 String source = (String)sit.next();
117 String item = anyToLatin.transliterate(source);
118 if (item.equals("") || source.equals(item)) item = " ";
119 pw.print("<td>" + item + "</td>");
120 for (int i = 1; i < indicScripts.length; ++i) {
121 sit = equivs.iterator();
123 while (sit.hasNext()) {
124 String trial = (String)sit.next();
125 if (!sets[i].containsAll(trial)) continue;
129 String classString = "";
130 if (item.equals("")) {
131 classString = " class='miss'";
132 String temp = fallbacks[i].transliterate(source);
133 if (!temp.equals("") && !temp.equals(source)) item = temp;
135 String backup = item.equals("") ? " " : item;
136 pw.print("<td" + classString + " title='" + getName(item, "; ") + "'>"
137 + backup + "<br><tt>" + Utility.hex(item) + "</tt></td>");
140 Iterator sit = equivs.iterator();
141 while (sit.hasNext()) {
142 String item = (String)sit.next();
143 pw.print("<td>" + item + "</td>");
148 pw.println("</table>");
150 pw.println("<h2>Failed Normalization</h2>");
152 UnicodeSetIterator it = new UnicodeSetIterator(failNorm);
153 UnicodeSet pieces = new UnicodeSet();
155 String s = UTF16.valueOf(it.codepoint);
156 String d = Normalizer.normalize(s,Normalizer.NFD,0);
157 pw.println("Norm:" + s + ", " + Utility.hex(s) + " " + UCharacter.getName(it.codepoint)
158 + "; " + d + ", " + Utility.hex(d) + ", ");
159 pw.println(UCharacter.getName(d.charAt(1)) + "<br>");
160 if (UCharacter.getName(d.charAt(1)).indexOf("LENGTH") >= 0) pieces.add(d.charAt(1));
164 pw.println("<h2>Failed Round-Trip</h2>");
165 Iterator cit = latinFail.iterator();
166 while (cit.hasNext()) {
167 pw.println(cit.next() + "<br>");
171 pw.println("</table></body></html>");
173 System.out.println("Done");
176 public static String fix(String s) {
177 if (s.equals("\u0946\u093E")) return "\u094A";
178 if (s.equals("\u0C46\u0C3E")) return "\u0C4A";
179 if (s.equals("\u0CC6\u0CBE")) return "\u0CCA";
181 if (s.equals("\u0947\u093E")) return "\u094B";
182 if (s.equals("\u0A47\u0A3E")) return "\u0A4B";
183 if (s.equals("\u0AC7\u0ABE")) return "\u0ACB";
184 if (s.equals("\u0C47\u0C3E")) return "\u0C4B";
185 if (s.equals("\u0CC7\u0CBE")) return "\u0CCB";
187 //return Normalizer.normalize(s,Normalizer.NFD,0);
191 public static PrintWriter openPrintWriter(String fileName) throws IOException {
192 File lf = new File(fileName);
193 System.out.println("Creating file: " + lf.getAbsoluteFile());
195 return new PrintWriter(
197 new OutputStreamWriter(
198 new FileOutputStream(fileName), "UTF8"), 4*1024));
202 public static String getName(String s, String separator) {
204 StringBuffer sb = new StringBuffer();
205 for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
206 cp = UTF16.charAt(s,i);
207 if (i != 0) sb.append(separator);
208 sb.append(UCharacter.getName(cp));
210 return sb.toString();
213 static class MyComparator implements Comparator {
214 public int compare(Object o1, Object o2) {
215 Iterator i1 = ((TreeSet) o1).iterator();
216 Iterator i2 = ((TreeSet) o2).iterator();
217 while (i1.hasNext() && i2.hasNext()) {
218 String a = (String)i1.next();
219 String b = (String)i2.next();
220 int result = a.compareTo(b);
221 if (result != 0) return result;
223 if (i1.hasNext()) return 1;
224 if (i2.hasNext()) return -1;
229 static class ReverseComparator implements Comparator {
230 public int compare(Object o1, Object o2) {
231 String a = o1.toString();
232 char a1 = a.charAt(0);
233 String b = o2.toString();
234 char b1 = b.charAt(0);
235 if (a1 < 0x900 && b1 > 0x900) return -1;
236 if (a1 > 0x900 && b1 < 0x900) return +1;
237 return a.compareTo(b);
241 static class EquivClass {
242 EquivClass(Comparator c) {
245 private HashMap itemToSet = new HashMap();
246 private Comparator comparator;
248 void add(Object a, Object b) {
249 Set sa = (Set)itemToSet.get(a);
250 Set sb = (Set)itemToSet.get(b);
251 if (sa == null && sb == null) { // new set!
252 Set s = new TreeSet(comparator);
257 } else if (sa == null) {
259 } else if (sb == null) {
261 } else { // merge sets, dumping sb
263 Iterator it = sb.iterator();
264 while (it.hasNext()) {
265 itemToSet.put(it.next(), sa);
270 private class MyIterator implements Iterator {
272 MyIterator (Comparator comp) {
273 TreeSet values = new TreeSet(comp);
274 values.addAll(itemToSet.values());
275 it = values.iterator();
278 public boolean hasNext() {
281 public Object next() {
284 public void remove() {
285 throw new IllegalArgumentException("can't remove");
289 public Iterator getSetIterator (Comparator comp) {
290 return new MyIterator(comp);