]> gitweb.fperrin.net Git - Dictionary.git/blob - jars/icu4j-52_1/demos/src/com/ibm/icu/dev/demo/translit/TransliterationChart.java
Clean up imports.
[Dictionary.git] / jars / icu4j-52_1 / demos / src / com / ibm / icu / dev / demo / translit / TransliterationChart.java
1 /**
2  *******************************************************************************
3  * Copyright (C) 2001-2010, International Business Machines Corporation and    *
4  * others. All Rights Reserved.                                                *
5  *******************************************************************************
6  */
7 package com.ibm.icu.dev.demo.translit;
8 import java.io.BufferedWriter;
9 import java.io.File;
10 import java.io.FileOutputStream;
11 import java.io.IOException;
12 import java.io.OutputStreamWriter;
13 import java.io.PrintWriter;
14 import java.util.Comparator;
15 import java.util.HashMap;
16 import java.util.Iterator;
17 import java.util.Set;
18 import java.util.TreeSet;
19
20 import com.ibm.icu.impl.Utility;
21 import com.ibm.icu.lang.UCharacter;
22 import com.ibm.icu.lang.UScript;
23 import com.ibm.icu.text.Normalizer;
24 import com.ibm.icu.text.Transliterator;
25 import com.ibm.icu.text.UTF16;
26 import com.ibm.icu.text.UnicodeSet;
27 import com.ibm.icu.text.UnicodeSetIterator;
28
29 public class TransliterationChart {
30     public static void main(String[] args) throws IOException {
31         System.out.println("Start");
32         UnicodeSet lengthMarks = new UnicodeSet("[\u09D7\u0B56-\u0B57\u0BD7\u0C56\u0CD5-\u0CD6\u0D57\u0C55\u0CD5]");
33         int[] indicScripts = {
34             UScript.LATIN,
35             UScript.DEVANAGARI,
36             UScript.BENGALI,
37             UScript.GURMUKHI,
38             UScript.GUJARATI,
39             UScript.ORIYA,
40             UScript.TAMIL,
41             UScript.TELUGU,
42             UScript.KANNADA,
43             UScript.MALAYALAM,
44         };
45         String[] names = new String[indicScripts.length];
46         UnicodeSet[] sets = new UnicodeSet[indicScripts.length];
47         Transliterator[] fallbacks = new Transliterator[indicScripts.length];
48         for (int i = 0; i < indicScripts.length; ++i) {
49             names[i] = UScript.getName(indicScripts[i]);
50             sets[i] = new UnicodeSet("[[:" + names[i] + ":]&[[:L:][:M:]]&[:age=3.1:]]");
51             fallbacks[i] = Transliterator.getInstance("any-" + names[i]);
52         }
53         EquivClass eq = new EquivClass(new ReverseComparator());
54         PrintWriter pw = openPrintWriter("transChart.html");
55         pw.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
56         pw.println("<title>Indic Transliteration Chart</title><style>");
57         pw.println("td { text-align: Center; font-size: 200% }");
58         pw.println("tt { font-size: 50% }");
59         pw.println("td.miss { background-color: #CCCCFF }");
60         pw.println("</style></head><body bgcolor='#FFFFFF'>");
61
62         Transliterator anyToLatin = Transliterator.getInstance("any-latin");
63         
64         String testString = "\u0946\u093E";
65         
66         UnicodeSet failNorm = new UnicodeSet();
67         Set latinFail = new TreeSet();
68         
69         for (int i = 0; i < indicScripts.length; ++i) {
70             if (indicScripts[i] == UScript.LATIN) continue;
71             String source = names[i];
72             System.out.println(source);
73             UnicodeSet sourceChars = sets[i];
74
75             for (int j = 0; j < indicScripts.length; ++j) {
76                 if (i == j) continue;
77                 String target = names[j];
78                 Transliterator forward = Transliterator.getInstance(source + '-' + target);
79                 Transliterator backward = forward.getInverse();
80                 UnicodeSetIterator it = new UnicodeSetIterator(sourceChars);
81                 while (it.next()) {
82                     if (lengthMarks.contains(it.codepoint)) continue;
83                     String s = Normalizer.normalize(it.codepoint,Normalizer.NFC,0);
84                     //if (!Normalizer.isNormalized(s,Normalizer.NFC,0)) continue;
85                     if (!s.equals(Normalizer.normalize(s,Normalizer.NFD,0))) {
86                         failNorm.add(it.codepoint);
87                     } 
88                     String t = fix(forward.transliterate(s));
89                     if (t.equals(testString)) {
90                         System.out.println("debug");
91                     }
92
93                     String r = fix(backward.transliterate(t));
94                     if (Normalizer.compare(s,r,0) == 0) {
95                         if (indicScripts[j] != UScript.LATIN) eq.add(s,t);
96                     } else {
97                         if (indicScripts[j] == UScript.LATIN) {
98                             latinFail.add(s + " - " + t + " - " + r);
99                         }
100                     }
101                 }
102             }
103         }
104         // collect equivalents
105         pw.println("<table border='1' cellspacing='0'><tr>");
106         for (int i = 0; i < indicScripts.length; ++i) {
107             pw.print("<th width='10%'>" + names[i].substring(0,3) + "</th>");
108         }
109         pw.println("</tr>");
110
111         Iterator rit = eq.getSetIterator(new MyComparator());
112         while(rit.hasNext()) {
113             Set equivs = (Set)rit.next();
114             pw.print("<tr>");
115             Iterator sit = equivs.iterator();
116             String source = (String)sit.next();
117             String item = anyToLatin.transliterate(source);
118             if (item.equals("") || source.equals(item)) item = "&nbsp;";
119             pw.print("<td>" + item + "</td>");
120             for (int i = 1; i < indicScripts.length; ++i) {
121                 sit = equivs.iterator();
122                 item = "";
123                 while (sit.hasNext()) {
124                     String trial = (String)sit.next();
125                     if (!sets[i].containsAll(trial)) continue;
126                     item = trial;
127                     break;
128                 }
129                 String classString = "";
130                 if (item.equals("")) {
131                     classString = " class='miss'";
132                     String temp = fallbacks[i].transliterate(source);
133                     if (!temp.equals("") && !temp.equals(source)) item = temp;
134                 } 
135                 String backup = item.equals("") ? "&nbsp;" : item;
136                 pw.print("<td" + classString + " title='" + getName(item, "; ") + "'>" 
137                     + backup + "<br><tt>" + Utility.hex(item) + "</tt></td>");
138             }
139             /*
140             Iterator sit = equivs.iterator();
141             while (sit.hasNext()) {
142                 String item = (String)sit.next();
143                 pw.print("<td>" + item + "</td>");
144             }
145             */
146             pw.println("</tr>");
147         }
148         pw.println("</table>");
149         if (true) {
150             pw.println("<h2>Failed Normalization</h2>");
151     
152             UnicodeSetIterator it = new UnicodeSetIterator(failNorm);
153             UnicodeSet pieces = new UnicodeSet();
154             while (it.next()) {
155                 String s = UTF16.valueOf(it.codepoint);
156                 String d = Normalizer.normalize(s,Normalizer.NFD,0);
157                 pw.println("Norm:" + s + ", " + Utility.hex(s) + " " + UCharacter.getName(it.codepoint)
158                      + "; " + d + ", " + Utility.hex(d) + ", ");
159                 pw.println(UCharacter.getName(d.charAt(1)) + "<br>");
160                 if (UCharacter.getName(d.charAt(1)).indexOf("LENGTH") >= 0) pieces.add(d.charAt(1));
161             }
162             pw.println(pieces);
163             
164             pw.println("<h2>Failed Round-Trip</h2>");
165             Iterator cit = latinFail.iterator();
166             while (cit.hasNext()) {
167                 pw.println(cit.next() + "<br>");
168             }
169         } 
170
171         pw.println("</table></body></html>");
172         pw.close();     
173         System.out.println("Done");
174     }
175     
176     public static String fix(String s) {
177         if (s.equals("\u0946\u093E")) return "\u094A";
178         if (s.equals("\u0C46\u0C3E")) return "\u0C4A";
179         if (s.equals("\u0CC6\u0CBE")) return "\u0CCA";
180
181         if (s.equals("\u0947\u093E")) return "\u094B";
182         if (s.equals("\u0A47\u0A3E")) return "\u0A4B";
183         if (s.equals("\u0AC7\u0ABE")) return "\u0ACB";
184         if (s.equals("\u0C47\u0C3E")) return "\u0C4B";
185         if (s.equals("\u0CC7\u0CBE")) return "\u0CCB";
186        
187         //return Normalizer.normalize(s,Normalizer.NFD,0);
188         return s;
189     }
190     
191     public static PrintWriter openPrintWriter(String fileName) throws IOException {
192         File lf = new File(fileName);
193         System.out.println("Creating file: " + lf.getAbsoluteFile());
194     
195         return new PrintWriter(
196                 new BufferedWriter(
197                     new OutputStreamWriter(
198                         new FileOutputStream(fileName), "UTF8"), 4*1024));
199     }
200
201     
202     public static String getName(String s, String separator) {
203         int cp;
204         StringBuffer sb = new StringBuffer();
205         for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
206             cp = UTF16.charAt(s,i);
207             if (i != 0) sb.append(separator);
208             sb.append(UCharacter.getName(cp));
209         }
210         return sb.toString();
211     }
212     
213     static class MyComparator implements Comparator {
214        public int compare(Object o1, Object o2) {
215             Iterator i1 = ((TreeSet) o1).iterator();
216             Iterator i2 = ((TreeSet) o2).iterator();
217             while (i1.hasNext() && i2.hasNext()) {
218                 String a = (String)i1.next();
219                 String b = (String)i2.next();
220                 int result = a.compareTo(b);
221                 if (result != 0) return result;
222             }
223             if (i1.hasNext()) return 1;
224             if (i2.hasNext()) return -1;
225             return 0;
226         }
227         
228     }
229     static class ReverseComparator implements Comparator {
230         public int compare(Object o1, Object o2) {
231             String a = o1.toString();
232             char a1 = a.charAt(0);
233             String b = o2.toString();
234             char b1 = b.charAt(0);
235             if (a1 < 0x900 && b1 > 0x900) return -1;
236             if (a1 > 0x900 && b1 < 0x900) return +1;
237             return a.compareTo(b);
238         }       
239     }
240       
241     static class EquivClass {
242         EquivClass(Comparator c) {
243             comparator = c;
244         }
245         private HashMap itemToSet = new HashMap();
246         private Comparator comparator;
247         
248         void add(Object a, Object b) {
249             Set sa = (Set)itemToSet.get(a);
250             Set sb = (Set)itemToSet.get(b);
251             if (sa == null && sb == null) { // new set!
252                 Set s = new TreeSet(comparator);
253                 s.add(a);
254                 s.add(b);
255                 itemToSet.put(a, s);
256                 itemToSet.put(b, s);
257             } else if (sa == null) {
258                 sb.add(a);
259             } else if (sb == null) {
260                 sa.add(b);
261             } else { // merge sets, dumping sb
262                 sa.addAll(sb);
263                 Iterator it = sb.iterator();
264                 while (it.hasNext()) {
265                     itemToSet.put(it.next(), sa);
266                 }
267             }
268         }
269         
270         private class MyIterator implements Iterator {
271             private Iterator it;
272             MyIterator (Comparator comp) {
273                 TreeSet values = new TreeSet(comp);
274                 values.addAll(itemToSet.values());
275                 it = values.iterator();
276             }
277         
278             public boolean hasNext() {
279                 return it.hasNext();
280             }
281             public Object next() {
282                 return it.next();
283             }
284             public void remove() {
285                 throw new IllegalArgumentException("can't remove");
286             }        
287         }
288
289         public Iterator getSetIterator (Comparator comp) {
290             return new MyIterator(comp);
291         }
292
293     }
294 }