]> gitweb.fperrin.net Git - Dictionary.git/blob - jars/icu4j-4_4_2-src/demos/src/com/ibm/icu/dev/demo/translit/TransliterationChart.java
go
[Dictionary.git] / jars / icu4j-4_4_2-src / demos / src / com / ibm / icu / dev / demo / translit / TransliterationChart.java
1 /**\r
2  *******************************************************************************\r
3  * Copyright (C) 2001-2010, International Business Machines Corporation and    *\r
4  * others. All Rights Reserved.                                                *\r
5  *******************************************************************************\r
6  */\r
7 package com.ibm.icu.dev.demo.translit;\r
8 import java.io.BufferedWriter;\r
9 import java.io.File;\r
10 import java.io.FileOutputStream;\r
11 import java.io.IOException;\r
12 import java.io.OutputStreamWriter;\r
13 import java.io.PrintWriter;\r
14 import java.util.Comparator;\r
15 import java.util.HashMap;\r
16 import java.util.Iterator;\r
17 import java.util.Set;\r
18 import java.util.TreeSet;\r
19 \r
20 import com.ibm.icu.impl.Utility;\r
21 import com.ibm.icu.lang.UCharacter;\r
22 import com.ibm.icu.lang.UScript;\r
23 import com.ibm.icu.text.Normalizer;\r
24 import com.ibm.icu.text.Transliterator;\r
25 import com.ibm.icu.text.UTF16;\r
26 import com.ibm.icu.text.UnicodeSet;\r
27 import com.ibm.icu.text.UnicodeSetIterator;\r
28 \r
29 public class TransliterationChart {\r
30     public static void main(String[] args) throws IOException {\r
31         System.out.println("Start");\r
32         UnicodeSet lengthMarks = new UnicodeSet("[\u09D7\u0B56-\u0B57\u0BD7\u0C56\u0CD5-\u0CD6\u0D57\u0C55\u0CD5]");\r
33         int[] indicScripts = {\r
34             UScript.LATIN,\r
35             UScript.DEVANAGARI,\r
36             UScript.BENGALI,\r
37             UScript.GURMUKHI,\r
38             UScript.GUJARATI,\r
39             UScript.ORIYA,\r
40             UScript.TAMIL,\r
41             UScript.TELUGU,\r
42             UScript.KANNADA,\r
43             UScript.MALAYALAM,\r
44         };\r
45         String[] names = new String[indicScripts.length];\r
46         UnicodeSet[] sets = new UnicodeSet[indicScripts.length];\r
47         Transliterator[] fallbacks = new Transliterator[indicScripts.length];\r
48         for (int i = 0; i < indicScripts.length; ++i) {\r
49             names[i] = UScript.getName(indicScripts[i]);\r
50             sets[i] = new UnicodeSet("[[:" + names[i] + ":]&[[:L:][:M:]]&[:age=3.1:]]");\r
51             fallbacks[i] = Transliterator.getInstance("any-" + names[i]);\r
52         }\r
53         EquivClass eq = new EquivClass(new ReverseComparator());\r
54         PrintWriter pw = openPrintWriter("transChart.html");\r
55         pw.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");\r
56         pw.println("<title>Indic Transliteration Chart</title><style>");\r
57         pw.println("td { text-align: Center; font-size: 200% }");\r
58         pw.println("tt { font-size: 50% }");\r
59         pw.println("td.miss { background-color: #CCCCFF }");\r
60         pw.println("</style></head><body bgcolor='#FFFFFF'>");\r
61 \r
62         Transliterator anyToLatin = Transliterator.getInstance("any-latin");\r
63         \r
64         String testString = "\u0946\u093E";\r
65         \r
66         UnicodeSet failNorm = new UnicodeSet();\r
67         Set latinFail = new TreeSet();\r
68         \r
69         for (int i = 0; i < indicScripts.length; ++i) {\r
70             if (indicScripts[i] == UScript.LATIN) continue;\r
71             String source = names[i];\r
72             System.out.println(source);\r
73             UnicodeSet sourceChars = sets[i];\r
74 \r
75             for (int j = 0; j < indicScripts.length; ++j) {\r
76                 if (i == j) continue;\r
77                 String target = names[j];\r
78                 Transliterator forward = Transliterator.getInstance(source + '-' + target);\r
79                 Transliterator backward = forward.getInverse();\r
80                 UnicodeSetIterator it = new UnicodeSetIterator(sourceChars);\r
81                 while (it.next()) {\r
82                     if (lengthMarks.contains(it.codepoint)) continue;\r
83                     String s = Normalizer.normalize(it.codepoint,Normalizer.NFC,0);\r
84                     //if (!Normalizer.isNormalized(s,Normalizer.NFC,0)) continue;\r
85                     if (!s.equals(Normalizer.normalize(s,Normalizer.NFD,0))) {\r
86                         failNorm.add(it.codepoint);\r
87                     } \r
88                     String t = fix(forward.transliterate(s));\r
89                     if (t.equals(testString)) {\r
90                         System.out.println("debug");\r
91                     }\r
92 \r
93                     String r = fix(backward.transliterate(t));\r
94                     if (Normalizer.compare(s,r,0) == 0) {\r
95                         if (indicScripts[j] != UScript.LATIN) eq.add(s,t);\r
96                     } else {\r
97                         if (indicScripts[j] == UScript.LATIN) {\r
98                             latinFail.add(s + " - " + t + " - " + r);\r
99                         }\r
100                     }\r
101                 }\r
102             }\r
103         }\r
104         // collect equivalents\r
105         pw.println("<table border='1' cellspacing='0'><tr>");\r
106         for (int i = 0; i < indicScripts.length; ++i) {\r
107             pw.print("<th width='10%'>" + names[i].substring(0,3) + "</th>");\r
108         }\r
109         pw.println("</tr>");\r
110 \r
111         Iterator rit = eq.getSetIterator(new MyComparator());\r
112         while(rit.hasNext()) {\r
113             Set equivs = (Set)rit.next();\r
114             pw.print("<tr>");\r
115             Iterator sit = equivs.iterator();\r
116             String source = (String)sit.next();\r
117             String item = anyToLatin.transliterate(source);\r
118             if (item.equals("") || source.equals(item)) item = "&nbsp;";\r
119             pw.print("<td>" + item + "</td>");\r
120             for (int i = 1; i < indicScripts.length; ++i) {\r
121                 sit = equivs.iterator();\r
122                 item = "";\r
123                 while (sit.hasNext()) {\r
124                     String trial = (String)sit.next();\r
125                     if (!sets[i].containsAll(trial)) continue;\r
126                     item = trial;\r
127                     break;\r
128                 }\r
129                 String classString = "";\r
130                 if (item.equals("")) {\r
131                     classString = " class='miss'";\r
132                     String temp = fallbacks[i].transliterate(source);\r
133                     if (!temp.equals("") && !temp.equals(source)) item = temp;\r
134                 } \r
135                 String backup = item.equals("") ? "&nbsp;" : item;\r
136                 pw.print("<td" + classString + " title='" + getName(item, "; ") + "'>" \r
137                     + backup + "<br><tt>" + Utility.hex(item) + "</tt></td>");\r
138             }\r
139             /*\r
140             Iterator sit = equivs.iterator();\r
141             while (sit.hasNext()) {\r
142                 String item = (String)sit.next();\r
143                 pw.print("<td>" + item + "</td>");\r
144             }\r
145             */\r
146             pw.println("</tr>");\r
147         }\r
148         pw.println("</table>");\r
149         if (true) {\r
150             pw.println("<h2>Failed Normalization</h2>");\r
151     \r
152             UnicodeSetIterator it = new UnicodeSetIterator(failNorm);\r
153             UnicodeSet pieces = new UnicodeSet();\r
154             while (it.next()) {\r
155                 String s = UTF16.valueOf(it.codepoint);\r
156                 String d = Normalizer.normalize(s,Normalizer.NFD,0);\r
157                 pw.println("Norm:" + s + ", " + Utility.hex(s) + " " + UCharacter.getName(it.codepoint)\r
158                      + "; " + d + ", " + Utility.hex(d) + ", ");\r
159                 pw.println(UCharacter.getName(d.charAt(1)) + "<br>");\r
160                 if (UCharacter.getName(d.charAt(1)).indexOf("LENGTH") >= 0) pieces.add(d.charAt(1));\r
161             }\r
162             pw.println(pieces);\r
163             \r
164             pw.println("<h2>Failed Round-Trip</h2>");\r
165             Iterator cit = latinFail.iterator();\r
166             while (cit.hasNext()) {\r
167                 pw.println(cit.next() + "<br>");\r
168             }\r
169         } \r
170 \r
171         pw.println("</table></body></html>");\r
172         pw.close();     \r
173         System.out.println("Done");\r
174     }\r
175     \r
176     public static String fix(String s) {\r
177         if (s.equals("\u0946\u093E")) return "\u094A";\r
178         if (s.equals("\u0C46\u0C3E")) return "\u0C4A";\r
179         if (s.equals("\u0CC6\u0CBE")) return "\u0CCA";\r
180 \r
181         if (s.equals("\u0947\u093E")) return "\u094B";\r
182         if (s.equals("\u0A47\u0A3E")) return "\u0A4B";\r
183         if (s.equals("\u0AC7\u0ABE")) return "\u0ACB";\r
184         if (s.equals("\u0C47\u0C3E")) return "\u0C4B";\r
185         if (s.equals("\u0CC7\u0CBE")) return "\u0CCB";\r
186        \r
187         //return Normalizer.normalize(s,Normalizer.NFD,0);\r
188         return s;\r
189     }\r
190     \r
191     public static PrintWriter openPrintWriter(String fileName) throws IOException {\r
192         File lf = new File(fileName);\r
193         System.out.println("Creating file: " + lf.getAbsoluteFile());\r
194     \r
195         return new PrintWriter(\r
196                 new BufferedWriter(\r
197                     new OutputStreamWriter(\r
198                         new FileOutputStream(fileName), "UTF8"), 4*1024));\r
199     }\r
200 \r
201     \r
202     public static String getName(String s, String separator) {\r
203         int cp;\r
204         StringBuffer sb = new StringBuffer();\r
205         for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {\r
206             cp = UTF16.charAt(s,i);\r
207             if (i != 0) sb.append(separator);\r
208             sb.append(UCharacter.getName(cp));\r
209         }\r
210         return sb.toString();\r
211     }\r
212     \r
213     static class MyComparator implements Comparator {\r
214        public int compare(Object o1, Object o2) {\r
215             Iterator i1 = ((TreeSet) o1).iterator();\r
216             Iterator i2 = ((TreeSet) o2).iterator();\r
217             while (i1.hasNext() && i2.hasNext()) {\r
218                 String a = (String)i1.next();\r
219                 String b = (String)i2.next();\r
220                 int result = a.compareTo(b);\r
221                 if (result != 0) return result;\r
222             }\r
223             if (i1.hasNext()) return 1;\r
224             if (i2.hasNext()) return -1;\r
225             return 0;\r
226         }\r
227         \r
228     }\r
229     static class ReverseComparator implements Comparator {\r
230         public int compare(Object o1, Object o2) {\r
231             String a = o1.toString();\r
232             char a1 = a.charAt(0);\r
233             String b = o2.toString();\r
234             char b1 = b.charAt(0);\r
235             if (a1 < 0x900 && b1 > 0x900) return -1;\r
236             if (a1 > 0x900 && b1 < 0x900) return +1;\r
237             return a.compareTo(b);\r
238         }       \r
239     }\r
240       \r
241     static class EquivClass {\r
242         EquivClass(Comparator c) {\r
243             comparator = c;\r
244         }\r
245         private HashMap itemToSet = new HashMap();\r
246         private Comparator comparator;\r
247         \r
248         void add(Object a, Object b) {\r
249             Set sa = (Set)itemToSet.get(a);\r
250             Set sb = (Set)itemToSet.get(b);\r
251             if (sa == null && sb == null) { // new set!\r
252                 Set s = new TreeSet(comparator);\r
253                 s.add(a);\r
254                 s.add(b);\r
255                 itemToSet.put(a, s);\r
256                 itemToSet.put(b, s);\r
257             } else if (sa == null) {\r
258                 sb.add(a);\r
259             } else if (sb == null) {\r
260                 sa.add(b);\r
261             } else { // merge sets, dumping sb\r
262                 sa.addAll(sb);\r
263                 Iterator it = sb.iterator();\r
264                 while (it.hasNext()) {\r
265                     itemToSet.put(it.next(), sa);\r
266                 }\r
267             }\r
268         }\r
269         \r
270         private class MyIterator implements Iterator {\r
271             private Iterator it;\r
272             MyIterator (Comparator comp) {\r
273                 TreeSet values = new TreeSet(comp);\r
274                 values.addAll(itemToSet.values());\r
275                 it = values.iterator();\r
276             }\r
277         \r
278             public boolean hasNext() {\r
279                 return it.hasNext();\r
280             }\r
281             public Object next() {\r
282                 return it.next();\r
283             }\r
284             public void remove() {\r
285                 throw new IllegalArgumentException("can't remove");\r
286             }        \r
287         }\r
288 \r
289         public Iterator getSetIterator (Comparator comp) {\r
290             return new MyIterator(comp);\r
291         }\r
292 \r
293     }\r
294 }