2 *******************************************************************************
3 * Copyright (C) 2002-2010, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 *******************************************************************************
7 package com.ibm.icu.dev.tool.translit;
9 import java.io.FileOutputStream;
10 import java.io.OutputStreamWriter;
11 import java.util.Hashtable;
13 import com.ibm.icu.impl.Utility;
14 import com.ibm.icu.lang.UCharacter;
15 import com.ibm.icu.text.Transliterator;
16 import com.ibm.icu.text.UTF16;
17 import com.ibm.icu.text.UnicodeSet;
18 import com.ibm.icu.text.UnicodeSetIterator;
22 * To change this generated comment edit the template variable "typecomment":
23 * Window>Preferences>Java>Templates.
24 * To enable and disable the creation of type comments go to
25 * Window>Preferences>Java>Code Generation.7F
27 public class WriteIndicCharts {
29 public static void main(String[] args){
34 static String header = "<html>\n" +
35 " <head><meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">"+
36 " Inter-Indic Transliteration Comparison chart"+
38 " <body bgcolor=#FFFFFF>\n"+
39 " <table border=1 width=100% >\n"+
41 " <th width=9%>Inter-Indic</th>\n"+
42 " <th width=9%>Latin</th>\n"+
43 " <th width=9%>Devanagari</th>\n"+
44 " <th width=9%>Bengali</th>\n"+
45 " <th width=9%>Gurmukhi</th>\n"+
46 " <th width=9%>Gujarati</th>\n"+
47 " <th width=9%>Oriya</th>\n"+
48 " <th width=9%>Tamil</th>\n"+
49 " <th width=9%>Telugu</th>\n"+
50 " <th width=9%>Kannada</th>\n"+
51 " <th width=9%>Malayalam</th>\n"+
53 static String footer = " </table>\n"+
57 static UnicodeSet deva = new UnicodeSet("[:deva:]");
58 static UnicodeSet beng = new UnicodeSet("[:beng:]");
59 static UnicodeSet gujr = new UnicodeSet("[:gujr:]");
60 static UnicodeSet guru = new UnicodeSet("[:guru:]");
61 static UnicodeSet orya = new UnicodeSet("[:orya:]");
62 static UnicodeSet taml = new UnicodeSet("[:taml:]");
63 static UnicodeSet telu = new UnicodeSet("[:telu:]");
64 static UnicodeSet knda = new UnicodeSet("[:knda:]");
65 static UnicodeSet mlym = new UnicodeSet("[:mlym:]");
66 static UnicodeSet inter= new UnicodeSet("[\uE000-\uE082]");
68 public static void writeIICharts(){
70 Transliterator t1 = Transliterator.getInstance("InterIndic-Bengali");
71 Transliterator t2 = Transliterator.getInstance("InterIndic-Gurmukhi");
72 Transliterator t3 = Transliterator.getInstance("InterIndic-Gujarati");
73 Transliterator t4 = Transliterator.getInstance("InterIndic-Oriya");
74 Transliterator t5 = Transliterator.getInstance("InterIndic-Tamil");
75 Transliterator t6 = Transliterator.getInstance("InterIndic-Telugu");
76 Transliterator t7 = Transliterator.getInstance("InterIndic-Kannada");
77 Transliterator t8 = Transliterator.getInstance("InterIndic-Malayalam");
78 Transliterator t9 = Transliterator.getInstance("InterIndic-Devanagari");
79 Transliterator t10 = Transliterator.getInstance("InterIndic-Latin");
80 //UnicodeSetIterator sIter = new UnicodeSetIterator(deva);
82 for(int i=0x00;i<=0x80;i++){
83 String[] arr = new String[10];
84 arr[0]=UTF16.valueOf(i+ 0xE000);
85 table.put(UTF16.valueOf(i),arr);
88 OutputStreamWriter os = new OutputStreamWriter(new FileOutputStream("comparison-chart.html"),"UTF-8");
92 writeIICharts(t9,0x0900,1);
93 writeIICharts(t1,0x0980,2);
94 writeIICharts(t2,0x0A00,3);
95 writeIICharts(t3,0x0A80,4);
96 writeIICharts(t4,0x0B00,5);
97 writeIICharts(t5,0x0B80,6);
98 writeIICharts(t6,0x0c00,7);
99 writeIICharts(t7,0x0C80,8);
100 writeIICharts(t8,0x0D00,9);
102 for(int i=0x00;i<=0x80;i++){
103 String[] temp = (String[])table.get(UTF16.valueOf(i));
104 boolean write = false;
105 for(int k=1;k<temp.length && temp[k]!=null;k++){
106 if(UCharacter.getExtendedName(UTF16.charAt(temp[k],0)).indexOf("unassigned")<0 ||
107 temp[k].indexOf(":UNASSIGNED")<0){
113 for(int j=0; j<temp.length;j++){
115 boolean fallback=false;
116 boolean unassigned=false;
117 boolean unmapped = false;
118 boolean consumed =false;
119 String str = temp[j];
121 if(temp[j].indexOf(":FALLBACK")>=0){
122 str = temp[j].substring(0,temp[j].indexOf(":"));
124 // os.write(" <td bgcolor=#FFFF00 align=center title=\""++"\">"+str+"<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
126 if(temp[j].indexOf(":UNASSIGNED")>=0){
127 str = temp[j].substring(0,temp[j].indexOf(":"));
131 if(temp[j].indexOf(":UNMAPPED")>=0){
132 str = temp[j].substring(0,temp[j].indexOf(":"));
135 if(temp[j].indexOf(":CONSUMED")>=0){
136 str = temp[j].substring(0,temp[j].indexOf(":"));
141 StringBuffer nameBuf=new StringBuffer();
142 for(int f=0; f<str.length();f++){
143 if(f>0){ nameBuf.append("+");}
144 nameBuf.append(UCharacter.getExtendedName(UTF16.charAt(str,f)));
146 name = nameBuf.toString();
149 if(UCharacter.getExtendedName(UTF16.charAt(str,0)).indexOf("unassigned")>0){
150 os.write(" <td width=9% bgcolor=#BBBBFF align=center title=\""+name+"\">"+" <br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
152 os.write(" <td width=9% bgcolor=#BBBBFF align=center title=\""+name+"\">"+ str+"<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
155 os.write(" <td bgcolor=#FF9999 align=center title=\""+name+"\">"+" <br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
156 }else if(unassigned){
157 if(UCharacter.getExtendedName(UTF16.charAt(str,0)).indexOf("unassigned")>0){
158 os.write(" <td width=9% bgcolor=#00FFFF align=center title=\""+name+"\">"+" <br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
160 os.write(" <td width=9% bgcolor=#00FFFF align=center title=\""+name+"\">"+ str+"<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
163 if(UCharacter.getExtendedName(UTF16.charAt(str,0)).indexOf("unassigned")>0){
164 os.write(" <td width=9% bgcolor=#FFFF55 align=center title=\""+name+"\">"+" <br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
166 os.write(" <td width=9% bgcolor=#FFFF55 align=center title=\""+""+"\">"+" <br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
168 }else if(name.indexOf("private")!=-1){
169 String s = t10.transliterate(str);
170 os.write(" <td width=9% bgcolor=#FFBBBB align=center title=\""+name+"\">"+" <br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
172 os.write(" <td width=9% bgcolor=#CCEEDD align=center>"+s +"</td>");
174 os.write(" <td width=9% bgcolor=#CCEEDD align=center> </td>");
177 os.write(" <td width=9% align=center title=\""+name+"\">"+ str+"<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
180 os.write(" <td width=9% > </td>\n");
183 os.write(" </tr>\n");
188 }catch( Exception e){
192 public static void writeCharts(){
194 Transliterator t1 = Transliterator.getInstance("InterIndic-Bengali");
195 Transliterator t2 = Transliterator.getInstance("InterIndic-Gurmukhi");
196 Transliterator t3 = Transliterator.getInstance("InterIndic-Gujarati");
197 Transliterator t4 = Transliterator.getInstance("InterIndic-Oriya");
198 Transliterator t5 = Transliterator.getInstance("InterIndic-Tamil");
199 Transliterator t6 = Transliterator.getInstance("InterIndic-Telugu");
200 Transliterator t7 = Transliterator.getInstance("InterIndic-Kannada");
201 Transliterator t8 = Transliterator.getInstance("InterIndic-Malayalam");
202 Transliterator t9 = Transliterator.getInstance("InterIndic-Devanagari");
204 //UnicodeSetIterator sIter = new UnicodeSetIterator(deva);
206 for(int i=0x0900;i<=0x097F;i++){
207 String[] arr = new String[10];
208 arr[0]=UTF16.valueOf((i&0xFF) + 0xE000);
209 table.put(UTF16.valueOf(i),arr);
212 OutputStreamWriter os = new OutputStreamWriter(new FileOutputStream("comparison-chart.html"),"UTF-8");
216 writeCharts(t1,beng,1);
217 writeCharts(t2,guru,2);
218 writeCharts(t3,gujr,3);
219 writeCharts(t4,orya,4);
220 writeCharts(t5,taml,5);
221 writeCharts(t6,telu,6);
222 writeCharts(t7,knda,7);
223 writeCharts(t8,mlym,8);
226 writeCharts(t9,0x0900,1);
227 writeCharts(t1,0x0980,2);
228 writeCharts(t2,0x0A00,3);
229 writeCharts(t3,0x0A80,4);
230 writeCharts(t4,0x0B00,5);
231 writeCharts(t5,0x0B80,6);
232 writeCharts(t6,0x0c00,7);
233 writeCharts(t7,0x0C80,8);
234 writeCharts(t8,0x0D00,9);
236 writeIICharts(t9,0x0900,1);
237 writeIICharts(t1,0x0980,2);
238 writeIICharts(t2,0x0A00,3);
239 writeIICharts(t3,0x0A80,4);
240 writeIICharts(t4,0x0B00,5);
241 writeIICharts(t5,0x0B80,6);
242 writeIICharts(t6,0x0c00,7);
243 writeIICharts(t7,0x0C80,8);
244 writeIICharts(t8,0x0D00,9);
245 for(int i=0x0900;i<=0x097F;i++){
246 String[] temp = (String[])table.get(UTF16.valueOf(i));
247 boolean write = false;
248 for(int k=1;k<temp.length;k++){
249 if(UCharacter.getExtendedName(UTF16.charAt(temp[k],0)).indexOf("unassigned")<0){
255 for(int j=0; j<temp.length;j++){
257 boolean fallback=false;
258 String str = temp[j];
260 if(temp[j].indexOf(":FALLBACK")>=0){
261 str = temp[j].substring(0,temp[j].indexOf(":"));
263 // os.write(" <td bgcolor=#FFFF00 align=center title=\""++"\">"+str+"<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
265 String name = UCharacter.getExtendedName(UTF16.charAt(str,0));
267 os.write(" <td bgcolor=#BBBBFF align=center title=\""+name+"\">"+ str+"<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
268 }else if(name.indexOf("unassigned")!=-1){
269 os.write(" <td bgcolor=#CCCCCC align=center title=\""+name+"\">"+" <br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
270 }else if(name.indexOf("private")!=-1){
273 os.write(" <td bgcolor=#FFBBBB align=center title=\""+name+"\">"+" <br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
276 os.write(" <td align=center title=\""+name+"\">"+ str+"<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
279 os.write(" <td> </td>\n");
282 os.write(" </tr>\n");
287 }catch( Exception e){
291 static Hashtable table = new Hashtable();
292 static String getKey(int cp){
293 int delta = cp & 0xFF;
294 delta-= (delta>0x7f)? 0x80 : 0;
296 return UTF16.valueOf(delta);
299 public static void writeCharts(Transliterator trans, int start, int index){
301 Transliterator inverse = trans.getInverse();
302 for(int i=0;i<=0x7f;i++){
303 String cp = UTF16.valueOf(start+i);
304 String s1 = inverse.transliterate(cp);
305 String s2 = trans.transliterate(s1);
307 String[] arr = (String[])table.get(getKey(start+i));
311 arr[index] = s1 + ":FALLBACK";
316 public static void writeIICharts(Transliterator trans,int start, int index){
318 Transliterator inverse = trans.getInverse();
319 UnicodeSetIterator iter = new UnicodeSetIterator(inter);
322 String cp =UTF16.valueOf(iter.codepoint);
323 String s1 = trans.transliterate(cp);
324 String s2 = inverse.transliterate(s1);
325 String[] arr = (String[])table.get(UTF16.valueOf(iter.codepoint&0xFF));
327 arr[index] = UTF16.valueOf(start+(((byte)iter.codepoint)&0xFF))+":UNASSIGNED";
328 }else if(cp.equals(s2)){
330 }else if(s1.equals(s2)){
332 arr[index] = UTF16.valueOf(start+(((byte)iter.codepoint)&0xFF))+":CONSUMED";
334 arr[index] = s1+ ":FALLBACK";
338 arr[index] = UTF16.valueOf(start+(((byte)iter.codepoint)&0xFF))+":CONSUMED";
340 arr[index] = s1+ ":FALLBACK";
345 public static void writeCharts(Transliterator trans, UnicodeSet target, int index){
346 UnicodeSetIterator tIter = new UnicodeSetIterator(target);
347 Transliterator inverse = trans.getInverse();
349 String cp = UTF16.valueOf(tIter.codepoint);
350 String s1 = inverse.transliterate(cp);
351 String s2 = trans.transliterate(s1);
353 String[] arr = (String[])table.get(getKey(tIter.codepoint));
357 arr[index] = cp + ":FALLBACK";