]> gitweb.fperrin.net Git - Dictionary.git/blob - jars/icu4j-4_8_1_1/main/tests/core/src/com/ibm/icu/dev/test/lang/TestUScript.java
Added flags.
[Dictionary.git] / jars / icu4j-4_8_1_1 / main / tests / core / src / com / ibm / icu / dev / test / lang / TestUScript.java
1 /**
2 *******************************************************************************
3 * Copyright (C) 1996-2011, International Business Machines Corporation and    *
4 * others. All Rights Reserved.                                                *
5 *******************************************************************************
6 */
7
8 package com.ibm.icu.dev.test.lang;
9
10 import java.util.BitSet;
11 import java.util.Locale;
12
13 import com.ibm.icu.dev.test.TestFmwk;
14 import com.ibm.icu.lang.UScript;
15 import com.ibm.icu.util.ULocale;
16
17 public class TestUScript extends TestFmwk {
18
19     /**
20     * Constructor
21     */
22     public TestUScript()
23     {
24     }
25
26     public static void main(String[] args) throws Exception {
27         new TestUScript().run(args);
28     }
29     public void TestLocaleGetCode(){
30         final ULocale[] testNames={
31         /* test locale */
32         new ULocale("en"), new ULocale("en_US"),
33         new ULocale("sr"), new ULocale("ta") ,
34         new ULocale("te_IN"),
35         new ULocale("hi"),
36         new ULocale("he"), new ULocale("ar"),
37         new ULocale("abcde"),
38         new ULocale("abcde_cdef"),
39         new ULocale("iw")
40         };
41         final int[] expected ={
42                 /* locales should return */
43                 UScript.LATIN, UScript.LATIN,
44                 UScript.CYRILLIC, UScript.TAMIL,
45                 UScript.TELUGU,UScript.DEVANAGARI,
46                 UScript.HEBREW, UScript.ARABIC,
47                 UScript.INVALID_CODE,UScript.INVALID_CODE,
48                 UScript.HEBREW
49         };
50         int i =0;
51         int numErrors =0;
52
53         for( ; i<testNames.length; i++){
54             int[] code = UScript.getCode(testNames[i]);
55
56             if(code==null){
57                 if(expected[i]!=UScript.INVALID_CODE){
58                     logln("Error getting script code Got: null" + " Expected: " +expected[i] +" for name "+testNames[i]);
59                     numErrors++;
60                 }
61                 // getCode returns null if the code could not be found
62                 continue;
63             }
64             if((code[0] != expected[i])){
65                 logln("Error getting script code Got: " +code[0] + " Expected: " +expected[i] +" for name "+testNames[i]);
66                 numErrors++;
67             }
68         }
69         reportDataErrors(numErrors);
70         
71         // 
72         ULocale defaultLoc = ULocale.getDefault(); 
73         ULocale esparanto = new ULocale("eo_DE");
74         ULocale.setDefault(esparanto);
75         int[] code = UScript.getCode(esparanto); 
76         if(code != null){
77             if( code[0] != UScript.LATIN){
78                 errln("Did not get the expected script code for Esparanto");
79             }
80         }else{
81             warnln("Could not load the locale data.");
82         }
83         ULocale.setDefault(defaultLoc);
84     }
85
86     private void reportDataErrors(int numErrors) {
87         if (numErrors >0) {
88             // assume missing locale data, so not an error, just a warning
89             if (isModularBuild() || noData()) {
90                 // if nodata is set don't even warn
91                 warnln("Could not find locale data");
92             } else {
93                 errln("encountered " + numErrors + " errors.");
94             }
95         }
96     }
97
98     public void TestMultipleCode(){
99         final String[] testNames = { "ja" ,"ko_KR","zh","zh_TW"};
100         final int[][] expected = {
101                                 {UScript.KATAKANA,UScript.HIRAGANA,UScript.HAN},
102                                 {UScript.HANGUL, UScript.HAN},
103                                 {UScript.HAN},
104                                 {UScript.HAN,UScript.BOPOMOFO}
105                               };
106
107         int numErrors = 0;
108         for(int i=0; i<testNames.length;i++){
109             int[] code = UScript.getCode(testNames[i]);
110             int[] expt = (int[]) expected[i];
111             if(code!=null){
112                 for(int j =0; j< code.length;j++){
113                     if(code[j]!=expt[j]){
114                         numErrors++;
115                         logln("Error getting script code Got: " +code[j] + " Expected: " +expt[j] +" for name "+testNames[i]);
116                     }
117                 }
118             }else{
119                 numErrors++;
120                 logln("Error getting script code for name "+testNames[i]);
121             }
122         }
123         reportDataErrors(numErrors);
124         
125         //cover UScript.getCode(Locale)
126         Locale[] testLocales = new Locale[] {
127             Locale.JAPANESE,
128             Locale.KOREA,
129             Locale.CHINESE,
130             Locale.TAIWAN };
131         logln("Testing UScript.getCode(Locale) ...");
132         numErrors = 0;
133         for(int i=0; i<testNames.length;i++){
134             logln("  Testing locale: " + testLocales[i].getDisplayName());
135             int[] code = UScript.getCode(testLocales[i]);
136             int[] expt = (int[]) expected[i];
137             if(code!=null){
138                 for(int j =0; j< code.length;j++){
139                     if(code[j]!=expt[j]){
140                         numErrors++;
141                         logln("  Error getting script code Got: " +code[j] + " Expected: " +expt[j] +" for name "+testNames[i]);
142                     }
143                 }
144             }else{
145                 numErrors++;
146                 logln("  Error getting script code for name "+testNames[i]);
147             }
148         }
149         reportDataErrors(numErrors);                 
150     }
151
152     public void TestGetCode(){
153
154         final String[] testNames={
155             /* test locale */
156             "en", "en_US", "sr", "ta", "gu", "te_IN", 
157             "hi", "he", "ar",
158             /* test abbr */
159             "Hani", "Hang","Hebr","Hira",
160             "Knda","Kana","Khmr","Lao",
161             "Latn",/*"Latf","Latg",*/
162             "Mlym", "Mong",
163
164             /* test names */
165             "CYRILLIC","DESERET","DEVANAGARI","ETHIOPIC","GEORGIAN",
166             "GOTHIC",  "GREEK",  "GUJARATI", "COMMON", "INHERITED",
167             /* test lower case names */
168             "malayalam", "mongolian", "myanmar", "ogham", "old-italic",
169             "oriya",     "runic",     "sinhala", "syriac","tamil",
170             "telugu",    "thaana",    "thai",    "tibetan",
171             /* test the bounds*/
172             "Cans", "arabic","Yi","Zyyy"
173         };
174         final int[] expected ={
175             /* locales should return */
176             UScript.LATIN, UScript.LATIN,
177             UScript.CYRILLIC, UScript.TAMIL, UScript.GUJARATI,
178             UScript.TELUGU,UScript.DEVANAGARI,
179             UScript.HEBREW, UScript.ARABIC,
180             /* abbr should return */
181             UScript.HAN, UScript.HANGUL, UScript.HEBREW, UScript.HIRAGANA,
182             UScript.KANNADA, UScript.KATAKANA, UScript.KHMER, UScript.LAO,
183             UScript.LATIN,/* UScript.LATIN, UScript.LATIN,*/
184             UScript.MALAYALAM, UScript.MONGOLIAN,
185             /* names should return */
186             UScript.CYRILLIC, UScript.DESERET, UScript.DEVANAGARI, UScript.ETHIOPIC, UScript.GEORGIAN,
187             UScript.GOTHIC, UScript.GREEK, UScript.GUJARATI, UScript.COMMON, UScript.INHERITED,
188             /* lower case names should return */
189             UScript.MALAYALAM, UScript.MONGOLIAN, UScript.MYANMAR, UScript.OGHAM, UScript.OLD_ITALIC,
190             UScript.ORIYA, UScript.RUNIC, UScript.SINHALA, UScript.SYRIAC, UScript.TAMIL,
191             UScript.TELUGU, UScript.THAANA, UScript.THAI, UScript.TIBETAN,
192             /* bounds */
193             UScript.CANADIAN_ABORIGINAL, UScript.ARABIC, UScript.YI, UScript.COMMON
194         };
195         int i =0;
196         int numErrors =0;
197
198         for( ; i<testNames.length; i++){
199             int[] code = UScript.getCode(testNames[i]);
200             if(code == null){
201                 if(expected[i]==UScript.INVALID_CODE){
202                     // getCode returns null if the code could not be found
203                     continue;
204                 }
205                 // currently commented out until jitterbug#2678 is fixed
206                 logln("Error getting script code Got: null" + " Expected: " +expected[i] +" for name "+testNames[i]);
207                 numErrors++;
208                 continue;
209             }
210             if((code[0] != expected[i])){
211                 logln("Error getting script code Got: " +code[0] + " Expected: " +expected[i] +" for name "+testNames[i]);
212                 numErrors++;
213             }
214         }
215         reportDataErrors(numErrors);
216     }
217
218     public void TestGetName(){
219
220         final int[] testCodes={
221             /* names should return */
222             UScript.CYRILLIC, UScript.DESERET, UScript.DEVANAGARI, UScript.ETHIOPIC, UScript.GEORGIAN,
223             UScript.GOTHIC, UScript.GREEK, UScript.GUJARATI,
224         };
225
226         final String[] expectedNames={
227
228             /* test names */
229             "Cyrillic","Deseret","Devanagari","Ethiopic","Georgian",
230             "Gothic",  "Greek",  "Gujarati",
231         };
232         int i =0;
233         int numErrors=0;
234         while(i< testCodes.length){
235             String scriptName  = UScript.getName(testCodes[i]);
236             if(!expectedNames[i].equals(scriptName)){
237                 logln("Error getting abbreviations Got: " +scriptName +" Expected: "+expectedNames[i]);
238                 numErrors++;
239             }
240             i++;
241         }
242         if(numErrors >0 ){
243             warnln("encountered " + numErrors + " errors in UScript.getName()");
244         }
245
246     }
247     public void TestGetShortName(){
248         final int[] testCodes={
249             /* abbr should return */
250             UScript.HAN, UScript.HANGUL, UScript.HEBREW, UScript.HIRAGANA,
251             UScript.KANNADA, UScript.KATAKANA, UScript.KHMER, UScript.LAO,
252             UScript.LATIN,
253             UScript.MALAYALAM, UScript.MONGOLIAN,
254         };
255
256         final String[] expectedAbbr={
257               /* test abbr */
258             "Hani", "Hang","Hebr","Hira",
259             "Knda","Kana","Khmr","Laoo",
260             "Latn",
261             "Mlym", "Mong",
262         };
263         int i=0;
264         int numErrors=0;
265         while(i<testCodes.length){
266             String  shortName = UScript.getShortName(testCodes[i]);
267             if(!expectedAbbr[i].equals(shortName)){
268                 logln("Error getting abbreviations Got: " +shortName+ " Expected: " +expectedAbbr[i]);
269                 numErrors++;
270             }
271             i++;
272         }
273         if(numErrors >0 ){
274             warnln("encountered " + numErrors + " errors in UScript.getShortName()");
275         }
276     }
277     public void TestGetScript(){
278         int codepoints[][] = new int[][] {
279                 {0x0000FF9D, UScript.KATAKANA },
280                 {0x0000FFBE, UScript.HANGUL },
281                 {0x0000FFC7, UScript.HANGUL },
282                 {0x0000FFCF, UScript.HANGUL },
283                 {0x0000FFD7, UScript.HANGUL}, 
284                 {0x0000FFDC, UScript.HANGUL},
285                 {0x00010300, UScript.OLD_ITALIC},
286                 {0x00010330, UScript.GOTHIC},
287                 {0x0001034A, UScript.GOTHIC},
288                 {0x00010400, UScript.DESERET},
289                 {0x00010428, UScript.DESERET},
290                 {0x0001D167, UScript.INHERITED},
291                 {0x0001D17B, UScript.INHERITED},
292                 {0x0001D185, UScript.INHERITED},
293                 {0x0001D1AA, UScript.INHERITED},
294                 {0x00020000, UScript.HAN},
295                 {0x00000D02, UScript.MALAYALAM},
296                 {0x00000D00, UScript.UNKNOWN},
297                 {0x00000000, UScript.COMMON},
298                 {0x0001D169, UScript.INHERITED },
299                 {0x0001D182, UScript.INHERITED },
300                 {0x0001D18B, UScript.INHERITED },
301                 {0x0001D1AD, UScript.INHERITED },
302         };
303
304         int i =0;
305         int code = UScript.INVALID_CODE;
306         boolean passed = true;
307
308         while(i< codepoints.length){
309             code = UScript.getScript(codepoints[i][0]);
310
311             if(code != codepoints[i][1]){
312                 logln("UScript.getScript for codepoint 0x"+ hex(codepoints[i][0])+" failed");
313                 passed = false;
314             }
315
316             i++;
317         }
318         if(!passed){
319            errln("UScript.getScript failed.");
320         }
321     }
322
323     public void TestGetScriptOfCharsWithScriptExtensions() {
324         /* test characters which have Script_Extensions */
325         if(!(
326             UScript.COMMON==UScript.getScript(0x0640) &&
327             UScript.INHERITED==UScript.getScript(0x0650) &&
328             UScript.ARABIC==UScript.getScript(0xfdf2))
329         ) {
330             errln("UScript.getScript(character with Script_Extensions) failed");
331         }
332     }
333
334     public void TestHasScript() {
335         if(!(
336             !UScript.hasScript(0x063f, UScript.COMMON) &&
337             UScript.hasScript(0x063f, UScript.ARABIC) &&  /* main Script value */
338             !UScript.hasScript(0x063f, UScript.SYRIAC) &&
339             !UScript.hasScript(0x063f, UScript.THAANA))
340         ) {
341             errln("UScript.hasScript(U+063F, ...) is wrong\n");
342         }
343         if(!(
344             UScript.hasScript(0x0640, UScript.COMMON) &&  /* main Script value */
345             UScript.hasScript(0x0640, UScript.ARABIC) &&
346             UScript.hasScript(0x0640, UScript.SYRIAC) &&
347             !UScript.hasScript(0x0640, UScript.THAANA))
348         ) {
349             errln("UScript.hasScript(U+0640, ...) is wrong\n");
350         }
351         if(!(
352             UScript.hasScript(0x0650, UScript.INHERITED) &&  /* main Script value */
353             UScript.hasScript(0x0650, UScript.ARABIC) &&
354             UScript.hasScript(0x0650, UScript.SYRIAC) &&
355             !UScript.hasScript(0x0650, UScript.THAANA))
356         ) {
357             errln("UScript.hasScript(U+0650, ...) is wrong\n");
358         }
359         if(!(
360             UScript.hasScript(0x0660, UScript.COMMON) &&  /* main Script value */
361             UScript.hasScript(0x0660, UScript.ARABIC) &&
362             !UScript.hasScript(0x0660, UScript.SYRIAC) &&
363             UScript.hasScript(0x0660, UScript.THAANA))
364         ) {
365             errln("UScript.hasScript(U+0660, ...) is wrong\n");
366         }
367         if(!(
368             !UScript.hasScript(0xfdf2, UScript.COMMON) &&
369             UScript.hasScript(0xfdf2, UScript.ARABIC) &&  /* main Script value */
370             !UScript.hasScript(0xfdf2, UScript.SYRIAC) &&
371             UScript.hasScript(0xfdf2, UScript.THAANA))
372         ) {
373             errln("UScript.hasScript(U+FDF2, ...) is wrong\n");
374         }
375     }
376
377     public void TestGetScriptExtensions() {
378         BitSet scripts=new BitSet(UScript.CODE_LIMIT);
379
380         /* normal usage */
381         if(!UScript.getScriptExtensions(0x063f, scripts).isEmpty()) {
382             errln("UScript.getScriptExtensions(U+063F) is not empty");
383         }
384         if(UScript.getScriptExtensions(0x0640, scripts).cardinality()!=2 || !scripts.get(UScript.ARABIC) || !scripts.get(UScript.SYRIAC)) {
385             errln("UScript.getScriptExtensions(U+0640) failed");
386         }
387         UScript.getScriptExtensions(0xfdf2, scripts);
388         if(scripts.cardinality()!=2 || !scripts.get(UScript.ARABIC) || !scripts.get(UScript.THAANA)) {
389             errln("UScript.getScriptExtensions(U+FDF2) failed");
390         }
391         UScript.getScriptExtensions(0xff65, scripts);
392         if(scripts.cardinality()!=6 || !scripts.get(UScript.BOPOMOFO) || !scripts.get(UScript.YI)) {
393             errln("UScript.getScriptExtensions(U+FF65) failed");
394         }
395     }
396
397     public void TestScriptNames(){
398         for(int i=0; i<UScript.CODE_LIMIT;i++){
399             String name = UScript.getName(i);
400             if(name.equals("") ){
401                 errln("FAILED: getName for code : "+i);
402             }
403             String shortName= UScript.getShortName(i);
404             if(shortName.equals("")){
405                 errln("FAILED: getName for code : "+i);
406             }
407         }
408     }
409     public void TestAllCodepoints(){
410         int code;
411         //String oldId="";
412         //String oldAbbrId="";
413         for( int i =0; i <= 0x10ffff; i++){
414           code =UScript.INVALID_CODE;
415           code = UScript.getScript(i);
416           if(code==UScript.INVALID_CODE){
417                 errln("UScript.getScript for codepoint 0x"+ hex(i)+" failed");
418           }
419           String id =UScript.getName(code);
420           if(id.indexOf("INVALID")>=0){
421                  errln("UScript.getScript for codepoint 0x"+ hex(i)+" failed");
422           }
423           String abbr = UScript.getShortName(code);
424           if(abbr.indexOf("INV")>=0){
425                  errln("UScript.getScript for codepoint 0x"+ hex(i)+" failed");
426           }
427         }
428     }
429     public void TestNewCode(){
430         /*
431          * These script codes were originally added to ICU pre-3.6, so that ICU would
432          * have all ISO 15924 script codes. ICU was then based on Unicode 4.1.
433          * These script codes were added with only short names because we don't
434          * want to invent long names ourselves.
435          * Unicode 5 and later encode some of these scripts and give them long names.
436          * Whenever this happens, the long script names here need to be updated.
437          */
438         String[] expectedLong = new String[]{
439             "Balinese", "Batak", "Blis", "Brahmi", "Cham", "Cirt", "Cyrs", "Egyd", "Egyh", "Egyptian_Hieroglyphs", 
440             "Geok", "Hans", "Hant", "Hmng", "Hung", "Inds", "Javanese", "Kayah_Li", "Latf", "Latg", 
441             "Lepcha", "Lina", "Mandaic", "Maya", "Mero", "Nko", "Old_Turkic", "Perm", "Phags_Pa", "Phoenician", 
442             "Plrd", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vai", "Visp", "Cuneiform", 
443             "Zxxx", "Unknown",
444             "Carian", "Jpan", "Tai_Tham", "Lycian", "Lydian", "Ol_Chiki", "Rejang", "Saurashtra", "Sgnw", "Sundanese",
445             "Moon", "Meetei_Mayek",
446
447             // ICU 4.0
448             "Imperial_Aramaic", "Avestan", "Cakm", "Kore",
449             "Kaithi", "Mani", "Inscriptional_Pahlavi", "Phlp", "Phlv", "Inscriptional_Parthian", "Samaritan", "Tai_Viet",
450             "Zmth", "Zsym",
451             /* new in ICU 4.4 */
452             "Bamum", "Lisu", "Nkgb", "Old_South_Arabian",
453             /* new in ICU 4.6 */
454             "Bass", "Dupl", "Elba", "Gran", "Kpel", "Loma", "Mend", "Merc",
455             "Narb", "Nbat", "Palm", "Sind", "Wara",
456             /* new in ICU 4.8 */
457             "Afak", "Jurc", "Mroo", "Nshu", "Shrd", "Sora", "Takr", "Tang", "Wole",
458         };
459         String[] expectedShort = new String[]{
460             "Bali", "Batk", "Blis", "Brah", "Cham", "Cirt", "Cyrs", "Egyd", "Egyh", "Egyp", 
461             "Geok", "Hans", "Hant", "Hmng", "Hung", "Inds", "Java", "Kali", "Latf", "Latg", 
462             "Lepc", "Lina", "Mand", "Maya", "Mero", "Nkoo", "Orkh", "Perm", "Phag", "Phnx", 
463             "Plrd", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vaii", "Visp", "Xsux", 
464             "Zxxx", "Zzzz",
465             "Cari", "Jpan", "Lana", "Lyci", "Lydi", "Olck", "Rjng", "Saur", "Sgnw", "Sund",
466             "Moon", "Mtei", 
467
468             // ICU 4.0
469             "Armi", "Avst", "Cakm", "Kore", "Kthi", "Mani", "Phli", "Phlp", "Phlv", "Prti",
470             "Samr", "Tavt", "Zmth", "Zsym",
471             /* new in ICU 4.4 */
472             "Bamu", "Lisu", "Nkgb", "Sarb", 
473             /* new in ICU 4.6 */
474             "Bass", "Dupl", "Elba", "Gran", "Kpel", "Loma", "Mend", "Merc",
475             "Narb", "Nbat", "Palm", "Sind", "Wara",
476             /* new in ICU 4.8 */
477             "Afak", "Jurc", "Mroo", "Nshu", "Shrd", "Sora", "Takr", "Tang", "Wole",
478         };
479         if(expectedLong.length!=(UScript.CODE_LIMIT-UScript.BALINESE)) {
480             errln("need to add new script codes in lang.TestUScript.java!");
481             return;
482         }
483         int j = 0;
484         int i = 0;
485         for(i=UScript.BALINESE; i<UScript.CODE_LIMIT; i++, j++){
486             String name = UScript.getName(i);
487             if(name==null || !name.equals(expectedLong[j])){
488                 errln("UScript.getName failed for code"+ i + name +"!=" +expectedLong[j]);
489             }
490             name = UScript.getShortName(i);
491             if(name==null || !name.equals(expectedShort[j])){
492                 errln("UScript.getShortName failed for code"+ i + name +"!=" +expectedShort[j]);
493             }
494         }
495         for(i=0; i<expectedLong.length; i++){
496             int[] ret = UScript.getCode(expectedShort[i]);
497             if(ret.length>1){
498                 errln("UScript.getCode did not return expected number of codes for script"+ expectedShort[i]+". EXPECTED: 1 GOT: "+ ret.length);
499             }
500             if(ret[0]!= (UScript.BALINESE+i)){
501                 errln("UScript.getCode did not return expected code for script"+ expectedShort[i]+". EXPECTED: "+ (UScript.BALINESE+i)+" GOT: %i\n"+ ret[0] );
502             }
503         }
504     }
505  }