2 *******************************************************************************
\r
3 * Copyright (C) 1996-2010, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 *******************************************************************************
\r
8 package com.ibm.icu.dev.test.lang;
\r
10 import java.util.Locale;
\r
12 import com.ibm.icu.dev.test.TestFmwk;
\r
13 import com.ibm.icu.lang.UScript;
\r
14 import com.ibm.icu.util.ULocale;
\r
16 public class TestUScript extends TestFmwk {
\r
21 public TestUScript()
\r
25 public static void main(String[] args) throws Exception {
\r
26 new TestUScript().run(args);
\r
28 public void TestLocaleGetCode(){
\r
29 final ULocale[] testNames={
\r
31 new ULocale("en"), new ULocale("en_US"),
\r
32 new ULocale("sr"), new ULocale("ta") ,
\r
33 new ULocale("te_IN"),
\r
35 new ULocale("he"), new ULocale("ar"),
\r
36 new ULocale("abcde"),
\r
37 new ULocale("abcde_cdef"),
\r
40 final int[] expected ={
\r
41 /* locales should return */
\r
42 UScript.LATIN, UScript.LATIN,
\r
43 UScript.CYRILLIC, UScript.TAMIL,
\r
44 UScript.TELUGU,UScript.DEVANAGARI,
\r
45 UScript.HEBREW, UScript.ARABIC,
\r
46 UScript.INVALID_CODE,UScript.INVALID_CODE,
\r
52 for( ; i<testNames.length; i++){
\r
53 int[] code = UScript.getCode(testNames[i]);
\r
56 if(expected[i]!=UScript.INVALID_CODE){
\r
57 logln("Error getting script code Got: null" + " Expected: " +expected[i] +" for name "+testNames[i]);
\r
60 // getCode returns null if the code could not be found
\r
63 if((code[0] != expected[i])){
\r
64 logln("Error getting script code Got: " +code[0] + " Expected: " +expected[i] +" for name "+testNames[i]);
\r
68 reportDataErrors(numErrors);
\r
71 ULocale defaultLoc = ULocale.getDefault();
\r
72 ULocale esparanto = new ULocale("eo_DE");
\r
73 ULocale.setDefault(esparanto);
\r
74 int[] code = UScript.getCode(esparanto);
\r
76 if( code[0] != UScript.LATIN){
\r
77 errln("Did not get the expected script code for Esparanto");
\r
80 warnln("Could not load the locale data.");
\r
82 ULocale.setDefault(defaultLoc);
\r
85 private void reportDataErrors(int numErrors) {
\r
87 // assume missing locale data, so not an error, just a warning
\r
88 if (isModularBuild() || noData()) {
\r
89 // if nodata is set don't even warn
\r
90 warnln("Could not find locale data");
\r
92 errln("encountered " + numErrors + " errors.");
\r
97 public void TestMultipleCode(){
\r
98 final String[] testNames = { "ja" ,"ko_KR","zh","zh_TW"};
\r
99 final int[][] expected = {
\r
100 {UScript.KATAKANA,UScript.HIRAGANA,UScript.HAN},
\r
101 {UScript.HANGUL, UScript.HAN},
\r
103 {UScript.HAN,UScript.BOPOMOFO}
\r
107 for(int i=0; i<testNames.length;i++){
\r
108 int[] code = UScript.getCode(testNames[i]);
\r
109 int[] expt = (int[]) expected[i];
\r
111 for(int j =0; j< code.length;j++){
\r
112 if(code[j]!=expt[j]){
\r
114 logln("Error getting script code Got: " +code[j] + " Expected: " +expt[j] +" for name "+testNames[i]);
\r
119 logln("Error getting script code for name "+testNames[i]);
\r
122 reportDataErrors(numErrors);
\r
124 //cover UScript.getCode(Locale)
\r
125 Locale[] testLocales = new Locale[] {
\r
130 logln("Testing UScript.getCode(Locale) ...");
\r
132 for(int i=0; i<testNames.length;i++){
\r
133 logln(" Testing locale: " + testLocales[i].getDisplayName());
\r
134 int[] code = UScript.getCode(testLocales[i]);
\r
135 int[] expt = (int[]) expected[i];
\r
137 for(int j =0; j< code.length;j++){
\r
138 if(code[j]!=expt[j]){
\r
140 logln(" Error getting script code Got: " +code[j] + " Expected: " +expt[j] +" for name "+testNames[i]);
\r
145 logln(" Error getting script code for name "+testNames[i]);
\r
148 reportDataErrors(numErrors);
\r
151 public void TestGetCode(){
\r
153 final String[] testNames={
\r
155 "en", "en_US", "sr", "ta", "gu", "te_IN",
\r
158 "Hani", "Hang","Hebr","Hira",
\r
159 "Knda","Kana","Khmr","Lao",
\r
160 "Latn",/*"Latf","Latg",*/
\r
164 "CYRILLIC","DESERET","DEVANAGARI","ETHIOPIC","GEORGIAN",
\r
165 "GOTHIC", "GREEK", "GUJARATI", "COMMON", "INHERITED",
\r
166 /* test lower case names */
\r
167 "malayalam", "mongolian", "myanmar", "ogham", "old-italic",
\r
168 "oriya", "runic", "sinhala", "syriac","tamil",
\r
169 "telugu", "thaana", "thai", "tibetan",
\r
170 /* test the bounds*/
\r
171 "Cans", "arabic","Yi","Zyyy"
\r
173 final int[] expected ={
\r
174 /* locales should return */
\r
175 UScript.LATIN, UScript.LATIN,
\r
176 UScript.CYRILLIC, UScript.TAMIL, UScript.GUJARATI,
\r
177 UScript.TELUGU,UScript.DEVANAGARI,
\r
178 UScript.HEBREW, UScript.ARABIC,
\r
179 /* abbr should return */
\r
180 UScript.HAN, UScript.HANGUL, UScript.HEBREW, UScript.HIRAGANA,
\r
181 UScript.KANNADA, UScript.KATAKANA, UScript.KHMER, UScript.LAO,
\r
182 UScript.LATIN,/* UScript.LATIN, UScript.LATIN,*/
\r
183 UScript.MALAYALAM, UScript.MONGOLIAN,
\r
184 /* names should return */
\r
185 UScript.CYRILLIC, UScript.DESERET, UScript.DEVANAGARI, UScript.ETHIOPIC, UScript.GEORGIAN,
\r
186 UScript.GOTHIC, UScript.GREEK, UScript.GUJARATI, UScript.COMMON, UScript.INHERITED,
\r
187 /* lower case names should return */
\r
188 UScript.MALAYALAM, UScript.MONGOLIAN, UScript.MYANMAR, UScript.OGHAM, UScript.OLD_ITALIC,
\r
189 UScript.ORIYA, UScript.RUNIC, UScript.SINHALA, UScript.SYRIAC, UScript.TAMIL,
\r
190 UScript.TELUGU, UScript.THAANA, UScript.THAI, UScript.TIBETAN,
\r
192 UScript.CANADIAN_ABORIGINAL, UScript.ARABIC, UScript.YI, UScript.COMMON
\r
197 for( ; i<testNames.length; i++){
\r
198 int[] code = UScript.getCode(testNames[i]);
\r
200 if(expected[i]==UScript.INVALID_CODE){
\r
201 // getCode returns null if the code could not be found
\r
204 // currently commented out until jitterbug#2678 is fixed
\r
205 logln("Error getting script code Got: null" + " Expected: " +expected[i] +" for name "+testNames[i]);
\r
209 if((code[0] != expected[i])){
\r
210 logln("Error getting script code Got: " +code[0] + " Expected: " +expected[i] +" for name "+testNames[i]);
\r
214 reportDataErrors(numErrors);
\r
217 public void TestGetName(){
\r
219 final int[] testCodes={
\r
220 /* names should return */
\r
221 UScript.CYRILLIC, UScript.DESERET, UScript.DEVANAGARI, UScript.ETHIOPIC, UScript.GEORGIAN,
\r
222 UScript.GOTHIC, UScript.GREEK, UScript.GUJARATI,
\r
225 final String[] expectedNames={
\r
228 "Cyrillic","Deseret","Devanagari","Ethiopic","Georgian",
\r
229 "Gothic", "Greek", "Gujarati",
\r
233 while(i< testCodes.length){
\r
234 String scriptName = UScript.getName(testCodes[i]);
\r
235 if(!expectedNames[i].equals(scriptName)){
\r
236 logln("Error getting abbreviations Got: " +scriptName +" Expected: "+expectedNames[i]);
\r
242 warnln("encountered " + numErrors + " errors in UScript.getName()");
\r
246 public void TestGetShortName(){
\r
247 final int[] testCodes={
\r
248 /* abbr should return */
\r
249 UScript.HAN, UScript.HANGUL, UScript.HEBREW, UScript.HIRAGANA,
\r
250 UScript.KANNADA, UScript.KATAKANA, UScript.KHMER, UScript.LAO,
\r
252 UScript.MALAYALAM, UScript.MONGOLIAN,
\r
255 final String[] expectedAbbr={
\r
257 "Hani", "Hang","Hebr","Hira",
\r
258 "Knda","Kana","Khmr","Laoo",
\r
264 while(i<testCodes.length){
\r
265 String shortName = UScript.getShortName(testCodes[i]);
\r
266 if(!expectedAbbr[i].equals(shortName)){
\r
267 logln("Error getting abbreviations Got: " +shortName+ " Expected: " +expectedAbbr[i]);
\r
273 warnln("encountered " + numErrors + " errors in UScript.getShortName()");
\r
276 public void TestGetScript(){
\r
277 int codepoints[][] = new int[][] {
\r
278 {0x0000FF9D, UScript.KATAKANA },
\r
279 {0x0000FFBE, UScript.HANGUL },
\r
280 {0x0000FFC7, UScript.HANGUL },
\r
281 {0x0000FFCF, UScript.HANGUL },
\r
282 {0x0000FFD7, UScript.HANGUL},
\r
283 {0x0000FFDC, UScript.HANGUL},
\r
284 {0x00010300, UScript.OLD_ITALIC},
\r
285 {0x00010330, UScript.GOTHIC},
\r
286 {0x0001034A, UScript.GOTHIC},
\r
287 {0x00010400, UScript.DESERET},
\r
288 {0x00010428, UScript.DESERET},
\r
289 {0x0001D167, UScript.INHERITED},
\r
290 {0x0001D17B, UScript.INHERITED},
\r
291 {0x0001D185, UScript.INHERITED},
\r
292 {0x0001D1AA, UScript.INHERITED},
\r
293 {0x00020000, UScript.HAN},
\r
294 {0x00000D02, UScript.MALAYALAM},
\r
295 {0x00000D00, UScript.UNKNOWN},
\r
296 {0x00000000, UScript.COMMON},
\r
297 {0x0001D169, UScript.INHERITED },
\r
298 {0x0001D182, UScript.INHERITED },
\r
299 {0x0001D18B, UScript.INHERITED },
\r
300 {0x0001D1AD, UScript.INHERITED },
\r
304 int code = UScript.INVALID_CODE;
\r
305 boolean passed = true;
\r
307 while(i< codepoints.length){
\r
308 code = UScript.getScript(codepoints[i][0]);
\r
310 if(code != codepoints[i][1]){
\r
311 logln("UScript.getScript for codepoint 0x"+ hex(codepoints[i][0])+" failed");
\r
318 errln("UScript.getScript failed.");
\r
321 public void TestScriptNames(){
\r
322 for(int i=0; i<UScript.CODE_LIMIT;i++){
\r
323 String name = UScript.getName(i);
\r
324 if(name.equals("") ){
\r
325 errln("FAILED: getName for code : "+i);
\r
327 String shortName= UScript.getShortName(i);
\r
328 if(shortName.equals("")){
\r
329 errln("FAILED: getName for code : "+i);
\r
333 public void TestAllCodepoints(){
\r
336 //String oldAbbrId="";
\r
337 for( int i =0; i <= 0x10ffff; i++){
\r
338 code =UScript.INVALID_CODE;
\r
339 code = UScript.getScript(i);
\r
340 if(code==UScript.INVALID_CODE){
\r
341 errln("UScript.getScript for codepoint 0x"+ hex(i)+" failed");
\r
343 String id =UScript.getName(code);
\r
344 if(id.indexOf("INVALID")>=0){
\r
345 errln("UScript.getScript for codepoint 0x"+ hex(i)+" failed");
\r
347 String abbr = UScript.getShortName(code);
\r
348 if(abbr.indexOf("INV")>=0){
\r
349 errln("UScript.getScript for codepoint 0x"+ hex(i)+" failed");
\r
353 public void TestNewCode(){
\r
355 * These script codes were originally added to ICU pre-3.6, so that ICU would
\r
356 * have all ISO 15924 script codes. ICU was then based on Unicode 4.1.
\r
357 * These script codes were added with only short names because we don't
\r
358 * want to invent long names ourselves.
\r
359 * Unicode 5 and later encode some of these scripts and give them long names.
\r
360 * Whenever this happens, the long script names here need to be updated.
\r
362 String[] expectedLong = new String[]{
\r
363 "Balinese", "Batk", "Blis", "Brah", "Cham", "Cirt", "Cyrs", "Egyd", "Egyh", "Egyptian_Hieroglyphs",
\r
364 "Geok", "Hans", "Hant", "Hmng", "Hung", "Inds", "Javanese", "Kayah_Li", "Latf", "Latg",
\r
365 "Lepcha", "Lina", "Mand", "Maya", "Mero", "Nko", "Old_Turkic", "Perm", "Phags_Pa", "Phoenician",
\r
366 "Plrd", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vai", "Visp", "Cuneiform",
\r
368 "Carian", "Jpan", "Tai_Tham", "Lycian", "Lydian", "Ol_Chiki", "Rejang", "Saurashtra", "Sgnw", "Sundanese",
\r
369 "Moon", "Meetei_Mayek",
\r
372 "Imperial_Aramaic", "Avestan", "Cakm", "Kore",
\r
373 "Kaithi", "Mani", "Inscriptional_Pahlavi", "Phlp", "Phlv", "Inscriptional_Parthian", "Samaritan", "Tai_Viet",
\r
375 /* new in ICU 4.4 */
\r
376 "Bamum", "Lisu", "Nkgb", "Old_South_Arabian",
\r
378 String[] expectedShort = new String[]{
\r
379 "Bali", "Batk", "Blis", "Brah", "Cham", "Cirt", "Cyrs", "Egyd", "Egyh", "Egyp",
\r
380 "Geok", "Hans", "Hant", "Hmng", "Hung", "Inds", "Java", "Kali", "Latf", "Latg",
\r
381 "Lepc", "Lina", "Mand", "Maya", "Mero", "Nkoo", "Orkh", "Perm", "Phag", "Phnx",
\r
382 "Plrd", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vaii", "Visp", "Xsux",
\r
384 "Cari", "Jpan", "Lana", "Lyci", "Lydi", "Olck", "Rjng", "Saur", "Sgnw", "Sund",
\r
388 "Armi", "Avst", "Cakm", "Kore", "Kthi", "Mani", "Phli", "Phlp", "Phlv", "Prti",
\r
389 "Samr", "Tavt", "Zmth", "Zsym",
\r
390 /* new in ICU 4.4 */
\r
391 "Bamu", "Lisu", "Nkgb", "Sarb",
\r
393 if(expectedLong.length!=(UScript.CODE_LIMIT-UScript.BALINESE)) {
\r
394 errln("need to add new script codes in lang.TestUScript.java!");
\r
399 for(i=UScript.BALINESE; i<UScript.CODE_LIMIT; i++, j++){
\r
400 String name = UScript.getName(i);
\r
401 if(name==null || !name.equals(expectedLong[j])){
\r
402 errln("UScript.getName failed for code"+ i + name +"!=" +expectedLong[j]);
\r
404 name = UScript.getShortName(i);
\r
405 if(name==null || !name.equals(expectedShort[j])){
\r
406 errln("UScript.getShortName failed for code"+ i + name +"!=" +expectedShort[j]);
\r
409 for(i=0; i<expectedLong.length; i++){
\r
410 int[] ret = UScript.getCode(expectedShort[i]);
\r
412 errln("UScript.getCode did not return expected number of codes for script"+ expectedShort[i]+". EXPECTED: 1 GOT: "+ ret.length);
\r
414 if(ret[0]!= (UScript.BALINESE+i)){
\r
415 errln("UScript.getCode did not return expected code for script"+ expectedShort[i]+". EXPECTED: "+ (UScript.BALINESE+i)+" GOT: %i\n"+ ret[0] );
\r