]> gitweb.fperrin.net Git - Dictionary.git/blobdiff - jars/icu4j-52_1/main/tests/core/src/com/ibm/icu/dev/test/lang/TestUScript.java
Upgrade ICU4J.
[Dictionary.git] / jars / icu4j-52_1 / main / tests / core / src / com / ibm / icu / dev / test / lang / TestUScript.java
similarity index 70%
rename from jars/icu4j-4_8_1_1/main/tests/core/src/com/ibm/icu/dev/test/lang/TestUScript.java
rename to jars/icu4j-52_1/main/tests/core/src/com/ibm/icu/dev/test/lang/TestUScript.java
index a6056a13f00d96476dae1a8f6fb8618a08556eff..e3d7434869d229fc52b0db20a624500768ce585b 100644 (file)
@@ -1,7 +1,7 @@
 /**
 *******************************************************************************
-* Copyright (C) 1996-2011, International Business Machines Corporation and    *
-* others. All Rights Reserved.                                                *
+* Copyright (C) 1996-2013, International Business Machines Corporation and
+* others. All Rights Reserved.
 *******************************************************************************
 */
 
@@ -11,7 +11,10 @@ import java.util.BitSet;
 import java.util.Locale;
 
 import com.ibm.icu.dev.test.TestFmwk;
+import com.ibm.icu.lang.UProperty;
 import com.ibm.icu.lang.UScript;
+import com.ibm.icu.lang.UScript.ScriptUsage;
+import com.ibm.icu.text.UnicodeSet;
 import com.ibm.icu.util.ULocale;
 
 public class TestUScript extends TestFmwk {
@@ -338,31 +341,31 @@ public class TestUScript extends TestFmwk {
             !UScript.hasScript(0x063f, UScript.SYRIAC) &&
             !UScript.hasScript(0x063f, UScript.THAANA))
         ) {
-            errln("UScript.hasScript(U+063F, ...) is wrong\n");
+            errln("UScript.hasScript(U+063F, ...) is wrong");
         }
         if(!(
-            UScript.hasScript(0x0640, UScript.COMMON) &&  /* main Script value */
+            !UScript.hasScript(0x0640, UScript.COMMON) &&  /* main Script value */
             UScript.hasScript(0x0640, UScript.ARABIC) &&
             UScript.hasScript(0x0640, UScript.SYRIAC) &&
             !UScript.hasScript(0x0640, UScript.THAANA))
         ) {
-            errln("UScript.hasScript(U+0640, ...) is wrong\n");
+            errln("UScript.hasScript(U+0640, ...) is wrong");
         }
         if(!(
-            UScript.hasScript(0x0650, UScript.INHERITED) &&  /* main Script value */
+            !UScript.hasScript(0x0650, UScript.INHERITED) &&  /* main Script value */
             UScript.hasScript(0x0650, UScript.ARABIC) &&
             UScript.hasScript(0x0650, UScript.SYRIAC) &&
             !UScript.hasScript(0x0650, UScript.THAANA))
         ) {
-            errln("UScript.hasScript(U+0650, ...) is wrong\n");
+            errln("UScript.hasScript(U+0650, ...) is wrong");
         }
         if(!(
-            UScript.hasScript(0x0660, UScript.COMMON) &&  /* main Script value */
+            !UScript.hasScript(0x0660, UScript.COMMON) &&  /* main Script value */
             UScript.hasScript(0x0660, UScript.ARABIC) &&
             !UScript.hasScript(0x0660, UScript.SYRIAC) &&
             UScript.hasScript(0x0660, UScript.THAANA))
         ) {
-            errln("UScript.hasScript(U+0660, ...) is wrong\n");
+            errln("UScript.hasScript(U+0660, ...) is wrong");
         }
         if(!(
             !UScript.hasScript(0xfdf2, UScript.COMMON) &&
@@ -370,30 +373,149 @@ public class TestUScript extends TestFmwk {
             !UScript.hasScript(0xfdf2, UScript.SYRIAC) &&
             UScript.hasScript(0xfdf2, UScript.THAANA))
         ) {
-            errln("UScript.hasScript(U+FDF2, ...) is wrong\n");
+            errln("UScript.hasScript(U+FDF2, ...) is wrong");
+        }
+        if(UScript.hasScript(0x0640, 0xaffe)) {
+            // An unguarded implementation might go into an infinite loop.
+            errln("UScript.hasScript(U+0640, bogus 0xaffe) is wrong");
         }
     }
 
     public void TestGetScriptExtensions() {
         BitSet scripts=new BitSet(UScript.CODE_LIMIT);
 
+        /* invalid code points */
+        if(UScript.getScriptExtensions(-1, scripts)!=UScript.UNKNOWN || scripts.cardinality()!=1 ||
+                !scripts.get(UScript.UNKNOWN)) {
+            errln("UScript.getScriptExtensions(-1) is not {UNKNOWN}");
+        }
+        if(UScript.getScriptExtensions(0x110000, scripts)!=UScript.UNKNOWN || scripts.cardinality()!=1 ||
+                !scripts.get(UScript.UNKNOWN)) {
+            errln("UScript.getScriptExtensions(0x110000) is not {UNKNOWN}");
+        }
+
         /* normal usage */
-        if(!UScript.getScriptExtensions(0x063f, scripts).isEmpty()) {
-            errln("UScript.getScriptExtensions(U+063F) is not empty");
+        if(UScript.getScriptExtensions(0x063f, scripts)!=UScript.ARABIC || scripts.cardinality()!=1 ||
+                !scripts.get(UScript.ARABIC)) {
+            errln("UScript.getScriptExtensions(U+063F) is not {ARABIC}");
         }
-        if(UScript.getScriptExtensions(0x0640, scripts).cardinality()!=2 || !scripts.get(UScript.ARABIC) || !scripts.get(UScript.SYRIAC)) {
+        if(UScript.getScriptExtensions(0x0640, scripts)!=-3 || scripts.cardinality()!=3 ||
+           !scripts.get(UScript.ARABIC) || !scripts.get(UScript.SYRIAC) || !scripts.get(UScript.MANDAIC)
+        ) {
             errln("UScript.getScriptExtensions(U+0640) failed");
         }
-        UScript.getScriptExtensions(0xfdf2, scripts);
-        if(scripts.cardinality()!=2 || !scripts.get(UScript.ARABIC) || !scripts.get(UScript.THAANA)) {
+        if(UScript.getScriptExtensions(0xfdf2, scripts)!=-2 || scripts.cardinality()!=2 ||
+                !scripts.get(UScript.ARABIC) || !scripts.get(UScript.THAANA)) {
             errln("UScript.getScriptExtensions(U+FDF2) failed");
         }
-        UScript.getScriptExtensions(0xff65, scripts);
-        if(scripts.cardinality()!=6 || !scripts.get(UScript.BOPOMOFO) || !scripts.get(UScript.YI)) {
+        if(UScript.getScriptExtensions(0xff65, scripts)!=-6 || scripts.cardinality()!=6 ||
+                !scripts.get(UScript.BOPOMOFO) || !scripts.get(UScript.YI)) {
             errln("UScript.getScriptExtensions(U+FF65) failed");
         }
     }
 
+    public void TestScriptMetadataAPI() {
+        /* API & code coverage. */
+        String sample = UScript.getSampleString(UScript.LATIN);
+        if(sample.length()!=1 || UScript.getScript(sample.charAt(0))!=UScript.LATIN) {
+            errln("UScript.getSampleString(Latn) failed");
+        }
+        sample = UScript.getSampleString(UScript.INVALID_CODE);
+        if(sample.length()!=0) {
+            errln("UScript.getSampleString(invalid) failed");
+        }
+
+        if(UScript.getUsage(UScript.LATIN)!=ScriptUsage.RECOMMENDED ||
+                UScript.getUsage(UScript.YI)!=ScriptUsage.ASPIRATIONAL ||
+                UScript.getUsage(UScript.CHEROKEE)!=ScriptUsage.LIMITED_USE ||
+                UScript.getUsage(UScript.COPTIC)!=ScriptUsage.EXCLUDED ||
+                UScript.getUsage(UScript.CIRTH)!=ScriptUsage.NOT_ENCODED ||
+                UScript.getUsage(UScript.INVALID_CODE)!=ScriptUsage.NOT_ENCODED ||
+                UScript.getUsage(UScript.CODE_LIMIT)!=ScriptUsage.NOT_ENCODED) {
+            errln("UScript.getUsage() failed");
+        }
+
+        if(UScript.isRightToLeft(UScript.LATIN) ||
+                UScript.isRightToLeft(UScript.CIRTH) ||
+                !UScript.isRightToLeft(UScript.ARABIC) ||
+                !UScript.isRightToLeft(UScript.HEBREW)) {
+            errln("UScript.isRightToLeft() failed");
+        }
+
+        if(UScript.breaksBetweenLetters(UScript.LATIN) ||
+                UScript.breaksBetweenLetters(UScript.CIRTH) ||
+                !UScript.breaksBetweenLetters(UScript.HAN) ||
+                !UScript.breaksBetweenLetters(UScript.THAI)) {
+            errln("UScript.breaksBetweenLetters() failed");
+        }
+
+        if(UScript.isCased(UScript.CIRTH) ||
+                UScript.isCased(UScript.HAN) ||
+                !UScript.isCased(UScript.LATIN) ||
+                !UScript.isCased(UScript.GREEK)) {
+            errln("UScript.isCased() failed");
+        }
+    }
+
+    /**
+     * Maps a special script code to the most common script of its encoded characters.
+     */
+    private static final int getCharScript(int script) {
+        switch(script) {
+        case UScript.SIMPLIFIED_HAN:
+        case UScript.TRADITIONAL_HAN:
+            return UScript.HAN;
+        case UScript.JAPANESE:
+            return UScript.HIRAGANA;
+        case UScript.KOREAN:
+            return UScript.HANGUL;
+        default:
+            return script;
+        }
+    }
+
+    public void TestScriptMetadata() {
+        UnicodeSet rtl = new UnicodeSet("[[:bc=R:][:bc=AL:]-[:Cn:]-[:sc=Common:]]");
+        // So far, sample characters are uppercase.
+        // Georgian is special.
+        UnicodeSet cased = new UnicodeSet("[[:Lu:]-[:sc=Common:]-[:sc=Geor:]]");
+        for(int sc = 0; sc < UScript.CODE_LIMIT; ++sc) {
+            String sn = UScript.getShortName(sc);
+            ScriptUsage usage = UScript.getUsage(sc);
+            String sample = UScript.getSampleString(sc);
+            UnicodeSet scriptSet = new UnicodeSet();
+            scriptSet.applyIntPropertyValue(UProperty.SCRIPT, sc);
+            if(usage == ScriptUsage.NOT_ENCODED) {
+                assertTrue(sn + " not encoded, no sample", sample.length() == 0);  // Java 6: sample.isEmpty()
+                assertFalse(sn + " not encoded, not RTL", UScript.isRightToLeft(sc));
+                assertFalse(sn + " not encoded, not LB letters", UScript.breaksBetweenLetters(sc));
+                assertFalse(sn + " not encoded, not cased", UScript.isCased(sc));
+                assertTrue(sn + " not encoded, no characters", scriptSet.isEmpty());
+            } else {
+                assertFalse(sn + " encoded, has a sample character", sample.length() == 0);  // Java 6: sample.isEmpty()
+                int firstChar = sample.codePointAt(0);
+                int charScript = getCharScript(sc);
+                assertEquals(sn + " script(sample(script))",
+                             charScript, UScript.getScript(firstChar));
+                assertEquals(sn + " RTL vs. set", rtl.contains(firstChar), UScript.isRightToLeft(sc));
+                assertEquals(sn + " cased vs. set", cased.contains(firstChar), UScript.isCased(sc));
+                assertEquals(sn + " encoded, has characters", sc == charScript, !scriptSet.isEmpty());
+                if(UScript.isRightToLeft(sc)) {
+                    rtl.removeAll(scriptSet);
+                }
+                if(UScript.isCased(sc)) {
+                    cased.removeAll(scriptSet);
+                }
+            }
+        }
+        assertEquals("no remaining RTL characters", "[]", rtl.toPattern(true));
+        assertEquals("no remaining cased characters", "[]", cased.toPattern(true));
+
+        assertTrue("Hani breaks between letters", UScript.breaksBetweenLetters(UScript.HAN));
+        assertTrue("Thai breaks between letters", UScript.breaksBetweenLetters(UScript.THAI));
+        assertFalse("Latn does not break between letters", UScript.breaksBetweenLetters(UScript.LATIN));
+    }
+
     public void TestScriptNames(){
         for(int i=0; i<UScript.CODE_LIMIT;i++){
             String name = UScript.getName(i);
@@ -438,23 +560,27 @@ public class TestUScript extends TestFmwk {
         String[] expectedLong = new String[]{
             "Balinese", "Batak", "Blis", "Brahmi", "Cham", "Cirt", "Cyrs", "Egyd", "Egyh", "Egyptian_Hieroglyphs", 
             "Geok", "Hans", "Hant", "Hmng", "Hung", "Inds", "Javanese", "Kayah_Li", "Latf", "Latg", 
-            "Lepcha", "Lina", "Mandaic", "Maya", "Mero", "Nko", "Old_Turkic", "Perm", "Phags_Pa", "Phoenician", 
-            "Plrd", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vai", "Visp", "Cuneiform", 
+            "Lepcha", "Lina", "Mandaic", "Maya", "Meroitic_Hieroglyphs", "Nko", "Old_Turkic", "Perm", "Phags_Pa", "Phoenician", 
+            "Miao", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vai", "Visp", "Cuneiform", 
             "Zxxx", "Unknown",
             "Carian", "Jpan", "Tai_Tham", "Lycian", "Lydian", "Ol_Chiki", "Rejang", "Saurashtra", "Sgnw", "Sundanese",
             "Moon", "Meetei_Mayek",
 
             // ICU 4.0
-            "Imperial_Aramaic", "Avestan", "Cakm", "Kore",
+            "Imperial_Aramaic", "Avestan", "Chakma", "Kore",
             "Kaithi", "Mani", "Inscriptional_Pahlavi", "Phlp", "Phlv", "Inscriptional_Parthian", "Samaritan", "Tai_Viet",
             "Zmth", "Zsym",
             /* new in ICU 4.4 */
             "Bamum", "Lisu", "Nkgb", "Old_South_Arabian",
             /* new in ICU 4.6 */
-            "Bass", "Dupl", "Elba", "Gran", "Kpel", "Loma", "Mend", "Merc",
+            "Bass", "Dupl", "Elba", "Gran", "Kpel", "Loma", "Mend", "Meroitic_Cursive",
             "Narb", "Nbat", "Palm", "Sind", "Wara",
             /* new in ICU 4.8 */
-            "Afak", "Jurc", "Mroo", "Nshu", "Shrd", "Sora", "Takr", "Tang", "Wole",
+            "Afak", "Jurc", "Mroo", "Nshu", "Sharada", "Sora_Sompeng", "Takri", "Tang", "Wole",
+            /* new in ICU 49 */
+            "Hluw", "Khoj", "Tirh",
+            /* new in ICU 52 */
+            "Aghb", "Mahj"
         };
         String[] expectedShort = new String[]{
             "Bali", "Batk", "Blis", "Brah", "Cham", "Cirt", "Cyrs", "Egyd", "Egyh", "Egyp", 
@@ -475,6 +601,10 @@ public class TestUScript extends TestFmwk {
             "Narb", "Nbat", "Palm", "Sind", "Wara",
             /* new in ICU 4.8 */
             "Afak", "Jurc", "Mroo", "Nshu", "Shrd", "Sora", "Takr", "Tang", "Wole",
+            /* new in ICU 49 */
+            "Hluw", "Khoj", "Tirh",
+            /* new in ICU 52 */
+            "Aghb", "Mahj"
         };
         if(expectedLong.length!=(UScript.CODE_LIMIT-UScript.BALINESE)) {
             errln("need to add new script codes in lang.TestUScript.java!");
@@ -502,4 +632,4 @@ public class TestUScript extends TestFmwk {
             }
         }
     }
- }
+}