/**
*******************************************************************************
-* Copyright (C) 1996-2011, International Business Machines Corporation and *
-* others. All Rights Reserved. *
+* Copyright (C) 1996-2013, International Business Machines Corporation and
+* others. All Rights Reserved.
*******************************************************************************
*/
import java.util.Locale;
import com.ibm.icu.dev.test.TestFmwk;
+import com.ibm.icu.lang.UProperty;
import com.ibm.icu.lang.UScript;
+import com.ibm.icu.lang.UScript.ScriptUsage;
+import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.util.ULocale;
public class TestUScript extends TestFmwk {
!UScript.hasScript(0x063f, UScript.SYRIAC) &&
!UScript.hasScript(0x063f, UScript.THAANA))
) {
- errln("UScript.hasScript(U+063F, ...) is wrong\n");
+ errln("UScript.hasScript(U+063F, ...) is wrong");
}
if(!(
- UScript.hasScript(0x0640, UScript.COMMON) && /* main Script value */
+ !UScript.hasScript(0x0640, UScript.COMMON) && /* main Script value */
UScript.hasScript(0x0640, UScript.ARABIC) &&
UScript.hasScript(0x0640, UScript.SYRIAC) &&
!UScript.hasScript(0x0640, UScript.THAANA))
) {
- errln("UScript.hasScript(U+0640, ...) is wrong\n");
+ errln("UScript.hasScript(U+0640, ...) is wrong");
}
if(!(
- UScript.hasScript(0x0650, UScript.INHERITED) && /* main Script value */
+ !UScript.hasScript(0x0650, UScript.INHERITED) && /* main Script value */
UScript.hasScript(0x0650, UScript.ARABIC) &&
UScript.hasScript(0x0650, UScript.SYRIAC) &&
!UScript.hasScript(0x0650, UScript.THAANA))
) {
- errln("UScript.hasScript(U+0650, ...) is wrong\n");
+ errln("UScript.hasScript(U+0650, ...) is wrong");
}
if(!(
- UScript.hasScript(0x0660, UScript.COMMON) && /* main Script value */
+ !UScript.hasScript(0x0660, UScript.COMMON) && /* main Script value */
UScript.hasScript(0x0660, UScript.ARABIC) &&
!UScript.hasScript(0x0660, UScript.SYRIAC) &&
UScript.hasScript(0x0660, UScript.THAANA))
) {
- errln("UScript.hasScript(U+0660, ...) is wrong\n");
+ errln("UScript.hasScript(U+0660, ...) is wrong");
}
if(!(
!UScript.hasScript(0xfdf2, UScript.COMMON) &&
!UScript.hasScript(0xfdf2, UScript.SYRIAC) &&
UScript.hasScript(0xfdf2, UScript.THAANA))
) {
- errln("UScript.hasScript(U+FDF2, ...) is wrong\n");
+ errln("UScript.hasScript(U+FDF2, ...) is wrong");
+ }
+ if(UScript.hasScript(0x0640, 0xaffe)) {
+ // An unguarded implementation might go into an infinite loop.
+ errln("UScript.hasScript(U+0640, bogus 0xaffe) is wrong");
}
}
public void TestGetScriptExtensions() {
BitSet scripts=new BitSet(UScript.CODE_LIMIT);
+ /* invalid code points */
+ if(UScript.getScriptExtensions(-1, scripts)!=UScript.UNKNOWN || scripts.cardinality()!=1 ||
+ !scripts.get(UScript.UNKNOWN)) {
+ errln("UScript.getScriptExtensions(-1) is not {UNKNOWN}");
+ }
+ if(UScript.getScriptExtensions(0x110000, scripts)!=UScript.UNKNOWN || scripts.cardinality()!=1 ||
+ !scripts.get(UScript.UNKNOWN)) {
+ errln("UScript.getScriptExtensions(0x110000) is not {UNKNOWN}");
+ }
+
/* normal usage */
- if(!UScript.getScriptExtensions(0x063f, scripts).isEmpty()) {
- errln("UScript.getScriptExtensions(U+063F) is not empty");
+ if(UScript.getScriptExtensions(0x063f, scripts)!=UScript.ARABIC || scripts.cardinality()!=1 ||
+ !scripts.get(UScript.ARABIC)) {
+ errln("UScript.getScriptExtensions(U+063F) is not {ARABIC}");
}
- if(UScript.getScriptExtensions(0x0640, scripts).cardinality()!=2 || !scripts.get(UScript.ARABIC) || !scripts.get(UScript.SYRIAC)) {
+ if(UScript.getScriptExtensions(0x0640, scripts)!=-3 || scripts.cardinality()!=3 ||
+ !scripts.get(UScript.ARABIC) || !scripts.get(UScript.SYRIAC) || !scripts.get(UScript.MANDAIC)
+ ) {
errln("UScript.getScriptExtensions(U+0640) failed");
}
- UScript.getScriptExtensions(0xfdf2, scripts);
- if(scripts.cardinality()!=2 || !scripts.get(UScript.ARABIC) || !scripts.get(UScript.THAANA)) {
+ if(UScript.getScriptExtensions(0xfdf2, scripts)!=-2 || scripts.cardinality()!=2 ||
+ !scripts.get(UScript.ARABIC) || !scripts.get(UScript.THAANA)) {
errln("UScript.getScriptExtensions(U+FDF2) failed");
}
- UScript.getScriptExtensions(0xff65, scripts);
- if(scripts.cardinality()!=6 || !scripts.get(UScript.BOPOMOFO) || !scripts.get(UScript.YI)) {
+ if(UScript.getScriptExtensions(0xff65, scripts)!=-6 || scripts.cardinality()!=6 ||
+ !scripts.get(UScript.BOPOMOFO) || !scripts.get(UScript.YI)) {
errln("UScript.getScriptExtensions(U+FF65) failed");
}
}
+ public void TestScriptMetadataAPI() {
+ /* API & code coverage. */
+ String sample = UScript.getSampleString(UScript.LATIN);
+ if(sample.length()!=1 || UScript.getScript(sample.charAt(0))!=UScript.LATIN) {
+ errln("UScript.getSampleString(Latn) failed");
+ }
+ sample = UScript.getSampleString(UScript.INVALID_CODE);
+ if(sample.length()!=0) {
+ errln("UScript.getSampleString(invalid) failed");
+ }
+
+ if(UScript.getUsage(UScript.LATIN)!=ScriptUsage.RECOMMENDED ||
+ UScript.getUsage(UScript.YI)!=ScriptUsage.ASPIRATIONAL ||
+ UScript.getUsage(UScript.CHEROKEE)!=ScriptUsage.LIMITED_USE ||
+ UScript.getUsage(UScript.COPTIC)!=ScriptUsage.EXCLUDED ||
+ UScript.getUsage(UScript.CIRTH)!=ScriptUsage.NOT_ENCODED ||
+ UScript.getUsage(UScript.INVALID_CODE)!=ScriptUsage.NOT_ENCODED ||
+ UScript.getUsage(UScript.CODE_LIMIT)!=ScriptUsage.NOT_ENCODED) {
+ errln("UScript.getUsage() failed");
+ }
+
+ if(UScript.isRightToLeft(UScript.LATIN) ||
+ UScript.isRightToLeft(UScript.CIRTH) ||
+ !UScript.isRightToLeft(UScript.ARABIC) ||
+ !UScript.isRightToLeft(UScript.HEBREW)) {
+ errln("UScript.isRightToLeft() failed");
+ }
+
+ if(UScript.breaksBetweenLetters(UScript.LATIN) ||
+ UScript.breaksBetweenLetters(UScript.CIRTH) ||
+ !UScript.breaksBetweenLetters(UScript.HAN) ||
+ !UScript.breaksBetweenLetters(UScript.THAI)) {
+ errln("UScript.breaksBetweenLetters() failed");
+ }
+
+ if(UScript.isCased(UScript.CIRTH) ||
+ UScript.isCased(UScript.HAN) ||
+ !UScript.isCased(UScript.LATIN) ||
+ !UScript.isCased(UScript.GREEK)) {
+ errln("UScript.isCased() failed");
+ }
+ }
+
+ /**
+ * Maps a special script code to the most common script of its encoded characters.
+ */
+ private static final int getCharScript(int script) {
+ switch(script) {
+ case UScript.SIMPLIFIED_HAN:
+ case UScript.TRADITIONAL_HAN:
+ return UScript.HAN;
+ case UScript.JAPANESE:
+ return UScript.HIRAGANA;
+ case UScript.KOREAN:
+ return UScript.HANGUL;
+ default:
+ return script;
+ }
+ }
+
+ public void TestScriptMetadata() {
+ UnicodeSet rtl = new UnicodeSet("[[:bc=R:][:bc=AL:]-[:Cn:]-[:sc=Common:]]");
+ // So far, sample characters are uppercase.
+ // Georgian is special.
+ UnicodeSet cased = new UnicodeSet("[[:Lu:]-[:sc=Common:]-[:sc=Geor:]]");
+ for(int sc = 0; sc < UScript.CODE_LIMIT; ++sc) {
+ String sn = UScript.getShortName(sc);
+ ScriptUsage usage = UScript.getUsage(sc);
+ String sample = UScript.getSampleString(sc);
+ UnicodeSet scriptSet = new UnicodeSet();
+ scriptSet.applyIntPropertyValue(UProperty.SCRIPT, sc);
+ if(usage == ScriptUsage.NOT_ENCODED) {
+ assertTrue(sn + " not encoded, no sample", sample.length() == 0); // Java 6: sample.isEmpty()
+ assertFalse(sn + " not encoded, not RTL", UScript.isRightToLeft(sc));
+ assertFalse(sn + " not encoded, not LB letters", UScript.breaksBetweenLetters(sc));
+ assertFalse(sn + " not encoded, not cased", UScript.isCased(sc));
+ assertTrue(sn + " not encoded, no characters", scriptSet.isEmpty());
+ } else {
+ assertFalse(sn + " encoded, has a sample character", sample.length() == 0); // Java 6: sample.isEmpty()
+ int firstChar = sample.codePointAt(0);
+ int charScript = getCharScript(sc);
+ assertEquals(sn + " script(sample(script))",
+ charScript, UScript.getScript(firstChar));
+ assertEquals(sn + " RTL vs. set", rtl.contains(firstChar), UScript.isRightToLeft(sc));
+ assertEquals(sn + " cased vs. set", cased.contains(firstChar), UScript.isCased(sc));
+ assertEquals(sn + " encoded, has characters", sc == charScript, !scriptSet.isEmpty());
+ if(UScript.isRightToLeft(sc)) {
+ rtl.removeAll(scriptSet);
+ }
+ if(UScript.isCased(sc)) {
+ cased.removeAll(scriptSet);
+ }
+ }
+ }
+ assertEquals("no remaining RTL characters", "[]", rtl.toPattern(true));
+ assertEquals("no remaining cased characters", "[]", cased.toPattern(true));
+
+ assertTrue("Hani breaks between letters", UScript.breaksBetweenLetters(UScript.HAN));
+ assertTrue("Thai breaks between letters", UScript.breaksBetweenLetters(UScript.THAI));
+ assertFalse("Latn does not break between letters", UScript.breaksBetweenLetters(UScript.LATIN));
+ }
+
public void TestScriptNames(){
for(int i=0; i<UScript.CODE_LIMIT;i++){
String name = UScript.getName(i);
String[] expectedLong = new String[]{
"Balinese", "Batak", "Blis", "Brahmi", "Cham", "Cirt", "Cyrs", "Egyd", "Egyh", "Egyptian_Hieroglyphs",
"Geok", "Hans", "Hant", "Hmng", "Hung", "Inds", "Javanese", "Kayah_Li", "Latf", "Latg",
- "Lepcha", "Lina", "Mandaic", "Maya", "Mero", "Nko", "Old_Turkic", "Perm", "Phags_Pa", "Phoenician",
- "Plrd", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vai", "Visp", "Cuneiform",
+ "Lepcha", "Lina", "Mandaic", "Maya", "Meroitic_Hieroglyphs", "Nko", "Old_Turkic", "Perm", "Phags_Pa", "Phoenician",
+ "Miao", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vai", "Visp", "Cuneiform",
"Zxxx", "Unknown",
"Carian", "Jpan", "Tai_Tham", "Lycian", "Lydian", "Ol_Chiki", "Rejang", "Saurashtra", "Sgnw", "Sundanese",
"Moon", "Meetei_Mayek",
// ICU 4.0
- "Imperial_Aramaic", "Avestan", "Cakm", "Kore",
+ "Imperial_Aramaic", "Avestan", "Chakma", "Kore",
"Kaithi", "Mani", "Inscriptional_Pahlavi", "Phlp", "Phlv", "Inscriptional_Parthian", "Samaritan", "Tai_Viet",
"Zmth", "Zsym",
/* new in ICU 4.4 */
"Bamum", "Lisu", "Nkgb", "Old_South_Arabian",
/* new in ICU 4.6 */
- "Bass", "Dupl", "Elba", "Gran", "Kpel", "Loma", "Mend", "Merc",
+ "Bass", "Dupl", "Elba", "Gran", "Kpel", "Loma", "Mend", "Meroitic_Cursive",
"Narb", "Nbat", "Palm", "Sind", "Wara",
/* new in ICU 4.8 */
- "Afak", "Jurc", "Mroo", "Nshu", "Shrd", "Sora", "Takr", "Tang", "Wole",
+ "Afak", "Jurc", "Mroo", "Nshu", "Sharada", "Sora_Sompeng", "Takri", "Tang", "Wole",
+ /* new in ICU 49 */
+ "Hluw", "Khoj", "Tirh",
+ /* new in ICU 52 */
+ "Aghb", "Mahj"
};
String[] expectedShort = new String[]{
"Bali", "Batk", "Blis", "Brah", "Cham", "Cirt", "Cyrs", "Egyd", "Egyh", "Egyp",
"Narb", "Nbat", "Palm", "Sind", "Wara",
/* new in ICU 4.8 */
"Afak", "Jurc", "Mroo", "Nshu", "Shrd", "Sora", "Takr", "Tang", "Wole",
+ /* new in ICU 49 */
+ "Hluw", "Khoj", "Tirh",
+ /* new in ICU 52 */
+ "Aghb", "Mahj"
};
if(expectedLong.length!=(UScript.CODE_LIMIT-UScript.BALINESE)) {
errln("need to add new script codes in lang.TestUScript.java!");
}
}
}
- }
+}