2 *******************************************************************************
\r
3 * Copyright (C) 2009, Google, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 *******************************************************************************
\r
7 package com.ibm.icu.dev.test.translit;
\r
9 import com.ibm.icu.dev.test.TestFmwk;
\r
10 import com.ibm.icu.lang.UScript;
\r
11 import com.ibm.icu.text.Transliterator;
\r
12 import com.ibm.icu.text.UTF16;
\r
13 import com.ibm.icu.text.UnicodeSet;
\r
14 import com.ibm.icu.text.UnicodeSetIterator;
\r
20 public class AnyScriptTest extends TestFmwk {
\r
21 public static void main(String[] args) throws Exception {
\r
22 new AnyScriptTest().run(args);
\r
25 public void TestContext() {
\r
26 Transliterator t = Transliterator.createFromRules("foo", "::[bc]; a{b}d > B;", Transliterator.FORWARD);
\r
27 String sample = "abd abc b";
\r
28 assertEquals("context works", "aBd abc b", t.transform(sample));
\r
31 public void TestScripts(){
\r
32 // get a couple of characters of each script for testing
\r
34 StringBuffer testBuffer = new StringBuffer();
\r
35 for (int script = 0; script < UScript.CODE_LIMIT; ++script) {
\r
36 UnicodeSet test = new UnicodeSet().applyPropertyAlias("script", UScript.getName(script));
\r
37 int count = Math.min(20, test.size());
\r
38 for (int i = 0; i < count; ++i){
\r
39 testBuffer.append(UTF16.valueOf(test.charAt(i)));
\r
42 String test = testBuffer.toString();
\r
43 logln("Test line: " + test);
\r
45 int inclusion = getInclusion();
\r
46 boolean testedUnavailableScript = false;
\r
48 for (int script = 0; script < UScript.CODE_LIMIT; ++script) {
\r
49 if (script == UScript.COMMON || script == UScript.INHERITED) {
\r
52 // if the inclusion rate is not 10, skip all but a small number of items.
\r
53 // Make sure, however, that we test at least one unavailable script
\r
54 if (inclusion < 10 && script != UScript.LATIN
\r
55 && script != UScript.HAN
\r
56 && script != UScript.HIRAGANA
\r
57 && testedUnavailableScript
\r
62 String scriptName = UScript.getName(script);
\r
65 t = Transliterator.getInstance("any-" + scriptName);
\r
66 } catch (Exception e) {
\r
67 testedUnavailableScript = true;
\r
68 logln("Skipping unavailable: " + scriptName);
\r
69 continue; // we don't handle all scripts
\r
71 logln("Checking: " + scriptName);
\r
73 t.transform(test); // just verify we don't crash
\r
75 scriptName = UScript.getShortName(script);
\r
76 t = Transliterator.getInstance("any-" + scriptName);
\r
77 t.transform(test); // just verify we don't crash
\r
82 * Check to make sure that wide characters are converted when going to narrow scripts.
\r
84 public void TestForWidth(){
\r
85 Transliterator widen = Transliterator.getInstance("halfwidth-fullwidth");
\r
86 Transliterator narrow = Transliterator.getInstance("fullwidth-halfwidth");
\r
87 UnicodeSet ASCII = new UnicodeSet("[:ascii:]");
\r
88 String lettersAndSpace = "abc def";
\r
89 final String punctOnly = "( )";
\r
91 String wideLettersAndSpace = widen.transform(lettersAndSpace);
\r
92 String widePunctOnly = widen.transform(punctOnly);
\r
93 assertContainsNone("Should be wide", ASCII, wideLettersAndSpace);
\r
94 assertContainsNone("Should be wide", ASCII, widePunctOnly);
\r
97 back = narrow.transform(wideLettersAndSpace);
\r
98 assertEquals("Should be narrow", lettersAndSpace, back);
\r
99 back = narrow.transform(widePunctOnly);
\r
100 assertEquals("Should be narrow", punctOnly, back);
\r
102 Transliterator latin = Transliterator.getInstance("any-Latn");
\r
103 back = latin.transform(wideLettersAndSpace);
\r
104 assertEquals("Should be ascii", lettersAndSpace, back);
\r
106 back = latin.transform(widePunctOnly);
\r
107 assertEquals("Should be ascii", punctOnly, back);
\r
109 Transliterator t2 = Transliterator.getInstance("any-Han");
\r
110 back = t2.transform(widePunctOnly);
\r
111 assertEquals("Should be same", widePunctOnly, back);
\r
116 public void TestCommonDigits() {
\r
117 UnicodeSet westernDigitSet = new UnicodeSet("[0-9]");
\r
118 UnicodeSet westernDigitSetAndMarks = new UnicodeSet("[[0-9][:Mn:]]");
\r
119 UnicodeSet arabicDigitSet = new UnicodeSet("[[:Nd:]&[:block=Arabic:]]");
\r
120 Transliterator latin = Transliterator.getInstance("Any-Latn");
\r
121 Transliterator arabic = Transliterator.getInstance("Any-Arabic");
\r
122 String westernDigits = getList(westernDigitSet);
\r
123 String arabicDigits = getList(arabicDigitSet);
\r
125 String fromArabic = latin.transform(arabicDigits);
\r
126 assertContainsAll("Any-Latin transforms Arabic digits", westernDigitSetAndMarks, fromArabic);
\r
127 if (false) { // we don't require conversion to Arabic digits
\r
128 String fromLatin = arabic.transform(westernDigits);
\r
129 assertContainsAll("Any-Arabic transforms Western digits", arabicDigitSet, fromLatin);
\r
133 // might want to add to TestFmwk
\r
134 private void assertContainsAll(String message, UnicodeSet set, String string) {
\r
135 handleAssert(set.containsAll(string), message, set, string, "contains all of", false);
\r
138 private void assertContainsNone(String message, UnicodeSet set, String string) {
\r
139 handleAssert(set.containsNone(string), message, set, string, "contains none of", false);
\r
142 // might want to add to UnicodeSet
\r
143 private String getList(UnicodeSet set) {
\r
144 StringBuffer result = new StringBuffer();
\r
145 for (UnicodeSetIterator it = new UnicodeSetIterator(set); it.next();) {
\r
146 result.append(it.getString());
\r
148 return result.toString();
\r