2 *******************************************************************************
3 * Copyright (C) 1996-2012, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 *******************************************************************************
7 package com.ibm.icu.dev.test.translit;
9 import java.util.ArrayList;
10 import java.util.Enumeration;
11 import java.util.HashMap;
12 import java.util.HashSet;
13 import java.util.Iterator;
14 import java.util.List;
15 import java.util.Locale;
16 import java.util.Map.Entry;
18 import com.ibm.icu.dev.test.TestFmwk;
19 import com.ibm.icu.dev.test.TestUtil;
20 import com.ibm.icu.dev.util.UnicodeMap;
21 import com.ibm.icu.impl.Utility;
22 import com.ibm.icu.impl.UtilityExtensions;
23 import com.ibm.icu.lang.CharSequences;
24 import com.ibm.icu.lang.UCharacter;
25 import com.ibm.icu.lang.UScript;
26 import com.ibm.icu.text.CanonicalIterator;
27 import com.ibm.icu.text.Normalizer2;
28 import com.ibm.icu.text.Replaceable;
29 import com.ibm.icu.text.ReplaceableString;
30 import com.ibm.icu.text.StringTransform;
31 import com.ibm.icu.text.Transliterator;
32 import com.ibm.icu.text.UTF16;
33 import com.ibm.icu.text.UnicodeFilter;
34 import com.ibm.icu.text.UnicodeSet;
35 import com.ibm.icu.text.UnicodeSetIterator;
36 import com.ibm.icu.util.CaseInsensitiveString;
37 import com.ibm.icu.util.ULocale;
39 /***********************************************************************
41 HOW TO USE THIS TEST FILE
43 How I developed on two platforms
44 without losing (too much of) my mind
47 1. Add new tests by copying/pasting/changing existing tests. On Java,
48 any public void method named Test...() taking no parameters becomes
49 a test. On C++, you need to modify the header and add a line to
50 the runIndexedTest() dispatch method.
52 2. Make liberal use of the expect() method; it is your friend.
54 3. The tests in this file exactly match those in a sister file on the
55 other side. The two files are:
57 icu4j: src/com.ibm.icu.dev.test/translit/TransliteratorTest.java
58 icu4c: source/test/intltest/transtst.cpp
60 ==> THIS IS THE IMPORTANT PART <==
62 When you add a test in this file, add it in transtst.cpp too.
63 Give it the same name and put it in the same relative place. This
64 makes maintenance a lot simpler for any poor soul who ends up
65 trying to synchronize the tests between icu4j and icu4c.
67 4. If you MUST enter a test that is NOT paralleled in the sister file,
68 then add it in the special non-mirrored section. These are
77 Make sure you document the reason the test is here and not there.
82 ***********************************************************************/
86 * @summary General test of Transliterator
88 public class TransliteratorTest extends TestFmwk {
90 public static void main(String[] args) throws Exception {
91 new TransliteratorTest().run(args);
94 public void TestHangul() {
96 Transliterator lh = Transliterator.getInstance("Latin-Hangul");
97 Transliterator hl = lh.getInverse();
99 assertTransform("Transform", "\uCE20", lh, "ch");
101 assertTransform("Transform", "\uC544\uB530", lh, hl, "atta", "a-tta");
102 assertTransform("Transform", "\uC544\uBE60", lh, hl, "appa", "a-ppa");
103 assertTransform("Transform", "\uC544\uC9DC", lh, hl, "ajja", "a-jja");
104 assertTransform("Transform", "\uC544\uAE4C", lh, hl, "akka", "a-kka");
105 assertTransform("Transform", "\uC544\uC2F8", lh, hl, "assa", "a-ssa");
106 assertTransform("Transform", "\uC544\uCC28", lh, hl, "acha", "a-cha");
107 assertTransform("Transform", "\uC545\uC0AC", lh, hl, "agsa", "ag-sa");
108 assertTransform("Transform", "\uC548\uC790", lh, hl, "anja", "an-ja");
109 assertTransform("Transform", "\uC548\uD558", lh, hl, "anha", "an-ha");
110 assertTransform("Transform", "\uC54C\uAC00", lh, hl, "alga", "al-ga");
111 assertTransform("Transform", "\uC54C\uB9C8", lh, hl, "alma", "al-ma");
112 assertTransform("Transform", "\uC54C\uBC14", lh, hl, "alba", "al-ba");
113 assertTransform("Transform", "\uC54C\uC0AC", lh, hl, "alsa", "al-sa");
114 assertTransform("Transform", "\uC54C\uD0C0", lh, hl, "alta", "al-ta");
115 assertTransform("Transform", "\uC54C\uD30C", lh, hl, "alpa", "al-pa");
116 assertTransform("Transform", "\uC54C\uD558", lh, hl, "alha", "al-ha");
117 assertTransform("Transform", "\uC555\uC0AC", lh, hl, "absa", "ab-sa");
118 assertTransform("Transform", "\uC548\uAC00", lh, hl, "anga", "an-ga");
119 assertTransform("Transform", "\uC545\uC2F8", lh, hl, "agssa", "ag-ssa");
120 assertTransform("Transform", "\uC548\uC9DC", lh, hl, "anjja", "an-jja");
121 assertTransform("Transform", "\uC54C\uC2F8", lh, hl, "alssa", "al-ssa");
122 assertTransform("Transform", "\uC54C\uB530", lh, hl, "altta", "al-tta");
123 assertTransform("Transform", "\uC54C\uBE60", lh, hl, "alppa", "al-ppa");
124 assertTransform("Transform", "\uC555\uC2F8", lh, hl, "abssa", "ab-ssa");
125 assertTransform("Transform", "\uC546\uCE74", lh, hl, "akkka", "akk-ka");
126 assertTransform("Transform", "\uC558\uC0AC", lh, hl, "asssa", "ass-sa");
130 public void TestChinese() {
131 Transliterator hanLatin = Transliterator.getInstance("Han-Latin");
132 assertTransform("Transform", "z\u00E0o Unicode", hanLatin, "\u9020Unicode");
133 assertTransform("Transform", "z\u00E0i chu\u00E0ng z\u00E0o Unicode zh\u012B qi\u00E1n", hanLatin, "\u5728\u5275\u9020Unicode\u4E4B\u524D");
136 public void TestRegistry() {
137 checkRegistry("foo3", "::[a-z]; ::NFC; [:letter:] a > b;"); // check compound
138 checkRegistry("foo2", "::NFC; [:letter:] a > b;"); // check compound
139 checkRegistry("foo1", "[:letter:] a > b;");
140 for (Enumeration e = Transliterator.getAvailableIDs(); e.hasMoreElements(); ) {
141 String id = (String) e.nextElement();
146 private void checkRegistry (String id, String rules) {
147 Transliterator foo = Transliterator.createFromRules(id, rules, Transliterator.FORWARD);
148 Transliterator.registerInstance(foo);
152 private void checkRegistry(String id) {
153 Transliterator fie = Transliterator.getInstance(id);
154 final UnicodeSet fae = new UnicodeSet("[a-z5]");
156 Transliterator foe = Transliterator.getInstance(id);
157 UnicodeFilter fee = foe.getFilter();
158 if (fae.equals(fee)) {
159 errln("Changed what is in registry for " + id);
163 public void TestInstantiation() {
164 long ms = System.currentTimeMillis();
166 for (Enumeration e = Transliterator.getAvailableIDs(); e.hasMoreElements(); ) {
167 ID = (String) e.nextElement();
168 if (ID.equals("Latin-Han/definition")) {
169 System.out.println("\nTODO: disabling Latin-Han/definition check for now: fix later");
172 Transliterator t = null;
174 t = Transliterator.getInstance(ID);
175 // This is only true for some subclasses
176 // // We should get a new instance if we try again
177 // Transliterator t2 = Transliterator.getInstance(ID);
179 // logln("OK: " + Transliterator.getDisplayName(ID) + " (" + ID + "): " + t);
181 // errln("FAIL: " + ID + " returned identical instances");
184 } catch (IllegalArgumentException ex) {
185 errln("FAIL: " + ID);
189 // if (t.getFilter() != null) {
190 // errln("Fail: Should never have filter on transliterator unless we started with one: " + ID + ", " + t.getFilter());
197 rules = t.toRules(true);
199 Transliterator.createFromRules("x", rules, Transliterator.FORWARD);
200 } catch (IllegalArgumentException ex2) {
201 errln("FAIL: " + ID + ".toRules() => bad rules: " +
208 // Now test the failure path
210 ID = "<Not a valid Transliterator ID>";
211 Transliterator t = Transliterator.getInstance(ID);
212 errln("FAIL: " + ID + " returned " + t);
213 } catch (IllegalArgumentException ex) {
214 logln("OK: Bogus ID handled properly");
217 ms = System.currentTimeMillis() - ms;
218 logln("Elapsed time: " + ms + " ms");
221 public void TestSimpleRules() {
222 /* Example: rules 1. ab>x|y
225 * []|eabcd start - no match, copy e to tranlated buffer
226 * [e]|abcd match rule 1 - copy output & adjust cursor
227 * [ex|y]cd match rule 2 - copy output & adjust cursor
228 * [exz]|d no match, copy d to transliterated buffer
235 /* Another set of rules:
247 expect("ab>x|yzacw;" +
255 Transliterator t = Transliterator.createFromRules("<ID>",
257 "$vowel=[aeiouAEIOU];" +
259 "$vowel } $lu > '!';" +
264 Transliterator.FORWARD);
265 expect(t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
269 * Test inline set syntax and set variable syntax.
271 public void TestInlineSet() {
272 expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz");
273 expect("a[0-9]b > qrs", "1a7b9", "1qrs9");
275 expect("$digit = [0-9];" +
276 "$alpha = [a-zA-Z];" +
277 "$alphanumeric = [$digit $alpha];" + // ***
278 "$special = [^$alphanumeric];" + // ***
279 "$alphanumeric > '-';" +
282 "thx-1138", "---*----");
286 * Create some inverses and confirm that they work. We have to be
287 * careful how we do this, since the inverses will not be true
288 * inverses -- we can't throw any random string at the composition
289 * of the transliterators and expect the identity function. F x
290 * F' != I. However, if we are careful about the input, we will
291 * get the expected results.
293 public void TestRuleBasedInverse() {
314 // Careful here -- random strings will not work. If we keep
315 // the left side to the domain and the right side to the range
316 // we will be okay though (left, abc; right xyz).
318 "abcacab", "zyxxxyy",
322 Transliterator fwd = Transliterator.createFromRules("<ID>", RULES, Transliterator.FORWARD);
323 Transliterator rev = Transliterator.createFromRules("<ID>", RULES, Transliterator.REVERSE);
324 for (int i=0; i<DATA.length; i+=2) {
325 expect(fwd, DATA[i], DATA[i+1]);
326 expect(rev, DATA[i+1], DATA[i]);
331 * Basic test of keyboard.
333 public void TestKeyboard() {
334 Transliterator t = Transliterator.createFromRules("<ID>",
338 +"a>A;", Transliterator.FORWARD);
347 null, "AycAY", // null means finishKeyboardTransliteration
350 keyboardAux(t, DATA);
354 * Basic test of keyboard with cursor.
356 public void TestKeyboard2() {
357 Transliterator t = Transliterator.createFromRules("<ID>",
361 +"a>A;", Transliterator.FORWARD);
366 "s", "Aps", // modified for rollback - "Ay",
367 "c", "Apsc", // modified for rollback - "Ayc",
370 "s", "AycAps", // modified for rollback - "AycAy",
371 "c", "AycApsc", // modified for rollback - "AycAyc",
373 null, "AycAY", // null means finishKeyboardTransliteration
376 keyboardAux(t, DATA);
380 * Test keyboard transliteration with back-replacement.
382 public void TestKeyboard3() {
383 // We want th>z but t>y. Furthermore, during keyboard
384 // transliteration we want t>y then yh>z if t, then h are
392 // Column 1: characters to add to buffer (as if typed)
393 // Column 2: expected appearance of buffer after
394 // keyboard xliteration.
397 "t", "abt", // modified for rollback - "aby",
399 "t", "abyct", // modified for rollback - "abycy",
401 null, "abycz", // null means finishKeyboardTransliteration
404 Transliterator t = Transliterator.createFromRules("<ID>", RULES, Transliterator.FORWARD);
405 keyboardAux(t, DATA);
408 private void keyboardAux(Transliterator t, String[] DATA) {
409 Transliterator.Position index = new Transliterator.Position();
410 ReplaceableString s = new ReplaceableString();
411 for (int i=0; i<DATA.length; i+=2) {
413 if (DATA[i] != null) {
414 log = new StringBuffer(s.toString() + " + "
417 t.transliterate(s, index, DATA[i]);
419 log = new StringBuffer(s.toString() + " => ");
420 t.finishTransliteration(s, index);
422 UtilityExtensions.formatInput(log, s, index);
423 if (s.toString().equals(DATA[i+1])) {
424 logln(log.toString());
426 errln("FAIL: " + log.toString() + ", expected " + DATA[i+1]);
431 // Latin-Arabic has been temporarily removed until it can be
434 // public void TestArabic() {
437 // "\u062a\u062a\u0645\u062a\u0639 "+
438 // "\u0627\u0644\u0644\u063a\u0629 "+
439 // "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629 "+
440 // "\u0628\u0628\u0646\u0638\u0645 "+
441 // "\u0643\u062a\u0627\u0628\u0628\u064a\u0629 "+
442 // "\u062c\u0645\u064a\u0644\u0629"
445 // Transliterator t = Transliterator.getInstance("Latin-Arabic");
446 // for (int i=0; i<DATA.length; i+=2) {
447 // expect(t, DATA[i], DATA[i+1]);
452 * Compose the Kana transliterator forward and reverse and try
453 * some strings that should come out unchanged.
455 public void TestCompoundKana() {
456 Transliterator t = Transliterator.getInstance("Latin-Katakana;Katakana-Latin");
457 expect(t, "aaaaa", "aaaaa");
461 * Compose the hex transliterators forward and reverse.
463 public void TestCompoundHex() {
464 Transliterator a = Transliterator.getInstance("Any-Hex");
465 Transliterator b = Transliterator.getInstance("Hex-Any");
466 // Transliterator[] trans = { a, b };
467 // Transliterator ab = Transliterator.getInstance(trans);
468 Transliterator ab = Transliterator.getInstance("Any-Hex;Hex-Any");
470 // Do some basic tests of b
471 expect(b, "\\u0030\\u0031", "01");
476 // trans = new Transliterator[] { b, a };
477 // Transliterator ba = Transliterator.getInstance(trans);
478 Transliterator ba = Transliterator.getInstance("Hex-Any;Any-Hex");
479 ReplaceableString str = new ReplaceableString(s);
480 a.transliterate(str);
481 expect(ba, str.toString(), str.toString());
485 * Do some basic tests of filtering.
487 public void TestFiltering() {
489 Transliterator tempTrans = Transliterator.createFromRules("temp", "x > y; x{a} > b; ", Transliterator.FORWARD);
490 tempTrans.setFilter(new UnicodeSet("[a]"));
491 String tempResult = tempTrans.transform("xa");
492 assertEquals("context should not be filtered ", "xb", tempResult);
494 tempTrans = Transliterator.createFromRules("temp", "::[a]; x > y; x{a} > b; ", Transliterator.FORWARD);
495 tempResult = tempTrans.transform("xa");
496 assertEquals("context should not be filtered ", "xb", tempResult);
498 Transliterator hex = Transliterator.getInstance("Any-Hex");
499 hex.setFilter(new UnicodeFilter() {
500 public boolean contains(int c) {
503 public String toPattern(boolean escapeUnprintable) {
506 public boolean matchesIndexValue(int v) {
509 public void addMatchSetTo(UnicodeSet toUnionTo) {}
512 String out = hex.transliterate(s);
513 String exp = "\\u0061\\u0062c\\u0064\\u0065";
514 if (out.equals(exp)) {
515 logln("Ok: \"" + exp + "\"");
517 logln("FAIL: \"" + out + "\", wanted \"" + exp + "\"");
524 public void TestAnchors() {
525 expect("^ab > 01 ;" +
533 expect("$s = [z$] ;" +
540 "abzababbabxzabxabx",
545 * Test pattern quoting and escape mechanisms.
547 public void TestPatternQuoting() {
549 // Each item is <rules>, <input>, <expected output>
551 "\u4E01>'[male adult]'", "\u4E01", "[male adult]",
554 for (int i=0; i<DATA.length; i+=3) {
555 logln("Pattern: " + Utility.escape(DATA[i]));
556 Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD);
557 expect(t, DATA[i+1], DATA[i+2]);
561 public void TestVariableNames() {
562 Transliterator gl = Transliterator.createFromRules("foo5", "$\u2DC0 = qy; a>b;", Transliterator.FORWARD);
564 errln("FAIL: null Transliterator returned.");
569 * Regression test for bugs found in Greek transliteration.
571 public void TestJ277() {
572 Transliterator gl = Transliterator.getInstance("Greek-Latin; NFD; [:M:]Remove; NFC");
574 char sigma = (char)0x3C3;
575 char upsilon = (char)0x3C5;
576 char nu = (char)0x3BD;
577 // not used char PHI = (char)0x3A6;
578 char alpha = (char)0x3B1;
579 // not used char omega = (char)0x3C9;
580 // not used char omicron = (char)0x3BF;
581 // not used char epsilon = (char)0x3B5;
583 // sigma upsilon nu -> syn
584 StringBuffer buf = new StringBuffer();
585 buf.append(sigma).append(upsilon).append(nu);
586 String syn = buf.toString();
587 expect(gl, syn, "syn");
589 // sigma alpha upsilon nu -> saun
591 buf.append(sigma).append(alpha).append(upsilon).append(nu);
592 String sayn = buf.toString();
593 expect(gl, sayn, "saun");
595 // Again, using a smaller rule set
600 "$ypsilon = \u03C5;" +
601 "$vowel = [aeiouAEIOU$alpha$ypsilon];" +
604 "u <> $vowel { $ypsilon;" +
607 Transliterator mini = Transliterator.createFromRules
608 ("mini", rules, Transliterator.REVERSE);
609 expect(mini, syn, "syn");
610 expect(mini, sayn, "saun");
612 //| // Transliterate the Greek locale data
614 //| DateFormatSymbols syms(el, status);
615 //| if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
616 //| int32_t i, count;
617 //| const UnicodeString* data = syms.getMonths(count);
618 //| for (i=0; i<count; ++i) {
619 //| if (data[i].length() == 0) {
622 //| UnicodeString out(data[i]);
623 //| gl->transliterate(out);
624 //| bool_t ok = TRUE;
625 //| if (data[i].length() >= 2 && out.length() >= 2 &&
626 //| u_isupper(data[i].charAt(0)) && u_islower(data[i].charAt(1))) {
627 //| if (!(u_isupper(out.charAt(0)) && u_islower(out.charAt(1)))) {
632 //| logln(prettify(data[i] + " -> " + out));
634 //| errln(UnicodeString("FAIL: ") + prettify(data[i] + " -> " + out));
640 // * Prefix, suffix support in hex transliterators
642 // public void TestJ243() {
643 // // Test default Hex-Any, which should handle
644 // // \\u, \\U, u+, and U+
645 // HexToUnicodeTransliterator hex = new HexToUnicodeTransliterator();
646 // expect(hex, "\\u0041+\\U0042,u+0043uu+0044z", "A+B,CuDz");
648 // // Try a custom Hex-Any
649 // // \\uXXXX and &#xXXXX;
650 // HexToUnicodeTransliterator hex2 = new HexToUnicodeTransliterator("\\\\u###0;&\\#x###0\\;");
651 // expect(hex2, "\\u61\\u062\\u0063\\u00645\\u66x0123",
652 // "abcd5fx0123");
654 // // Try custom Any-Hex (default is tested elsewhere)
655 // UnicodeToHexTransliterator hex3 = new UnicodeToHexTransliterator("&\\#x###0;");
656 // expect(hex3, "012", "012");
659 public void TestJ329() {
662 Boolean.FALSE, "a > b; c > d",
663 Boolean.TRUE, "a > b; no operator; c > d",
666 for (int i=0; i<DATA.length; i+=2) {
669 Transliterator.createFromRules("<ID>",
671 Transliterator.FORWARD);
672 } catch (IllegalArgumentException e) {
673 err = e.getMessage();
675 boolean gotError = (err != null);
676 String desc = (String) DATA[i+1] +
677 (gotError ? (" -> error: " + err) : " -> no error");
678 if ((err != null) == ((Boolean)DATA[i]).booleanValue()) {
679 logln("Ok: " + desc);
681 errln("FAIL: " + desc);
687 * Test segments and segment references.
689 public void TestSegments() {
691 // Each item is <rules>, <input>, <expected output>
693 "([a-z]) '.' ([0-9]) > $2 '-' $1",
698 for (int i=0; i<DATA.length; i+=3) {
699 logln("Pattern: " + Utility.escape(DATA[i]));
700 Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD);
701 expect(t, DATA[i+1], DATA[i+2]);
706 * Test cursor positioning outside of the key
708 public void TestCursorOffset() {
710 // Each item is <rules>, <input>, <expected output>
712 "pre {alpha} post > | @ ALPHA ;" +
714 "pre {beta} post > BETA @@ | ;" +
717 "prealphapost prebetapost",
718 "prbetaxyz preBETApost",
721 for (int i=0; i<DATA.length; i+=3) {
722 logln("Pattern: " + Utility.escape(DATA[i]));
723 Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD);
724 expect(t, DATA[i+1], DATA[i+2]);
729 * Test zero length and > 1 char length variable values. Test
730 * use of variable refs in UnicodeSets.
732 public void TestArbitraryVariableValues() {
734 // Each item is <rules>, <input>, <expected output>
740 "$llY = [$ll$pat];" +
754 for (int i=0; i<DATA.length; i+=3) {
755 logln("Pattern: " + Utility.escape(DATA[i]));
756 Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD);
757 expect(t, DATA[i+1], DATA[i+2]);
762 * Confirm that the contextStart, contextLimit, start, and limit
765 public void TestPositionHandling() {
767 // Each item is <rules>, <input>, <expected output>
769 "a{t} > SS ; {t}b > UU ; {t} > TT ;",
770 "xtat txtb", // pos 0,9,0,9
773 "a{t} > SS ; {t}b > UU ; {t} > TT ;",
774 "xtat txtb", // pos 2,9,3,8
777 "a{t} > SS ; {t}b > UU ; {t} > TT ;",
778 "xtat txtb", // pos 3,8,3,8
782 // Array of 4n positions -- these go with the DATA array
783 // They are: contextStart, contextLimit, start, limit
790 int n = DATA.length/3;
791 for (int i=0; i<n; i++) {
792 Transliterator t = Transliterator.createFromRules("<ID>", DATA[3*i], Transliterator.FORWARD);
793 Transliterator.Position pos = new Transliterator.Position(
794 POS[4*i], POS[4*i+1], POS[4*i+2], POS[4*i+3]);
795 ReplaceableString rsource = new ReplaceableString(DATA[3*i+1]);
796 t.transliterate(rsource, pos);
797 t.finishTransliteration(rsource, pos);
798 String result = rsource.toString();
799 String exp = DATA[3*i+2];
800 expectAux(Utility.escape(DATA[3*i]),
809 * Test the Hiragana-Katakana transliterator.
811 public void TestHiraganaKatakana() {
812 Transliterator hk = Transliterator.getInstance("Hiragana-Katakana");
813 Transliterator kh = Transliterator.getInstance("Katakana-Hiragana");
816 // Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana>
819 "\u3042\u3090\u3099\u3092\u3050",
820 "\u30A2\u30F8\u30F2\u30B0",
823 "\u307C\u3051\u3060\u3042\u3093\u30FC",
824 "\u30DC\u30F6\u30C0\u30FC\u30F3\u30FC",
827 for (int i=0; i<DATA.length; i+=3) {
828 switch (DATA[i].charAt(0)) {
829 case 'h': // Hiragana-Katakana
830 expect(hk, DATA[i+1], DATA[i+2]);
832 case 'k': // Katakana-Hiragana
833 expect(kh, DATA[i+2], DATA[i+1]);
836 expect(hk, DATA[i+1], DATA[i+2]);
837 expect(kh, DATA[i+2], DATA[i+1]);
844 public void TestCopyJ476() {
845 // This is a C++-only copy constructor test
849 * Test inter-Indic transliterators. These are composed.
851 public void TestInterIndic() {
852 String ID = "Devanagari-Gujarati";
853 Transliterator dg = Transliterator.getInstance(ID);
855 errln("FAIL: getInstance(" + ID + ") returned null");
858 String id = dg.getID();
859 if (!id.equals(ID)) {
860 errln("FAIL: getInstance(" + ID + ").getID() => " + id);
862 String dev = "\u0901\u090B\u0925";
863 String guj = "\u0A81\u0A8B\u0AA5";
864 expect(dg, dev, guj);
868 * Test filter syntax in IDs. (J23)
870 public void TestFilterIDs() {
872 "[aeiou]Any-Hex", // ID
873 "[aeiou]Hex-Any", // expected inverse ID
875 "q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src)
877 "[aeiou]Any-Hex;[^5]Hex-Any",
878 "[^5]Any-Hex;[aeiou]Hex-Any",
888 for (int i=0; i<DATA.length; i+=4) {
890 Transliterator t = Transliterator.getInstance(ID);
891 expect(t, DATA[i+2], DATA[i+3]);
894 if (!ID.equals(t.getID())) {
895 errln("FAIL: getInstance(" + ID + ").getID() => " +
900 String uID = DATA[i+1];
901 Transliterator u = t.getInverse();
903 errln("FAIL: " + ID + ".getInverse() returned NULL");
904 } else if (!u.getID().equals(uID)) {
905 errln("FAIL: " + ID + ".getInverse().getID() => " +
906 u.getID() + ", expected " + uID);
912 * Test the case mapping transliterators.
914 public void TestCaseMap() {
915 Transliterator toUpper =
916 Transliterator.getInstance("Any-Upper[^xyzXYZ]");
917 Transliterator toLower =
918 Transliterator.getInstance("Any-Lower[^xyzXYZ]");
919 Transliterator toTitle =
920 Transliterator.getInstance("Any-Title[^xyzXYZ]");
922 expect(toUpper, "The quick brown fox jumped over the lazy dogs.",
923 "THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS.");
924 expect(toLower, "The quIck brown fOX jUMPED OVER THE LAzY dogs.",
925 "the quick brown foX jumped over the lazY dogs.");
926 expect(toTitle, "the quick brown foX caN'T jump over the laZy dogs.",
927 "The Quick Brown FoX Can't Jump Over The LaZy Dogs.");
931 * Test the name mapping transliterators.
933 public void TestNameMap() {
934 Transliterator uni2name =
935 Transliterator.getInstance("Any-Name[^abc]");
936 Transliterator name2uni =
937 Transliterator.getInstance("Name-Any");
939 expect(uni2name, "\u00A0abc\u4E01\u00B5\u0A81\uFFFD\u0004\u0009\u0081\uFFFF",
940 "\\N{NO-BREAK SPACE}abc\\N{CJK UNIFIED IDEOGRAPH-4E01}\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\\N{<noncharacter-FFFF>}");
941 expect(name2uni, "{\\N { NO-BREAK SPACE}abc\\N{ CJK UNIFIED IDEOGRAPH-4E01 }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{",
942 "{\u00A0abc\u4E01\\N{x\u00B5\u0A81\uFFFD\u0004\u0009\u0081\uFFFF\u0004\\N{");
945 Transliterator t = Transliterator.getInstance("Any-Name;Name-Any");
947 String s = "{\u00A0abc\u4E01\\N{x\u00B5\u0A81\uFFFD\u0004\u0009\u0081\uFFFF\u0004\\N{";
952 * Test liberalized ID syntax. 1006c
954 public void TestLiberalizedID() {
955 // Some test cases have an expected getID() value of NULL. This
956 // means I have disabled the test case for now. This stuff is
957 // still under development, and I haven't decided whether to make
958 // getID() return canonical case yet. It will all get rewritten
959 // with the move to Source-Target/Variant IDs anyway. [aliu]
961 "latin-greek", null /*"Latin-Greek"*/, "case insensitivity",
962 " Null ", "Null", "whitespace",
963 " Latin[a-z]-Greek ", "[a-z]Latin-Greek", "inline filter",
964 " null ; latin-greek ", null /*"Null;Latin-Greek"*/, "compound whitespace",
967 for (int i=0; i<DATA.length; i+=3) {
969 Transliterator t = Transliterator.getInstance(DATA[i]);
970 if (DATA[i+1] == null || DATA[i+1].equals(t.getID())) {
971 logln("Ok: " + DATA[i+2] +
972 " create ID \"" + DATA[i] + "\" => \"" +
975 errln("FAIL: " + DATA[i+2] +
976 " create ID \"" + DATA[i] + "\" => \"" +
977 t.getID() + "\", exp \"" + DATA[i+1] + "\"");
979 } catch (IllegalArgumentException e) {
980 errln("FAIL: " + DATA[i+2] +
981 " create ID \"" + DATA[i] + "\"");
986 public void TestCreateInstance() {
987 String FORWARD = "F";
988 String REVERSE = "R";
991 // Column 2: direction
992 // Column 3: expected ID, or "" if expect failure
993 "Latin-Hangul", REVERSE, "Hangul-Latin", // JB#912
995 // JB#2689: bad compound causes crash
996 "InvalidSource-InvalidTarget", FORWARD, "",
997 "InvalidSource-InvalidTarget", REVERSE, "",
998 "Hex-Any;InvalidSource-InvalidTarget", FORWARD, "",
999 "Hex-Any;InvalidSource-InvalidTarget", REVERSE, "",
1000 "InvalidSource-InvalidTarget;Hex-Any", FORWARD, "",
1001 "InvalidSource-InvalidTarget;Hex-Any", REVERSE, "",
1006 for (int i=0; DATA[i]!=null; i+=3) {
1008 int dir = (DATA[i+1]==FORWARD)?
1009 Transliterator.FORWARD:Transliterator.REVERSE;
1010 String expID=DATA[i+2];
1014 t = Transliterator.getInstance(id,dir);
1015 } catch (Exception e1) {
1019 String newID = (t!=null)?t.getID():"";
1020 boolean ok = (newID.equals(expID));
1022 newID = e.getMessage();
1025 logln("Ok: createInstance(" +
1026 id + "," + DATA[i+1] + ") => " + newID);
1028 errln("FAIL: createInstance(" +
1029 id + "," + DATA[i+1] + ") => " + newID +
1030 ", expected " + expID);
1036 * Test the normalization transliterator.
1038 public void TestNormalizationTransliterator() {
1039 // THE FOLLOWING TWO TABLES ARE COPIED FROM com.ibm.icu.dev.test.normalizer.BasicTest
1040 // PLEASE KEEP THEM IN SYNC WITH BasicTest.
1041 String[][] CANON = {
1042 // Input Decomposed Composed
1043 {"cat", "cat", "cat" },
1044 {"\u00e0ardvark", "a\u0300ardvark", "\u00e0ardvark" },
1046 {"\u1e0a", "D\u0307", "\u1e0a" }, // D-dot_above
1047 {"D\u0307", "D\u0307", "\u1e0a" }, // D dot_above
1049 {"\u1e0c\u0307", "D\u0323\u0307", "\u1e0c\u0307" }, // D-dot_below dot_above
1050 {"\u1e0a\u0323", "D\u0323\u0307", "\u1e0c\u0307" }, // D-dot_above dot_below
1051 {"D\u0307\u0323", "D\u0323\u0307", "\u1e0c\u0307" }, // D dot_below dot_above
1053 {"\u1e10\u0307\u0323", "D\u0327\u0323\u0307","\u1e10\u0323\u0307"}, // D dot_below cedilla dot_above
1054 {"D\u0307\u0328\u0323","D\u0328\u0323\u0307","\u1e0c\u0328\u0307"}, // D dot_above ogonek dot_below
1056 {"\u1E14", "E\u0304\u0300", "\u1E14" }, // E-macron-grave
1057 {"\u0112\u0300", "E\u0304\u0300", "\u1E14" }, // E-macron + grave
1058 {"\u00c8\u0304", "E\u0300\u0304", "\u00c8\u0304" }, // E-grave + macron
1060 {"\u212b", "A\u030a", "\u00c5" }, // angstrom_sign
1061 {"\u00c5", "A\u030a", "\u00c5" }, // A-ring
1063 {"\u00fdffin", "y\u0301ffin", "\u00fdffin" }, //updated with 3.0
1064 {"\u00fd\uFB03n", "y\u0301\uFB03n", "\u00fd\uFB03n" }, //updated with 3.0
1066 {"Henry IV", "Henry IV", "Henry IV" },
1067 {"Henry \u2163", "Henry \u2163", "Henry \u2163" },
1069 {"\u30AC", "\u30AB\u3099", "\u30AC" }, // ga (Katakana)
1070 {"\u30AB\u3099", "\u30AB\u3099", "\u30AC" }, // ka + ten
1071 {"\uFF76\uFF9E", "\uFF76\uFF9E", "\uFF76\uFF9E" }, // hw_ka + hw_ten
1072 {"\u30AB\uFF9E", "\u30AB\uFF9E", "\u30AB\uFF9E" }, // ka + hw_ten
1073 {"\uFF76\u3099", "\uFF76\u3099", "\uFF76\u3099" }, // hw_ka + ten
1075 {"A\u0300\u0316", "A\u0316\u0300", "\u00C0\u0316" },
1078 String[][] COMPAT = {
1079 // Input Decomposed Composed
1080 {"\uFB4f", "\u05D0\u05DC", "\u05D0\u05DC" }, // Alef-Lamed vs. Alef, Lamed
1082 {"\u00fdffin", "y\u0301ffin", "\u00fdffin" }, //updated for 3.0
1083 {"\u00fd\uFB03n", "y\u0301ffin", "\u00fdffin" }, // ffi ligature -> f + f + i
1085 {"Henry IV", "Henry IV", "Henry IV" },
1086 {"Henry \u2163", "Henry IV", "Henry IV" },
1088 {"\u30AC", "\u30AB\u3099", "\u30AC" }, // ga (Katakana)
1089 {"\u30AB\u3099", "\u30AB\u3099", "\u30AC" }, // ka + ten
1091 {"\uFF76\u3099", "\u30AB\u3099", "\u30AC" }, // hw_ka + ten
1094 Transliterator NFD = Transliterator.getInstance("NFD");
1095 Transliterator NFC = Transliterator.getInstance("NFC");
1096 for (int i=0; i<CANON.length; ++i) {
1097 String in = CANON[i][0];
1098 String expd = CANON[i][1];
1099 String expc = CANON[i][2];
1100 expect(NFD, in, expd);
1101 expect(NFC, in, expc);
1104 Transliterator NFKD = Transliterator.getInstance("NFKD");
1105 Transliterator NFKC = Transliterator.getInstance("NFKC");
1106 for (int i=0; i<COMPAT.length; ++i) {
1107 String in = COMPAT[i][0];
1108 String expkd = COMPAT[i][1];
1109 String expkc = COMPAT[i][2];
1110 expect(NFKD, in, expkd);
1111 expect(NFKC, in, expkc);
1114 Transliterator t = Transliterator.getInstance("NFD; [x]Remove");
1115 expect(t, "\u010dx", "c\u030C");
1119 * Test compound RBT rules.
1121 public void TestCompoundRBT() {
1122 // Careful with spacing and ';' here: Phrase this exactly
1123 // as toRules() is going to return it. If toRules() changes
1124 // with regard to spacing or ';', then adjust this string.
1125 String rule = "::Hex-Any;\n" +
1130 Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD);
1132 errln("FAIL: createFromRules failed");
1135 expect(t, "\u0043at in the hat, bat on the mat",
1136 "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t");
1137 String r = t.toRules(true);
1138 if (r.equals(rule)) {
1139 logln("OK: toRules() => " + r);
1141 errln("FAIL: toRules() => " + r +
1142 ", expected " + rule);
1146 t = Transliterator.getInstance("Greek-Latin; Latin-Cyrillic", Transliterator.FORWARD);
1148 errln("FAIL: createInstance failed");
1151 String exp = "::Greek-Latin;\n::Latin-Cyrillic;";
1152 r = t.toRules(true);
1153 if (!r.equals(exp)) {
1154 errln("FAIL: toRules() => " + r +
1155 ", expected " + exp);
1157 logln("OK: toRules() => " + r);
1160 // Round trip the result of toRules
1161 t = Transliterator.createFromRules("Test", r, Transliterator.FORWARD);
1163 errln("FAIL: createFromRules #2 failed");
1166 logln("OK: createFromRules(" + r + ") succeeded");
1169 // Test toRules again
1170 r = t.toRules(true);
1171 if (!r.equals(exp)) {
1172 errln("FAIL: toRules() => " + r +
1173 ", expected " + exp);
1175 logln("OK: toRules() => " + r);
1178 // Test Foo(Bar) IDs. Careful with spacing in id; make it conform
1179 // to what the regenerated ID will look like.
1180 String id = "Upper(Lower);(NFKC)";
1181 t = Transliterator.getInstance(id, Transliterator.FORWARD);
1183 errln("FAIL: createInstance #2 failed");
1186 if (t.getID().equals(id)) {
1187 logln("OK: created " + id);
1189 errln("FAIL: createInstance(" + id +
1190 ").getID() => " + t.getID());
1193 Transliterator u = t.getInverse();
1195 errln("FAIL: createInverse failed");
1198 exp = "NFKC();Lower(Upper)";
1199 if (u.getID().equals(exp)) {
1200 logln("OK: createInverse(" + id + ") => " +
1203 errln("FAIL: createInverse(" + id + ") => " +
1209 * Compound filter semantics were orginially not implemented
1210 * correctly. Originally, each component filter f(i) is replaced by
1211 * f'(i) = f(i) && g, where g is the filter for the compound
1216 * Suppose and I have a transliterator X. Internally X is
1217 * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A].
1219 * The compound should convert all greek characters (through latin) to
1220 * cyrillic, then lowercase the result. The filter should say "don't
1221 * touch 'A' in the original". But because an intermediate result
1222 * happens to go through "A", the Greek Alpha gets hung up.
1224 public void TestCompoundFilter() {
1225 Transliterator t = Transliterator.getInstance
1226 ("Greek-Latin; Latin-Greek; Lower", Transliterator.FORWARD);
1227 t.setFilter(new UnicodeSet("[^A]"));
1229 // Only the 'A' at index 1 should remain unchanged
1231 CharsToUnicodeString("BA\\u039A\\u0391"),
1232 CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1"));
1236 * Test the "Remove" transliterator.
1238 public void TestRemove() {
1239 Transliterator t = Transliterator.getInstance("Remove[aeiou]");
1240 expect(t, "The quick brown fox.",
1244 public void TestToRules() {
1249 "$a=\\u4E61; [$a] > A;",
1253 "$white=[[:Zs:][:Zl:]]; $white{a} > A;",
1254 "[[:Zs:][:Zl:]]{a} > A;",
1281 "$white=[:Zs:]; $black=[^$white]; $black{a} > A;",
1282 "[^[:Zs:]]{a} > A;",
1285 "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;",
1286 "[[a-z]-[:Zs:]]{a} > A;",
1289 "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;",
1290 "[[:Zs:]&[a-z]]{a} > A;",
1293 "$a=[:Zs:]; $b=[x$a]; $b{a} > A;",
1294 "[x[:Zs:]]{a} > A;",
1297 "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"+
1298 "$macron = \\u0304 ;"+
1299 "$evowel = [aeiouyAEIOUY] ;"+
1300 "$iotasub = \\u0345 ;"+
1301 "($evowel $macron $accentMinus *) i > | $1 $iotasub ;",
1302 "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;",
1305 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1306 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1309 for (int d=0; d < DATA.length; d+=3) {
1310 if (DATA[d] == RBT) {
1311 // Transliterator test
1312 Transliterator t = Transliterator.createFromRules("ID",
1313 DATA[d+1], Transliterator.FORWARD);
1315 errln("FAIL: createFromRules failed");
1318 String rules, escapedRules;
1319 rules = t.toRules(false);
1320 escapedRules = t.toRules(true);
1321 String expRules = Utility.unescape(DATA[d+2]);
1322 String expEscapedRules = DATA[d+2];
1323 if (rules.equals(expRules)) {
1324 logln("Ok: " + DATA[d+1] +
1325 " => " + Utility.escape(rules));
1327 errln("FAIL: " + DATA[d+1] +
1328 " => " + Utility.escape(rules + ", exp " + expRules));
1330 if (escapedRules.equals(expEscapedRules)) {
1331 logln("Ok: " + DATA[d+1] +
1332 " => " + escapedRules);
1334 errln("FAIL: " + DATA[d+1] +
1335 " => " + escapedRules + ", exp " + expEscapedRules);
1340 String pat = DATA[d+1];
1341 String expToPat = DATA[d+2];
1342 UnicodeSet set = new UnicodeSet(pat);
1344 // Adjust spacing etc. as necessary.
1346 toPat = set.toPattern(true);
1347 if (expToPat.equals(toPat)) {
1348 logln("Ok: " + pat +
1351 errln("FAIL: " + pat +
1352 " => " + Utility.escape(toPat) +
1353 ", exp " + Utility.escape(pat));
1359 public void TestContext() {
1360 Transliterator.Position pos = new Transliterator.Position(0, 2, 0, 1); // cs cl s l
1362 expect("de > x; {d}e > y;",
1367 expect("ab{c} > z;",
1372 static final String CharsToUnicodeString(String s) {
1373 return Utility.unescape(s);
1376 public void TestSupplemental() {
1378 expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];" +
1380 CharsToUnicodeString("ab\\U0001030Fx"),
1381 CharsToUnicodeString("\\U00010300bix"));
1383 expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];" +
1384 "$b=[A-Z\\U00010400-\\U0001044D];" +
1385 "($a)($b) > $2 $1;"),
1386 CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"),
1387 CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301"));
1389 // k|ax\\U00010300xm
1391 // k|a\\U00010400\\U00010300xm
1392 // ky|\\U00010400\\U00010300xm
1393 // ky\\U00010400|\\U00010300xm
1395 // ky\\U00010400|\\U00010300\\U00010400m
1396 // ky\\U00010400y|\\U00010400m
1397 expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];" +
1398 "$a {x} > | @ \\U00010400;" +
1399 "{$a} [^\\u0000-\\uFFFF] > y;"),
1400 CharsToUnicodeString("kax\\U00010300xm"),
1401 CharsToUnicodeString("ky\\U00010400y\\U00010400m"));
1403 expect(Transliterator.getInstance("Any-Name"),
1404 CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"),
1405 "\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}");
1407 expect(Transliterator.getInstance("Name-Any"),
1408 "\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}",
1409 CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"));
1411 expect(Transliterator.getInstance("Any-Hex/Unicode"),
1412 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1413 "U+10330U+10FF00U+E0061U+00A0");
1415 expect(Transliterator.getInstance("Any-Hex/C"),
1416 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1417 "\\U00010330\\U0010FF00\\U000E0061\\u00A0");
1419 expect(Transliterator.getInstance("Any-Hex/Perl"),
1420 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1421 "\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}");
1423 expect(Transliterator.getInstance("Any-Hex/Java"),
1424 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1425 "\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0");
1427 expect(Transliterator.getInstance("Any-Hex/XML"),
1428 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1429 "𐌰􏼀󠁡 ");
1431 expect(Transliterator.getInstance("Any-Hex/XML10"),
1432 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1433 "𐌰􏼀󠁡 ");
1435 expect(Transliterator.getInstance("[\\U000E0000-\\U000E0FFF] Remove"),
1436 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1437 CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0"));
1440 public void TestQuantifier() {
1442 // Make sure @ in a quantified anteContext works
1443 expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';",
1447 // Make sure @ in a quantified postContext works
1448 expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';",
1452 // Make sure @ in a quantified postContext with seg ref works
1453 expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';",
1457 // Make sure @ past ante context doesn't enter ante context
1458 Transliterator.Position pos = new Transliterator.Position(0, 5, 3, 5);
1459 expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';",
1464 // Make sure @ past post context doesn't pass limit
1465 Transliterator.Position pos2 = new Transliterator.Position(0, 4, 0, 2);
1466 expect("{b} a+ > c @@ |; x > y; a > A;",
1471 // Make sure @ past post context doesn't enter post context
1472 expect("{b} a+ > c @@ |; x > y; a > A;",
1476 expect("(ab)? c > d;",
1480 // NOTE: The (ab)+ when referenced just yields a single "ab",
1481 // not the full sequence of them. This accords with perl behavior.
1482 expect("(ab)+ {x} > '(' $1 ')';",
1484 "x ab(ab) abab(ab)y");
1487 "ac abc abbc abbbc",
1490 expect("[abc]+ > x;",
1491 "qac abrc abbcs abtbbc",
1494 expect("q{(ab)+} > x;",
1495 "qa qab qaba qababc qaba",
1496 "qa qx qxa qxc qxa");
1498 expect("q(ab)* > x;",
1499 "qa qab qaba qababc",
1502 // NOTE: The (ab)+ when referenced just yields a single "ab",
1503 // not the full sequence of them. This accords with perl behavior.
1504 expect("q(ab)* > '(' $1 ')';",
1505 "qa qab qaba qababc",
1506 "()a (ab) (ab)a (ab)c");
1508 // 'foo'+ and 'foo'* -- the quantifier should apply to the entire
1510 expect("'ab'+ > x;",
1514 // $foo+ and $foo* -- the quantifier should apply to the entire
1515 // variable reference
1516 expect("$var = ab; $var+ > x;",
1521 static class TestFact implements Transliterator.Factory {
1522 static class NameableNullTrans extends Transliterator {
1523 public NameableNullTrans(String id) {
1526 protected void handleTransliterate(Replaceable text,
1527 Position offsets, boolean incremental) {
1528 offsets.start = offsets.limit;
1532 public TestFact(String theID) {
1535 public Transliterator getInstance(String ignoredID) {
1536 return new NameableNullTrans(id);
1540 public void TestSTV() {
1541 Enumeration es = Transliterator.getAvailableSources();
1542 for (int i=0; es.hasMoreElements(); ++i) {
1543 String source = (String) es.nextElement();
1544 logln("" + i + ": " + source);
1545 if (source.length() == 0) {
1546 errln("FAIL: empty source");
1549 Enumeration et = Transliterator.getAvailableTargets(source);
1550 for (int j=0; et.hasMoreElements(); ++j) {
1551 String target = (String) et.nextElement();
1552 logln(" " + j + ": " + target);
1553 if (target.length() == 0) {
1554 errln("FAIL: empty target");
1557 Enumeration ev = Transliterator.getAvailableVariants(source, target);
1558 for (int k=0; ev.hasMoreElements(); ++k) {
1559 String variant = (String) ev.nextElement();
1560 if (variant.length() == 0) {
1561 logln(" " + k + ": <empty>");
1563 logln(" " + k + ": " + variant);
1569 // Test registration
1570 String[] IDS = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
1571 String[] FULL_IDS = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
1572 String[] SOURCES = { null, "Seoridf", "Oewoir" };
1573 for (int i=0; i<3; ++i) {
1574 Transliterator.registerFactory(IDS[i], new TestFact(IDS[i]));
1576 Transliterator t = Transliterator.getInstance(IDS[i]);
1577 if (t.getID().equals(IDS[i])) {
1578 logln("Ok: Registration/creation succeeded for ID " +
1581 errln("FAIL: Registration of ID " +
1582 IDS[i] + " creates ID " + t.getID());
1584 Transliterator.unregister(IDS[i]);
1586 t = Transliterator.getInstance(IDS[i]);
1587 errln("FAIL: Unregistration failed for ID " +
1588 IDS[i] + "; still receiving ID " + t.getID());
1589 } catch (IllegalArgumentException e2) {
1590 // Good; this is what we expect
1591 logln("Ok; Unregistered " + IDS[i]);
1593 } catch (IllegalArgumentException e) {
1594 errln("FAIL: Registration/creation failed for ID " +
1597 Transliterator.unregister(IDS[i]);
1601 // Make sure getAvailable API reflects removal
1602 for (Enumeration e = Transliterator.getAvailableIDs();
1603 e.hasMoreElements(); ) {
1604 String id = (String) e.nextElement();
1605 for (int i=0; i<3; ++i) {
1606 if (id.equals(FULL_IDS[i])) {
1607 errln("FAIL: unregister(" + id + ") failed");
1611 for (Enumeration e = Transliterator.getAvailableTargets("Any");
1612 e.hasMoreElements(); ) {
1613 String t = (String) e.nextElement();
1614 if (t.equals(IDS[0])) {
1615 errln("FAIL: unregister(Any-" + t + ") failed");
1618 for (Enumeration e = Transliterator.getAvailableSources();
1619 e.hasMoreElements(); ) {
1620 String s = (String) e.nextElement();
1621 for (int i=0; i<3; ++i) {
1622 if (SOURCES[i] == null) continue;
1623 if (s.equals(SOURCES[i])) {
1624 errln("FAIL: unregister(" + s + "-*) failed");
1631 * Test inverse of Greek-Latin; Title()
1633 public void TestCompoundInverse() {
1634 Transliterator t = Transliterator.getInstance
1635 ("Greek-Latin; Title()", Transliterator.REVERSE);
1637 errln("FAIL: createInstance");
1640 String exp = "(Title);Latin-Greek";
1641 if (t.getID().equals(exp)) {
1642 logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" +
1645 errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" +
1646 t.getID() + "\", expected \"" + exp + "\"");
1651 * Test NFD chaining with RBT
1653 public void TestNFDChainRBT() {
1654 Transliterator t = Transliterator.createFromRules(
1655 "TEST", "::NFD; aa > Q; a > q;",
1656 Transliterator.FORWARD);
1657 logln(t.toRules(true));
1658 expect(t, "aa", "Q");
1662 * Inverse of "Null" should be "Null". (J21)
1664 public void TestNullInverse() {
1665 Transliterator t = Transliterator.getInstance("Null");
1666 Transliterator u = t.getInverse();
1667 if (!u.getID().equals("Null")) {
1668 errln("FAIL: Inverse of Null should be Null");
1673 * Check ID of inverse of alias. (J22)
1675 public void TestAliasInverseID() {
1676 String ID = "Latin-Hangul"; // This should be any alias ID with an inverse
1677 Transliterator t = Transliterator.getInstance(ID);
1678 Transliterator u = t.getInverse();
1679 String exp = "Hangul-Latin";
1680 String got = u.getID();
1681 if (!got.equals(exp)) {
1682 errln("FAIL: Inverse of " + ID + " is " + got +
1683 ", expected " + exp);
1688 * Test IDs of inverses of compound transliterators. (J20)
1690 public void TestCompoundInverseID() {
1691 String ID = "Latin-Jamo;NFC(NFD)";
1692 Transliterator t = Transliterator.getInstance(ID);
1693 Transliterator u = t.getInverse();
1694 String exp = "NFD(NFC);Jamo-Latin";
1695 String got = u.getID();
1696 if (!got.equals(exp)) {
1697 errln("FAIL: Inverse of " + ID + " is " + got +
1698 ", expected " + exp);
1703 * Test undefined variable.
1705 public void TestUndefinedVariable() {
1706 String rule = "$initial } a <> \u1161;";
1708 Transliterator.createFromRules("<ID>", rule,Transliterator.FORWARD);
1709 } catch (IllegalArgumentException e) {
1710 logln("OK: Got exception for " + rule + ", as expected: " +
1714 errln("Fail: bogus rule " + rule + " compiled without error");
1718 * Test empty context.
1720 public void TestEmptyContext() {
1721 expect(" { a } > b;", "xay a ", "xby b ");
1725 * Test compound filter ID syntax
1727 public void TestCompoundFilterID() {
1729 // Col. 1 = ID or rule set (latter must start with #)
1731 // = columns > 1 are null if expect col. 1 to be illegal =
1733 // Col. 2 = direction, "F..." or "R..."
1734 // Col. 3 = source string
1735 // Col. 4 = exp result
1737 "[abc]; [abc]", null, null, null, // multiple filters
1738 "Latin-Greek; [abc];", null, null, null, // misplaced filter
1739 "[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\u0392c",
1740 "[b]; (Lower); Latin-Greek; Upper(); ([\u0392])", "R", "\u0391\u0392\u0393", "\u0391b\u0393",
1741 "#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\u0392c",
1742 "#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\u0392]);", "R", "\u0391\u0392\u0393", "\u0391b\u0393",
1745 for (int i=0; i<DATA.length; i+=4) {
1746 String id = DATA[i];
1747 int direction = (DATA[i+1] != null && DATA[i+1].charAt(0) == 'R') ?
1748 Transliterator.REVERSE : Transliterator.FORWARD;
1749 String source = DATA[i+2];
1750 String exp = DATA[i+3];
1751 boolean expOk = (DATA[i+1] != null);
1752 Transliterator t = null;
1753 IllegalArgumentException e = null;
1755 if (id.charAt(0) == '#') {
1756 t = Transliterator.createFromRules("ID", id, direction);
1758 t = Transliterator.getInstance(id, direction);
1760 } catch (IllegalArgumentException ee) {
1763 boolean ok = (t != null && e == null);
1765 logln("Ok: " + id + " => " + t +
1766 (e != null ? (", " + e.getMessage()) : ""));
1767 if (source != null) {
1768 expect(t, source, exp);
1771 errln("FAIL: " + id + " => " + t +
1772 (e != null ? (", " + e.getMessage()) : ""));
1778 * Test new property set syntax
1780 public void TestPropertySet() {
1781 expect("a>A; \\p{Lu}>x; \\p{Any}>y;", "abcDEF", "Ayyxxx");
1782 expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9",
1783 "[ a stitch ]\n[ in time ]\r[ saves 9]");
1787 * Test various failure points of the new 2.0 engine.
1789 public void TestNewEngine() {
1790 Transliterator t = Transliterator.getInstance("Latin-Hiragana");
1791 // Katakana should be untouched
1792 expect(t, "a\u3042\u30A2", "\u3042\u3042\u30A2");
1795 // This test will only work if Transliterator.ROLLBACK is
1796 // true. Otherwise, this test will fail, revealing a
1797 // limitation of global filters in incremental mode.
1800 Transliterator.createFromRules("a_to_A", "a > A;", Transliterator.FORWARD);
1802 Transliterator.createFromRules("A_to_b", "A > b;", Transliterator.FORWARD);
1804 //Transliterator array[] = new Transliterator[] {
1806 // Transliterator.getInstance("NFD"),
1808 //t = Transliterator.getInstance(array, new UnicodeSet("[:Ll:]"));
1811 Transliterator.registerInstance(a);
1812 Transliterator.registerInstance(A);
1814 t = Transliterator.getInstance("[:Ll:];a_to_A;NFD;A_to_b");
1815 expect(t, "aAaA", "bAbA");
1817 Transliterator[] u = t.getElements();
1818 assertTrue("getElements().length", u.length == 3);
1819 assertEquals("getElements()[0]", u[0].getID(), "a_to_A");
1820 assertEquals("getElements()[1]", u[1].getID(), "NFD");
1821 assertEquals("getElements()[2]", u[2].getID(), "A_to_b");
1823 t = Transliterator.getInstance("a_to_A;NFD;A_to_b");
1824 t.setFilter(new UnicodeSet("[:Ll:]"));
1825 expect(t, "aAaA", "bAbA");
1827 Transliterator.unregister("a_to_A");
1828 Transliterator.unregister("A_to_b");
1832 expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;",
1837 "$ddot = \u0308 ;" +
1838 "$lcgvowel = [\u03b1\u03b5\u03b7\u03b9\u03bf\u03c5\u03c9] ;" +
1839 "$rough = \u0314 ;" +
1840 "($lcgvowel+ $ddot?) $rough > h | $1 ;" +
1844 expect(gr, "\u03B1\u0314", "ha");
1848 * Test quantified segment behavior. We want:
1849 * ([abc])+ > x $1 x; applied to "cba" produces "xax"
1851 public void TestQuantifiedSegment() {
1853 expect("([abc]+) > x $1 x;", "cba", "xcbax");
1855 // The tricky case; the quantifier is around the segment
1856 expect("([abc])+ > x $1 x;", "cba", "xax");
1858 // Tricky case in reverse direction
1859 expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax");
1861 // Check post-context segment
1862 expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba");
1864 // Test toRule/toPattern for non-quantified segment.
1865 // Careful with spacing here.
1866 String r = "([a-c]){q} > x $1 x;";
1867 Transliterator t = Transliterator.createFromRules("ID", r, Transliterator.FORWARD);
1868 String rr = t.toRules(true);
1869 if (!r.equals(rr)) {
1870 errln("FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
1872 logln("Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
1875 // Test toRule/toPattern for quantified segment.
1876 // Careful with spacing here.
1877 r = "([a-c])+{q} > x $1 x;";
1878 t = Transliterator.createFromRules("ID", r, Transliterator.FORWARD);
1879 rr = t.toRules(true);
1880 if (!r.equals(rr)) {
1881 errln("FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
1883 logln("Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
1887 //======================================================================
1889 //======================================================================
1890 /* this test performs test of rules in ISO 15915 */
1891 public void TestDevanagariLatinRT(){
1907 //"r\u0323ya", // \u095c is not valid in Devanagari
1940 // Not roundtrippable --
1941 // \u0939\u094d\u094d\u092E - hma
1942 // \u0939\u094d\u092E - hma
1943 // CharsToUnicodeString("hma"),
1948 "san\u0304j\u012Bb s\u0113nagupta",
1949 "\u0101nand vaddir\u0101ju",
1951 String[] expected = {
1952 "\u092D\u093E\u0930\u0924", /* bha\u0304rata */
1953 "\u0915\u094D\u0930", /* kra */
1954 "\u0915\u094D\u0937", /* ks\u0323a */
1955 "\u0916\u094D\u0930", /* khra */
1956 "\u0917\u094D\u0930", /* gra */
1957 "\u0919\u094D\u0930", /* n\u0307ra */
1958 "\u091A\u094D\u0930", /* cra */
1959 "\u091B\u094D\u0930", /* chra */
1960 "\u091C\u094D\u091E", /* jn\u0303a */
1961 "\u091D\u094D\u0930", /* jhra */
1962 "\u091E\u094D\u0930", /* n\u0303ra */
1963 "\u091F\u094D\u092F", /* t\u0323ya */
1964 "\u0920\u094D\u0930", /* t\u0323hra */
1965 "\u0921\u094D\u092F", /* d\u0323ya */
1966 //"\u095C\u094D\u092F", /* r\u0323ya */ // \u095c is not valid in Devanagari
1967 "\u0922\u094D\u092F", /* d\u0323hya */
1968 "\u0922\u093C\u094D\u0930", /* r\u0323hra */
1969 "\u0923\u094D\u0930", /* n\u0323ra */
1970 "\u0924\u094D\u0924", /* tta */
1971 "\u0925\u094D\u0930", /* thra */
1972 "\u0926\u094D\u0926", /* dda */
1973 "\u0927\u094D\u0930", /* dhra */
1974 "\u0928\u094D\u0928", /* nna */
1975 "\u092A\u094D\u0930", /* pra */
1976 "\u092B\u094D\u0930", /* phra */
1977 "\u092C\u094D\u0930", /* bra */
1978 "\u092D\u094D\u0930", /* bhra */
1979 "\u092E\u094D\u0930", /* mra */
1980 "\u0929\u094D\u0930", /* n\u0331ra */
1981 //"\u0934\u094D\u0930", /* l\u0331ra */
1982 "\u092F\u094D\u0930", /* yra */
1983 "\u092F\u093C\u094D\u0930", /* y\u0307ra */
1985 "\u0935\u094D\u0930", /* vra */
1986 "\u0936\u094D\u0930", /* s\u0301ra */
1987 "\u0937\u094D\u0930", /* s\u0323ra */
1988 "\u0938\u094D\u0930", /* sra */
1989 "\u0939\u094d\u092E", /* hma */
1990 "\u091F\u094D\u091F", /* t\u0323t\u0323a */
1991 "\u091F\u094D\u0920", /* t\u0323t\u0323ha */
1992 "\u0920\u094D\u0920", /* t\u0323ht\u0323ha*/
1993 "\u0921\u094D\u0921", /* d\u0323d\u0323a */
1994 "\u0921\u094D\u0922", /* d\u0323d\u0323ha */
1995 "\u091F\u094D\u092F", /* t\u0323ya */
1996 "\u0920\u094D\u092F", /* t\u0323hya */
1997 "\u0921\u094D\u092F", /* d\u0323ya */
1998 "\u0922\u094D\u092F", /* d\u0323hya */
2000 "\u0939\u094D\u092F", /* hya */
2001 "\u0936\u0943", /* s\u0301r\u0325a */
2002 "\u0936\u094D\u091A", /* s\u0301ca */
2003 "\u090d", /* e\u0306 */
2004 "\u0938\u0902\u091C\u0940\u092C\u094D \u0938\u0947\u0928\u0917\u0941\u092A\u094D\u0924",
2005 "\u0906\u0928\u0902\u0926\u094D \u0935\u0926\u094D\u0926\u093F\u0930\u093E\u091C\u0941",
2008 Transliterator latinToDev=Transliterator.getInstance("Latin-Devanagari", Transliterator.FORWARD );
2009 Transliterator devToLatin=Transliterator.getInstance("Devanagari-Latin", Transliterator.FORWARD);
2011 for(int i= 0; i<source.length; i++){
2012 expect(latinToDev,(source[i]),(expected[i]));
2013 expect(devToLatin,(expected[i]),(source[i]));
2017 public void TestTeluguLatinRT(){
2019 "raghur\u0101m vi\u015Bvan\u0101dha", /* Raghuram Viswanadha */
2020 "\u0101nand vaddir\u0101ju", /* Anand Vaddiraju */
2021 "r\u0101j\u012Bv ka\u015Barab\u0101da", /* Rajeev Kasarabada */
2022 "san\u0304j\u012Bv ka\u015Barab\u0101da", /* sanjeev kasarabada */
2023 "san\u0304j\u012Bb sen'gupta", /* sanjib sengupata */
2024 "amar\u0113ndra hanum\u0101nula", /* Amarendra hanumanula */
2025 "ravi kum\u0101r vi\u015Bvan\u0101dha", /* Ravi Kumar Viswanadha */
2026 "\u0101ditya kandr\u0113gula", /* Aditya Kandregula */
2027 "\u015Br\u012Bdhar ka\u1E47\u1E6Dama\u015Be\u1E6D\u1E6Di", /* Shridhar Kantamsetty */
2028 "m\u0101dhav de\u015Be\u1E6D\u1E6Di" /* Madhav Desetty */
2031 String[] expected = {
2032 "\u0c30\u0c18\u0c41\u0c30\u0c3e\u0c2e\u0c4d \u0c35\u0c3f\u0c36\u0c4d\u0c35\u0c28\u0c3e\u0c27",
2033 "\u0c06\u0c28\u0c02\u0c26\u0c4d \u0C35\u0C26\u0C4D\u0C26\u0C3F\u0C30\u0C3E\u0C1C\u0C41",
2034 "\u0c30\u0c3e\u0c1c\u0c40\u0c35\u0c4d \u0c15\u0c36\u0c30\u0c2c\u0c3e\u0c26",
2035 "\u0c38\u0c02\u0c1c\u0c40\u0c35\u0c4d \u0c15\u0c36\u0c30\u0c2c\u0c3e\u0c26",
2036 "\u0c38\u0c02\u0c1c\u0c40\u0c2c\u0c4d \u0c38\u0c46\u0c28\u0c4d\u0c17\u0c41\u0c2a\u0c4d\u0c24",
2037 "\u0c05\u0c2e\u0c30\u0c47\u0c02\u0c26\u0c4d\u0c30 \u0c39\u0c28\u0c41\u0c2e\u0c3e\u0c28\u0c41\u0c32",
2038 "\u0c30\u0c35\u0c3f \u0c15\u0c41\u0c2e\u0c3e\u0c30\u0c4d \u0c35\u0c3f\u0c36\u0c4d\u0c35\u0c28\u0c3e\u0c27",
2039 "\u0c06\u0c26\u0c3f\u0c24\u0c4d\u0c2f \u0C15\u0C02\u0C26\u0C4D\u0C30\u0C47\u0C17\u0C41\u0c32",
2040 "\u0c36\u0c4d\u0c30\u0c40\u0C27\u0C30\u0C4D \u0c15\u0c02\u0c1f\u0c2e\u0c36\u0c46\u0c1f\u0c4d\u0c1f\u0c3f",
2041 "\u0c2e\u0c3e\u0c27\u0c35\u0c4d \u0c26\u0c46\u0c36\u0c46\u0c1f\u0c4d\u0c1f\u0c3f",
2045 Transliterator latinToDev=Transliterator.getInstance("Latin-Telugu", Transliterator.FORWARD);
2046 Transliterator devToLatin=Transliterator.getInstance("Telugu-Latin", Transliterator.FORWARD);
2048 for(int i= 0; i<source.length; i++){
2049 expect(latinToDev,(source[i]),(expected[i]));
2050 expect(devToLatin,(expected[i]),(source[i]));
2054 public void TestSanskritLatinRT(){
2059 "bhagavadg\u012Bt\u0101",
2064 "dhr\u0325tar\u0101\u1E63\u1E6Dra",
2066 "dharmak\u1E63\u0113tr\u0113",
2067 "kuruk\u1E63\u0113tr\u0113",
2068 "samav\u0113t\u0101",
2070 "m\u0101mak\u0101\u1E25",
2071 // "p\u0101\u1E47\u1E0Dav\u0101\u015Bcaiva",
2075 String[] expected = {
2076 "\u0930\u094D\u092E\u094D\u0915\u094D\u0937\u0947\u0924\u094D",
2077 "\u0936\u094d\u0930\u0940\u092e\u0926\u094d",
2078 "\u092d\u0917\u0935\u0926\u094d\u0917\u0940\u0924\u093e",
2079 "\u0905\u0927\u094d\u092f\u093e\u092f",
2080 "\u0905\u0930\u094d\u091c\u0941\u0928",
2081 "\u0935\u093f\u0937\u093e\u0926",
2082 "\u092f\u094b\u0917",
2083 "\u0927\u0943\u0924\u0930\u093e\u0937\u094d\u091f\u094d\u0930",
2084 "\u0909\u0935\u093E\u091A\u0943",
2085 "\u0927\u0930\u094d\u092e\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947",
2086 "\u0915\u0941\u0930\u0941\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947",
2087 "\u0938\u092e\u0935\u0947\u0924\u093e",
2088 "\u092f\u0941\u092f\u0941\u0924\u094d\u0938\u0935\u0903",
2089 "\u092e\u093e\u092e\u0915\u093e\u0903",
2090 //"\u092a\u093e\u0923\u094d\u0921\u0935\u093e\u0936\u094d\u091a\u0948\u0935",
2091 "\u0915\u093f\u092e\u0915\u0941\u0930\u094d\u0935\u0924",
2092 "\u0938\u0902\u091c\u0935",
2095 Transliterator latinToDev=Transliterator.getInstance("Latin-Devanagari", Transliterator.FORWARD);
2096 Transliterator devToLatin=Transliterator.getInstance("Devanagari-Latin", Transliterator.FORWARD);
2097 for(int i= 0; i<MAX_LEN; i++){
2098 expect(latinToDev,(source[i]),(expected[i]));
2099 expect(devToLatin,(expected[i]),(source[i]));
2103 public void TestCompoundLatinRT(){
2108 "bhagavadg\u012Bt\u0101",
2113 "dhr\u0325tar\u0101\u1E63\u1E6Dra",
2115 "dharmak\u1E63\u0113tr\u0113",
2116 "kuruk\u1E63\u0113tr\u0113",
2117 "samav\u0113t\u0101",
2119 "m\u0101mak\u0101\u1E25",
2120 // "p\u0101\u1E47\u1E0Dav\u0101\u015Bcaiva",
2124 String[] expected = {
2125 "\u0930\u094D\u092E\u094D\u0915\u094D\u0937\u0947\u0924\u094D",
2126 "\u0936\u094d\u0930\u0940\u092e\u0926\u094d",
2127 "\u092d\u0917\u0935\u0926\u094d\u0917\u0940\u0924\u093e",
2128 "\u0905\u0927\u094d\u092f\u093e\u092f",
2129 "\u0905\u0930\u094d\u091c\u0941\u0928",
2130 "\u0935\u093f\u0937\u093e\u0926",
2131 "\u092f\u094b\u0917",
2132 "\u0927\u0943\u0924\u0930\u093e\u0937\u094d\u091f\u094d\u0930",
2133 "\u0909\u0935\u093E\u091A\u0943",
2134 "\u0927\u0930\u094d\u092e\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947",
2135 "\u0915\u0941\u0930\u0941\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947",
2136 "\u0938\u092e\u0935\u0947\u0924\u093e",
2137 "\u092f\u0941\u092f\u0941\u0924\u094d\u0938\u0935\u0903",
2138 "\u092e\u093e\u092e\u0915\u093e\u0903",
2139 // "\u092a\u093e\u0923\u094d\u0921\u0935\u093e\u0936\u094d\u091a\u0948\u0935",
2140 "\u0915\u093f\u092e\u0915\u0941\u0930\u094d\u0935\u0924",
2141 "\u0938\u0902\u091c\u0935"
2144 Transliterator latinToDevToLatin=Transliterator.getInstance("Latin-Devanagari;Devanagari-Latin", Transliterator.FORWARD);
2145 Transliterator devToLatinToDev=Transliterator.getInstance("Devanagari-Latin;Latin-Devanagari", Transliterator.FORWARD);
2146 for(int i= 0; i<MAX_LEN; i++){
2147 expect(latinToDevToLatin,(source[i]),(source[i]));
2148 expect(devToLatinToDev,(expected[i]),(expected[i]));
2152 * Test Gurmukhi-Devanagari Tippi and Bindi
2154 public void TestGurmukhiDevanagari(){
2156 // (\u0902) (when preceded by vowel) ---> (\u0A02)
2157 // (\u0902) (when preceded by consonant) ---> (\u0A70)
2159 UnicodeSet vowel =new UnicodeSet("[\u0905-\u090A \u090F\u0910\u0913\u0914 \u093e-\u0942\u0947\u0948\u094B\u094C\u094D]");
2160 UnicodeSet non_vowel =new UnicodeSet("[\u0915-\u0928\u092A-\u0930]");
2162 UnicodeSetIterator vIter = new UnicodeSetIterator(vowel);
2163 UnicodeSetIterator nvIter = new UnicodeSetIterator(non_vowel);
2164 Transliterator trans = Transliterator.getInstance("Devanagari-Gurmukhi");
2165 StringBuffer src = new StringBuffer(" \u0902");
2166 StringBuffer expect = new StringBuffer(" \u0A02");
2167 while(vIter.next()){
2168 src.setCharAt(0,(char) vIter.codepoint);
2169 expect.setCharAt(0,(char) (vIter.codepoint+0x0100));
2170 expect(trans,src.toString(),expect.toString());
2173 expect.setCharAt(1,'\u0A70');
2174 while(nvIter.next()){
2175 //src.setCharAt(0,(char) nvIter.codepoint);
2176 src.setCharAt(0,(char)nvIter.codepoint);
2177 expect.setCharAt(0,(char) (nvIter.codepoint+0x0100));
2178 expect(trans,src.toString(),expect.toString());
2182 * Test instantiation from a locale.
2184 public void TestLocaleInstantiation() {
2187 t = Transliterator.getInstance("te_IN-Latin");
2188 //expect(t, "\u0430", "a");
2189 }catch(IllegalArgumentException ex){
2190 warnln("Could not load locale data for obtaining the script used in the locale te_IN. "+ex.getMessage());
2193 t = Transliterator.getInstance("ru_RU-Latin");
2194 expect(t, "\u0430", "a");
2195 }catch(IllegalArgumentException ex){
2196 warnln("Could not load locale data for obtaining the script used in the locale ru_RU. "+ex.getMessage());
2199 t = Transliterator.getInstance("en-el");
2200 expect(t, "a", "\u03B1");
2201 }catch(IllegalArgumentException ex){
2202 warnln("Could not load locale data for obtaining the script used in the locale el. "+ ex.getMessage());
2207 * Test title case handling of accent (should ignore accents)
2209 public void TestTitleAccents() {
2210 Transliterator t = Transliterator.getInstance("Title");
2211 expect(t, "a\u0300b can't abe", "A\u0300b Can't Abe");
2215 * Basic test of a locale resource based rule.
2217 public void TestLocaleResource() {
2220 "Latin-Greek/UNGEGN", "b", "\u03bc\u03c0",
2221 "Latin-el", "b", "\u03bc\u03c0",
2222 "Latin-Greek", "b", "\u03B2",
2223 "Greek-Latin/UNGEGN", "\u03B2", "v",
2224 "el-Latin", "\u03B2", "v",
2225 "Greek-Latin", "\u03B2", "b",
2227 for (int i=0; i<DATA.length; i+=3) {
2228 Transliterator t = Transliterator.getInstance(DATA[i]);
2229 expect(t, DATA[i+1], DATA[i+2]);
2234 * Make sure parse errors reference the right line.
2236 public void TestParseError() {
2242 Transliterator t = Transliterator.createFromRules("ID", rule, Transliterator.FORWARD);
2244 errln("FAIL: Did not get expected exception");
2246 } catch (IllegalArgumentException e) {
2247 String err = e.getMessage();
2248 if (err.indexOf("d << b") >= 0) {
2249 logln("Ok: " + err);
2251 errln("FAIL: " + err);
2255 errln("FAIL: no syntax error");
2259 * Make sure sets on output are disallowed.
2261 public void TestOutputSet() {
2262 String rule = "$set = [a-cm-n]; b > $set;";
2263 Transliterator t = null;
2265 t = Transliterator.createFromRules("ID", rule, Transliterator.FORWARD);
2267 errln("FAIL: Did not get the expected exception");
2269 } catch (IllegalArgumentException e) {
2270 logln("Ok: " + e.getMessage());
2273 errln("FAIL: No syntax error");
2277 * Test the use variable range pragma, making sure that use of
2278 * variable range characters is detected and flagged as an error.
2280 public void TestVariableRange() {
2281 String rule = "use variable range 0x70 0x72; a > A; b > B; q > Q;";
2284 Transliterator.createFromRules("ID", rule, Transliterator.FORWARD);
2286 errln("FAIL: Did not get the expected exception");
2288 } catch (IllegalArgumentException e) {
2289 logln("Ok: " + e.getMessage());
2292 errln("FAIL: No syntax error");
2296 * Test invalid post context error handling
2298 public void TestInvalidPostContext() {
2301 Transliterator.createFromRules("ID", "a}b{c>d;", Transliterator.FORWARD);
2303 errln("FAIL: Did not get the expected exception");
2305 } catch (IllegalArgumentException e) {
2306 String msg = e.getMessage();
2307 if (msg.indexOf("a}b{c") >= 0) {
2308 logln("Ok: " + msg);
2310 errln("FAIL: " + msg);
2314 errln("FAIL: No syntax error");
2318 * Test ID form variants
2320 public void TestIDForms() {
2323 "nfd", null, "NFC", // make sure case is ignored
2324 "Any-NFKD", null, "Any-NFKC",
2325 "Null", null, "Null",
2326 "-nfkc", "nfkc", "NFKD",
2327 "-nfkc/", "nfkc", "NFKD",
2328 "Latin-Greek/UNGEGN", null, "Greek-Latin/UNGEGN",
2329 "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN",
2330 "Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali",
2331 "Source-", null, null,
2332 "Source/Variant-", null, null,
2333 "Source-/Variant", null, null,
2334 "/Variant", null, null,
2335 "/Variant-", null, null,
2336 "-/Variant", null, null,
2342 for (int i=0; i<DATA.length; i+=3) {
2343 String ID = DATA[i];
2344 String expID = DATA[i+1];
2345 String expInvID = DATA[i+2];
2346 boolean expValid = (expInvID != null);
2347 if (expID == null) {
2352 Transliterator.getInstance(ID);
2353 Transliterator u = t.getInverse();
2354 if (t.getID().equals(expID) &&
2355 u.getID().equals(expInvID)) {
2356 logln("Ok: " + ID + ".getInverse() => " + expInvID);
2358 errln("FAIL: getInstance(" + ID + ") => " +
2359 t.getID() + " x getInverse() => " + u.getID() +
2360 ", expected " + expInvID);
2362 } catch (IllegalArgumentException e) {
2364 logln("Ok: getInstance(" + ID + ") => " + e.getMessage());
2366 errln("FAIL: getInstance(" + ID + ") => " + e.getMessage());
2372 void checkRules(String label, Transliterator t2, String testRulesForward) {
2373 String rules2 = t2.toRules(true);
2374 //rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
2375 rules2 = TestUtility.replace(rules2, " ", "");
2376 rules2 = TestUtility.replace(rules2, "\n", "");
2377 rules2 = TestUtility.replace(rules2, "\r", "");
2378 testRulesForward = TestUtility.replace(testRulesForward, " ", "");
2380 if (!rules2.equals(testRulesForward)) {
2382 logln("GENERATED RULES: " + rules2);
2383 logln("SHOULD BE: " + testRulesForward);
2388 * Mark's toRules test.
2390 public void TestToRulesMark() {
2393 "::[[:Latin:][:Mark:]];"
2395 + "::Lower (Lower);"
2396 + "a <> \\u03B1;" // alpha
2398 + "::Upper (Lower);"
2400 + "::([[:Greek:][:Mark:]]);"
2402 String testRulesForward =
2403 "::[[:Latin:][:Mark:]];"
2408 + "::Upper (Lower);"
2411 String testRulesBackward =
2412 "::[[:Greek:][:Mark:]];"
2413 + "::Lower (Upper);"
2419 String source = "\u00E1"; // a-acute
2420 String target = "\u03AC"; // alpha-acute
2422 Transliterator t2 = Transliterator.createFromRules("source-target", testRules, Transliterator.FORWARD);
2423 Transliterator t3 = Transliterator.createFromRules("target-source", testRules, Transliterator.REVERSE);
2425 expect(t2, source, target);
2426 expect(t3, target, source);
2428 checkRules("Failed toRules FORWARD", t2, testRulesForward);
2429 checkRules("Failed toRules BACKWARD", t3, testRulesBackward);
2433 * Test Escape and Unescape transliterators.
2435 public void TestEscape() {
2436 expect(Transliterator.getInstance("Hex-Any"),
2437 "\\x{40}\\U000000312Q",
2439 expect(Transliterator.getInstance("Any-Hex/C"),
2440 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
2441 "\\u0041\\U0010BEEF\\uFEED");
2442 expect(Transliterator.getInstance("Any-Hex/Java"),
2443 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
2444 "\\u0041\\uDBEF\\uDEEF\\uFEED");
2445 expect(Transliterator.getInstance("Any-Hex/Perl"),
2446 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
2447 "\\x{41}\\x{10BEEF}\\x{FEED}");
2451 * Make sure display names of variants look reasonable.
2453 public void TestDisplayName() {
2455 // ID, forward name, reverse name
2456 // Update the text as necessary -- the important thing is
2457 // not the text itself, but how various cases are handled.
2460 "Any-Hex", "Any to Hex Escape", "Hex Escape to Any",
2463 "Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl",
2466 "NFC", "Any to NFC", "Any to NFD",
2469 Locale US = Locale.US;
2471 for (int i=0; i<DATA.length; i+=3) {
2472 String name = Transliterator.getDisplayName(DATA[i], US);
2473 if (!name.equals(DATA[i+1])) {
2474 errln("FAIL: " + DATA[i] + ".getDisplayName() => " +
2475 name + ", expected " + DATA[i+1]);
2477 logln("Ok: " + DATA[i] + ".getDisplayName() => " + name);
2479 Transliterator t = Transliterator.getInstance(DATA[i], Transliterator.REVERSE);
2480 name = Transliterator.getDisplayName(t.getID(), US);
2481 if (!name.equals(DATA[i+2])) {
2482 errln("FAIL: " + t.getID() + ".getDisplayName() => " +
2483 name + ", expected " + DATA[i+2]);
2485 logln("Ok: " + t.getID() + ".getDisplayName() => " + name);
2488 // Cover getDisplayName(String)
2489 ULocale save = ULocale.getDefault();
2490 ULocale.setDefault(ULocale.US);
2491 String name2 = Transliterator.getDisplayName(t.getID());
2492 if (!name.equals(name2))
2493 errln("FAIL: getDisplayName with default locale failed");
2494 ULocale.setDefault(save);
2499 * Test anchor masking
2501 public void TestAnchorMasking() {
2502 String rule = "^a > Q; a > q;";
2504 Transliterator t = Transliterator.createFromRules("ID", rule, Transliterator.FORWARD);
2506 errln("FAIL: Did not get the expected exception");
2508 } catch (IllegalArgumentException e) {
2509 errln("FAIL: " + rule + " => " + e);
2514 * This test is not in trnstst.cpp. This test has been moved from com/ibm/icu/dev/test/lang/TestUScript.java
2515 * during ICU4J modularization to remove dependency of tests on Transliterator.
2517 public void TestScriptAllCodepoints(){
2519 HashSet scriptIdsChecked = new HashSet();
2520 HashSet scriptAbbrsChecked = new HashSet();
2521 for( int i =0; i <= 0x10ffff; i++){
2522 code = UScript.getScript(i);
2523 if(code==UScript.INVALID_CODE){
2524 errln("UScript.getScript for codepoint 0x"+ hex(i)+" failed");
2526 String id =UScript.getName(code);
2527 String abbr = UScript.getShortName(code);
2528 if (!scriptIdsChecked.contains(id)) {
2529 scriptIdsChecked.add(id);
2530 String newId ="[:"+id+":];NFD";
2532 Transliterator t = Transliterator.getInstance(newId);
2534 errln("Failed to create transliterator for "+hex(i)+
2535 " script code: " +id);
2537 }catch(Exception e){
2538 errln("Failed to create transliterator for "+hex(i)
2539 +" script code: " +id
2540 + " Exception: "+e.getMessage());
2543 if (!scriptAbbrsChecked.contains(abbr)) {
2544 scriptAbbrsChecked.add(abbr);
2545 String newAbbrId ="[:"+abbr+":];NFD";
2547 Transliterator t = Transliterator.getInstance(newAbbrId);
2549 errln("Failed to create transliterator for "+hex(i)+
2550 " script code: " +abbr);
2552 }catch(Exception e){
2553 errln("Failed to create transliterator for "+hex(i)
2554 +" script code: " +abbr
2555 + " Exception: "+e.getMessage());
2562 static final String[][] registerRules = {
2563 {"Any-Dev1", "x > X; y > Y;"},
2564 {"Any-Dev2", "XY > Z"},
2565 {"Greek-Latin/FAKE",
2566 "[^[:L:][:M:]] { \u03bc\u03c0 > b ; "+
2567 "\u03bc\u03c0 } [^[:L:][:M:]] > b ; "+
2568 "[^[:L:][:M:]] { [\u039c\u03bc][\u03a0\u03c0] > B ; "+
2569 "[\u039c\u03bc][\u03a0\u03c0] } [^[:L:][:M:]] > B ;"
2573 static final String DESERET_DEE = UTF16.valueOf(0x10414);
2574 static final String DESERET_dee = UTF16.valueOf(0x1043C);
2576 static final String[][] testCases = {
2579 // should add more test cases
2580 {"NFD" , "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"},
2581 {"NFC" , "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"},
2582 {"NFKD", "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"},
2583 {"NFKC", "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"},
2586 {"Greek-Latin/UNGEGN", "(\u03BC\u03C0)", "(b)"},
2587 {"Greek-Latin/FAKE", "(\u03BC\u03C0)", "(b)"},
2589 // check for devanagari bug
2590 {"nfd;Dev1;Dev2;nfc", "xy", "Z"},
2592 // ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE
2593 {"Title", "ab'cD ffi\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE,
2594 "Ab'cd Ffi\u0131ii\u0307 \u01C8\u01C9\u01C9 " + DESERET_DEE + DESERET_dee},
2595 //TODO: enable this test once Titlecase works right
2596 //{"Title", "\uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE,
2597 // "Ffi\u0131ii \u01C8\u01C9\u01C9 " + DESERET_DEE + DESERET_dee},
2599 {"Upper", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE,
2600 "AB'CD FFIII\u0130 \u01C7\u01C7\u01C7 " + DESERET_DEE + DESERET_DEE},
2601 {"Lower", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE,
2602 "ab'cd \uFB00i\u0131ii\u0307 \u01C9\u01C9\u01C9 " + DESERET_dee + DESERET_dee},
2604 {"Upper", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE},
2605 {"Lower", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE},
2608 {"Greek-Latin/UNGEGN", "\u03C3 \u03C3\u03C2 \u03C2\u03C3", "s ss s\u0331s\u0331"},
2609 {"Latin-Greek/UNGEGN", "s ss s\u0331s\u0331", "\u03C3 \u03C3\u03C2 \u03C2\u03C3"},
2610 {"Greek-Latin", "\u03C3 \u03C3\u03C2 \u03C2\u03C3", "s ss s\u0331s\u0331"},
2611 {"Latin-Greek", "s ss s\u0331s\u0331", "\u03C3 \u03C3\u03C2 \u03C2\u03C3"},
2614 // Upper: TAT\u02B9\u00C2NA
2615 // Lower: tat\u02B9\u00E2na
2616 // Title: Tat\u02B9\u00E2na
2617 {"Upper", "tat\u02B9\u00E2na", "TAT\u02B9\u00C2NA"},
2618 {"Lower", "TAT\u02B9\u00C2NA", "tat\u02B9\u00E2na"},
2619 {"Title", "tat\u02B9\u00E2na", "Tat\u02B9\u00E2na"},
2622 public void TestSpecialCases() {
2624 for (int i = 0; i < registerRules.length; ++i) {
2625 Transliterator t = Transliterator.createFromRules(registerRules[i][0],
2626 registerRules[i][1], Transliterator.FORWARD);
2627 DummyFactory.add(registerRules[i][0], t);
2629 for (int i = 0; i < testCases.length; ++i) {
2630 String name = testCases[i][0];
2631 Transliterator t = Transliterator.getInstance(name);
2632 String id = t.getID();
2633 String source = testCases[i][1];
2634 String target = null;
2636 // Automatic generation of targets, to make it simpler to add test cases (and more fail-safe)
2638 if (testCases[i].length > 2) target = testCases[i][2];
2639 else if (id.equalsIgnoreCase("NFD")) target = com.ibm.icu.text.Normalizer.normalize(source, com.ibm.icu.text.Normalizer.NFD);
2640 else if (id.equalsIgnoreCase("NFC")) target = com.ibm.icu.text.Normalizer.normalize(source, com.ibm.icu.text.Normalizer.NFC);
2641 else if (id.equalsIgnoreCase("NFKD")) target = com.ibm.icu.text.Normalizer.normalize(source, com.ibm.icu.text.Normalizer.NFKD);
2642 else if (id.equalsIgnoreCase("NFKC")) target = com.ibm.icu.text.Normalizer.normalize(source, com.ibm.icu.text.Normalizer.NFKC);
2643 else if (id.equalsIgnoreCase("Lower")) target = UCharacter.toLowerCase(Locale.US, source);
2644 else if (id.equalsIgnoreCase("Upper")) target = UCharacter.toUpperCase(Locale.US, source);
2646 expect(t, source, target);
2648 for (int i = 0; i < registerRules.length; ++i) {
2649 Transliterator.unregister(registerRules[i][0]);
2653 // seems like there should be an easier way to just register an instance of a transliterator
2655 static class DummyFactory implements Transliterator.Factory {
2656 static DummyFactory singleton = new DummyFactory();
2657 static HashMap m = new HashMap();
2659 // Since Transliterators are immutable, we don't have to clone on set & get
2660 static void add(String ID, Transliterator t) {
2662 //System.out.println("Registering: " + ID + ", " + t.toRules(true));
2663 Transliterator.registerFactory(ID, singleton);
2665 public Transliterator getInstance(String ID) {
2666 return (Transliterator) m.get(ID);
2670 public void TestCasing() {
2671 Transliterator toLower = Transliterator.getInstance("lower");
2672 Transliterator toCasefold = Transliterator.getInstance("casefold");
2673 Transliterator toUpper = Transliterator.getInstance("upper");
2674 Transliterator toTitle = Transliterator.getInstance("title");
2675 for (int i = 0; i < 0x600; ++i) {
2676 String s = UTF16.valueOf(i);
2678 String lower = UCharacter.toLowerCase(ULocale.ROOT, s);
2679 assertEquals("Lowercase", lower, toLower.transform(s));
2681 String casefold = UCharacter.foldCase(s, true);
2682 assertEquals("Casefold", casefold, toCasefold.transform(s));
2684 String title = UCharacter.toTitleCase(ULocale.ROOT, s, null);
2685 assertEquals("Title", title, toTitle.transform(s));
2687 String upper = UCharacter.toUpperCase(ULocale.ROOT, s);
2688 assertEquals("Upper", upper, toUpper.transform(s));
2692 public void TestSurrogateCasing () {
2693 // check that casing handles surrogates
2694 // titlecase is currently defective
2695 int dee = UTF16.charAt(DESERET_dee,0);
2696 int DEE = UCharacter.toTitleCase(dee);
2697 if (!UTF16.valueOf(DEE).equals(DESERET_DEE)) {
2698 errln("Fails titlecase of surrogates" + Integer.toString(dee,16) + ", " + Integer.toString(DEE,16));
2701 if (!UCharacter.toUpperCase(DESERET_dee + DESERET_DEE).equals(DESERET_DEE + DESERET_DEE)) {
2702 errln("Fails uppercase of surrogates");
2705 if (!UCharacter.toLowerCase(DESERET_dee + DESERET_DEE).equals(DESERET_dee + DESERET_dee)) {
2706 errln("Fails lowercase of surrogates");
2710 // Check to see that incremental gets at least part way through a reasonable string.
2712 public void TestIncrementalProgress() {
2713 String latinTest = "The Quick Brown Fox.";
2714 String devaTest = Transliterator.getInstance("Latin-Devanagari").transliterate(latinTest);
2715 String kataTest = Transliterator.getInstance("Latin-Katakana").transliterate(latinTest);
2716 String[][] tests = {
2718 {"Latin", latinTest},
2719 {"Halfwidth", latinTest},
2720 {"Devanagari", devaTest},
2721 {"Katakana", kataTest},
2724 Enumeration sources = Transliterator.getAvailableSources();
2725 while(sources.hasMoreElements()) {
2726 String source = (String) sources.nextElement();
2727 String test = findMatch(source, tests);
2729 logln("Skipping " + source + "-X");
2732 Enumeration targets = Transliterator.getAvailableTargets(source);
2733 while(targets.hasMoreElements()) {
2734 String target = (String) targets.nextElement();
2735 Enumeration variants = Transliterator.getAvailableVariants(source, target);
2736 while(variants.hasMoreElements()) {
2737 String variant = (String) variants.nextElement();
2738 String id = source + "-" + target + "/" + variant;
2741 String filter = getTranslitTestFilter();
2742 if (filter != null && id.indexOf(filter) < 0) continue;
2744 Transliterator t = Transliterator.getInstance(id);
2745 CheckIncrementalAux(t, test);
2747 String rev = t.transliterate(test);
2748 Transliterator inv = t.getInverse();
2749 CheckIncrementalAux(inv, rev);
2755 public String findMatch (String source, String[][] pairs) {
2756 for (int i = 0; i < pairs.length; ++i) {
2757 if (source.equalsIgnoreCase(pairs[i][0])) return pairs[i][1];
2762 public void CheckIncrementalAux(Transliterator t, String input) {
2764 Replaceable test = new ReplaceableString(input);
2765 Transliterator.Position pos = new Transliterator.Position(0, test.length(), 0, test.length());
2766 t.transliterate(test, pos);
2767 boolean gotError = false;
2769 // we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X?
2771 if (pos.start == 0 && pos.limit != 0 && !t.getID().equals("Hex-Any/Unicode")) {
2772 errln("No Progress, " + t.getID() + ": " + UtilityExtensions.formatInput(test, pos));
2775 logln("PASS Progress, " + t.getID() + ": " + UtilityExtensions.formatInput(test, pos));
2777 t.finishTransliteration(test, pos);
2778 if (pos.start != pos.limit) {
2779 errln("Incomplete, " + t.getID() + ": " + UtilityExtensions.formatInput(test, pos));
2783 //errln("FAIL: Did not get expected error");
2787 public void TestFunction() {
2788 // Careful with spacing and ';' here: Phrase this exactly
2789 // as toRules() is going to return it. If toRules() changes
2790 // with regard to spacing or ';', then adjust this string.
2792 "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';";
2794 Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD);
2796 errln("FAIL: createFromRules failed");
2800 String r = t.toRules(true);
2801 if (r.equals(rule)) {
2802 logln("OK: toRules() => " + r);
2804 errln("FAIL: toRules() => " + r +
2805 ", expected " + rule);
2808 expect(t, "The Quick Brown Fox",
2809 "T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox");
2811 "([^\\ -\\u007F]) > &Hex/Unicode( $1 ) ' ' &Name( $1 ) ;";
2813 t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD);
2815 errln("FAIL: createFromRules failed");
2819 r = t.toRules(true);
2820 if (r.equals(rule)) {
2821 logln("OK: toRules() => " + r);
2823 errln("FAIL: toRules() => " + r +
2824 ", expected " + rule);
2828 "U+0301 \\N{COMBINING ACUTE ACCENT}");
2831 public void TestInvalidBackRef() {
2832 String rule = ". > $1;";
2833 String rule2 ="(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\u0020;";
2835 Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD);
2837 errln("FAIL: createFromRules should have returned NULL");
2839 errln("FAIL: Ok: . > $1; => no error");
2840 Transliterator t2= Transliterator.createFromRules("Test2", rule2, Transliterator.FORWARD);
2842 errln("FAIL: createFromRules should have returned NULL");
2844 errln("FAIL: Ok: . > $1; => no error");
2845 } catch (IllegalArgumentException e) {
2846 logln("Ok: . > $1; => " + e.getMessage());
2850 public void TestMulticharStringSet() {
2857 " e } [{fg}] > r;" ;
2859 Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD);
2861 errln("FAIL: createFromRules failed");
2865 expect(t, "a aa ab bc d gd de gde gdefg ddefg",
2866 "y x yz z d gd de gdq gdqfg ddrfg");
2868 // Overlapped string test. Make sure that when multiple
2869 // strings can match that the longest one is matched.
2871 " [a {ab} {abc}] > x;" +
2874 " q [t {st} {rst}] { e > p;" ;
2876 t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD);
2878 errln("FAIL: createFromRules failed");
2882 expect(t, "a ab abc qte qste qrste",
2883 "x x x qtp qstp qrstp");
2887 * Test that user-registered transliterators can be used under function
2890 public void TestUserFunction() {
2893 // There's no need to register inverses if we don't use them
2894 TestUserFunctionFactory.add("Any-gif",
2895 Transliterator.createFromRules("gif",
2896 "'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';",
2897 Transliterator.FORWARD));
2898 //TestUserFunctionFactory.add("gif-Any", Transliterator.getInstance("Any-Null"));
2900 TestUserFunctionFactory.add("Any-RemoveCurly",
2901 Transliterator.createFromRules("RemoveCurly", "[\\{\\}] > ; \\\\N > ;", Transliterator.FORWARD));
2902 //TestUserFunctionFactory.add("RemoveCurly-Any", Transliterator.getInstance("Any-Null"));
2904 logln("Trying &hex");
2905 t = Transliterator.createFromRules("hex2", "(.) > &hex($1);", Transliterator.FORWARD);
2906 logln("Registering");
2907 TestUserFunctionFactory.add("Any-hex2", t);
2908 t = Transliterator.getInstance("Any-hex2");
2909 expect(t, "abc", "\\u0061\\u0062\\u0063");
2911 logln("Trying &gif");
2912 t = Transliterator.createFromRules("gif2", "(.) > &Gif(&Hex2($1));", Transliterator.FORWARD);
2913 logln("Registering");
2914 TestUserFunctionFactory.add("Any-gif2", t);
2915 t = Transliterator.getInstance("Any-gif2");
2916 expect(t, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">" +
2917 "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">");
2919 // Test that filters are allowed after &
2920 t = Transliterator.createFromRules("test",
2921 "(.) > &Hex($1) ' ' &Any-RemoveCurly(&Name($1)) ' ';", Transliterator.FORWARD);
2922 expect(t, "abc", "\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C ");
2924 // Unregister our test stuff
2925 TestUserFunctionFactory.unregister();
2928 static class TestUserFunctionFactory implements Transliterator.Factory {
2929 static TestUserFunctionFactory singleton = new TestUserFunctionFactory();
2930 static HashMap m = new HashMap();
2932 static void add(String ID, Transliterator t) {
2933 m.put(new CaseInsensitiveString(ID), t);
2934 Transliterator.registerFactory(ID, singleton);
2937 public Transliterator getInstance(String ID) {
2938 return (Transliterator) m.get(new CaseInsensitiveString(ID));
2941 static void unregister() {
2942 Iterator ids = m.keySet().iterator();
2943 while (ids.hasNext()) {
2944 CaseInsensitiveString id = (CaseInsensitiveString) ids.next();
2945 Transliterator.unregister(id.getString());
2946 ids.remove(); // removes pair from m
2952 * Test the Any-X transliterators.
2954 public void TestAnyX() {
2955 Transliterator anyLatin =
2956 Transliterator.getInstance("Any-Latin", Transliterator.FORWARD);
2959 "greek:\u03B1\u03B2\u03BA\u0391\u0392\u039A hiragana:\u3042\u3076\u304F cyrillic:\u0430\u0431\u0446",
2960 "greek:abkABK hiragana:abuku cyrillic:abc");
2964 * Test Any-X transliterators with sample letters from all scripts.
2966 public void TestAny() {
2967 UnicodeSet alphabetic = (UnicodeSet) new UnicodeSet("[:alphabetic:]").freeze();
2968 StringBuffer testString = new StringBuffer();
2969 for (int i = 0; i < UScript.CODE_LIMIT; ++i) {
2970 UnicodeSet sample = new UnicodeSet().applyPropertyAlias("script", UScript.getShortName(i)).retainAll(alphabetic);
2972 for (UnicodeSetIterator it = new UnicodeSetIterator(sample); it.next();) {
2973 testString.append(it.getString());
2974 if (--count < 0) break;
2977 logln("Sample set for Any-Latin: " + testString);
2978 Transliterator anyLatin = Transliterator.getInstance("any-Latn");
2979 String result = anyLatin.transliterate(testString.toString());
2980 logln("Sample result for Any-Latin: " + result);
2985 * Test the source and target set API. These are only implemented
2986 * for RBT and CompoundTransliterator at this time.
2988 public void TestSourceTargetSet() {
2995 UnicodeSet expSrc = new UnicodeSet("[arx{lu}]");
2998 UnicodeSet expTrg = new UnicodeSet("[bq]");
3000 Transliterator t = Transliterator.createFromRules("test", r, Transliterator.FORWARD);
3001 UnicodeSet src = t.getSourceSet();
3002 UnicodeSet trg = t.getTargetSet();
3004 if (src.equals(expSrc) && trg.equals(expTrg)) {
3005 logln("Ok: " + r + " => source = " + src.toPattern(true) +
3006 ", target = " + trg.toPattern(true));
3008 errln("FAIL: " + r + " => source = " + src.toPattern(true) +
3009 ", expected " + expSrc.toPattern(true) +
3010 "; target = " + trg.toPattern(true) +
3011 ", expected " + expTrg.toPattern(true));
3015 public void TestSourceTargetSet2() {
3018 Normalizer2 nfc = Normalizer2.getNFCInstance();
3019 Normalizer2 nfd = Normalizer2.getNFDInstance();
3021 // Normalizer2 nfkd = Normalizer2.getInstance(null, "nfkd", Mode.DECOMPOSE);
3022 // UnicodeSet nfkdSource = new UnicodeSet();
3023 // UnicodeSet nfkdTarget = new UnicodeSet();
3024 // for (int i = 0; i <= 0x10FFFF; ++i) {
3025 // if (nfkd.isInert(i)) {
3028 // nfkdSource.add(i);
3029 // String t = nfkd.getDecomposition(i);
3031 // nfkdTarget.addAll(t);
3033 // nfkdTarget.add(i);
3036 // nfkdSource.freeze();
3037 // nfkdTarget.freeze();
3038 // logln("NFKD Source: " + nfkdSource.toPattern(false));
3039 // logln("NFKD Target: " + nfkdTarget.toPattern(false));
3041 UnicodeMap<UnicodeSet> leadToTrail = new UnicodeMap();
3042 UnicodeMap<UnicodeSet> leadToSources = new UnicodeMap();
3043 UnicodeSet nonStarters = new UnicodeSet("[:^ccc=0:]").freeze();
3044 CanonicalIterator can = new CanonicalIterator("");
3046 UnicodeSet disorderedMarks = new UnicodeSet();
3048 for (int i = 0; i <= 0x10FFFF; ++i) {
3049 String s = nfd.getDecomposition(i);
3055 for (String t = can.next(); t != null; t = can.next()) {
3056 disorderedMarks.add(t);
3059 // if s has two code points, (or more), add the lead/trail information
3060 int first = s.codePointAt(0);
3061 int firstCount = Character.charCount(first);
3062 if (s.length() == firstCount) continue;
3063 String trailString = s.substring(firstCount);
3065 // add all the trail characters
3066 if (!nonStarters.containsSome(trailString)) {
3069 UnicodeSet trailSet = leadToTrail.get(first);
3070 if (trailSet == null) {
3071 leadToTrail.put(first, trailSet = new UnicodeSet());
3073 trailSet.addAll(trailString); // add remaining trails
3076 UnicodeSet sourcesSet = leadToSources.get(first);
3077 if (sourcesSet == null) {
3078 leadToSources.put(first, sourcesSet = new UnicodeSet());
3084 for (Entry<String, UnicodeSet> x : leadToSources.entrySet()) {
3085 String lead = x.getKey();
3086 UnicodeSet sources = x.getValue();
3087 UnicodeSet trailSet = leadToTrail.get(lead);
3088 for (String source : sources) {
3089 for (String trail : trailSet) {
3090 can.setSource(source + trail);
3091 for (String t = can.next(); t != null; t = can.next()) {
3092 if (t.endsWith(trail)) continue;
3093 disorderedMarks.add(t);
3100 for (String s : nonStarters) {
3101 disorderedMarks.add("\u0345" + s);
3102 disorderedMarks.add(s+"\u0323");
3103 String xx = nfc.normalize("\u01EC" + s);
3104 if (!xx.startsWith("\u01EC")) {
3109 // for (int i = 0; i <= 0x10FFFF; ++i) {
3110 // String s = nfkd.getDecomposition(i);
3112 // disorderedMarks.add(s);
3113 // disorderedMarks.add(nfc.normalize(s));
3114 // addDerivedStrings(nfc, disorderedMarks, s);
3116 // s = nfd.getDecomposition(i);
3118 // disorderedMarks.add(s);
3120 // if (!nfc.isInert(i)) {
3121 // if (i == 0x00C0) {
3124 // can.setSource(s+"\u0334");
3125 // for (String t = can.next(); t != null; t = can.next()) {
3126 // addDerivedStrings(nfc, disorderedMarks, t);
3128 // can.setSource(s+"\u0345");
3129 // for (String t = can.next(); t != null; t = can.next()) {
3130 // addDerivedStrings(nfc, disorderedMarks, t);
3132 // can.setSource(s+"\u0323");
3133 // for (String t = can.next(); t != null; t = can.next()) {
3134 // addDerivedStrings(nfc, disorderedMarks, t);
3138 logln("Test cases: " + disorderedMarks.size());
3139 disorderedMarks.addAll(0,0x10FFFF).freeze();
3140 logln("isInert \u0104 " + nfc.isInert('\u0104'));
3142 Object[][] rules = {
3143 {":: [:sc=COMMON:] any-name;", null},
3145 {":: [:Greek:] hex-any/C;", null},
3146 {":: [:Greek:] any-hex/C;", null},
3148 {":: [[:Mn:][:Me:]] remove;", null},
3149 {":: [[:Mn:][:Me:]] null;", null},
3152 {":: lower;", null},
3153 {":: upper;", null},
3154 {":: title;", null},
3155 {":: CaseFold;", null},
3162 {":: [[:Mn:][:Me:]] NFKD;", null},
3163 {":: Latin-Greek;", null},
3164 {":: [:Latin:] NFKD;", null},
3167 ":: [[:Mn:][:Me:]] remove;\n" +
3170 for (Object[] rulex : rules) {
3171 String rule = (String) rulex[0];
3172 Transliterator trans = Transliterator.createFromRules("temp", rule, Transliterator.FORWARD);
3173 UnicodeSet actualSource = trans.getSourceSet();
3174 UnicodeSet actualTarget = trans.getTargetSet();
3175 UnicodeSet empiricalSource = new UnicodeSet();
3176 UnicodeSet empiricalTarget = new UnicodeSet();
3177 String ruleDisplay = rule.replace("\n", "\t\t");
3178 UnicodeSet toTest = disorderedMarks;
3179 // if (rulex[1] != null) {
3180 // toTest = new UnicodeSet(disorderedMarks);
3181 // toTest.addAll((UnicodeSet) rulex[1]);
3184 String test = nfd.normalize("\u0104");
3185 boolean DEBUG = true;
3186 @SuppressWarnings("unused")
3187 int count = 0; // for debugging
3188 for (String s : toTest) {
3189 if (s.equals(test)) {
3192 String t = trans.transform(s);
3194 if (!isAtomic(s, t, trans)) {
3195 isAtomic(s, t, trans);
3199 // only keep the part that changed; so skip the front and end.
3200 // int start = findSharedStartLength(s,t);
3201 // int end = findSharedEndLength(s,t);
3202 // if (start != 0 || end != 0) {
3203 // s = s.substring(start, s.length() - end);
3204 // t = t.substring(start, t.length() - end);
3207 if (!actualSource.containsAll(s)) {
3210 if (!actualTarget.containsAll(t)) {
3214 addSourceTarget(s, empiricalSource, t, empiricalTarget);
3217 assertEquals("getSource(" + ruleDisplay + ")", empiricalSource, actualSource, SetAssert.MISSING_OK);
3218 assertEquals("getTarget(" + ruleDisplay + ")", empiricalTarget, actualTarget, SetAssert.MISSING_OK);
3222 public void TestSourceTargetSetFilter() {
3223 String[][] tests = {
3224 // rules, expectedTarget-FORWARD, expectedTarget-REVERSE
3225 {"[] Latin-Greek", null, "[\']"},
3226 {"::[] ; ::NFD ; ::NFKC ; :: ([]) ;"},
3238 UnicodeSet expectedSource = UnicodeSet.EMPTY;
3239 for (String[] testPair : tests) {
3240 String test = testPair[0];
3243 t0 = Transliterator.getInstance(test);
3244 } catch (Exception e) {
3245 t0 = Transliterator.createFromRules("temp", test, Transliterator.FORWARD);
3249 t1 = t0.getInverse();
3250 } catch (Exception e) {
3251 t1 = Transliterator.createFromRules("temp", test, Transliterator.REVERSE);
3253 int targetIndex = 0;
3254 for (Transliterator t : new Transliterator[]{t0, t1}) {
3256 UnicodeSet source = t.getSourceSet();
3257 String direction = t == t0 ? "FORWARD\t" : "REVERSE\t";
3259 UnicodeSet expectedTarget = testPair.length <= targetIndex ? expectedSource
3260 : testPair[targetIndex] == null ? expectedSource
3261 : testPair[targetIndex].length() == 0 ? expectedSource
3262 : new UnicodeSet(testPair[targetIndex]);
3263 ok = assertEquals(direction + "getSource\t\"" + test + '"', expectedSource, source);
3264 if (!ok) { // for debugging
3265 source = t.getSourceSet();
3267 UnicodeSet target = t.getTargetSet();
3268 ok = assertEquals(direction + "getTarget\t\"" + test + '"', expectedTarget, target);
3269 if (!ok) { // for debugging
3270 target = t.getTargetSet();
3276 private boolean isAtomic(String s, String t, Transliterator trans) {
3277 for (int i = 1; i < s.length(); ++i) {
3278 if (!CharSequences.onCharacterBoundary(s, i)) {
3281 String q = trans.transform(s.substring(0,i));
3282 if (t.startsWith(q)) {
3283 String r = trans.transform(s.substring(i));
3284 if (t.length() == q.length() + r.length() && t.endsWith(r)) {
3290 // // make sure that every part is different
3291 // if (s.codePointCount(0, s.length()) > 1) {
3292 // int[] codePoints = It.codePoints(s);
3293 // for (int k = 0; k < codePoints.length; ++k) {
3294 // int pos = indexOf(t,codePoints[k]);
3299 // if (s.contains("\u00C0")) {
3305 private void addSourceTarget(String s, UnicodeSet expectedSource, String t, UnicodeSet expectedTarget) {
3306 expectedSource.addAll(s);
3307 if (t.length() > 0) {
3308 expectedTarget.addAll(t);
3312 // private void addDerivedStrings(Normalizer2 nfc, UnicodeSet disorderedMarks, String s) {
3313 // disorderedMarks.add(s);
3314 // for (int j = 1; j < s.length(); ++j) {
3315 // if (CharSequences.onCharacterBoundary(s, j)) {
3316 // String shorter = s.substring(0,j);
3317 // disorderedMarks.add(shorter);
3318 // disorderedMarks.add(nfc.normalize(shorter) + s.substring(j));
3323 public void TestCharUtils() {
3324 String[][] startTests = {
3327 {"0", "\uD800", "\uD800\uDC01"},
3328 {"1", "\uD800a", "\uD800b"},
3329 {"0", "\uD800\uDC00", "\uD800\uDC01"},
3331 for (String[] row : startTests) {
3332 int actual = findSharedStartLength(row[1], row[2]);
3333 assertEquals("findSharedStartLength(" + row[1] + "," + row[2] + ")",
3334 Integer.parseInt(row[0]),
3337 String[][] endTests = {
3338 {"0", "\uDC00", "\uD801\uDC00"},
3341 {"1", "a\uDC00", "b\uDC00"},
3342 {"0", "\uD800\uDC00", "\uD801\uDC00"},
3344 for (String[] row : endTests) {
3345 int actual = findSharedEndLength(row[1], row[2]);
3346 assertEquals("findSharedEndLength(" + row[1] + "," + row[2] + ")",
3347 Integer.parseInt(row[0]),
3357 // TODO make generally available
3358 private static int findSharedStartLength(CharSequence s, CharSequence t) {
3359 int min = Math.min(s.length(), t.length());
3362 for (i = 0; i < min; ++i) {
3369 return CharSequences.onCharacterBoundary(s,i) && CharSequences.onCharacterBoundary(t,i) ? i : i - 1;
3377 // TODO make generally available
3378 private static int findSharedEndLength(CharSequence s, CharSequence t) {
3379 int slength = s.length();
3380 int tlength = t.length();
3381 int min = Math.min(slength, tlength);
3384 // TODO can make the calculations slightly faster... Not sure if it is worth the complication, tho'
3385 for (i = 0; i < min; ++i) {
3386 sch = s.charAt(slength - i - 1);
3387 tch = t.charAt(tlength - i - 1);
3392 return CharSequences.onCharacterBoundary(s,slength - i) && CharSequences.onCharacterBoundary(t,tlength - i) ? i : i - 1;
3395 enum SetAssert {EQUALS, MISSING_OK, EXTRA_OK}
3397 void assertEquals(String message, UnicodeSet empirical, UnicodeSet actual, SetAssert setAssert) {
3398 boolean haveError = false;
3399 if (!actual.containsAll(empirical)) {
3400 UnicodeSet missing = new UnicodeSet(empirical).removeAll(actual);
3401 errln(message + " \tgetXSet < empirical (" + missing.size() + "): " + toPattern(missing));
3404 if (!empirical.containsAll(actual)) {
3405 UnicodeSet extra = new UnicodeSet(actual).removeAll(empirical);
3406 logln("WARNING: " + message + " \tgetXSet > empirical (" + extra.size() + "): " + toPattern(extra));
3410 logln("OK " + message + ' ' + toPattern(empirical));
3414 private String toPattern(UnicodeSet missing) {
3415 String result = missing.toPattern(false);
3416 if (result.length() < 200) {
3419 return result.substring(0, CharSequences.onCharacterBoundary(result, 200) ? 200 : 199) + "\u2026";
3424 * Test handling of Pattern_White_Space, for both RBT and UnicodeSet.
3426 public void TestPatternWhitespace() {
3428 String r = "a > \u200E b;";
3430 Transliterator t = Transliterator.createFromRules("test", r, Transliterator.FORWARD);
3432 expect(t, "a", "b");
3435 UnicodeSet set = new UnicodeSet("[a \u200E]");
3437 if (set.contains(0x200E)) {
3438 errln("FAIL: U+200E not being ignored by UnicodeSet");
3442 public void TestAlternateSyntax() {
3447 expect("a \u2192 x; b \u2190 y; c \u2194 z",
3450 expect("([:^ASCII:]) \u2192 \u2206Name($1);",
3451 "<=\u2190; >=\u2192; <>=\u2194; &=\u2206",
3452 "<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}");
3455 public void TestPositionAPI() {
3456 Transliterator.Position a = new Transliterator.Position(3,5,7,11);
3457 Transliterator.Position b = new Transliterator.Position(a);
3458 Transliterator.Position c = new Transliterator.Position();
3460 // Call the toString() API:
3461 if (a.equals(b) && a.equals(c)) {
3462 logln("Ok: " + a + " == " + b + " == " + c);
3464 errln("FAIL: " + a + " != " + b + " != " + c);
3468 //======================================================================
3469 // New tests for the ::BEGIN/::END syntax
3470 //======================================================================
3472 private static final String[] BEGIN_END_RULES = new String[] {
3486 "", // test case commented out below, this is here to keep from messing up the indexes
3495 "", // test case commented out below, this is here to keep from messing up the indexes
3504 "", // test case commented out below, this is here to keep from messing up the indexes
3523 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
3524 + "$delim = [\\-$ws];"
3525 + "$ws $delim* > ' ';"
3526 + "'-' $delim* > '-';",
3530 + "$ws = [[:Separator:][\\u0009-\\u000C]$];"
3531 + "$delim = [\\-$ws];"
3532 + "$ws $delim* > ' ';"
3533 + "'-' $delim* > '-';",
3536 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
3537 + "$delim = [\\-$ws];"
3538 + "$ws $delim* > ' ';"
3539 + "'-' $delim* > '-';"
3543 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
3544 + "$delim = [\\-$ws];"
3546 + "$ws $delim* > ' ';"
3547 + "'-' $delim* > '-';",
3552 + "$ws = [[:Separator:][\\u0009-\\u000C]$];"
3553 + "$delim = [\\-$ws];"
3555 + "$ws $delim* > ' ';"
3556 + "'-' $delim* > '-';",
3558 "", // test case commented out below, this is here to keep from messing up the indexes
3562 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
3563 + "$delim = [\\-$ws];"
3565 + "$ws $delim* > ' ';"
3566 + "'-' $delim* > '-';"
3569 "", // test case commented out below, this is here to keep from messing up the indexes
3573 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
3574 + "$delim = [\\-$ws];"
3577 + "$ws $delim* > ' ';"
3578 + "'-' $delim* > '-';"
3581 + "$ab { ' ' } $ab > '-';"
3588 "", // test case commented out below, this is here to keep from messing up the indexes
3591 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
3592 + "$delim = [\\-$ws];"
3595 + "$ws $delim* > ' ';"
3596 + "'-' $delim* > '-';"
3598 + "$ab { ' ' } $ab > '-';"
3601 + "'a-a' > a\\%|a;",
3614 "", // test case commented out below, this is here to keep from messing up the indexes
3635 "", // test case commented out below, this is here to keep from messing up the indexes
3647 (This entire test is commented out below and will need some heavy revision when we re-add
3648 the ::BEGIN/::END stuff)
3649 private static final String[] BOGUS_BEGIN_END_RULES = new String[] {
3670 private static final String[] BEGIN_END_TEST_CASES = new String[] {
3671 BEGIN_END_RULES[0], "abc ababc aba", "xy zbc z",
3672 // BEGIN_END_RULES[1], "abc ababc aba", "xy abxy z",
3673 // BEGIN_END_RULES[2], "abc ababc aba", "xy abxy z",
3674 // BEGIN_END_RULES[3], "abc ababc aba", "xy abxy z",
3675 BEGIN_END_RULES[4], "abc ababc aba", "xy abxy z",
3676 BEGIN_END_RULES[5], "abccabaacababcbc", "PXAARXQBR",
3678 BEGIN_END_RULES[6], "e e - e---e- e", "e e e-e-e",
3679 BEGIN_END_RULES[7], "e e - e---e- e", "e e e-e-e",
3680 BEGIN_END_RULES[8], "e e - e---e- e", "e e e-e-e",
3681 BEGIN_END_RULES[9], "e e - e---e- e", "e e e-e-e",
3682 // BEGIN_END_RULES[10], "e e - e---e- e", "e e e-e-e",
3683 // BEGIN_END_RULES[11], "e e - e---e- e", "e e e-e-e",
3684 // BEGIN_END_RULES[12], "e e - e---e- e", "e e e-e-e",
3685 // BEGIN_END_RULES[12], "a a a a", "a%a%a%a",
3686 // BEGIN_END_RULES[12], "a a-b c b a", "a%a-b cb-a",
3687 BEGIN_END_RULES[13], "e e - e---e- e", "e e e-e-e",
3688 BEGIN_END_RULES[13], "a a a a", "a%a%a%a",
3689 BEGIN_END_RULES[13], "a a-b c b a", "a%a-b cb-a",
3691 // BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
3692 BEGIN_END_RULES[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
3693 // BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
3694 BEGIN_END_RULES[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ"
3697 public void TestBeginEnd() {
3698 // run through the list of test cases above
3699 for (int i = 0; i < BEGIN_END_TEST_CASES.length; i += 3) {
3700 expect(BEGIN_END_TEST_CASES[i], BEGIN_END_TEST_CASES[i + 1], BEGIN_END_TEST_CASES[i + 2]);
3703 // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing
3704 Transliterator reversed = Transliterator.createFromRules("Reversed", BEGIN_END_RULES[17],
3705 Transliterator.REVERSE);
3706 expect(reversed, "xy XY XYZ yz YZ", "xy abc xaba yz aba");
3708 // finally, run through the list of syntactically-ill-formed rule sets above and make sure
3709 // that all of them cause errors
3711 (commented out until we have the real ::BEGIN/::END stuff in place
3712 for (int i = 0; i < BOGUS_BEGIN_END_RULES.length; i++) {
3714 Transliterator t = Transliterator.createFromRules("foo", BOGUS_BEGIN_END_RULES[i],
3715 Transliterator.FORWARD);
3716 errln("Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]);
3718 catch (IllegalArgumentException e) {
3719 // this is supposed to happen; do nothing here
3725 public void TestBeginEndToRules() {
3726 // run through the same list of test cases we used above, but this time, instead of just
3727 // instantiating a Transliterator from the rules and running the test against it, we instantiate
3728 // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from
3729 // the resulting set of rules, and make sure that the generated rule set is semantically equivalent
3730 // to (i.e., does the same thing as) the original rule set
3731 for (int i = 0; i < BEGIN_END_TEST_CASES.length; i += 3) {
3732 Transliterator t = Transliterator.createFromRules("--", BEGIN_END_TEST_CASES[i],
3733 Transliterator.FORWARD);
3734 String rules = t.toRules(false);
3735 Transliterator t2 = Transliterator.createFromRules("Test case #" + (i / 3), rules, Transliterator.FORWARD);
3736 expect(t2, BEGIN_END_TEST_CASES[i + 1], BEGIN_END_TEST_CASES[i + 2]);
3739 // do the same thing for the reversible test case
3740 Transliterator reversed = Transliterator.createFromRules("Reversed", BEGIN_END_RULES[17],
3741 Transliterator.REVERSE);
3742 String rules = reversed.toRules(false);
3743 Transliterator reversed2 = Transliterator.createFromRules("Reversed", rules, Transliterator.FORWARD);
3744 expect(reversed2, "xy XY XYZ yz YZ", "xy abc xaba yz aba");
3747 public void TestRegisterAlias() {
3748 String longID = "Lower;[aeiou]Upper";
3749 String shortID = "Any-CapVowels";
3750 String reallyShortID = "CapVowels";
3752 Transliterator.registerAlias(shortID, longID);
3754 Transliterator t1 = Transliterator.getInstance(longID);
3755 Transliterator t2 = Transliterator.getInstance(reallyShortID);
3757 if (!t1.getID().equals(longID))
3758 errln("Transliterator instantiated with long ID doesn't have long ID");
3759 if (!t2.getID().equals(reallyShortID))
3760 errln("Transliterator instantiated with short ID doesn't have short ID");
3762 if (!t1.toRules(true).equals(t2.toRules(true)))
3763 errln("Alias transliterators aren't the same");
3765 Transliterator.unregister(shortID);
3768 t1 = Transliterator.getInstance(shortID);
3769 errln("Instantiation with short ID succeeded after short ID was unregistered");
3771 catch (IllegalArgumentException e) {
3774 // try the same thing again, but this time with something other than
3775 // an instance of CompoundTransliterator
3776 String realID = "Latin-Greek";
3777 String fakeID = "Latin-dlgkjdflkjdl";
3778 Transliterator.registerAlias(fakeID, realID);
3780 t1 = Transliterator.getInstance(realID);
3781 t2 = Transliterator.getInstance(fakeID);
3783 if (!t1.toRules(true).equals(t2.toRules(true)))
3784 errln("Alias transliterators aren't the same");
3786 Transliterator.unregister(fakeID);
3790 * Test the Halfwidth-Fullwidth transliterator (ticket 6281).
3792 public void TestHalfwidthFullwidth() {
3793 Transliterator hf = Transliterator.getInstance("Halfwidth-Fullwidth");
3794 Transliterator fh = Transliterator.getInstance("Fullwidth-Halfwidth");
3796 // Array of 3n items
3798 // "hf"|"fh"|"both",
3803 "\uFFE9\uFFEA\uFFEB\uFFEC\u0061\uFF71\u00AF\u0020",
3804 "\u2190\u2191\u2192\u2193\uFF41\u30A2\uFFE3\u3000",
3807 for (int i=0; i<DATA.length; i+=3) {
3808 switch (DATA[i].charAt(0)) {
3809 case 'h': // Halfwidth-Fullwidth only
3810 expect(hf, DATA[i+1], DATA[i+2]);
3812 case 'f': // Fullwidth-Halfwidth only
3813 expect(fh, DATA[i+2], DATA[i+1]);
3815 case 'b': // both directions
3816 expect(hf, DATA[i+1], DATA[i+2]);
3817 expect(fh, DATA[i+2], DATA[i+1]);
3825 * Test Thai. The text is the first paragraph of "What is Unicode" from the Unicode.org web site.
3826 * TODO: confirm that the expected results are correct.
3827 * For now, test just confirms that C++ and Java give identical results.
3829 public void TestThai() {
3830 Transliterator tr = Transliterator.getInstance("Any-Latin", Transliterator.FORWARD);
3832 "\u0e42\u0e14\u0e22\u0e1e\u0e37\u0e49\u0e19\u0e10\u0e32\u0e19\u0e41\u0e25\u0e49\u0e27, \u0e04\u0e2d" +
3833 "\u0e21\u0e1e\u0e34\u0e27\u0e40\u0e15\u0e2d\u0e23\u0e4c\u0e08\u0e30\u0e40\u0e01\u0e35\u0e48\u0e22" +
3834 "\u0e27\u0e02\u0e49\u0e2d\u0e07\u0e01\u0e31\u0e1a\u0e40\u0e23\u0e37\u0e48\u0e2d\u0e07\u0e02\u0e2d" +
3835 "\u0e07\u0e15\u0e31\u0e27\u0e40\u0e25\u0e02. \u0e04\u0e2d\u0e21\u0e1e\u0e34\u0e27\u0e40\u0e15\u0e2d" +
3836 "\u0e23\u0e4c\u0e08\u0e31\u0e14\u0e40\u0e01\u0e47\u0e1a\u0e15\u0e31\u0e27\u0e2d\u0e31\u0e01\u0e29" +
3837 "\u0e23\u0e41\u0e25\u0e30\u0e2d\u0e31\u0e01\u0e02\u0e23\u0e30\u0e2d\u0e37\u0e48\u0e19\u0e46 \u0e42" +
3838 "\u0e14\u0e22\u0e01\u0e32\u0e23\u0e01\u0e33\u0e2b\u0e19\u0e14\u0e2b\u0e21\u0e32\u0e22\u0e40\u0e25" +
3839 "\u0e02\u0e43\u0e2b\u0e49\u0e2a\u0e33\u0e2b\u0e23\u0e31\u0e1a\u0e41\u0e15\u0e48\u0e25\u0e30\u0e15" +
3840 "\u0e31\u0e27. \u0e01\u0e48\u0e2d\u0e19\u0e2b\u0e19\u0e49\u0e32\u0e17\u0e35\u0e48\u0e4a Unicode \u0e08" +
3841 "\u0e30\u0e16\u0e39\u0e01\u0e2a\u0e23\u0e49\u0e32\u0e07\u0e02\u0e36\u0e49\u0e19, \u0e44\u0e14\u0e49" +
3842 "\u0e21\u0e35\u0e23\u0e30\u0e1a\u0e1a encoding \u0e2d\u0e22\u0e39\u0e48\u0e2b\u0e25\u0e32\u0e22\u0e23" +
3843 "\u0e49\u0e2d\u0e22\u0e23\u0e30\u0e1a\u0e1a\u0e2a\u0e33\u0e2b\u0e23\u0e31\u0e1a\u0e01\u0e32\u0e23" +
3844 "\u0e01\u0e33\u0e2b\u0e19\u0e14\u0e2b\u0e21\u0e32\u0e22\u0e40\u0e25\u0e02\u0e40\u0e2b\u0e25\u0e48" +
3845 "\u0e32\u0e19\u0e35\u0e49. \u0e44\u0e21\u0e48\u0e21\u0e35 encoding \u0e43\u0e14\u0e17\u0e35\u0e48" +
3846 "\u0e21\u0e35\u0e08\u0e33\u0e19\u0e27\u0e19\u0e15\u0e31\u0e27\u0e2d\u0e31\u0e01\u0e02\u0e23\u0e30" +
3847 "\u0e21\u0e32\u0e01\u0e40\u0e1e\u0e35\u0e22\u0e07\u0e1e\u0e2d: \u0e22\u0e01\u0e15\u0e31\u0e27\u0e2d" +
3848 "\u0e22\u0e48\u0e32\u0e07\u0e40\u0e0a\u0e48\u0e19, \u0e40\u0e09\u0e1e\u0e32\u0e30\u0e43\u0e19\u0e01" +
3849 "\u0e25\u0e38\u0e48\u0e21\u0e2a\u0e2b\u0e20\u0e32\u0e1e\u0e22\u0e38\u0e42\u0e23\u0e1b\u0e40\u0e1e" +
3850 "\u0e35\u0e22\u0e07\u0e41\u0e2b\u0e48\u0e07\u0e40\u0e14\u0e35\u0e22\u0e27 \u0e01\u0e47\u0e15\u0e49" +
3851 "\u0e2d\u0e07\u0e01\u0e32\u0e23\u0e2b\u0e25\u0e32\u0e22 encoding \u0e43\u0e19\u0e01\u0e32\u0e23\u0e04" +
3852 "\u0e23\u0e2d\u0e1a\u0e04\u0e25\u0e38\u0e21\u0e17\u0e38\u0e01\u0e20\u0e32\u0e29\u0e32\u0e43\u0e19" +
3853 "\u0e01\u0e25\u0e38\u0e48\u0e21. \u0e2b\u0e23\u0e37\u0e2d\u0e41\u0e21\u0e49\u0e41\u0e15\u0e48\u0e43" +
3854 "\u0e19\u0e20\u0e32\u0e29\u0e32\u0e40\u0e14\u0e35\u0e48\u0e22\u0e27 \u0e40\u0e0a\u0e48\u0e19 \u0e20" +
3855 "\u0e32\u0e29\u0e32\u0e2d\u0e31\u0e07\u0e01\u0e24\u0e29 \u0e01\u0e47\u0e44\u0e21\u0e48\u0e21\u0e35" +
3856 " encoding \u0e43\u0e14\u0e17\u0e35\u0e48\u0e40\u0e1e\u0e35\u0e22\u0e07\u0e1e\u0e2d\u0e2a\u0e33\u0e2b" +
3857 "\u0e23\u0e31\u0e1a\u0e17\u0e38\u0e01\u0e15\u0e31\u0e27\u0e2d\u0e31\u0e01\u0e29\u0e23, \u0e40\u0e04" +
3858 "\u0e23\u0e37\u0e48\u0e2d\u0e07\u0e2b\u0e21\u0e32\u0e22\u0e27\u0e23\u0e23\u0e04\u0e15\u0e2d\u0e19" +
3859 " \u0e41\u0e25\u0e30\u0e2a\u0e31\u0e0d\u0e25\u0e31\u0e01\u0e29\u0e13\u0e4c\u0e17\u0e32\u0e07\u0e40" +
3860 "\u0e17\u0e04\u0e19\u0e34\u0e04\u0e17\u0e35\u0e48\u0e43\u0e0a\u0e49\u0e01\u0e31\u0e19\u0e2d\u0e22" +
3861 "\u0e39\u0e48\u0e17\u0e31\u0e48\u0e27\u0e44\u0e1b.";
3864 "doy ph\u1ee5\u0304\u0302n \u1e6d\u0304h\u0101n l\u00e6\u0302w, khxmphiwtexr\u0312 ca ke\u012b\u0300" +
3865 "ywk\u0304\u0125xng k\u1ea1b re\u1ee5\u0304\u0300xng k\u0304hxng t\u1ea1wlek\u0304h. khxmphiwtexr" +
3866 "\u0312 c\u1ea1d k\u0115b t\u1ea1w x\u1ea1ks\u0304\u02b9r l\u00e6a x\u1ea1kk\u0304h ra x\u1ee5\u0304" +
3867 "\u0300n\u00ab doy k\u0101r k\u1ea3h\u0304nd h\u0304m\u0101ylek\u0304h h\u0304\u0131\u0302 s\u0304" +
3868 "\u1ea3h\u0304r\u1ea1b t\u00e6\u0300la t\u1ea1w. k\u0300xn h\u0304n\u0302\u0101 th\u012b\u0300\u0301" +
3869 " Unicode ca t\u0304h\u016bk s\u0304r\u0302\u0101ng k\u0304h\u1ee5\u0302n, d\u1ecb\u0302 m\u012b " +
3870 "rabb encoding xy\u016b\u0300 h\u0304l\u0101y r\u0302xy rabb s\u0304\u1ea3h\u0304r\u1ea1b k\u0101" +
3871 "r k\u1ea3h\u0304nd h\u0304m\u0101ylek\u0304h h\u0304el\u0300\u0101 n\u012b\u0302. m\u1ecb\u0300m" +
3872 "\u012b encoding d\u0131 th\u012b\u0300 m\u012b c\u1ea3nwn t\u1ea1w x\u1ea1kk\u0304hra m\u0101k p" +
3873 "he\u012byng phx: yk t\u1ea1wx\u1ef3\u0101ng ch\u00e8n, c\u0304heph\u0101a n\u0131 kl\u00f9m s\u0304" +
3874 "h\u0304p\u0323h\u0101ph yurop phe\u012byng h\u0304\u00e6\u0300ng de\u012byw k\u0306 t\u0302xngk\u0101" +
3875 "r h\u0304l\u0101y encoding n\u0131 k\u0101r khrxbkhlum thuk p\u0323h\u0101s\u0304\u02b9\u0101 n\u0131" +
3876 " kl\u00f9m. h\u0304r\u1ee5\u0304x m\u00e6\u0302t\u00e6\u0300 n\u0131 p\u0323h\u0101s\u0304\u02b9" +
3877 "\u0101 de\u012b\u0300yw ch\u00e8n p\u0323h\u0101s\u0304\u02b9\u0101 x\u1ea1ngkvs\u0304\u02b9 k\u0306" +
3878 " m\u1ecb\u0300m\u012b encoding d\u0131 th\u012b\u0300 phe\u012byng phx s\u0304\u1ea3h\u0304r\u1ea1" +
3879 "b thuk t\u1ea1w x\u1ea1ks\u0304\u02b9r, kher\u1ee5\u0304\u0300xngh\u0304m\u0101y wrrkh txn l\u00e6" +
3880 "a s\u0304\u1ea1\u1ef5l\u1ea1ks\u0304\u02b9\u1e47\u0312 th\u0101ng thekhnikh th\u012b\u0300 ch\u0131" +
3881 "\u0302 k\u1ea1n xy\u016b\u0300 th\u1ea1\u0300wp\u1ecb.";
3883 expect(tr, thaiText, latinText);
3887 //======================================================================
3888 // These tests are not mirrored (yet) in icu4c at
3889 // source/test/intltest/transtst.cpp
3890 //======================================================================
3893 * Improve code coverage.
3895 public void TestCoverage() {
3896 // NullTransliterator
3897 Transliterator t = Transliterator.getInstance("Null", Transliterator.FORWARD);
3898 expect(t, "a", "a");
3900 // Source, target set
3901 t = Transliterator.getInstance("Latin-Greek", Transliterator.FORWARD);
3902 t.setFilter(new UnicodeSet("[A-Z]"));
3903 logln("source = " + t.getSourceSet());
3904 logln("target = " + t.getTargetSet());
3906 t = Transliterator.createFromRules("x", "(.) > &Any-Hex($1);", Transliterator.FORWARD);
3907 logln("source = " + t.getSourceSet());
3908 logln("target = " + t.getTargetSet());
3911 * Test case for threading problem in NormalizationTransliterator
3912 * reported by ticket#5160
3914 public void TestT5160() {
3915 final String[] testData = {
3921 final String[] expected = {
3927 Transliterator translit = Transliterator.getInstance("NFC");
3928 NormTranslitTask[] tasks = new NormTranslitTask[testData.length];
3929 for (int i = 0; i < tasks.length; i++) {
3930 tasks[i] = new NormTranslitTask(translit, testData[i], expected[i]);
3932 TestUtil.runUntilDone(tasks);
3934 for (int i = 0; i < tasks.length; i++) {
3935 if (tasks[i].getErrorMessage() != null) {
3936 System.out.println("Fail: thread#" + i + " " + tasks[i].getErrorMessage());
3942 static class NormTranslitTask implements Runnable {
3943 Transliterator translit;
3945 String expectedData;
3948 NormTranslitTask(Transliterator translit, String testData, String expectedData) {
3949 this.translit = translit;
3950 this.testData = testData;
3951 this.expectedData = expectedData;
3956 StringBuffer inBuf = new StringBuffer(testData);
3957 StringBuffer expectedBuf = new StringBuffer(expectedData);
3959 for(int i = 0; i < 1000; i++) {
3960 String in = inBuf.toString();
3961 String out = translit.transliterate(in);
3962 String expected = expectedBuf.toString();
3963 if (!out.equals(expected)) {
3964 errorMsg = "in {" + in + "} / out {" + out + "} / expected {" + expected + "}";
3967 inBuf.append(testData);
3968 expectedBuf.append(expectedData);
3972 public String getErrorMessage() {
3977 //======================================================================
3979 //======================================================================
3980 void expect(String rules,
3982 String expectedResult,
3983 Transliterator.Position pos) {
3984 Transliterator t = Transliterator.createFromRules("<ID>", rules, Transliterator.FORWARD);
3985 expect(t, source, expectedResult, pos);
3988 void expect(String rules, String source, String expectedResult) {
3989 expect(rules, source, expectedResult, null);
3992 void expect(Transliterator t, String source, String expectedResult,
3993 Transliterator reverseTransliterator) {
3994 expect(t, source, expectedResult);
3995 if (reverseTransliterator != null) {
3996 expect(reverseTransliterator, expectedResult, source);
4000 void expect(Transliterator t, String source, String expectedResult) {
4001 expect(t, source, expectedResult, (Transliterator.Position) null);
4004 void expect(Transliterator t, String source, String expectedResult,
4005 Transliterator.Position pos) {
4007 String result = t.transliterate(source);
4008 if (!expectAux(t.getID() + ":String", source, result, expectedResult)) return;
4011 Transliterator.Position index = null;
4013 index = new Transliterator.Position(0, source.length(), 0, source.length());
4015 index = new Transliterator.Position(pos.contextStart, pos.contextLimit,
4016 pos.start, pos.limit);
4019 ReplaceableString rsource = new ReplaceableString(source);
4021 t.finishTransliteration(rsource, index);
4022 // Do it all at once -- below we do it incrementally
4024 if (index.start != index.limit) {
4025 expectAux(t.getID() + ":UNFINISHED", source,
4026 "start: " + index.start + ", limit: " + index.limit, false, expectedResult);
4029 String result = rsource.toString();
4030 if (!expectAux(t.getID() + ":Replaceable", source, result, expectedResult)) return;
4034 index = new Transliterator.Position();
4036 index = new Transliterator.Position(pos.contextStart, pos.contextLimit,
4037 pos.start, pos.limit);
4040 // Test incremental transliteration -- this result
4041 // must be the same after we finalize (see below).
4042 List<String> v = new ArrayList<String>();
4044 rsource.replace(0, rsource.length(), "");
4046 rsource.replace(0, 0, source);
4047 v.add(UtilityExtensions.formatInput(rsource, index));
4048 t.transliterate(rsource, index);
4049 v.add(UtilityExtensions.formatInput(rsource, index));
4051 for (int i=0; i<source.length(); ++i) {
4052 //v.add(i == 0 ? "" : " + " + source.charAt(i) + "");
4053 //log.append(source.charAt(i)).append(" -> "));
4054 t.transliterate(rsource, index, source.charAt(i));
4055 //v.add(UtilityExtensions.formatInput(rsource, index) + source.substring(i+1));
4056 v.add(UtilityExtensions.formatInput(rsource, index) +
4057 ((i<source.length()-1)?(" + '" + source.charAt(i+1) + "' ->"):" =>"));
4061 // As a final step in keyboard transliteration, we must call
4062 // transliterate to finish off any pending partial matches that
4063 // were waiting for more input.
4064 t.finishTransliteration(rsource, index);
4065 result = rsource.toString();
4066 //log.append(" => ").append(rsource.toString());
4069 String[] results = new String[v.size()];
4071 expectAux(t.getID() + ":Incremental", results,
4072 result.equals(expectedResult),
4076 boolean expectAux(String tag, String source,
4077 String result, String expectedResult) {
4078 return expectAux(tag, new String[] {source, result},
4079 result.equals(expectedResult),
4083 boolean expectAux(String tag, String source,
4084 String result, boolean pass,
4085 String expectedResult) {
4086 return expectAux(tag, new String[] {source, result},
4091 boolean expectAux(String tag, String source,
4093 String expectedResult) {
4094 return expectAux(tag, new String[] {source},
4099 boolean expectAux(String tag, String[] results, boolean pass,
4100 String expectedResult) {
4101 msg((pass?"(":"FAIL: (")+tag+")", pass ? LOG : ERR, true, true);
4103 for (int i = 0; i < results.length; ++i) {
4107 } else if (i == results.length - 1) {
4110 if (!isVerbose() && pass) continue;
4111 label = "interm" + i + ": ";
4113 msg(" " + label + results[i], pass ? LOG : ERR, false, true);
4117 msg( " expected: " + expectedResult, ERR, false, true);
4123 private void assertTransform(String message, String expected, StringTransform t, String source) {
4124 assertEquals(message + " " + source, expected, t.transform(source));
4128 private void assertTransform(String message, String expected, StringTransform t, StringTransform back, String source, String source2) {
4129 assertEquals(message + " " +source, expected, t.transform(source));
4130 assertEquals(message + " " +source2, expected, t.transform(source2));
4131 assertEquals(message + " " + expected, source, back.transform(expected));
4135 * Tests the method public Enumeration<String> getAvailableTargets(String source)
4137 public void TestGetAvailableTargets() {
4139 // Tests when if (targets == null) is true
4140 Transliterator.getAvailableTargets("");
4141 } catch (Exception e) {
4142 errln("TransliteratorRegistry.getAvailableTargets(String) was not " + "supposed to return an exception.");
4147 * Tests the method public Enumeration<String> getAvailableVariants(String source, String target)
4149 public void TestGetAvailableVariants() {
4151 // Tests when if (targets == null) is true
4152 Transliterator.getAvailableVariants("", "");
4153 } catch (Exception e) {
4154 errln("TransliteratorRegistry.getAvailableVariants(String) was not " + "supposed to return an exception.");
4159 * Tests the mehtod String nextLine() in RuleBody
4161 public void TestNextLine() {
4162 // Tests when "if (s != null && s.length() > 0 && s.charAt(s.length() - 1) == '\\') is true
4164 Transliterator.createFromRules("gif", "\\", Transliterator.FORWARD);
4165 } catch(Exception e){
4166 errln("TransliteratorParser.nextLine() was not suppose to return an " +
4167 "exception for a rule of '\\'");