2 *******************************************************************************
\r
3 * Copyright (C) 1996-2010, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 *******************************************************************************
\r
7 package com.ibm.icu.dev.test.translit;
\r
9 import java.util.Enumeration;
\r
10 import java.util.HashMap;
\r
11 import java.util.HashSet;
\r
12 import java.util.Iterator;
\r
13 import java.util.Locale;
\r
14 import java.util.Vector;
\r
16 import com.ibm.icu.dev.test.TestFmwk;
\r
17 import com.ibm.icu.dev.test.TestUtil;
\r
18 import com.ibm.icu.impl.Utility;
\r
19 import com.ibm.icu.impl.UtilityExtensions;
\r
20 import com.ibm.icu.lang.UCharacter;
\r
21 import com.ibm.icu.lang.UScript;
\r
22 import com.ibm.icu.text.Replaceable;
\r
23 import com.ibm.icu.text.ReplaceableString;
\r
24 import com.ibm.icu.text.StringTransform;
\r
25 import com.ibm.icu.text.Transliterator;
\r
26 import com.ibm.icu.text.UTF16;
\r
27 import com.ibm.icu.text.UnicodeFilter;
\r
28 import com.ibm.icu.text.UnicodeSet;
\r
29 import com.ibm.icu.text.UnicodeSetIterator;
\r
30 import com.ibm.icu.util.CaseInsensitiveString;
\r
31 import com.ibm.icu.util.ULocale;
\r
33 /***********************************************************************
\r
35 HOW TO USE THIS TEST FILE
\r
37 How I developed on two platforms
\r
38 without losing (too much of) my mind
\r
41 1. Add new tests by copying/pasting/changing existing tests. On Java,
\r
42 any public void method named Test...() taking no parameters becomes
\r
43 a test. On C++, you need to modify the header and add a line to
\r
44 the runIndexedTest() dispatch method.
\r
46 2. Make liberal use of the expect() method; it is your friend.
\r
48 3. The tests in this file exactly match those in a sister file on the
\r
49 other side. The two files are:
\r
51 icu4j: src/com.ibm.icu.dev.test/translit/TransliteratorTest.java
\r
52 icu4c: source/test/intltest/transtst.cpp
\r
54 ==> THIS IS THE IMPORTANT PART <==
\r
56 When you add a test in this file, add it in transtst.cpp too.
\r
57 Give it the same name and put it in the same relative place. This
\r
58 makes maintenance a lot simpler for any poor soul who ends up
\r
59 trying to synchronize the tests between icu4j and icu4c.
\r
61 4. If you MUST enter a test that is NOT paralleled in the sister file,
\r
62 then add it in the special non-mirrored section. These are
\r
71 Make sure you document the reason the test is here and not there.
\r
76 ***********************************************************************/
\r
80 * @summary General test of Transliterator
\r
82 public class TransliteratorTest extends TestFmwk {
\r
84 public static void main(String[] args) throws Exception {
\r
85 new TransliteratorTest().run(args);
\r
88 public void TestHangul() {
\r
90 Transliterator lh = Transliterator.getInstance("Latin-Hangul");
\r
91 Transliterator hl = lh.getInverse();
\r
93 assertTransform("Transform", "\uCE20", lh, "ch");
\r
95 assertTransform("Transform", "\uC544\uB530", lh, hl, "atta", "a-tta");
\r
96 assertTransform("Transform", "\uC544\uBE60", lh, hl, "appa", "a-ppa");
\r
97 assertTransform("Transform", "\uC544\uC9DC", lh, hl, "ajja", "a-jja");
\r
98 assertTransform("Transform", "\uC544\uAE4C", lh, hl, "akka", "a-kka");
\r
99 assertTransform("Transform", "\uC544\uC2F8", lh, hl, "assa", "a-ssa");
\r
100 assertTransform("Transform", "\uC544\uCC28", lh, hl, "acha", "a-cha");
\r
101 assertTransform("Transform", "\uC545\uC0AC", lh, hl, "agsa", "ag-sa");
\r
102 assertTransform("Transform", "\uC548\uC790", lh, hl, "anja", "an-ja");
\r
103 assertTransform("Transform", "\uC548\uD558", lh, hl, "anha", "an-ha");
\r
104 assertTransform("Transform", "\uC54C\uAC00", lh, hl, "alga", "al-ga");
\r
105 assertTransform("Transform", "\uC54C\uB9C8", lh, hl, "alma", "al-ma");
\r
106 assertTransform("Transform", "\uC54C\uBC14", lh, hl, "alba", "al-ba");
\r
107 assertTransform("Transform", "\uC54C\uC0AC", lh, hl, "alsa", "al-sa");
\r
108 assertTransform("Transform", "\uC54C\uD0C0", lh, hl, "alta", "al-ta");
\r
109 assertTransform("Transform", "\uC54C\uD30C", lh, hl, "alpa", "al-pa");
\r
110 assertTransform("Transform", "\uC54C\uD558", lh, hl, "alha", "al-ha");
\r
111 assertTransform("Transform", "\uC555\uC0AC", lh, hl, "absa", "ab-sa");
\r
112 assertTransform("Transform", "\uC548\uAC00", lh, hl, "anga", "an-ga");
\r
113 assertTransform("Transform", "\uC545\uC2F8", lh, hl, "agssa", "ag-ssa");
\r
114 assertTransform("Transform", "\uC548\uC9DC", lh, hl, "anjja", "an-jja");
\r
115 assertTransform("Transform", "\uC54C\uC2F8", lh, hl, "alssa", "al-ssa");
\r
116 assertTransform("Transform", "\uC54C\uB530", lh, hl, "altta", "al-tta");
\r
117 assertTransform("Transform", "\uC54C\uBE60", lh, hl, "alppa", "al-ppa");
\r
118 assertTransform("Transform", "\uC555\uC2F8", lh, hl, "abssa", "ab-ssa");
\r
119 assertTransform("Transform", "\uC546\uCE74", lh, hl, "akkka", "akk-ka");
\r
120 assertTransform("Transform", "\uC558\uC0AC", lh, hl, "asssa", "ass-sa");
\r
124 public void TestChinese() {
\r
125 Transliterator hanLatin = Transliterator.getInstance("Han-Latin");
\r
126 assertTransform("Transform", "z\u00E0o Unicode", hanLatin, "\u9020Unicode");
\r
127 assertTransform("Transform", "z\u00E0i chu\u00E0ng z\u00E0o Unicode zh\u012B qi\u00E1n", hanLatin, "\u5728\u5275\u9020Unicode\u4E4B\u524D");
\r
130 public void TestRegistry() {
\r
131 checkRegistry("foo3", "::[a-z]; ::NFC; [:letter:] a > b;"); // check compound
\r
132 checkRegistry("foo2", "::NFC; [:letter:] a > b;"); // check compound
\r
133 checkRegistry("foo1", "[:letter:] a > b;");
\r
134 for (Enumeration e = Transliterator.getAvailableIDs(); e.hasMoreElements(); ) {
\r
135 String id = (String) e.nextElement();
\r
140 private void checkRegistry (String id, String rules) {
\r
141 Transliterator foo = Transliterator.createFromRules(id, rules, Transliterator.FORWARD);
\r
142 Transliterator.registerInstance(foo);
\r
146 private void checkRegistry(String id) {
\r
147 Transliterator fie = Transliterator.getInstance(id);
\r
148 final UnicodeSet fae = new UnicodeSet("[a-z5]");
\r
149 fie.setFilter(fae);
\r
150 Transliterator foe = Transliterator.getInstance(id);
\r
151 UnicodeFilter fee = foe.getFilter();
\r
152 if (fae.equals(fee)) {
\r
153 errln("Changed what is in registry for " + id);
\r
157 public void TestInstantiation() {
\r
158 long ms = System.currentTimeMillis();
\r
160 for (Enumeration e = Transliterator.getAvailableIDs(); e.hasMoreElements(); ) {
\r
161 ID = (String) e.nextElement();
\r
162 if (ID.equals("Latin-Han/definition")) {
\r
163 System.out.println("\nTODO: disabling Latin-Han/definition check for now: fix later");
\r
166 Transliterator t = null;
\r
168 t = Transliterator.getInstance(ID);
\r
169 // This is only true for some subclasses
\r
170 // // We should get a new instance if we try again
\r
171 // Transliterator t2 = Transliterator.getInstance(ID);
\r
173 // logln("OK: " + Transliterator.getDisplayName(ID) + " (" + ID + "): " + t);
\r
175 // errln("FAIL: " + ID + " returned identical instances");
\r
178 } catch (IllegalArgumentException ex) {
\r
179 errln("FAIL: " + ID);
\r
183 // if (t.getFilter() != null) {
\r
184 // errln("Fail: Should never have filter on transliterator unless we started with one: " + ID + ", " + t.getFilter());
\r
188 // Now test toRules
\r
189 String rules = null;
\r
191 rules = t.toRules(true);
\r
193 Transliterator.createFromRules("x", rules, Transliterator.FORWARD);
\r
194 } catch (IllegalArgumentException ex2) {
\r
195 errln("FAIL: " + ID + ".toRules() => bad rules: " +
\r
202 // Now test the failure path
\r
204 ID = "<Not a valid Transliterator ID>";
\r
205 Transliterator t = Transliterator.getInstance(ID);
\r
206 errln("FAIL: " + ID + " returned " + t);
\r
207 } catch (IllegalArgumentException ex) {
\r
208 logln("OK: Bogus ID handled properly");
\r
211 ms = System.currentTimeMillis() - ms;
\r
212 logln("Elapsed time: " + ms + " ms");
\r
215 public void TestSimpleRules() {
\r
216 /* Example: rules 1. ab>x|y
\r
219 * []|eabcd start - no match, copy e to tranlated buffer
\r
220 * [e]|abcd match rule 1 - copy output & adjust cursor
\r
221 * [ex|y]cd match rule 2 - copy output & adjust cursor
\r
222 * [exz]|d no match, copy d to transliterated buffer
\r
229 /* Another set of rules:
\r
236 * [x|yzacw] No match
\r
241 expect("ab>x|yzacw;" +
\r
249 Transliterator t = Transliterator.createFromRules("<ID>",
\r
251 "$vowel=[aeiouAEIOU];" +
\r
253 "$vowel } $lu > '!';" +
\r
255 "'!' { $lu > '^';" +
\r
258 Transliterator.FORWARD);
\r
259 expect(t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
\r
263 * Test inline set syntax and set variable syntax.
\r
265 public void TestInlineSet() {
\r
266 expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz");
\r
267 expect("a[0-9]b > qrs", "1a7b9", "1qrs9");
\r
269 expect("$digit = [0-9];" +
\r
270 "$alpha = [a-zA-Z];" +
\r
271 "$alphanumeric = [$digit $alpha];" + // ***
\r
272 "$special = [^$alphanumeric];" + // ***
\r
273 "$alphanumeric > '-';" +
\r
276 "thx-1138", "---*----");
\r
280 * Create some inverses and confirm that they work. We have to be
\r
281 * careful how we do this, since the inverses will not be true
\r
282 * inverses -- we can't throw any random string at the composition
\r
283 * of the transliterators and expect the identity function. F x
\r
284 * F' != I. However, if we are careful about the input, we will
\r
285 * get the expected results.
\r
287 public void TestRuleBasedInverse() {
\r
308 // Careful here -- random strings will not work. If we keep
\r
309 // the left side to the domain and the right side to the range
\r
310 // we will be okay though (left, abc; right xyz).
\r
312 "abcacab", "zyxxxyy",
\r
316 Transliterator fwd = Transliterator.createFromRules("<ID>", RULES, Transliterator.FORWARD);
\r
317 Transliterator rev = Transliterator.createFromRules("<ID>", RULES, Transliterator.REVERSE);
\r
318 for (int i=0; i<DATA.length; i+=2) {
\r
319 expect(fwd, DATA[i], DATA[i+1]);
\r
320 expect(rev, DATA[i+1], DATA[i]);
\r
325 * Basic test of keyboard.
\r
327 public void TestKeyboard() {
\r
328 Transliterator t = Transliterator.createFromRules("<ID>",
\r
332 +"a>A;", Transliterator.FORWARD);
\r
334 // insertion, buffer
\r
341 null, "AycAY", // null means finishKeyboardTransliteration
\r
344 keyboardAux(t, DATA);
\r
348 * Basic test of keyboard with cursor.
\r
350 public void TestKeyboard2() {
\r
351 Transliterator t = Transliterator.createFromRules("<ID>",
\r
355 +"a>A;", Transliterator.FORWARD);
\r
357 // insertion, buffer
\r
360 "s", "Aps", // modified for rollback - "Ay",
\r
361 "c", "Apsc", // modified for rollback - "Ayc",
\r
364 "s", "AycAps", // modified for rollback - "AycAy",
\r
365 "c", "AycApsc", // modified for rollback - "AycAyc",
\r
367 null, "AycAY", // null means finishKeyboardTransliteration
\r
370 keyboardAux(t, DATA);
\r
374 * Test keyboard transliteration with back-replacement.
\r
376 public void TestKeyboard3() {
\r
377 // We want th>z but t>y. Furthermore, during keyboard
\r
378 // transliteration we want t>y then yh>z if t, then h are
\r
386 // Column 1: characters to add to buffer (as if typed)
\r
387 // Column 2: expected appearance of buffer after
\r
388 // keyboard xliteration.
\r
391 "t", "abt", // modified for rollback - "aby",
\r
393 "t", "abyct", // modified for rollback - "abycy",
\r
395 null, "abycz", // null means finishKeyboardTransliteration
\r
398 Transliterator t = Transliterator.createFromRules("<ID>", RULES, Transliterator.FORWARD);
\r
399 keyboardAux(t, DATA);
\r
402 private void keyboardAux(Transliterator t, String[] DATA) {
\r
403 Transliterator.Position index = new Transliterator.Position();
\r
404 ReplaceableString s = new ReplaceableString();
\r
405 for (int i=0; i<DATA.length; i+=2) {
\r
407 if (DATA[i] != null) {
\r
408 log = new StringBuffer(s.toString() + " + "
\r
411 t.transliterate(s, index, DATA[i]);
\r
413 log = new StringBuffer(s.toString() + " => ");
\r
414 t.finishTransliteration(s, index);
\r
416 UtilityExtensions.formatInput(log, s, index);
\r
417 if (s.toString().equals(DATA[i+1])) {
\r
418 logln(log.toString());
\r
420 errln("FAIL: " + log.toString() + ", expected " + DATA[i+1]);
\r
425 // Latin-Arabic has been temporarily removed until it can be
\r
428 // public void TestArabic() {
\r
429 // String DATA[] = {
\r
431 // "\u062a\u062a\u0645\u062a\u0639 "+
\r
432 // "\u0627\u0644\u0644\u063a\u0629 "+
\r
433 // "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629 "+
\r
434 // "\u0628\u0628\u0646\u0638\u0645 "+
\r
435 // "\u0643\u062a\u0627\u0628\u0628\u064a\u0629 "+
\r
436 // "\u062c\u0645\u064a\u0644\u0629"
\r
439 // Transliterator t = Transliterator.getInstance("Latin-Arabic");
\r
440 // for (int i=0; i<DATA.length; i+=2) {
\r
441 // expect(t, DATA[i], DATA[i+1]);
\r
446 * Compose the Kana transliterator forward and reverse and try
\r
447 * some strings that should come out unchanged.
\r
449 public void TestCompoundKana() {
\r
450 Transliterator t = Transliterator.getInstance("Latin-Katakana;Katakana-Latin");
\r
451 expect(t, "aaaaa", "aaaaa");
\r
455 * Compose the hex transliterators forward and reverse.
\r
457 public void TestCompoundHex() {
\r
458 Transliterator a = Transliterator.getInstance("Any-Hex");
\r
459 Transliterator b = Transliterator.getInstance("Hex-Any");
\r
460 // Transliterator[] trans = { a, b };
\r
461 // Transliterator ab = Transliterator.getInstance(trans);
\r
462 Transliterator ab = Transliterator.getInstance("Any-Hex;Hex-Any");
\r
464 // Do some basic tests of b
\r
465 expect(b, "\\u0030\\u0031", "01");
\r
467 String s = "abcde";
\r
470 // trans = new Transliterator[] { b, a };
\r
471 // Transliterator ba = Transliterator.getInstance(trans);
\r
472 Transliterator ba = Transliterator.getInstance("Hex-Any;Any-Hex");
\r
473 ReplaceableString str = new ReplaceableString(s);
\r
474 a.transliterate(str);
\r
475 expect(ba, str.toString(), str.toString());
\r
479 * Do some basic tests of filtering.
\r
481 public void TestFiltering() {
\r
482 Transliterator hex = Transliterator.getInstance("Any-Hex");
\r
483 hex.setFilter(new UnicodeFilter() {
\r
484 public boolean contains(int c) {
\r
487 public String toPattern(boolean escapeUnprintable) {
\r
490 public boolean matchesIndexValue(int v) {
\r
493 public void addMatchSetTo(UnicodeSet toUnionTo) {}
\r
495 String s = "abcde";
\r
496 String out = hex.transliterate(s);
\r
497 String exp = "\\u0061\\u0062c\\u0064\\u0065";
\r
498 if (out.equals(exp)) {
\r
499 logln("Ok: \"" + exp + "\"");
\r
501 logln("FAIL: \"" + out + "\", wanted \"" + exp + "\"");
\r
508 public void TestAnchors() {
\r
509 expect("^ab > 01 ;" +
\r
517 expect("$s = [z$] ;" +
\r
524 "abzababbabxzabxabx",
\r
525 "01z018k45z01x45");
\r
529 * Test pattern quoting and escape mechanisms.
\r
531 public void TestPatternQuoting() {
\r
532 // Array of 3n items
\r
533 // Each item is <rules>, <input>, <expected output>
\r
535 "\u4E01>'[male adult]'", "\u4E01", "[male adult]",
\r
538 for (int i=0; i<DATA.length; i+=3) {
\r
539 logln("Pattern: " + Utility.escape(DATA[i]));
\r
540 Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD);
\r
541 expect(t, DATA[i+1], DATA[i+2]);
\r
545 public void TestVariableNames() {
\r
546 Transliterator gl = Transliterator.createFromRules("foo5", "$\u2DC0 = qy; a>b;", Transliterator.FORWARD);
\r
548 errln("FAIL: null Transliterator returned.");
\r
553 * Regression test for bugs found in Greek transliteration.
\r
555 public void TestJ277() {
\r
556 Transliterator gl = Transliterator.getInstance("Greek-Latin; NFD; [:M:]Remove; NFC");
\r
558 char sigma = (char)0x3C3;
\r
559 char upsilon = (char)0x3C5;
\r
560 char nu = (char)0x3BD;
\r
561 // not used char PHI = (char)0x3A6;
\r
562 char alpha = (char)0x3B1;
\r
563 // not used char omega = (char)0x3C9;
\r
564 // not used char omicron = (char)0x3BF;
\r
565 // not used char epsilon = (char)0x3B5;
\r
567 // sigma upsilon nu -> syn
\r
568 StringBuffer buf = new StringBuffer();
\r
569 buf.append(sigma).append(upsilon).append(nu);
\r
570 String syn = buf.toString();
\r
571 expect(gl, syn, "syn");
\r
573 // sigma alpha upsilon nu -> saun
\r
575 buf.append(sigma).append(alpha).append(upsilon).append(nu);
\r
576 String sayn = buf.toString();
\r
577 expect(gl, sayn, "saun");
\r
579 // Again, using a smaller rule set
\r
581 "$alpha = \u03B1;" +
\r
583 "$sigma = \u03C3;" +
\r
584 "$ypsilon = \u03C5;" +
\r
585 "$vowel = [aeiouAEIOU$alpha$ypsilon];" +
\r
588 "u <> $vowel { $ypsilon;" +
\r
591 Transliterator mini = Transliterator.createFromRules
\r
592 ("mini", rules, Transliterator.REVERSE);
\r
593 expect(mini, syn, "syn");
\r
594 expect(mini, sayn, "saun");
\r
596 //| // Transliterate the Greek locale data
\r
597 //| Locale el("el");
\r
598 //| DateFormatSymbols syms(el, status);
\r
599 //| if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
\r
600 //| int32_t i, count;
\r
601 //| const UnicodeString* data = syms.getMonths(count);
\r
602 //| for (i=0; i<count; ++i) {
\r
603 //| if (data[i].length() == 0) {
\r
606 //| UnicodeString out(data[i]);
\r
607 //| gl->transliterate(out);
\r
608 //| bool_t ok = TRUE;
\r
609 //| if (data[i].length() >= 2 && out.length() >= 2 &&
\r
610 //| u_isupper(data[i].charAt(0)) && u_islower(data[i].charAt(1))) {
\r
611 //| if (!(u_isupper(out.charAt(0)) && u_islower(out.charAt(1)))) {
\r
616 //| logln(prettify(data[i] + " -> " + out));
\r
618 //| errln(UnicodeString("FAIL: ") + prettify(data[i] + " -> " + out));
\r
624 // * Prefix, suffix support in hex transliterators
\r
626 // public void TestJ243() {
\r
627 // // Test default Hex-Any, which should handle
\r
628 // // \\u, \\U, u+, and U+
\r
629 // HexToUnicodeTransliterator hex = new HexToUnicodeTransliterator();
\r
630 // expect(hex, "\\u0041+\\U0042,u+0043uu+0044z", "A+B,CuDz");
\r
632 // // Try a custom Hex-Any
\r
633 // // \\uXXXX and &#xXXXX;
\r
634 // HexToUnicodeTransliterator hex2 = new HexToUnicodeTransliterator("\\\\u###0;&\\#x###0\\;");
\r
635 // expect(hex2, "\\u61\\u062\\u0063\\u00645\\u66x0123",
\r
636 // "abcd5fx0123");
\r
638 // // Try custom Any-Hex (default is tested elsewhere)
\r
639 // UnicodeToHexTransliterator hex3 = new UnicodeToHexTransliterator("&\\#x###0;");
\r
640 // expect(hex3, "012", "012");
\r
643 public void TestJ329() {
\r
646 Boolean.FALSE, "a > b; c > d",
\r
647 Boolean.TRUE, "a > b; no operator; c > d",
\r
650 for (int i=0; i<DATA.length; i+=2) {
\r
653 Transliterator.createFromRules("<ID>",
\r
654 (String) DATA[i+1],
\r
655 Transliterator.FORWARD);
\r
656 } catch (IllegalArgumentException e) {
\r
657 err = e.getMessage();
\r
659 boolean gotError = (err != null);
\r
660 String desc = (String) DATA[i+1] +
\r
661 (gotError ? (" -> error: " + err) : " -> no error");
\r
662 if ((err != null) == ((Boolean)DATA[i]).booleanValue()) {
\r
663 logln("Ok: " + desc);
\r
665 errln("FAIL: " + desc);
\r
671 * Test segments and segment references.
\r
673 public void TestSegments() {
\r
674 // Array of 3n items
\r
675 // Each item is <rules>, <input>, <expected output>
\r
677 "([a-z]) '.' ([0-9]) > $2 '-' $1",
\r
682 for (int i=0; i<DATA.length; i+=3) {
\r
683 logln("Pattern: " + Utility.escape(DATA[i]));
\r
684 Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD);
\r
685 expect(t, DATA[i+1], DATA[i+2]);
\r
690 * Test cursor positioning outside of the key
\r
692 public void TestCursorOffset() {
\r
693 // Array of 3n items
\r
694 // Each item is <rules>, <input>, <expected output>
\r
696 "pre {alpha} post > | @ ALPHA ;" +
\r
697 "eALPHA > beta ;" +
\r
698 "pre {beta} post > BETA @@ | ;" +
\r
701 "prealphapost prebetapost",
\r
702 "prbetaxyz preBETApost",
\r
705 for (int i=0; i<DATA.length; i+=3) {
\r
706 logln("Pattern: " + Utility.escape(DATA[i]));
\r
707 Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD);
\r
708 expect(t, DATA[i+1], DATA[i+2]);
\r
713 * Test zero length and > 1 char length variable values. Test
\r
714 * use of variable refs in UnicodeSets.
\r
716 public void TestArbitraryVariableValues() {
\r
717 // Array of 3n items
\r
718 // Each item is <rules>, <input>, <expected output>
\r
724 "$llY = [$ll$pat];" +
\r
738 for (int i=0; i<DATA.length; i+=3) {
\r
739 logln("Pattern: " + Utility.escape(DATA[i]));
\r
740 Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD);
\r
741 expect(t, DATA[i+1], DATA[i+2]);
\r
746 * Confirm that the contextStart, contextLimit, start, and limit
\r
747 * behave correctly.
\r
749 public void TestPositionHandling() {
\r
750 // Array of 3n items
\r
751 // Each item is <rules>, <input>, <expected output>
\r
753 "a{t} > SS ; {t}b > UU ; {t} > TT ;",
\r
754 "xtat txtb", // pos 0,9,0,9
\r
757 "a{t} > SS ; {t}b > UU ; {t} > TT ;",
\r
758 "xtat txtb", // pos 2,9,3,8
\r
761 "a{t} > SS ; {t}b > UU ; {t} > TT ;",
\r
762 "xtat txtb", // pos 3,8,3,8
\r
766 // Array of 4n positions -- these go with the DATA array
\r
767 // They are: contextStart, contextLimit, start, limit
\r
774 int n = DATA.length/3;
\r
775 for (int i=0; i<n; i++) {
\r
776 Transliterator t = Transliterator.createFromRules("<ID>", DATA[3*i], Transliterator.FORWARD);
\r
777 Transliterator.Position pos = new Transliterator.Position(
\r
778 POS[4*i], POS[4*i+1], POS[4*i+2], POS[4*i+3]);
\r
779 ReplaceableString rsource = new ReplaceableString(DATA[3*i+1]);
\r
780 t.transliterate(rsource, pos);
\r
781 t.finishTransliteration(rsource, pos);
\r
782 String result = rsource.toString();
\r
783 String exp = DATA[3*i+2];
\r
784 expectAux(Utility.escape(DATA[3*i]),
\r
787 result.equals(exp),
\r
793 * Test the Hiragana-Katakana transliterator.
\r
795 public void TestHiraganaKatakana() {
\r
796 Transliterator hk = Transliterator.getInstance("Hiragana-Katakana");
\r
797 Transliterator kh = Transliterator.getInstance("Katakana-Hiragana");
\r
799 // Array of 3n items
\r
800 // Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana>
\r
803 "\u3042\u3090\u3099\u3092\u3050",
\r
804 "\u30A2\u30F8\u30F2\u30B0",
\r
807 "\u307C\u3051\u3060\u3042\u3093\u30FC",
\r
808 "\u30DC\u30F6\u30C0\u30FC\u30F3\u30FC",
\r
811 for (int i=0; i<DATA.length; i+=3) {
\r
812 switch (DATA[i].charAt(0)) {
\r
813 case 'h': // Hiragana-Katakana
\r
814 expect(hk, DATA[i+1], DATA[i+2]);
\r
816 case 'k': // Katakana-Hiragana
\r
817 expect(kh, DATA[i+2], DATA[i+1]);
\r
820 expect(hk, DATA[i+1], DATA[i+2]);
\r
821 expect(kh, DATA[i+2], DATA[i+1]);
\r
828 public void TestCopyJ476() {
\r
829 // This is a C++-only copy constructor test
\r
833 * Test inter-Indic transliterators. These are composed.
\r
835 public void TestInterIndic() {
\r
836 String ID = "Devanagari-Gujarati";
\r
837 Transliterator dg = Transliterator.getInstance(ID);
\r
839 errln("FAIL: getInstance(" + ID + ") returned null");
\r
842 String id = dg.getID();
\r
843 if (!id.equals(ID)) {
\r
844 errln("FAIL: getInstance(" + ID + ").getID() => " + id);
\r
846 String dev = "\u0901\u090B\u0925";
\r
847 String guj = "\u0A81\u0A8B\u0AA5";
\r
848 expect(dg, dev, guj);
\r
852 * Test filter syntax in IDs. (J23)
\r
854 public void TestFilterIDs() {
\r
856 "[aeiou]Any-Hex", // ID
\r
857 "[aeiou]Hex-Any", // expected inverse ID
\r
858 "quizzical", // src
\r
859 "q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src)
\r
861 "[aeiou]Any-Hex;[^5]Hex-Any",
\r
862 "[^5]Any-Hex;[aeiou]Hex-Any",
\r
872 for (int i=0; i<DATA.length; i+=4) {
\r
873 String ID = DATA[i];
\r
874 Transliterator t = Transliterator.getInstance(ID);
\r
875 expect(t, DATA[i+2], DATA[i+3]);
\r
878 if (!ID.equals(t.getID())) {
\r
879 errln("FAIL: getInstance(" + ID + ").getID() => " +
\r
883 // Check the inverse
\r
884 String uID = DATA[i+1];
\r
885 Transliterator u = t.getInverse();
\r
887 errln("FAIL: " + ID + ".getInverse() returned NULL");
\r
888 } else if (!u.getID().equals(uID)) {
\r
889 errln("FAIL: " + ID + ".getInverse().getID() => " +
\r
890 u.getID() + ", expected " + uID);
\r
896 * Test the case mapping transliterators.
\r
898 public void TestCaseMap() {
\r
899 Transliterator toUpper =
\r
900 Transliterator.getInstance("Any-Upper[^xyzXYZ]");
\r
901 Transliterator toLower =
\r
902 Transliterator.getInstance("Any-Lower[^xyzXYZ]");
\r
903 Transliterator toTitle =
\r
904 Transliterator.getInstance("Any-Title[^xyzXYZ]");
\r
906 expect(toUpper, "The quick brown fox jumped over the lazy dogs.",
\r
907 "THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS.");
\r
908 expect(toLower, "The quIck brown fOX jUMPED OVER THE LAzY dogs.",
\r
909 "the quick brown foX jumped over the lazY dogs.");
\r
910 expect(toTitle, "the quick brown foX caN'T jump over the laZy dogs.",
\r
911 "The Quick Brown FoX Can't Jump Over The LaZy Dogs.");
\r
915 * Test the name mapping transliterators.
\r
917 public void TestNameMap() {
\r
918 Transliterator uni2name =
\r
919 Transliterator.getInstance("Any-Name[^abc]");
\r
920 Transliterator name2uni =
\r
921 Transliterator.getInstance("Name-Any");
\r
923 expect(uni2name, "\u00A0abc\u4E01\u00B5\u0A81\uFFFD\u0004\u0009\u0081\uFFFF",
\r
924 "\\N{NO-BREAK SPACE}abc\\N{CJK UNIFIED IDEOGRAPH-4E01}\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{END OF TRANSMISSION}\\N{CHARACTER TABULATION}\\N{<control-0081>}\\N{<noncharacter-FFFF>}");
\r
925 expect(name2uni, "{\\N { NO-BREAK SPACE}abc\\N{ CJK UNIFIED IDEOGRAPH-4E01 }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{END OF TRANSMISSION}\\N{CHARACTER TABULATION}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{",
\r
926 "{\u00A0abc\u4E01\\N{x\u00B5\u0A81\uFFFD\u0004\u0009\u0081\uFFFF\u0004\\N{");
\r
929 Transliterator t = Transliterator.getInstance("Any-Name;Name-Any");
\r
931 String s = "{\u00A0abc\u4E01\\N{x\u00B5\u0A81\uFFFD\u0004\u0009\u0081\uFFFF\u0004\\N{";
\r
936 * Test liberalized ID syntax. 1006c
\r
938 public void TestLiberalizedID() {
\r
939 // Some test cases have an expected getID() value of NULL. This
\r
940 // means I have disabled the test case for now. This stuff is
\r
941 // still under development, and I haven't decided whether to make
\r
942 // getID() return canonical case yet. It will all get rewritten
\r
943 // with the move to Source-Target/Variant IDs anyway. [aliu]
\r
945 "latin-greek", null /*"Latin-Greek"*/, "case insensitivity",
\r
946 " Null ", "Null", "whitespace",
\r
947 " Latin[a-z]-Greek ", "[a-z]Latin-Greek", "inline filter",
\r
948 " null ; latin-greek ", null /*"Null;Latin-Greek"*/, "compound whitespace",
\r
951 for (int i=0; i<DATA.length; i+=3) {
\r
953 Transliterator t = Transliterator.getInstance(DATA[i]);
\r
954 if (DATA[i+1] == null || DATA[i+1].equals(t.getID())) {
\r
955 logln("Ok: " + DATA[i+2] +
\r
956 " create ID \"" + DATA[i] + "\" => \"" +
\r
959 errln("FAIL: " + DATA[i+2] +
\r
960 " create ID \"" + DATA[i] + "\" => \"" +
\r
961 t.getID() + "\", exp \"" + DATA[i+1] + "\"");
\r
963 } catch (IllegalArgumentException e) {
\r
964 errln("FAIL: " + DATA[i+2] +
\r
965 " create ID \"" + DATA[i] + "\"");
\r
970 public void TestCreateInstance() {
\r
971 String FORWARD = "F";
\r
972 String REVERSE = "R";
\r
975 // Column 2: direction
\r
976 // Column 3: expected ID, or "" if expect failure
\r
977 "Latin-Hangul", REVERSE, "Hangul-Latin", // JB#912
\r
979 // JB#2689: bad compound causes crash
\r
980 "InvalidSource-InvalidTarget", FORWARD, "",
\r
981 "InvalidSource-InvalidTarget", REVERSE, "",
\r
982 "Hex-Any;InvalidSource-InvalidTarget", FORWARD, "",
\r
983 "Hex-Any;InvalidSource-InvalidTarget", REVERSE, "",
\r
984 "InvalidSource-InvalidTarget;Hex-Any", FORWARD, "",
\r
985 "InvalidSource-InvalidTarget;Hex-Any", REVERSE, "",
\r
990 for (int i=0; DATA[i]!=null; i+=3) {
\r
992 int dir = (DATA[i+1]==FORWARD)?
\r
993 Transliterator.FORWARD:Transliterator.REVERSE;
\r
994 String expID=DATA[i+2];
\r
995 Exception e = null;
\r
998 t = Transliterator.getInstance(id,dir);
\r
999 } catch (Exception e1) {
\r
1003 String newID = (t!=null)?t.getID():"";
\r
1004 boolean ok = (newID.equals(expID));
\r
1006 newID = e.getMessage();
\r
1009 logln("Ok: createInstance(" +
\r
1010 id + "," + DATA[i+1] + ") => " + newID);
\r
1012 errln("FAIL: createInstance(" +
\r
1013 id + "," + DATA[i+1] + ") => " + newID +
\r
1014 ", expected " + expID);
\r
1020 * Test the normalization transliterator.
\r
1022 public void TestNormalizationTransliterator() {
\r
1023 // THE FOLLOWING TWO TABLES ARE COPIED FROM com.ibm.icu.dev.test.normalizer.BasicTest
\r
1024 // PLEASE KEEP THEM IN SYNC WITH BasicTest.
\r
1025 String[][] CANON = {
\r
1026 // Input Decomposed Composed
\r
1027 {"cat", "cat", "cat" },
\r
1028 {"\u00e0ardvark", "a\u0300ardvark", "\u00e0ardvark" },
\r
1030 {"\u1e0a", "D\u0307", "\u1e0a" }, // D-dot_above
\r
1031 {"D\u0307", "D\u0307", "\u1e0a" }, // D dot_above
\r
1033 {"\u1e0c\u0307", "D\u0323\u0307", "\u1e0c\u0307" }, // D-dot_below dot_above
\r
1034 {"\u1e0a\u0323", "D\u0323\u0307", "\u1e0c\u0307" }, // D-dot_above dot_below
\r
1035 {"D\u0307\u0323", "D\u0323\u0307", "\u1e0c\u0307" }, // D dot_below dot_above
\r
1037 {"\u1e10\u0307\u0323", "D\u0327\u0323\u0307","\u1e10\u0323\u0307"}, // D dot_below cedilla dot_above
\r
1038 {"D\u0307\u0328\u0323","D\u0328\u0323\u0307","\u1e0c\u0328\u0307"}, // D dot_above ogonek dot_below
\r
1040 {"\u1E14", "E\u0304\u0300", "\u1E14" }, // E-macron-grave
\r
1041 {"\u0112\u0300", "E\u0304\u0300", "\u1E14" }, // E-macron + grave
\r
1042 {"\u00c8\u0304", "E\u0300\u0304", "\u00c8\u0304" }, // E-grave + macron
\r
1044 {"\u212b", "A\u030a", "\u00c5" }, // angstrom_sign
\r
1045 {"\u00c5", "A\u030a", "\u00c5" }, // A-ring
\r
1047 {"\u00fdffin", "y\u0301ffin", "\u00fdffin" }, //updated with 3.0
\r
1048 {"\u00fd\uFB03n", "y\u0301\uFB03n", "\u00fd\uFB03n" }, //updated with 3.0
\r
1050 {"Henry IV", "Henry IV", "Henry IV" },
\r
1051 {"Henry \u2163", "Henry \u2163", "Henry \u2163" },
\r
1053 {"\u30AC", "\u30AB\u3099", "\u30AC" }, // ga (Katakana)
\r
1054 {"\u30AB\u3099", "\u30AB\u3099", "\u30AC" }, // ka + ten
\r
1055 {"\uFF76\uFF9E", "\uFF76\uFF9E", "\uFF76\uFF9E" }, // hw_ka + hw_ten
\r
1056 {"\u30AB\uFF9E", "\u30AB\uFF9E", "\u30AB\uFF9E" }, // ka + hw_ten
\r
1057 {"\uFF76\u3099", "\uFF76\u3099", "\uFF76\u3099" }, // hw_ka + ten
\r
1059 {"A\u0300\u0316", "A\u0316\u0300", "\u00C0\u0316" },
\r
1062 String[][] COMPAT = {
\r
1063 // Input Decomposed Composed
\r
1064 {"\uFB4f", "\u05D0\u05DC", "\u05D0\u05DC" }, // Alef-Lamed vs. Alef, Lamed
\r
1066 {"\u00fdffin", "y\u0301ffin", "\u00fdffin" }, //updated for 3.0
\r
1067 {"\u00fd\uFB03n", "y\u0301ffin", "\u00fdffin" }, // ffi ligature -> f + f + i
\r
1069 {"Henry IV", "Henry IV", "Henry IV" },
\r
1070 {"Henry \u2163", "Henry IV", "Henry IV" },
\r
1072 {"\u30AC", "\u30AB\u3099", "\u30AC" }, // ga (Katakana)
\r
1073 {"\u30AB\u3099", "\u30AB\u3099", "\u30AC" }, // ka + ten
\r
1075 {"\uFF76\u3099", "\u30AB\u3099", "\u30AC" }, // hw_ka + ten
\r
1078 Transliterator NFD = Transliterator.getInstance("NFD");
\r
1079 Transliterator NFC = Transliterator.getInstance("NFC");
\r
1080 for (int i=0; i<CANON.length; ++i) {
\r
1081 String in = CANON[i][0];
\r
1082 String expd = CANON[i][1];
\r
1083 String expc = CANON[i][2];
\r
1084 expect(NFD, in, expd);
\r
1085 expect(NFC, in, expc);
\r
1088 Transliterator NFKD = Transliterator.getInstance("NFKD");
\r
1089 Transliterator NFKC = Transliterator.getInstance("NFKC");
\r
1090 for (int i=0; i<COMPAT.length; ++i) {
\r
1091 String in = COMPAT[i][0];
\r
1092 String expkd = COMPAT[i][1];
\r
1093 String expkc = COMPAT[i][2];
\r
1094 expect(NFKD, in, expkd);
\r
1095 expect(NFKC, in, expkc);
\r
1098 Transliterator t = Transliterator.getInstance("NFD; [x]Remove");
\r
1099 expect(t, "\u010dx", "c\u030C");
\r
1103 * Test compound RBT rules.
\r
1105 public void TestCompoundRBT() {
\r
1106 // Careful with spacing and ';' here: Phrase this exactly
\r
1107 // as toRules() is going to return it. If toRules() changes
\r
1108 // with regard to spacing or ';', then adjust this string.
\r
1109 String rule = "::Hex-Any;\n" +
\r
1110 "::Any-Lower;\n" +
\r
1113 "::[^t]Any-Upper;";
\r
1114 Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD);
\r
1116 errln("FAIL: createFromRules failed");
\r
1119 expect(t, "\u0043at in the hat, bat on the mat",
\r
1120 "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t");
\r
1121 String r = t.toRules(true);
\r
1122 if (r.equals(rule)) {
\r
1123 logln("OK: toRules() => " + r);
\r
1125 errln("FAIL: toRules() => " + r +
\r
1126 ", expected " + rule);
\r
1129 // Now test toRules
\r
1130 t = Transliterator.getInstance("Greek-Latin; Latin-Cyrillic", Transliterator.FORWARD);
\r
1132 errln("FAIL: createInstance failed");
\r
1135 String exp = "::Greek-Latin;\n::Latin-Cyrillic;";
\r
1136 r = t.toRules(true);
\r
1137 if (!r.equals(exp)) {
\r
1138 errln("FAIL: toRules() => " + r +
\r
1139 ", expected " + exp);
\r
1141 logln("OK: toRules() => " + r);
\r
1144 // Round trip the result of toRules
\r
1145 t = Transliterator.createFromRules("Test", r, Transliterator.FORWARD);
\r
1147 errln("FAIL: createFromRules #2 failed");
\r
1150 logln("OK: createFromRules(" + r + ") succeeded");
\r
1153 // Test toRules again
\r
1154 r = t.toRules(true);
\r
1155 if (!r.equals(exp)) {
\r
1156 errln("FAIL: toRules() => " + r +
\r
1157 ", expected " + exp);
\r
1159 logln("OK: toRules() => " + r);
\r
1162 // Test Foo(Bar) IDs. Careful with spacing in id; make it conform
\r
1163 // to what the regenerated ID will look like.
\r
1164 String id = "Upper(Lower);(NFKC)";
\r
1165 t = Transliterator.getInstance(id, Transliterator.FORWARD);
\r
1167 errln("FAIL: createInstance #2 failed");
\r
1170 if (t.getID().equals(id)) {
\r
1171 logln("OK: created " + id);
\r
1173 errln("FAIL: createInstance(" + id +
\r
1174 ").getID() => " + t.getID());
\r
1177 Transliterator u = t.getInverse();
\r
1179 errln("FAIL: createInverse failed");
\r
1182 exp = "NFKC();Lower(Upper)";
\r
1183 if (u.getID().equals(exp)) {
\r
1184 logln("OK: createInverse(" + id + ") => " +
\r
1187 errln("FAIL: createInverse(" + id + ") => " +
\r
1193 * Compound filter semantics were orginially not implemented
\r
1194 * correctly. Originally, each component filter f(i) is replaced by
\r
1195 * f'(i) = f(i) && g, where g is the filter for the compound
\r
1200 * Suppose and I have a transliterator X. Internally X is
\r
1201 * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A].
\r
1203 * The compound should convert all greek characters (through latin) to
\r
1204 * cyrillic, then lowercase the result. The filter should say "don't
\r
1205 * touch 'A' in the original". But because an intermediate result
\r
1206 * happens to go through "A", the Greek Alpha gets hung up.
\r
1208 public void TestCompoundFilter() {
\r
1209 Transliterator t = Transliterator.getInstance
\r
1210 ("Greek-Latin; Latin-Greek; Lower", Transliterator.FORWARD);
\r
1211 t.setFilter(new UnicodeSet("[^A]"));
\r
1213 // Only the 'A' at index 1 should remain unchanged
\r
1215 CharsToUnicodeString("BA\\u039A\\u0391"),
\r
1216 CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1"));
\r
1220 * Test the "Remove" transliterator.
\r
1222 public void TestRemove() {
\r
1223 Transliterator t = Transliterator.getInstance("Remove[aeiou]");
\r
1224 expect(t, "The quick brown fox.",
\r
1225 "Th qck brwn fx.");
\r
1228 public void TestToRules() {
\r
1229 String RBT = "rbt";
\r
1230 String SET = "set";
\r
1233 "$a=\\u4E61; [$a] > A;",
\r
1237 "$white=[[:Zs:][:Zl:]]; $white{a} > A;",
\r
1238 "[[:Zs:][:Zl:]]{a} > A;",
\r
1257 "[~[:Lu:][:Ll:]]",
\r
1258 "[~[:Lu:][:Ll:]]",
\r
1265 "$white=[:Zs:]; $black=[^$white]; $black{a} > A;",
\r
1266 "[^[:Zs:]]{a} > A;",
\r
1269 "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;",
\r
1270 "[[a-z]-[:Zs:]]{a} > A;",
\r
1273 "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;",
\r
1274 "[[:Zs:]&[a-z]]{a} > A;",
\r
1277 "$a=[:Zs:]; $b=[x$a]; $b{a} > A;",
\r
1278 "[x[:Zs:]]{a} > A;",
\r
1281 "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"+
\r
1282 "$macron = \\u0304 ;"+
\r
1283 "$evowel = [aeiouyAEIOUY] ;"+
\r
1284 "$iotasub = \\u0345 ;"+
\r
1285 "($evowel $macron $accentMinus *) i > | $1 $iotasub ;",
\r
1286 "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;",
\r
1289 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
\r
1290 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
\r
1293 for (int d=0; d < DATA.length; d+=3) {
\r
1294 if (DATA[d] == RBT) {
\r
1295 // Transliterator test
\r
1296 Transliterator t = Transliterator.createFromRules("ID",
\r
1297 DATA[d+1], Transliterator.FORWARD);
\r
1299 errln("FAIL: createFromRules failed");
\r
1302 String rules, escapedRules;
\r
1303 rules = t.toRules(false);
\r
1304 escapedRules = t.toRules(true);
\r
1305 String expRules = Utility.unescape(DATA[d+2]);
\r
1306 String expEscapedRules = DATA[d+2];
\r
1307 if (rules.equals(expRules)) {
\r
1308 logln("Ok: " + DATA[d+1] +
\r
1309 " => " + Utility.escape(rules));
\r
1311 errln("FAIL: " + DATA[d+1] +
\r
1312 " => " + Utility.escape(rules + ", exp " + expRules));
\r
1314 if (escapedRules.equals(expEscapedRules)) {
\r
1315 logln("Ok: " + DATA[d+1] +
\r
1316 " => " + escapedRules);
\r
1318 errln("FAIL: " + DATA[d+1] +
\r
1319 " => " + escapedRules + ", exp " + expEscapedRules);
\r
1323 // UnicodeSet test
\r
1324 String pat = DATA[d+1];
\r
1325 String expToPat = DATA[d+2];
\r
1326 UnicodeSet set = new UnicodeSet(pat);
\r
1328 // Adjust spacing etc. as necessary.
\r
1330 toPat = set.toPattern(true);
\r
1331 if (expToPat.equals(toPat)) {
\r
1332 logln("Ok: " + pat +
\r
1335 errln("FAIL: " + pat +
\r
1336 " => " + Utility.escape(toPat) +
\r
1337 ", exp " + Utility.escape(pat));
\r
1343 public void TestContext() {
\r
1344 Transliterator.Position pos = new Transliterator.Position(0, 2, 0, 1); // cs cl s l
\r
1346 expect("de > x; {d}e > y;",
\r
1351 expect("ab{c} > z;",
\r
1356 static final String CharsToUnicodeString(String s) {
\r
1357 return Utility.unescape(s);
\r
1360 public void TestSupplemental() {
\r
1362 expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];" +
\r
1363 "a > $a; $s > i;"),
\r
1364 CharsToUnicodeString("ab\\U0001030Fx"),
\r
1365 CharsToUnicodeString("\\U00010300bix"));
\r
1367 expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];" +
\r
1368 "$b=[A-Z\\U00010400-\\U0001044D];" +
\r
1369 "($a)($b) > $2 $1;"),
\r
1370 CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"),
\r
1371 CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301"));
\r
1373 // k|ax\\U00010300xm
\r
1375 // k|a\\U00010400\\U00010300xm
\r
1376 // ky|\\U00010400\\U00010300xm
\r
1377 // ky\\U00010400|\\U00010300xm
\r
1379 // ky\\U00010400|\\U00010300\\U00010400m
\r
1380 // ky\\U00010400y|\\U00010400m
\r
1381 expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];" +
\r
1382 "$a {x} > | @ \\U00010400;" +
\r
1383 "{$a} [^\\u0000-\\uFFFF] > y;"),
\r
1384 CharsToUnicodeString("kax\\U00010300xm"),
\r
1385 CharsToUnicodeString("ky\\U00010400y\\U00010400m"));
\r
1387 expect(Transliterator.getInstance("Any-Name"),
\r
1388 CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"),
\r
1389 "\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}");
\r
1391 expect(Transliterator.getInstance("Name-Any"),
\r
1392 "\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}",
\r
1393 CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"));
\r
1395 expect(Transliterator.getInstance("Any-Hex/Unicode"),
\r
1396 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
\r
1397 "U+10330U+10FF00U+E0061U+00A0");
\r
1399 expect(Transliterator.getInstance("Any-Hex/C"),
\r
1400 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
\r
1401 "\\U00010330\\U0010FF00\\U000E0061\\u00A0");
\r
1403 expect(Transliterator.getInstance("Any-Hex/Perl"),
\r
1404 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
\r
1405 "\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}");
\r
1407 expect(Transliterator.getInstance("Any-Hex/Java"),
\r
1408 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
\r
1409 "\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0");
\r
1411 expect(Transliterator.getInstance("Any-Hex/XML"),
\r
1412 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
\r
1413 "𐌰􏼀󠁡 ");
\r
1415 expect(Transliterator.getInstance("Any-Hex/XML10"),
\r
1416 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
\r
1417 "𐌰􏼀󠁡 ");
\r
1419 expect(Transliterator.getInstance("[\\U000E0000-\\U000E0FFF] Remove"),
\r
1420 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
\r
1421 CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0"));
\r
1424 public void TestQuantifier() {
\r
1426 // Make sure @ in a quantified anteContext works
\r
1427 expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';",
\r
1431 // Make sure @ in a quantified postContext works
\r
1432 expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';",
\r
1436 // Make sure @ in a quantified postContext with seg ref works
\r
1437 expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';",
\r
1441 // Make sure @ past ante context doesn't enter ante context
\r
1442 Transliterator.Position pos = new Transliterator.Position(0, 5, 3, 5);
\r
1443 expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';",
\r
1448 // Make sure @ past post context doesn't pass limit
\r
1449 Transliterator.Position pos2 = new Transliterator.Position(0, 4, 0, 2);
\r
1450 expect("{b} a+ > c @@ |; x > y; a > A;",
\r
1455 // Make sure @ past post context doesn't enter post context
\r
1456 expect("{b} a+ > c @@ |; x > y; a > A;",
\r
1460 expect("(ab)? c > d;",
\r
1464 // NOTE: The (ab)+ when referenced just yields a single "ab",
\r
1465 // not the full sequence of them. This accords with perl behavior.
\r
1466 expect("(ab)+ {x} > '(' $1 ')';",
\r
1468 "x ab(ab) abab(ab)y");
\r
1471 "ac abc abbc abbbc",
\r
1472 "ac axc axc axc");
\r
1474 expect("[abc]+ > x;",
\r
1475 "qac abrc abbcs abtbbc",
\r
1478 expect("q{(ab)+} > x;",
\r
1479 "qa qab qaba qababc qaba",
\r
1480 "qa qx qxa qxc qxa");
\r
1482 expect("q(ab)* > x;",
\r
1483 "qa qab qaba qababc",
\r
1486 // NOTE: The (ab)+ when referenced just yields a single "ab",
\r
1487 // not the full sequence of them. This accords with perl behavior.
\r
1488 expect("q(ab)* > '(' $1 ')';",
\r
1489 "qa qab qaba qababc",
\r
1490 "()a (ab) (ab)a (ab)c");
\r
1492 // 'foo'+ and 'foo'* -- the quantifier should apply to the entire
\r
1494 expect("'ab'+ > x;",
\r
1498 // $foo+ and $foo* -- the quantifier should apply to the entire
\r
1499 // variable reference
\r
1500 expect("$var = ab; $var+ > x;",
\r
1505 static class TestFact implements Transliterator.Factory {
\r
1506 static class NameableNullTrans extends Transliterator {
\r
1507 public NameableNullTrans(String id) {
\r
1510 protected void handleTransliterate(Replaceable text,
\r
1511 Position offsets, boolean incremental) {
\r
1512 offsets.start = offsets.limit;
\r
1516 public TestFact(String theID) {
\r
1519 public Transliterator getInstance(String ignoredID) {
\r
1520 return new NameableNullTrans(id);
\r
1524 public void TestSTV() {
\r
1525 Enumeration es = Transliterator.getAvailableSources();
\r
1526 for (int i=0; es.hasMoreElements(); ++i) {
\r
1527 String source = (String) es.nextElement();
\r
1528 logln("" + i + ": " + source);
\r
1529 if (source.length() == 0) {
\r
1530 errln("FAIL: empty source");
\r
1533 Enumeration et = Transliterator.getAvailableTargets(source);
\r
1534 for (int j=0; et.hasMoreElements(); ++j) {
\r
1535 String target = (String) et.nextElement();
\r
1536 logln(" " + j + ": " + target);
\r
1537 if (target.length() == 0) {
\r
1538 errln("FAIL: empty target");
\r
1541 Enumeration ev = Transliterator.getAvailableVariants(source, target);
\r
1542 for (int k=0; ev.hasMoreElements(); ++k) {
\r
1543 String variant = (String) ev.nextElement();
\r
1544 if (variant.length() == 0) {
\r
1545 logln(" " + k + ": <empty>");
\r
1547 logln(" " + k + ": " + variant);
\r
1553 // Test registration
\r
1554 String[] IDS = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
\r
1555 String[] FULL_IDS = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
\r
1556 String[] SOURCES = { null, "Seoridf", "Oewoir" };
\r
1557 for (int i=0; i<3; ++i) {
\r
1558 Transliterator.registerFactory(IDS[i], new TestFact(IDS[i]));
\r
1560 Transliterator t = Transliterator.getInstance(IDS[i]);
\r
1561 if (t.getID().equals(IDS[i])) {
\r
1562 logln("Ok: Registration/creation succeeded for ID " +
\r
1565 errln("FAIL: Registration of ID " +
\r
1566 IDS[i] + " creates ID " + t.getID());
\r
1568 Transliterator.unregister(IDS[i]);
\r
1570 t = Transliterator.getInstance(IDS[i]);
\r
1571 errln("FAIL: Unregistration failed for ID " +
\r
1572 IDS[i] + "; still receiving ID " + t.getID());
\r
1573 } catch (IllegalArgumentException e2) {
\r
1574 // Good; this is what we expect
\r
1575 logln("Ok; Unregistered " + IDS[i]);
\r
1577 } catch (IllegalArgumentException e) {
\r
1578 errln("FAIL: Registration/creation failed for ID " +
\r
1581 Transliterator.unregister(IDS[i]);
\r
1585 // Make sure getAvailable API reflects removal
\r
1586 for (Enumeration e = Transliterator.getAvailableIDs();
\r
1587 e.hasMoreElements(); ) {
\r
1588 String id = (String) e.nextElement();
\r
1589 for (int i=0; i<3; ++i) {
\r
1590 if (id.equals(FULL_IDS[i])) {
\r
1591 errln("FAIL: unregister(" + id + ") failed");
\r
1595 for (Enumeration e = Transliterator.getAvailableTargets("Any");
\r
1596 e.hasMoreElements(); ) {
\r
1597 String t = (String) e.nextElement();
\r
1598 if (t.equals(IDS[0])) {
\r
1599 errln("FAIL: unregister(Any-" + t + ") failed");
\r
1602 for (Enumeration e = Transliterator.getAvailableSources();
\r
1603 e.hasMoreElements(); ) {
\r
1604 String s = (String) e.nextElement();
\r
1605 for (int i=0; i<3; ++i) {
\r
1606 if (SOURCES[i] == null) continue;
\r
1607 if (s.equals(SOURCES[i])) {
\r
1608 errln("FAIL: unregister(" + s + "-*) failed");
\r
1615 * Test inverse of Greek-Latin; Title()
\r
1617 public void TestCompoundInverse() {
\r
1618 Transliterator t = Transliterator.getInstance
\r
1619 ("Greek-Latin; Title()", Transliterator.REVERSE);
\r
1621 errln("FAIL: createInstance");
\r
1624 String exp = "(Title);Latin-Greek";
\r
1625 if (t.getID().equals(exp)) {
\r
1626 logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" +
\r
1629 errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" +
\r
1630 t.getID() + "\", expected \"" + exp + "\"");
\r
1635 * Test NFD chaining with RBT
\r
1637 public void TestNFDChainRBT() {
\r
1638 Transliterator t = Transliterator.createFromRules(
\r
1639 "TEST", "::NFD; aa > Q; a > q;",
\r
1640 Transliterator.FORWARD);
\r
1641 logln(t.toRules(true));
\r
1642 expect(t, "aa", "Q");
\r
1646 * Inverse of "Null" should be "Null". (J21)
\r
1648 public void TestNullInverse() {
\r
1649 Transliterator t = Transliterator.getInstance("Null");
\r
1650 Transliterator u = t.getInverse();
\r
1651 if (!u.getID().equals("Null")) {
\r
1652 errln("FAIL: Inverse of Null should be Null");
\r
1657 * Check ID of inverse of alias. (J22)
\r
1659 public void TestAliasInverseID() {
\r
1660 String ID = "Latin-Hangul"; // This should be any alias ID with an inverse
\r
1661 Transliterator t = Transliterator.getInstance(ID);
\r
1662 Transliterator u = t.getInverse();
\r
1663 String exp = "Hangul-Latin";
\r
1664 String got = u.getID();
\r
1665 if (!got.equals(exp)) {
\r
1666 errln("FAIL: Inverse of " + ID + " is " + got +
\r
1667 ", expected " + exp);
\r
1672 * Test IDs of inverses of compound transliterators. (J20)
\r
1674 public void TestCompoundInverseID() {
\r
1675 String ID = "Latin-Jamo;NFC(NFD)";
\r
1676 Transliterator t = Transliterator.getInstance(ID);
\r
1677 Transliterator u = t.getInverse();
\r
1678 String exp = "NFD(NFC);Jamo-Latin";
\r
1679 String got = u.getID();
\r
1680 if (!got.equals(exp)) {
\r
1681 errln("FAIL: Inverse of " + ID + " is " + got +
\r
1682 ", expected " + exp);
\r
1687 * Test undefined variable.
\r
1689 public void TestUndefinedVariable() {
\r
1690 String rule = "$initial } a <> \u1161;";
\r
1692 Transliterator.createFromRules("<ID>", rule,Transliterator.FORWARD);
\r
1693 } catch (IllegalArgumentException e) {
\r
1694 logln("OK: Got exception for " + rule + ", as expected: " +
\r
1698 errln("Fail: bogus rule " + rule + " compiled without error");
\r
1702 * Test empty context.
\r
1704 public void TestEmptyContext() {
\r
1705 expect(" { a } > b;", "xay a ", "xby b ");
\r
1709 * Test compound filter ID syntax
\r
1711 public void TestCompoundFilterID() {
\r
1713 // Col. 1 = ID or rule set (latter must start with #)
\r
1715 // = columns > 1 are null if expect col. 1 to be illegal =
\r
1717 // Col. 2 = direction, "F..." or "R..."
\r
1718 // Col. 3 = source string
\r
1719 // Col. 4 = exp result
\r
1721 "[abc]; [abc]", null, null, null, // multiple filters
\r
1722 "Latin-Greek; [abc];", null, null, null, // misplaced filter
\r
1723 "[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\u0392c",
\r
1724 "[b]; (Lower); Latin-Greek; Upper(); ([\u0392])", "R", "\u0391\u0392\u0393", "\u0391b\u0393",
\r
1725 "#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\u0392c",
\r
1726 "#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\u0392]);", "R", "\u0391\u0392\u0393", "\u0391b\u0393",
\r
1729 for (int i=0; i<DATA.length; i+=4) {
\r
1730 String id = DATA[i];
\r
1731 int direction = (DATA[i+1] != null && DATA[i+1].charAt(0) == 'R') ?
\r
1732 Transliterator.REVERSE : Transliterator.FORWARD;
\r
1733 String source = DATA[i+2];
\r
1734 String exp = DATA[i+3];
\r
1735 boolean expOk = (DATA[i+1] != null);
\r
1736 Transliterator t = null;
\r
1737 IllegalArgumentException e = null;
\r
1739 if (id.charAt(0) == '#') {
\r
1740 t = Transliterator.createFromRules("ID", id, direction);
\r
1742 t = Transliterator.getInstance(id, direction);
\r
1744 } catch (IllegalArgumentException ee) {
\r
1747 boolean ok = (t != null && e == null);
\r
1748 if (ok == expOk) {
\r
1749 logln("Ok: " + id + " => " + t +
\r
1750 (e != null ? (", " + e.getMessage()) : ""));
\r
1751 if (source != null) {
\r
1752 expect(t, source, exp);
\r
1755 errln("FAIL: " + id + " => " + t +
\r
1756 (e != null ? (", " + e.getMessage()) : ""));
\r
1762 * Test new property set syntax
\r
1764 public void TestPropertySet() {
\r
1765 expect("a>A; \\p{Lu}>x; \\p{Any}>y;", "abcDEF", "Ayyxxx");
\r
1766 expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9",
\r
1767 "[ a stitch ]\n[ in time ]\r[ saves 9]");
\r
1771 * Test various failure points of the new 2.0 engine.
\r
1773 public void TestNewEngine() {
\r
1774 Transliterator t = Transliterator.getInstance("Latin-Hiragana");
\r
1775 // Katakana should be untouched
\r
1776 expect(t, "a\u3042\u30A2", "\u3042\u3042\u30A2");
\r
1779 // This test will only work if Transliterator.ROLLBACK is
\r
1780 // true. Otherwise, this test will fail, revealing a
\r
1781 // limitation of global filters in incremental mode.
\r
1783 Transliterator a =
\r
1784 Transliterator.createFromRules("a_to_A", "a > A;", Transliterator.FORWARD);
\r
1785 Transliterator A =
\r
1786 Transliterator.createFromRules("A_to_b", "A > b;", Transliterator.FORWARD);
\r
1788 //Transliterator array[] = new Transliterator[] {
\r
1790 // Transliterator.getInstance("NFD"),
\r
1792 //t = Transliterator.getInstance(array, new UnicodeSet("[:Ll:]"));
\r
1795 Transliterator.registerInstance(a);
\r
1796 Transliterator.registerInstance(A);
\r
1798 t = Transliterator.getInstance("[:Ll:];a_to_A;NFD;A_to_b");
\r
1799 expect(t, "aAaA", "bAbA");
\r
1801 Transliterator[] u = t.getElements();
\r
1802 assertTrue("getElements().length", u.length == 3);
\r
1803 assertEquals("getElements()[0]", u[0].getID(), "a_to_A");
\r
1804 assertEquals("getElements()[1]", u[1].getID(), "NFD");
\r
1805 assertEquals("getElements()[2]", u[2].getID(), "A_to_b");
\r
1807 t = Transliterator.getInstance("a_to_A;NFD;A_to_b");
\r
1808 t.setFilter(new UnicodeSet("[:Ll:]"));
\r
1809 expect(t, "aAaA", "bAbA");
\r
1811 Transliterator.unregister("a_to_A");
\r
1812 Transliterator.unregister("A_to_b");
\r
1816 expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;",
\r
1821 "$ddot = \u0308 ;" +
\r
1822 "$lcgvowel = [\u03b1\u03b5\u03b7\u03b9\u03bf\u03c5\u03c9] ;" +
\r
1823 "$rough = \u0314 ;" +
\r
1824 "($lcgvowel+ $ddot?) $rough > h | $1 ;" +
\r
1828 expect(gr, "\u03B1\u0314", "ha");
\r
1832 * Test quantified segment behavior. We want:
\r
1833 * ([abc])+ > x $1 x; applied to "cba" produces "xax"
\r
1835 public void TestQuantifiedSegment() {
\r
1836 // The normal case
\r
1837 expect("([abc]+) > x $1 x;", "cba", "xcbax");
\r
1839 // The tricky case; the quantifier is around the segment
\r
1840 expect("([abc])+ > x $1 x;", "cba", "xax");
\r
1842 // Tricky case in reverse direction
\r
1843 expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax");
\r
1845 // Check post-context segment
\r
1846 expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba");
\r
1848 // Test toRule/toPattern for non-quantified segment.
\r
1849 // Careful with spacing here.
\r
1850 String r = "([a-c]){q} > x $1 x;";
\r
1851 Transliterator t = Transliterator.createFromRules("ID", r, Transliterator.FORWARD);
\r
1852 String rr = t.toRules(true);
\r
1853 if (!r.equals(rr)) {
\r
1854 errln("FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
\r
1856 logln("Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
\r
1859 // Test toRule/toPattern for quantified segment.
\r
1860 // Careful with spacing here.
\r
1861 r = "([a-c])+{q} > x $1 x;";
\r
1862 t = Transliterator.createFromRules("ID", r, Transliterator.FORWARD);
\r
1863 rr = t.toRules(true);
\r
1864 if (!r.equals(rr)) {
\r
1865 errln("FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
\r
1867 logln("Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
\r
1871 //======================================================================
\r
1873 //======================================================================
\r
1874 /* this test performs test of rules in ISO 15915 */
\r
1875 public void TestDevanagariLatinRT(){
\r
1876 String[] source = {
\r
1891 //"r\u0323ya", // \u095c is not valid in Devanagari
\r
1917 "\u1E6Dh\u1E6Dha",
\r
1924 // Not roundtrippable --
\r
1925 // \u0939\u094d\u094d\u092E - hma
\r
1926 // \u0939\u094d\u092E - hma
\r
1927 // CharsToUnicodeString("hma"),
\r
1932 "san\u0304j\u012Bb s\u0113nagupta",
\r
1933 "\u0101nand vaddir\u0101ju",
\r
1935 String[] expected = {
\r
1936 "\u092D\u093E\u0930\u0924", /* bha\u0304rata */
\r
1937 "\u0915\u094D\u0930", /* kra */
\r
1938 "\u0915\u094D\u0937", /* ks\u0323a */
\r
1939 "\u0916\u094D\u0930", /* khra */
\r
1940 "\u0917\u094D\u0930", /* gra */
\r
1941 "\u0919\u094D\u0930", /* n\u0307ra */
\r
1942 "\u091A\u094D\u0930", /* cra */
\r
1943 "\u091B\u094D\u0930", /* chra */
\r
1944 "\u091C\u094D\u091E", /* jn\u0303a */
\r
1945 "\u091D\u094D\u0930", /* jhra */
\r
1946 "\u091E\u094D\u0930", /* n\u0303ra */
\r
1947 "\u091F\u094D\u092F", /* t\u0323ya */
\r
1948 "\u0920\u094D\u0930", /* t\u0323hra */
\r
1949 "\u0921\u094D\u092F", /* d\u0323ya */
\r
1950 //"\u095C\u094D\u092F", /* r\u0323ya */ // \u095c is not valid in Devanagari
\r
1951 "\u0922\u094D\u092F", /* d\u0323hya */
\r
1952 "\u0922\u093C\u094D\u0930", /* r\u0323hra */
\r
1953 "\u0923\u094D\u0930", /* n\u0323ra */
\r
1954 "\u0924\u094D\u0924", /* tta */
\r
1955 "\u0925\u094D\u0930", /* thra */
\r
1956 "\u0926\u094D\u0926", /* dda */
\r
1957 "\u0927\u094D\u0930", /* dhra */
\r
1958 "\u0928\u094D\u0928", /* nna */
\r
1959 "\u092A\u094D\u0930", /* pra */
\r
1960 "\u092B\u094D\u0930", /* phra */
\r
1961 "\u092C\u094D\u0930", /* bra */
\r
1962 "\u092D\u094D\u0930", /* bhra */
\r
1963 "\u092E\u094D\u0930", /* mra */
\r
1964 "\u0929\u094D\u0930", /* n\u0331ra */
\r
1965 //"\u0934\u094D\u0930", /* l\u0331ra */
\r
1966 "\u092F\u094D\u0930", /* yra */
\r
1967 "\u092F\u093C\u094D\u0930", /* y\u0307ra */
\r
1969 "\u0935\u094D\u0930", /* vra */
\r
1970 "\u0936\u094D\u0930", /* s\u0301ra */
\r
1971 "\u0937\u094D\u0930", /* s\u0323ra */
\r
1972 "\u0938\u094D\u0930", /* sra */
\r
1973 "\u0939\u094d\u092E", /* hma */
\r
1974 "\u091F\u094D\u091F", /* t\u0323t\u0323a */
\r
1975 "\u091F\u094D\u0920", /* t\u0323t\u0323ha */
\r
1976 "\u0920\u094D\u0920", /* t\u0323ht\u0323ha*/
\r
1977 "\u0921\u094D\u0921", /* d\u0323d\u0323a */
\r
1978 "\u0921\u094D\u0922", /* d\u0323d\u0323ha */
\r
1979 "\u091F\u094D\u092F", /* t\u0323ya */
\r
1980 "\u0920\u094D\u092F", /* t\u0323hya */
\r
1981 "\u0921\u094D\u092F", /* d\u0323ya */
\r
1982 "\u0922\u094D\u092F", /* d\u0323hya */
\r
1983 // "hma", /* hma */
\r
1984 "\u0939\u094D\u092F", /* hya */
\r
1985 "\u0936\u0943", /* s\u0301r\u0325a */
\r
1986 "\u0936\u094D\u091A", /* s\u0301ca */
\r
1987 "\u090d", /* e\u0306 */
\r
1988 "\u0938\u0902\u091C\u0940\u092C\u094D \u0938\u0947\u0928\u0917\u0941\u092A\u094D\u0924",
\r
1989 "\u0906\u0928\u0902\u0926\u094D \u0935\u0926\u094D\u0926\u093F\u0930\u093E\u091C\u0941",
\r
1992 Transliterator latinToDev=Transliterator.getInstance("Latin-Devanagari", Transliterator.FORWARD );
\r
1993 Transliterator devToLatin=Transliterator.getInstance("Devanagari-Latin", Transliterator.FORWARD);
\r
1995 for(int i= 0; i<source.length; i++){
\r
1996 expect(latinToDev,(source[i]),(expected[i]));
\r
1997 expect(devToLatin,(expected[i]),(source[i]));
\r
2001 public void TestTeluguLatinRT(){
\r
2002 String[] source = {
\r
2003 "raghur\u0101m vi\u015Bvan\u0101dha", /* Raghuram Viswanadha */
\r
2004 "\u0101nand vaddir\u0101ju", /* Anand Vaddiraju */
\r
2005 "r\u0101j\u012Bv ka\u015Barab\u0101da", /* Rajeev Kasarabada */
\r
2006 "san\u0304j\u012Bv ka\u015Barab\u0101da", /* sanjeev kasarabada */
\r
2007 "san\u0304j\u012Bb sen'gupta", /* sanjib sengupata */
\r
2008 "amar\u0113ndra hanum\u0101nula", /* Amarendra hanumanula */
\r
2009 "ravi kum\u0101r vi\u015Bvan\u0101dha", /* Ravi Kumar Viswanadha */
\r
2010 "\u0101ditya kandr\u0113gula", /* Aditya Kandregula */
\r
2011 "\u015Br\u012Bdhar ka\u1E47\u1E6Dama\u015Be\u1E6D\u1E6Di", /* Shridhar Kantamsetty */
\r
2012 "m\u0101dhav de\u015Be\u1E6D\u1E6Di" /* Madhav Desetty */
\r
2015 String[] expected = {
\r
2016 "\u0c30\u0c18\u0c41\u0c30\u0c3e\u0c2e\u0c4d \u0c35\u0c3f\u0c36\u0c4d\u0c35\u0c28\u0c3e\u0c27",
\r
2017 "\u0c06\u0c28\u0c02\u0c26\u0c4d \u0C35\u0C26\u0C4D\u0C26\u0C3F\u0C30\u0C3E\u0C1C\u0C41",
\r
2018 "\u0c30\u0c3e\u0c1c\u0c40\u0c35\u0c4d \u0c15\u0c36\u0c30\u0c2c\u0c3e\u0c26",
\r
2019 "\u0c38\u0c02\u0c1c\u0c40\u0c35\u0c4d \u0c15\u0c36\u0c30\u0c2c\u0c3e\u0c26",
\r
2020 "\u0c38\u0c02\u0c1c\u0c40\u0c2c\u0c4d \u0c38\u0c46\u0c28\u0c4d\u0c17\u0c41\u0c2a\u0c4d\u0c24",
\r
2021 "\u0c05\u0c2e\u0c30\u0c47\u0c02\u0c26\u0c4d\u0c30 \u0c39\u0c28\u0c41\u0c2e\u0c3e\u0c28\u0c41\u0c32",
\r
2022 "\u0c30\u0c35\u0c3f \u0c15\u0c41\u0c2e\u0c3e\u0c30\u0c4d \u0c35\u0c3f\u0c36\u0c4d\u0c35\u0c28\u0c3e\u0c27",
\r
2023 "\u0c06\u0c26\u0c3f\u0c24\u0c4d\u0c2f \u0C15\u0C02\u0C26\u0C4D\u0C30\u0C47\u0C17\u0C41\u0c32",
\r
2024 "\u0c36\u0c4d\u0c30\u0c40\u0C27\u0C30\u0C4D \u0c15\u0c02\u0c1f\u0c2e\u0c36\u0c46\u0c1f\u0c4d\u0c1f\u0c3f",
\r
2025 "\u0c2e\u0c3e\u0c27\u0c35\u0c4d \u0c26\u0c46\u0c36\u0c46\u0c1f\u0c4d\u0c1f\u0c3f",
\r
2029 Transliterator latinToDev=Transliterator.getInstance("Latin-Telugu", Transliterator.FORWARD);
\r
2030 Transliterator devToLatin=Transliterator.getInstance("Telugu-Latin", Transliterator.FORWARD);
\r
2032 for(int i= 0; i<source.length; i++){
\r
2033 expect(latinToDev,(source[i]),(expected[i]));
\r
2034 expect(devToLatin,(expected[i]),(source[i]));
\r
2038 public void TestSanskritLatinRT(){
\r
2040 String[] source = {
\r
2041 "rmk\u1E63\u0113t",
\r
2042 "\u015Br\u012Bmad",
\r
2043 "bhagavadg\u012Bt\u0101",
\r
2046 "vi\u1E63\u0101da",
\r
2048 "dhr\u0325tar\u0101\u1E63\u1E6Dra",
\r
2049 "uv\u0101cr\u0325",
\r
2050 "dharmak\u1E63\u0113tr\u0113",
\r
2051 "kuruk\u1E63\u0113tr\u0113",
\r
2052 "samav\u0113t\u0101",
\r
2053 "yuyutsava\u1E25",
\r
2054 "m\u0101mak\u0101\u1E25",
\r
2055 // "p\u0101\u1E47\u1E0Dav\u0101\u015Bcaiva",
\r
2059 String[] expected = {
\r
2060 "\u0930\u094D\u092E\u094D\u0915\u094D\u0937\u0947\u0924\u094D",
\r
2061 "\u0936\u094d\u0930\u0940\u092e\u0926\u094d",
\r
2062 "\u092d\u0917\u0935\u0926\u094d\u0917\u0940\u0924\u093e",
\r
2063 "\u0905\u0927\u094d\u092f\u093e\u092f",
\r
2064 "\u0905\u0930\u094d\u091c\u0941\u0928",
\r
2065 "\u0935\u093f\u0937\u093e\u0926",
\r
2066 "\u092f\u094b\u0917",
\r
2067 "\u0927\u0943\u0924\u0930\u093e\u0937\u094d\u091f\u094d\u0930",
\r
2068 "\u0909\u0935\u093E\u091A\u0943",
\r
2069 "\u0927\u0930\u094d\u092e\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947",
\r
2070 "\u0915\u0941\u0930\u0941\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947",
\r
2071 "\u0938\u092e\u0935\u0947\u0924\u093e",
\r
2072 "\u092f\u0941\u092f\u0941\u0924\u094d\u0938\u0935\u0903",
\r
2073 "\u092e\u093e\u092e\u0915\u093e\u0903",
\r
2074 //"\u092a\u093e\u0923\u094d\u0921\u0935\u093e\u0936\u094d\u091a\u0948\u0935",
\r
2075 "\u0915\u093f\u092e\u0915\u0941\u0930\u094d\u0935\u0924",
\r
2076 "\u0938\u0902\u091c\u0935",
\r
2079 Transliterator latinToDev=Transliterator.getInstance("Latin-Devanagari", Transliterator.FORWARD);
\r
2080 Transliterator devToLatin=Transliterator.getInstance("Devanagari-Latin", Transliterator.FORWARD);
\r
2081 for(int i= 0; i<MAX_LEN; i++){
\r
2082 expect(latinToDev,(source[i]),(expected[i]));
\r
2083 expect(devToLatin,(expected[i]),(source[i]));
\r
2087 public void TestCompoundLatinRT(){
\r
2089 String[] source = {
\r
2090 "rmk\u1E63\u0113t",
\r
2091 "\u015Br\u012Bmad",
\r
2092 "bhagavadg\u012Bt\u0101",
\r
2095 "vi\u1E63\u0101da",
\r
2097 "dhr\u0325tar\u0101\u1E63\u1E6Dra",
\r
2098 "uv\u0101cr\u0325",
\r
2099 "dharmak\u1E63\u0113tr\u0113",
\r
2100 "kuruk\u1E63\u0113tr\u0113",
\r
2101 "samav\u0113t\u0101",
\r
2102 "yuyutsava\u1E25",
\r
2103 "m\u0101mak\u0101\u1E25",
\r
2104 // "p\u0101\u1E47\u1E0Dav\u0101\u015Bcaiva",
\r
2108 String[] expected = {
\r
2109 "\u0930\u094D\u092E\u094D\u0915\u094D\u0937\u0947\u0924\u094D",
\r
2110 "\u0936\u094d\u0930\u0940\u092e\u0926\u094d",
\r
2111 "\u092d\u0917\u0935\u0926\u094d\u0917\u0940\u0924\u093e",
\r
2112 "\u0905\u0927\u094d\u092f\u093e\u092f",
\r
2113 "\u0905\u0930\u094d\u091c\u0941\u0928",
\r
2114 "\u0935\u093f\u0937\u093e\u0926",
\r
2115 "\u092f\u094b\u0917",
\r
2116 "\u0927\u0943\u0924\u0930\u093e\u0937\u094d\u091f\u094d\u0930",
\r
2117 "\u0909\u0935\u093E\u091A\u0943",
\r
2118 "\u0927\u0930\u094d\u092e\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947",
\r
2119 "\u0915\u0941\u0930\u0941\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947",
\r
2120 "\u0938\u092e\u0935\u0947\u0924\u093e",
\r
2121 "\u092f\u0941\u092f\u0941\u0924\u094d\u0938\u0935\u0903",
\r
2122 "\u092e\u093e\u092e\u0915\u093e\u0903",
\r
2123 // "\u092a\u093e\u0923\u094d\u0921\u0935\u093e\u0936\u094d\u091a\u0948\u0935",
\r
2124 "\u0915\u093f\u092e\u0915\u0941\u0930\u094d\u0935\u0924",
\r
2125 "\u0938\u0902\u091c\u0935"
\r
2128 Transliterator latinToDevToLatin=Transliterator.getInstance("Latin-Devanagari;Devanagari-Latin", Transliterator.FORWARD);
\r
2129 Transliterator devToLatinToDev=Transliterator.getInstance("Devanagari-Latin;Latin-Devanagari", Transliterator.FORWARD);
\r
2130 for(int i= 0; i<MAX_LEN; i++){
\r
2131 expect(latinToDevToLatin,(source[i]),(source[i]));
\r
2132 expect(devToLatinToDev,(expected[i]),(expected[i]));
\r
2136 * Test Gurmukhi-Devanagari Tippi and Bindi
\r
2138 public void TestGurmukhiDevanagari(){
\r
2140 // (\u0902) (when preceded by vowel) ---> (\u0A02)
\r
2141 // (\u0902) (when preceded by consonant) ---> (\u0A70)
\r
2143 UnicodeSet vowel =new UnicodeSet("[\u0905-\u090A \u090F\u0910\u0913\u0914 \u093e-\u0942\u0947\u0948\u094B\u094C\u094D]");
\r
2144 UnicodeSet non_vowel =new UnicodeSet("[\u0915-\u0928\u092A-\u0930]");
\r
2146 UnicodeSetIterator vIter = new UnicodeSetIterator(vowel);
\r
2147 UnicodeSetIterator nvIter = new UnicodeSetIterator(non_vowel);
\r
2148 Transliterator trans = Transliterator.getInstance("Devanagari-Gurmukhi");
\r
2149 StringBuffer src = new StringBuffer(" \u0902");
\r
2150 StringBuffer expect = new StringBuffer(" \u0A02");
\r
2151 while(vIter.next()){
\r
2152 src.setCharAt(0,(char) vIter.codepoint);
\r
2153 expect.setCharAt(0,(char) (vIter.codepoint+0x0100));
\r
2154 expect(trans,src.toString(),expect.toString());
\r
2157 expect.setCharAt(1,'\u0A70');
\r
2158 while(nvIter.next()){
\r
2159 //src.setCharAt(0,(char) nvIter.codepoint);
\r
2160 src.setCharAt(0,(char)nvIter.codepoint);
\r
2161 expect.setCharAt(0,(char) (nvIter.codepoint+0x0100));
\r
2162 expect(trans,src.toString(),expect.toString());
\r
2166 * Test instantiation from a locale.
\r
2168 public void TestLocaleInstantiation() {
\r
2171 t = Transliterator.getInstance("te_IN-Latin");
\r
2172 //expect(t, "\u0430", "a");
\r
2173 }catch(IllegalArgumentException ex){
\r
2174 warnln("Could not load locale data for obtaining the script used in the locale te_IN. "+ex.getMessage());
\r
2177 t = Transliterator.getInstance("ru_RU-Latin");
\r
2178 expect(t, "\u0430", "a");
\r
2179 }catch(IllegalArgumentException ex){
\r
2180 warnln("Could not load locale data for obtaining the script used in the locale ru_RU. "+ex.getMessage());
\r
2183 t = Transliterator.getInstance("en-el");
\r
2184 expect(t, "a", "\u03B1");
\r
2185 }catch(IllegalArgumentException ex){
\r
2186 warnln("Could not load locale data for obtaining the script used in the locale el. "+ ex.getMessage());
\r
2191 * Test title case handling of accent (should ignore accents)
\r
2193 public void TestTitleAccents() {
\r
2194 Transliterator t = Transliterator.getInstance("Title");
\r
2195 expect(t, "a\u0300b can't abe", "A\u0300b Can't Abe");
\r
2199 * Basic test of a locale resource based rule.
\r
2201 public void TestLocaleResource() {
\r
2204 "Latin-Greek/UNGEGN", "b", "\u03bc\u03c0",
\r
2205 "Latin-el", "b", "\u03bc\u03c0",
\r
2206 "Latin-Greek", "b", "\u03B2",
\r
2207 "Greek-Latin/UNGEGN", "\u03B2", "v",
\r
2208 "el-Latin", "\u03B2", "v",
\r
2209 "Greek-Latin", "\u03B2", "b",
\r
2211 for (int i=0; i<DATA.length; i+=3) {
\r
2212 Transliterator t = Transliterator.getInstance(DATA[i]);
\r
2213 expect(t, DATA[i+1], DATA[i+2]);
\r
2218 * Make sure parse errors reference the right line.
\r
2220 public void TestParseError() {
\r
2223 "# more stuff\n" +
\r
2226 Transliterator t = Transliterator.createFromRules("ID", rule, Transliterator.FORWARD);
\r
2228 errln("FAIL: Did not get expected exception");
\r
2230 } catch (IllegalArgumentException e) {
\r
2231 String err = e.getMessage();
\r
2232 if (err.indexOf("d << b") >= 0) {
\r
2233 logln("Ok: " + err);
\r
2235 errln("FAIL: " + err);
\r
2239 errln("FAIL: no syntax error");
\r
2243 * Make sure sets on output are disallowed.
\r
2245 public void TestOutputSet() {
\r
2246 String rule = "$set = [a-cm-n]; b > $set;";
\r
2247 Transliterator t = null;
\r
2249 t = Transliterator.createFromRules("ID", rule, Transliterator.FORWARD);
\r
2251 errln("FAIL: Did not get the expected exception");
\r
2253 } catch (IllegalArgumentException e) {
\r
2254 logln("Ok: " + e.getMessage());
\r
2257 errln("FAIL: No syntax error");
\r
2261 * Test the use variable range pragma, making sure that use of
\r
2262 * variable range characters is detected and flagged as an error.
\r
2264 public void TestVariableRange() {
\r
2265 String rule = "use variable range 0x70 0x72; a > A; b > B; q > Q;";
\r
2267 Transliterator t =
\r
2268 Transliterator.createFromRules("ID", rule, Transliterator.FORWARD);
\r
2270 errln("FAIL: Did not get the expected exception");
\r
2272 } catch (IllegalArgumentException e) {
\r
2273 logln("Ok: " + e.getMessage());
\r
2276 errln("FAIL: No syntax error");
\r
2280 * Test invalid post context error handling
\r
2282 public void TestInvalidPostContext() {
\r
2284 Transliterator t =
\r
2285 Transliterator.createFromRules("ID", "a}b{c>d;", Transliterator.FORWARD);
\r
2287 errln("FAIL: Did not get the expected exception");
\r
2289 } catch (IllegalArgumentException e) {
\r
2290 String msg = e.getMessage();
\r
2291 if (msg.indexOf("a}b{c") >= 0) {
\r
2292 logln("Ok: " + msg);
\r
2294 errln("FAIL: " + msg);
\r
2298 errln("FAIL: No syntax error");
\r
2302 * Test ID form variants
\r
2304 public void TestIDForms() {
\r
2306 "NFC", null, "NFD",
\r
2307 "nfd", null, "NFC", // make sure case is ignored
\r
2308 "Any-NFKD", null, "Any-NFKC",
\r
2309 "Null", null, "Null",
\r
2310 "-nfkc", "nfkc", "NFKD",
\r
2311 "-nfkc/", "nfkc", "NFKD",
\r
2312 "Latin-Greek/UNGEGN", null, "Greek-Latin/UNGEGN",
\r
2313 "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN",
\r
2314 "Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali",
\r
2315 "Source-", null, null,
\r
2316 "Source/Variant-", null, null,
\r
2317 "Source-/Variant", null, null,
\r
2318 "/Variant", null, null,
\r
2319 "/Variant-", null, null,
\r
2320 "-/Variant", null, null,
\r
2326 for (int i=0; i<DATA.length; i+=3) {
\r
2327 String ID = DATA[i];
\r
2328 String expID = DATA[i+1];
\r
2329 String expInvID = DATA[i+2];
\r
2330 boolean expValid = (expInvID != null);
\r
2331 if (expID == null) {
\r
2335 Transliterator t =
\r
2336 Transliterator.getInstance(ID);
\r
2337 Transliterator u = t.getInverse();
\r
2338 if (t.getID().equals(expID) &&
\r
2339 u.getID().equals(expInvID)) {
\r
2340 logln("Ok: " + ID + ".getInverse() => " + expInvID);
\r
2342 errln("FAIL: getInstance(" + ID + ") => " +
\r
2343 t.getID() + " x getInverse() => " + u.getID() +
\r
2344 ", expected " + expInvID);
\r
2346 } catch (IllegalArgumentException e) {
\r
2348 logln("Ok: getInstance(" + ID + ") => " + e.getMessage());
\r
2350 errln("FAIL: getInstance(" + ID + ") => " + e.getMessage());
\r
2356 void checkRules(String label, Transliterator t2, String testRulesForward) {
\r
2357 String rules2 = t2.toRules(true);
\r
2358 //rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
\r
2359 rules2 = TestUtility.replace(rules2, " ", "");
\r
2360 rules2 = TestUtility.replace(rules2, "\n", "");
\r
2361 rules2 = TestUtility.replace(rules2, "\r", "");
\r
2362 testRulesForward = TestUtility.replace(testRulesForward, " ", "");
\r
2364 if (!rules2.equals(testRulesForward)) {
\r
2366 logln("GENERATED RULES: " + rules2);
\r
2367 logln("SHOULD BE: " + testRulesForward);
\r
2372 * Mark's toRules test.
\r
2374 public void TestToRulesMark() {
\r
2376 String testRules =
\r
2377 "::[[:Latin:][:Mark:]];"
\r
2379 + "::Lower (Lower);"
\r
2380 + "a <> \\u03B1;" // alpha
\r
2382 + "::Upper (Lower);"
\r
2384 + "::([[:Greek:][:Mark:]]);"
\r
2386 String testRulesForward =
\r
2387 "::[[:Latin:][:Mark:]];"
\r
2389 + "::Lower(Lower);"
\r
2392 + "::Upper (Lower);"
\r
2395 String testRulesBackward =
\r
2396 "::[[:Greek:][:Mark:]];"
\r
2397 + "::Lower (Upper);"
\r
2400 + "::Lower(Lower);"
\r
2403 String source = "\u00E1"; // a-acute
\r
2404 String target = "\u03AC"; // alpha-acute
\r
2406 Transliterator t2 = Transliterator.createFromRules("source-target", testRules, Transliterator.FORWARD);
\r
2407 Transliterator t3 = Transliterator.createFromRules("target-source", testRules, Transliterator.REVERSE);
\r
2409 expect(t2, source, target);
\r
2410 expect(t3, target, source);
\r
2412 checkRules("Failed toRules FORWARD", t2, testRulesForward);
\r
2413 checkRules("Failed toRules BACKWARD", t3, testRulesBackward);
\r
2417 * Test Escape and Unescape transliterators.
\r
2419 public void TestEscape() {
\r
2420 expect(Transliterator.getInstance("Hex-Any"),
\r
2421 "\\x{40}\\U000000312Q",
\r
2423 expect(Transliterator.getInstance("Any-Hex/C"),
\r
2424 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
\r
2425 "\\u0041\\U0010BEEF\\uFEED");
\r
2426 expect(Transliterator.getInstance("Any-Hex/Java"),
\r
2427 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
\r
2428 "\\u0041\\uDBEF\\uDEEF\\uFEED");
\r
2429 expect(Transliterator.getInstance("Any-Hex/Perl"),
\r
2430 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
\r
2431 "\\x{41}\\x{10BEEF}\\x{FEED}");
\r
2435 * Make sure display names of variants look reasonable.
\r
2437 public void TestDisplayName() {
\r
2439 // ID, forward name, reverse name
\r
2440 // Update the text as necessary -- the important thing is
\r
2441 // not the text itself, but how various cases are handled.
\r
2444 "Any-Hex", "Any to Hex Escape", "Hex Escape to Any",
\r
2447 "Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl",
\r
2449 // Target-only IDs
\r
2450 "NFC", "Any to NFC", "Any to NFD",
\r
2453 Locale US = Locale.US;
\r
2455 for (int i=0; i<DATA.length; i+=3) {
\r
2456 String name = Transliterator.getDisplayName(DATA[i], US);
\r
2457 if (!name.equals(DATA[i+1])) {
\r
2458 errln("FAIL: " + DATA[i] + ".getDisplayName() => " +
\r
2459 name + ", expected " + DATA[i+1]);
\r
2461 logln("Ok: " + DATA[i] + ".getDisplayName() => " + name);
\r
2463 Transliterator t = Transliterator.getInstance(DATA[i], Transliterator.REVERSE);
\r
2464 name = Transliterator.getDisplayName(t.getID(), US);
\r
2465 if (!name.equals(DATA[i+2])) {
\r
2466 errln("FAIL: " + t.getID() + ".getDisplayName() => " +
\r
2467 name + ", expected " + DATA[i+2]);
\r
2469 logln("Ok: " + t.getID() + ".getDisplayName() => " + name);
\r
2472 // Cover getDisplayName(String)
\r
2473 ULocale save = ULocale.getDefault();
\r
2474 ULocale.setDefault(ULocale.US);
\r
2475 String name2 = Transliterator.getDisplayName(t.getID());
\r
2476 if (!name.equals(name2))
\r
2477 errln("FAIL: getDisplayName with default locale failed");
\r
2478 ULocale.setDefault(save);
\r
2483 * Test anchor masking
\r
2485 public void TestAnchorMasking() {
\r
2486 String rule = "^a > Q; a > q;";
\r
2488 Transliterator t = Transliterator.createFromRules("ID", rule, Transliterator.FORWARD);
\r
2490 errln("FAIL: Did not get the expected exception");
\r
2492 } catch (IllegalArgumentException e) {
\r
2493 errln("FAIL: " + rule + " => " + e);
\r
2498 * This test is not in trnstst.cpp. This test has been moved from com/ibm/icu/dev/test/lang/TestUScript.java
\r
2499 * during ICU4J modularization to remove dependency of tests on Transliterator.
\r
2501 public void TestScriptAllCodepoints(){
\r
2503 HashSet scriptIdsChecked = new HashSet();
\r
2504 HashSet scriptAbbrsChecked = new HashSet();
\r
2505 for( int i =0; i <= 0x10ffff; i++){
\r
2506 code = UScript.getScript(i);
\r
2507 if(code==UScript.INVALID_CODE){
\r
2508 errln("UScript.getScript for codepoint 0x"+ hex(i)+" failed");
\r
2510 String id =UScript.getName(code);
\r
2511 String abbr = UScript.getShortName(code);
\r
2512 if (!scriptIdsChecked.contains(id)) {
\r
2513 scriptIdsChecked.add(id);
\r
2514 String newId ="[:"+id+":];NFD";
\r
2516 Transliterator t = Transliterator.getInstance(newId);
\r
2518 errln("Failed to create transliterator for "+hex(i)+
\r
2519 " script code: " +id);
\r
2521 }catch(Exception e){
\r
2522 errln("Failed to create transliterator for "+hex(i)
\r
2523 +" script code: " +id
\r
2524 + " Exception: "+e.getMessage());
\r
2527 if (!scriptAbbrsChecked.contains(abbr)) {
\r
2528 scriptAbbrsChecked.add(abbr);
\r
2529 String newAbbrId ="[:"+abbr+":];NFD";
\r
2531 Transliterator t = Transliterator.getInstance(newAbbrId);
\r
2533 errln("Failed to create transliterator for "+hex(i)+
\r
2534 " script code: " +abbr);
\r
2536 }catch(Exception e){
\r
2537 errln("Failed to create transliterator for "+hex(i)
\r
2538 +" script code: " +abbr
\r
2539 + " Exception: "+e.getMessage());
\r
2546 static final String[][] registerRules = {
\r
2547 {"Any-Dev1", "x > X; y > Y;"},
\r
2548 {"Any-Dev2", "XY > Z"},
\r
2549 {"Greek-Latin/FAKE",
\r
2550 "[^[:L:][:M:]] { \u03bc\u03c0 > b ; "+
\r
2551 "\u03bc\u03c0 } [^[:L:][:M:]] > b ; "+
\r
2552 "[^[:L:][:M:]] { [\u039c\u03bc][\u03a0\u03c0] > B ; "+
\r
2553 "[\u039c\u03bc][\u03a0\u03c0] } [^[:L:][:M:]] > B ;"
\r
2557 static final String DESERET_DEE = UTF16.valueOf(0x10414);
\r
2558 static final String DESERET_dee = UTF16.valueOf(0x1043C);
\r
2560 static final String[][] testCases = {
\r
2563 // should add more test cases
\r
2564 {"NFD" , "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"},
\r
2565 {"NFC" , "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"},
\r
2566 {"NFKD", "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"},
\r
2567 {"NFKC", "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"},
\r
2570 {"Greek-Latin/UNGEGN", "(\u03BC\u03C0)", "(b)"},
\r
2571 {"Greek-Latin/FAKE", "(\u03BC\u03C0)", "(b)"},
\r
2573 // check for devanagari bug
\r
2574 {"nfd;Dev1;Dev2;nfc", "xy", "Z"},
\r
2576 // ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE
\r
2577 {"Title", "ab'cD ffi\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE,
\r
2578 "Ab'cd Ffi\u0131ii\u0307 \u01C8\u01C9\u01C9 " + DESERET_DEE + DESERET_dee},
\r
2579 //TODO: enable this test once Titlecase works right
\r
2580 //{"Title", "\uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE,
\r
2581 // "Ffi\u0131ii \u01C8\u01C9\u01C9 " + DESERET_DEE + DESERET_dee},
\r
2583 {"Upper", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE,
\r
2584 "AB'CD FFIII\u0130 \u01C7\u01C7\u01C7 " + DESERET_DEE + DESERET_DEE},
\r
2585 {"Lower", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE,
\r
2586 "ab'cd \uFB00i\u0131ii\u0307 \u01C9\u01C9\u01C9 " + DESERET_dee + DESERET_dee},
\r
2588 {"Upper", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE},
\r
2589 {"Lower", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE},
\r
2592 {"Greek-Latin/UNGEGN", "\u03C3 \u03C3\u03C2 \u03C2\u03C3", "s ss s\u0331s\u0331"},
\r
2593 {"Latin-Greek/UNGEGN", "s ss s\u0331s\u0331", "\u03C3 \u03C3\u03C2 \u03C2\u03C3"},
\r
2594 {"Greek-Latin", "\u03C3 \u03C3\u03C2 \u03C2\u03C3", "s ss s\u0331s\u0331"},
\r
2595 {"Latin-Greek", "s ss s\u0331s\u0331", "\u03C3 \u03C3\u03C2 \u03C2\u03C3"},
\r
2598 // Upper: TAT\u02B9\u00C2NA
\r
2599 // Lower: tat\u02B9\u00E2na
\r
2600 // Title: Tat\u02B9\u00E2na
\r
2601 {"Upper", "tat\u02B9\u00E2na", "TAT\u02B9\u00C2NA"},
\r
2602 {"Lower", "TAT\u02B9\u00C2NA", "tat\u02B9\u00E2na"},
\r
2603 {"Title", "tat\u02B9\u00E2na", "Tat\u02B9\u00E2na"},
\r
2606 public void TestSpecialCases() {
\r
2608 for (int i = 0; i < registerRules.length; ++i) {
\r
2609 Transliterator t = Transliterator.createFromRules(registerRules[i][0],
\r
2610 registerRules[i][1], Transliterator.FORWARD);
\r
2611 DummyFactory.add(registerRules[i][0], t);
\r
2613 for (int i = 0; i < testCases.length; ++i) {
\r
2614 String name = testCases[i][0];
\r
2615 Transliterator t = Transliterator.getInstance(name);
\r
2616 String id = t.getID();
\r
2617 String source = testCases[i][1];
\r
2618 String target = null;
\r
2620 // Automatic generation of targets, to make it simpler to add test cases (and more fail-safe)
\r
2622 if (testCases[i].length > 2) target = testCases[i][2];
\r
2623 else if (id.equalsIgnoreCase("NFD")) target = com.ibm.icu.text.Normalizer.normalize(source, com.ibm.icu.text.Normalizer.NFD);
\r
2624 else if (id.equalsIgnoreCase("NFC")) target = com.ibm.icu.text.Normalizer.normalize(source, com.ibm.icu.text.Normalizer.NFC);
\r
2625 else if (id.equalsIgnoreCase("NFKD")) target = com.ibm.icu.text.Normalizer.normalize(source, com.ibm.icu.text.Normalizer.NFKD);
\r
2626 else if (id.equalsIgnoreCase("NFKC")) target = com.ibm.icu.text.Normalizer.normalize(source, com.ibm.icu.text.Normalizer.NFKC);
\r
2627 else if (id.equalsIgnoreCase("Lower")) target = UCharacter.toLowerCase(Locale.US, source);
\r
2628 else if (id.equalsIgnoreCase("Upper")) target = UCharacter.toUpperCase(Locale.US, source);
\r
2630 expect(t, source, target);
\r
2632 for (int i = 0; i < registerRules.length; ++i) {
\r
2633 Transliterator.unregister(registerRules[i][0]);
\r
2637 // seems like there should be an easier way to just register an instance of a transliterator
\r
2639 static class DummyFactory implements Transliterator.Factory {
\r
2640 static DummyFactory singleton = new DummyFactory();
\r
2641 static HashMap m = new HashMap();
\r
2643 // Since Transliterators are immutable, we don't have to clone on set & get
\r
2644 static void add(String ID, Transliterator t) {
\r
2646 //System.out.println("Registering: " + ID + ", " + t.toRules(true));
\r
2647 Transliterator.registerFactory(ID, singleton);
\r
2649 public Transliterator getInstance(String ID) {
\r
2650 return (Transliterator) m.get(ID);
\r
2654 public void TestCasing() {
\r
2655 Transliterator toLower = Transliterator.getInstance("lower");
\r
2656 Transliterator toCasefold = Transliterator.getInstance("casefold");
\r
2657 Transliterator toUpper = Transliterator.getInstance("upper");
\r
2658 Transliterator toTitle = Transliterator.getInstance("title");
\r
2659 for (int i = 0; i < 0x600; ++i) {
\r
2660 String s = UTF16.valueOf(i);
\r
2662 String lower = UCharacter.toLowerCase(ULocale.ROOT, s);
\r
2663 assertEquals("Lowercase", lower, toLower.transform(s));
\r
2665 String casefold = UCharacter.foldCase(s, true);
\r
2666 assertEquals("Casefold", casefold, toCasefold.transform(s));
\r
2668 String title = UCharacter.toTitleCase(ULocale.ROOT, s, null);
\r
2669 assertEquals("Title", title, toTitle.transform(s));
\r
2671 String upper = UCharacter.toUpperCase(ULocale.ROOT, s);
\r
2672 assertEquals("Upper", upper, toUpper.transform(s));
\r
2676 public void TestSurrogateCasing () {
\r
2677 // check that casing handles surrogates
\r
2678 // titlecase is currently defective
\r
2679 int dee = UTF16.charAt(DESERET_dee,0);
\r
2680 int DEE = UCharacter.toTitleCase(dee);
\r
2681 if (!UTF16.valueOf(DEE).equals(DESERET_DEE)) {
\r
2682 errln("Fails titlecase of surrogates" + Integer.toString(dee,16) + ", " + Integer.toString(DEE,16));
\r
2685 if (!UCharacter.toUpperCase(DESERET_dee + DESERET_DEE).equals(DESERET_DEE + DESERET_DEE)) {
\r
2686 errln("Fails uppercase of surrogates");
\r
2689 if (!UCharacter.toLowerCase(DESERET_dee + DESERET_DEE).equals(DESERET_dee + DESERET_dee)) {
\r
2690 errln("Fails lowercase of surrogates");
\r
2694 // Check to see that incremental gets at least part way through a reasonable string.
\r
2696 public void TestIncrementalProgress() {
\r
2697 String latinTest = "The Quick Brown Fox.";
\r
2698 String devaTest = Transliterator.getInstance("Latin-Devanagari").transliterate(latinTest);
\r
2699 String kataTest = Transliterator.getInstance("Latin-Katakana").transliterate(latinTest);
\r
2700 String[][] tests = {
\r
2701 {"Any", latinTest},
\r
2702 {"Latin", latinTest},
\r
2703 {"Halfwidth", latinTest},
\r
2704 {"Devanagari", devaTest},
\r
2705 {"Katakana", kataTest},
\r
2708 Enumeration sources = Transliterator.getAvailableSources();
\r
2709 while(sources.hasMoreElements()) {
\r
2710 String source = (String) sources.nextElement();
\r
2711 String test = findMatch(source, tests);
\r
2712 if (test == null) {
\r
2713 logln("Skipping " + source + "-X");
\r
2716 Enumeration targets = Transliterator.getAvailableTargets(source);
\r
2717 while(targets.hasMoreElements()) {
\r
2718 String target = (String) targets.nextElement();
\r
2719 Enumeration variants = Transliterator.getAvailableVariants(source, target);
\r
2720 while(variants.hasMoreElements()) {
\r
2721 String variant = (String) variants.nextElement();
\r
2722 String id = source + "-" + target + "/" + variant;
\r
2723 logln("id: " + id);
\r
2725 String filter = getTranslitTestFilter();
\r
2726 if (filter != null && id.indexOf(filter) < 0) continue;
\r
2728 Transliterator t = Transliterator.getInstance(id);
\r
2729 CheckIncrementalAux(t, test);
\r
2731 String rev = t.transliterate(test);
\r
2732 Transliterator inv = t.getInverse();
\r
2733 CheckIncrementalAux(inv, rev);
\r
2739 public String findMatch (String source, String[][] pairs) {
\r
2740 for (int i = 0; i < pairs.length; ++i) {
\r
2741 if (source.equalsIgnoreCase(pairs[i][0])) return pairs[i][1];
\r
2746 public void CheckIncrementalAux(Transliterator t, String input) {
\r
2748 Replaceable test = new ReplaceableString(input);
\r
2749 Transliterator.Position pos = new Transliterator.Position(0, test.length(), 0, test.length());
\r
2750 t.transliterate(test, pos);
\r
2751 boolean gotError = false;
\r
2753 // we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X?
\r
2755 if (pos.start == 0 && pos.limit != 0 && !t.getID().equals("Hex-Any/Unicode")) {
\r
2756 errln("No Progress, " + t.getID() + ": " + UtilityExtensions.formatInput(test, pos));
\r
2759 logln("PASS Progress, " + t.getID() + ": " + UtilityExtensions.formatInput(test, pos));
\r
2761 t.finishTransliteration(test, pos);
\r
2762 if (pos.start != pos.limit) {
\r
2763 errln("Incomplete, " + t.getID() + ": " + UtilityExtensions.formatInput(test, pos));
\r
2767 //errln("FAIL: Did not get expected error");
\r
2771 public void TestFunction() {
\r
2772 // Careful with spacing and ';' here: Phrase this exactly
\r
2773 // as toRules() is going to return it. If toRules() changes
\r
2774 // with regard to spacing or ';', then adjust this string.
\r
2776 "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';";
\r
2778 Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD);
\r
2780 errln("FAIL: createFromRules failed");
\r
2784 String r = t.toRules(true);
\r
2785 if (r.equals(rule)) {
\r
2786 logln("OK: toRules() => " + r);
\r
2788 errln("FAIL: toRules() => " + r +
\r
2789 ", expected " + rule);
\r
2792 expect(t, "The Quick Brown Fox",
\r
2793 "T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox");
\r
2795 "([^\\ -\\u007F]) > &Hex/Unicode( $1 ) ' ' &Name( $1 ) ;";
\r
2797 t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD);
\r
2799 errln("FAIL: createFromRules failed");
\r
2803 r = t.toRules(true);
\r
2804 if (r.equals(rule)) {
\r
2805 logln("OK: toRules() => " + r);
\r
2807 errln("FAIL: toRules() => " + r +
\r
2808 ", expected " + rule);
\r
2811 expect(t, "\u0301",
\r
2812 "U+0301 \\N{COMBINING ACUTE ACCENT}");
\r
2815 public void TestInvalidBackRef() {
\r
2816 String rule = ". > $1;";
\r
2817 String rule2 ="(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\u0020;";
\r
2819 Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD);
\r
2821 errln("FAIL: createFromRules should have returned NULL");
\r
2823 errln("FAIL: Ok: . > $1; => no error");
\r
2824 Transliterator t2= Transliterator.createFromRules("Test2", rule2, Transliterator.FORWARD);
\r
2826 errln("FAIL: createFromRules should have returned NULL");
\r
2828 errln("FAIL: Ok: . > $1; => no error");
\r
2829 } catch (IllegalArgumentException e) {
\r
2830 logln("Ok: . > $1; => " + e.getMessage());
\r
2834 public void TestMulticharStringSet() {
\r
2840 "[{gd}] { e > q;" +
\r
2841 " e } [{fg}] > r;" ;
\r
2843 Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD);
\r
2845 errln("FAIL: createFromRules failed");
\r
2849 expect(t, "a aa ab bc d gd de gde gdefg ddefg",
\r
2850 "y x yz z d gd de gdq gdqfg ddrfg");
\r
2852 // Overlapped string test. Make sure that when multiple
\r
2853 // strings can match that the longest one is matched.
\r
2855 " [a {ab} {abc}] > x;" +
\r
2858 " q [t {st} {rst}] { e > p;" ;
\r
2860 t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD);
\r
2862 errln("FAIL: createFromRules failed");
\r
2866 expect(t, "a ab abc qte qste qrste",
\r
2867 "x x x qtp qstp qrstp");
\r
2871 * Test that user-registered transliterators can be used under function
\r
2874 public void TestUserFunction() {
\r
2877 // There's no need to register inverses if we don't use them
\r
2878 TestUserFunctionFactory.add("Any-gif",
\r
2879 Transliterator.createFromRules("gif",
\r
2880 "'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';",
\r
2881 Transliterator.FORWARD));
\r
2882 //TestUserFunctionFactory.add("gif-Any", Transliterator.getInstance("Any-Null"));
\r
2884 TestUserFunctionFactory.add("Any-RemoveCurly",
\r
2885 Transliterator.createFromRules("RemoveCurly", "[\\{\\}] > ; \\\\N > ;", Transliterator.FORWARD));
\r
2886 //TestUserFunctionFactory.add("RemoveCurly-Any", Transliterator.getInstance("Any-Null"));
\r
2888 logln("Trying &hex");
\r
2889 t = Transliterator.createFromRules("hex2", "(.) > &hex($1);", Transliterator.FORWARD);
\r
2890 logln("Registering");
\r
2891 TestUserFunctionFactory.add("Any-hex2", t);
\r
2892 t = Transliterator.getInstance("Any-hex2");
\r
2893 expect(t, "abc", "\\u0061\\u0062\\u0063");
\r
2895 logln("Trying &gif");
\r
2896 t = Transliterator.createFromRules("gif2", "(.) > &Gif(&Hex2($1));", Transliterator.FORWARD);
\r
2897 logln("Registering");
\r
2898 TestUserFunctionFactory.add("Any-gif2", t);
\r
2899 t = Transliterator.getInstance("Any-gif2");
\r
2900 expect(t, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">" +
\r
2901 "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">");
\r
2903 // Test that filters are allowed after &
\r
2904 t = Transliterator.createFromRules("test",
\r
2905 "(.) > &Hex($1) ' ' &Any-RemoveCurly(&Name($1)) ' ';", Transliterator.FORWARD);
\r
2906 expect(t, "abc", "\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C ");
\r
2908 // Unregister our test stuff
\r
2909 TestUserFunctionFactory.unregister();
\r
2912 static class TestUserFunctionFactory implements Transliterator.Factory {
\r
2913 static TestUserFunctionFactory singleton = new TestUserFunctionFactory();
\r
2914 static HashMap m = new HashMap();
\r
2916 static void add(String ID, Transliterator t) {
\r
2917 m.put(new CaseInsensitiveString(ID), t);
\r
2918 Transliterator.registerFactory(ID, singleton);
\r
2921 public Transliterator getInstance(String ID) {
\r
2922 return (Transliterator) m.get(new CaseInsensitiveString(ID));
\r
2925 static void unregister() {
\r
2926 Iterator ids = m.keySet().iterator();
\r
2927 while (ids.hasNext()) {
\r
2928 CaseInsensitiveString id = (CaseInsensitiveString) ids.next();
\r
2929 Transliterator.unregister(id.getString());
\r
2930 ids.remove(); // removes pair from m
\r
2936 * Test the Any-X transliterators.
\r
2938 public void TestAnyX() {
\r
2939 Transliterator anyLatin =
\r
2940 Transliterator.getInstance("Any-Latin", Transliterator.FORWARD);
\r
2943 "greek:\u03B1\u03B2\u03BA\u0391\u0392\u039A hiragana:\u3042\u3076\u304F cyrillic:\u0430\u0431\u0446",
\r
2944 "greek:abkABK hiragana:abuku cyrillic:abc");
\r
2948 * Test Any-X transliterators with sample letters from all scripts.
\r
2950 public void TestAny() {
\r
2951 UnicodeSet alphabetic = (UnicodeSet) new UnicodeSet("[:alphabetic:]").freeze();
\r
2952 StringBuffer testString = new StringBuffer();
\r
2953 for (int i = 0; i < UScript.CODE_LIMIT; ++i) {
\r
2954 UnicodeSet sample = new UnicodeSet().applyPropertyAlias("script", UScript.getShortName(i)).retainAll(alphabetic);
\r
2956 for (UnicodeSetIterator it = new UnicodeSetIterator(sample); it.next();) {
\r
2957 testString.append(it.getString());
\r
2958 if (--count < 0) break;
\r
2961 logln("Sample set for Any-Latin: " + testString);
\r
2962 Transliterator anyLatin = Transliterator.getInstance("any-Latn");
\r
2963 String result = anyLatin.transliterate(testString.toString());
\r
2964 logln("Sample result for Any-Latin: " + result);
\r
2969 * Test the source and target set API. These are only implemented
\r
2970 * for RBT and CompoundTransliterator at this time.
\r
2972 public void TestSourceTargetSet() {
\r
2978 // Expected source
\r
2979 UnicodeSet expSrc = new UnicodeSet("[arx{lu}]");
\r
2981 // Expected target
\r
2982 UnicodeSet expTrg = new UnicodeSet("[bq]");
\r
2984 Transliterator t = Transliterator.createFromRules("test", r, Transliterator.FORWARD);
\r
2985 UnicodeSet src = t.getSourceSet();
\r
2986 UnicodeSet trg = t.getTargetSet();
\r
2988 if (src.equals(expSrc) && trg.equals(expTrg)) {
\r
2989 logln("Ok: " + r + " => source = " + src.toPattern(true) +
\r
2990 ", target = " + trg.toPattern(true));
\r
2992 errln("FAIL: " + r + " => source = " + src.toPattern(true) +
\r
2993 ", expected " + expSrc.toPattern(true) +
\r
2994 "; target = " + trg.toPattern(true) +
\r
2995 ", expected " + expTrg.toPattern(true));
\r
3000 * Test handling of rule whitespace, for both RBT and UnicodeSet.
\r
3002 public void TestRuleWhitespace() {
\r
3004 String r = "a > \u200E b;";
\r
3006 Transliterator t = Transliterator.createFromRules("test", r, Transliterator.FORWARD);
\r
3008 expect(t, "a", "b");
\r
3011 UnicodeSet set = new UnicodeSet("[a \u200E]");
\r
3013 if (set.contains(0x200E)) {
\r
3014 errln("FAIL: U+200E not being ignored by UnicodeSet");
\r
3018 public void TestAlternateSyntax() {
\r
3023 expect("a \u2192 x; b \u2190 y; c \u2194 z",
\r
3026 expect("([:^ASCII:]) \u2192 \u2206Name($1);",
\r
3027 "<=\u2190; >=\u2192; <>=\u2194; &=\u2206",
\r
3028 "<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}");
\r
3031 public void TestPositionAPI() {
\r
3032 Transliterator.Position a = new Transliterator.Position(3,5,7,11);
\r
3033 Transliterator.Position b = new Transliterator.Position(a);
\r
3034 Transliterator.Position c = new Transliterator.Position();
\r
3036 // Call the toString() API:
\r
3037 if (a.equals(b) && a.equals(c)) {
\r
3038 logln("Ok: " + a + " == " + b + " == " + c);
\r
3040 errln("FAIL: " + a + " != " + b + " != " + c);
\r
3044 //======================================================================
\r
3045 // New tests for the ::BEGIN/::END syntax
\r
3046 //======================================================================
\r
3048 private static final String[] BEGIN_END_RULES = new String[] {
\r
3062 "", // test case commented out below, this is here to keep from messing up the indexes
\r
3071 "", // test case commented out below, this is here to keep from messing up the indexes
\r
3080 "", // test case commented out below, this is here to keep from messing up the indexes
\r
3099 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
\r
3100 + "$delim = [\\-$ws];"
\r
3101 + "$ws $delim* > ' ';"
\r
3102 + "'-' $delim* > '-';",
\r
3106 + "$ws = [[:Separator:][\\u0009-\\u000C]$];"
\r
3107 + "$delim = [\\-$ws];"
\r
3108 + "$ws $delim* > ' ';"
\r
3109 + "'-' $delim* > '-';",
\r
3112 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
\r
3113 + "$delim = [\\-$ws];"
\r
3114 + "$ws $delim* > ' ';"
\r
3115 + "'-' $delim* > '-';"
\r
3119 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
\r
3120 + "$delim = [\\-$ws];"
\r
3122 + "$ws $delim* > ' ';"
\r
3123 + "'-' $delim* > '-';",
\r
3128 + "$ws = [[:Separator:][\\u0009-\\u000C]$];"
\r
3129 + "$delim = [\\-$ws];"
\r
3131 + "$ws $delim* > ' ';"
\r
3132 + "'-' $delim* > '-';",
\r
3134 "", // test case commented out below, this is here to keep from messing up the indexes
\r
3138 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
\r
3139 + "$delim = [\\-$ws];"
\r
3141 + "$ws $delim* > ' ';"
\r
3142 + "'-' $delim* > '-';"
\r
3145 "", // test case commented out below, this is here to keep from messing up the indexes
\r
3149 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
\r
3150 + "$delim = [\\-$ws];"
\r
3153 + "$ws $delim* > ' ';"
\r
3154 + "'-' $delim* > '-';"
\r
3157 + "$ab { ' ' } $ab > '-';"
\r
3161 + "'a-a' > a\\%|a;"
\r
3164 "", // test case commented out below, this is here to keep from messing up the indexes
\r
3167 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
\r
3168 + "$delim = [\\-$ws];"
\r
3171 + "$ws $delim* > ' ';"
\r
3172 + "'-' $delim* > '-';"
\r
3174 + "$ab { ' ' } $ab > '-';"
\r
3177 + "'a-a' > a\\%|a;",
\r
3190 "", // test case commented out below, this is here to keep from messing up the indexes
\r
3208 + "::Upper(Lower);"
\r
3211 "", // test case commented out below, this is here to keep from messing up the indexes
\r
3218 + "::Upper(Lower);"
\r
3223 (This entire test is commented out below and will need some heavy revision when we re-add
\r
3224 the ::BEGIN/::END stuff)
\r
3225 private static final String[] BOGUS_BEGIN_END_RULES = new String[] {
\r
3246 private static final String[] BEGIN_END_TEST_CASES = new String[] {
\r
3247 BEGIN_END_RULES[0], "abc ababc aba", "xy zbc z",
\r
3248 // BEGIN_END_RULES[1], "abc ababc aba", "xy abxy z",
\r
3249 // BEGIN_END_RULES[2], "abc ababc aba", "xy abxy z",
\r
3250 // BEGIN_END_RULES[3], "abc ababc aba", "xy abxy z",
\r
3251 BEGIN_END_RULES[4], "abc ababc aba", "xy abxy z",
\r
3252 BEGIN_END_RULES[5], "abccabaacababcbc", "PXAARXQBR",
\r
3254 BEGIN_END_RULES[6], "e e - e---e- e", "e e e-e-e",
\r
3255 BEGIN_END_RULES[7], "e e - e---e- e", "e e e-e-e",
\r
3256 BEGIN_END_RULES[8], "e e - e---e- e", "e e e-e-e",
\r
3257 BEGIN_END_RULES[9], "e e - e---e- e", "e e e-e-e",
\r
3258 // BEGIN_END_RULES[10], "e e - e---e- e", "e e e-e-e",
\r
3259 // BEGIN_END_RULES[11], "e e - e---e- e", "e e e-e-e",
\r
3260 // BEGIN_END_RULES[12], "e e - e---e- e", "e e e-e-e",
\r
3261 // BEGIN_END_RULES[12], "a a a a", "a%a%a%a",
\r
3262 // BEGIN_END_RULES[12], "a a-b c b a", "a%a-b cb-a",
\r
3263 BEGIN_END_RULES[13], "e e - e---e- e", "e e e-e-e",
\r
3264 BEGIN_END_RULES[13], "a a a a", "a%a%a%a",
\r
3265 BEGIN_END_RULES[13], "a a-b c b a", "a%a-b cb-a",
\r
3267 // BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
\r
3268 BEGIN_END_RULES[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
\r
3269 // BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
\r
3270 BEGIN_END_RULES[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ"
\r
3273 public void TestBeginEnd() {
\r
3274 // run through the list of test cases above
\r
3275 for (int i = 0; i < BEGIN_END_TEST_CASES.length; i += 3) {
\r
3276 expect(BEGIN_END_TEST_CASES[i], BEGIN_END_TEST_CASES[i + 1], BEGIN_END_TEST_CASES[i + 2]);
\r
3279 // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing
\r
3280 Transliterator reversed = Transliterator.createFromRules("Reversed", BEGIN_END_RULES[17],
\r
3281 Transliterator.REVERSE);
\r
3282 expect(reversed, "xy XY XYZ yz YZ", "xy abc xaba yz aba");
\r
3284 // finally, run through the list of syntactically-ill-formed rule sets above and make sure
\r
3285 // that all of them cause errors
\r
3287 (commented out until we have the real ::BEGIN/::END stuff in place
\r
3288 for (int i = 0; i < BOGUS_BEGIN_END_RULES.length; i++) {
\r
3290 Transliterator t = Transliterator.createFromRules("foo", BOGUS_BEGIN_END_RULES[i],
\r
3291 Transliterator.FORWARD);
\r
3292 errln("Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]);
\r
3294 catch (IllegalArgumentException e) {
\r
3295 // this is supposed to happen; do nothing here
\r
3301 public void TestBeginEndToRules() {
\r
3302 // run through the same list of test cases we used above, but this time, instead of just
\r
3303 // instantiating a Transliterator from the rules and running the test against it, we instantiate
\r
3304 // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from
\r
3305 // the resulting set of rules, and make sure that the generated rule set is semantically equivalent
\r
3306 // to (i.e., does the same thing as) the original rule set
\r
3307 for (int i = 0; i < BEGIN_END_TEST_CASES.length; i += 3) {
\r
3308 Transliterator t = Transliterator.createFromRules("--", BEGIN_END_TEST_CASES[i],
\r
3309 Transliterator.FORWARD);
\r
3310 String rules = t.toRules(false);
\r
3311 Transliterator t2 = Transliterator.createFromRules("Test case #" + (i / 3), rules, Transliterator.FORWARD);
\r
3312 expect(t2, BEGIN_END_TEST_CASES[i + 1], BEGIN_END_TEST_CASES[i + 2]);
\r
3315 // do the same thing for the reversible test case
\r
3316 Transliterator reversed = Transliterator.createFromRules("Reversed", BEGIN_END_RULES[17],
\r
3317 Transliterator.REVERSE);
\r
3318 String rules = reversed.toRules(false);
\r
3319 Transliterator reversed2 = Transliterator.createFromRules("Reversed", rules, Transliterator.FORWARD);
\r
3320 expect(reversed2, "xy XY XYZ yz YZ", "xy abc xaba yz aba");
\r
3323 public void TestRegisterAlias() {
\r
3324 String longID = "Lower;[aeiou]Upper";
\r
3325 String shortID = "Any-CapVowels";
\r
3326 String reallyShortID = "CapVowels";
\r
3328 Transliterator.registerAlias(shortID, longID);
\r
3330 Transliterator t1 = Transliterator.getInstance(longID);
\r
3331 Transliterator t2 = Transliterator.getInstance(reallyShortID);
\r
3333 if (!t1.getID().equals(longID))
\r
3334 errln("Transliterator instantiated with long ID doesn't have long ID");
\r
3335 if (!t2.getID().equals(reallyShortID))
\r
3336 errln("Transliterator instantiated with short ID doesn't have short ID");
\r
3338 if (!t1.toRules(true).equals(t2.toRules(true)))
\r
3339 errln("Alias transliterators aren't the same");
\r
3341 Transliterator.unregister(shortID);
\r
3344 t1 = Transliterator.getInstance(shortID);
\r
3345 errln("Instantiation with short ID succeeded after short ID was unregistered");
\r
3347 catch (IllegalArgumentException e) {
\r
3350 // try the same thing again, but this time with something other than
\r
3351 // an instance of CompoundTransliterator
\r
3352 String realID = "Latin-Greek";
\r
3353 String fakeID = "Latin-dlgkjdflkjdl";
\r
3354 Transliterator.registerAlias(fakeID, realID);
\r
3356 t1 = Transliterator.getInstance(realID);
\r
3357 t2 = Transliterator.getInstance(fakeID);
\r
3359 if (!t1.toRules(true).equals(t2.toRules(true)))
\r
3360 errln("Alias transliterators aren't the same");
\r
3362 Transliterator.unregister(fakeID);
\r
3366 * Test the Halfwidth-Fullwidth transliterator (ticket 6281).
\r
3368 public void TestHalfwidthFullwidth() {
\r
3369 Transliterator hf = Transliterator.getInstance("Halfwidth-Fullwidth");
\r
3370 Transliterator fh = Transliterator.getInstance("Fullwidth-Halfwidth");
\r
3372 // Array of 3n items
\r
3374 // "hf"|"fh"|"both",
\r
3379 "\uFFE9\uFFEA\uFFEB\uFFEC\u0061\uFF71\u00AF\u0020",
\r
3380 "\u2190\u2191\u2192\u2193\uFF41\u30A2\uFFE3\u3000",
\r
3383 for (int i=0; i<DATA.length; i+=3) {
\r
3384 switch (DATA[i].charAt(0)) {
\r
3385 case 'h': // Halfwidth-Fullwidth only
\r
3386 expect(hf, DATA[i+1], DATA[i+2]);
\r
3388 case 'f': // Fullwidth-Halfwidth only
\r
3389 expect(fh, DATA[i+2], DATA[i+1]);
\r
3391 case 'b': // both directions
\r
3392 expect(hf, DATA[i+1], DATA[i+2]);
\r
3393 expect(fh, DATA[i+2], DATA[i+1]);
\r
3401 * Test Thai. The text is the first paragraph of "What is Unicode" from the Unicode.org web site.
\r
3402 * TODO: confirm that the expected results are correct.
\r
3403 * For now, test just confirms that C++ and Java give identical results.
\r
3405 public void TestThai() {
\r
3406 Transliterator tr = Transliterator.getInstance("Any-Latin", Transliterator.FORWARD);
\r
3407 String thaiText =
\r
3408 "\u0e42\u0e14\u0e22\u0e1e\u0e37\u0e49\u0e19\u0e10\u0e32\u0e19\u0e41\u0e25\u0e49\u0e27, \u0e04\u0e2d" +
\r
3409 "\u0e21\u0e1e\u0e34\u0e27\u0e40\u0e15\u0e2d\u0e23\u0e4c\u0e08\u0e30\u0e40\u0e01\u0e35\u0e48\u0e22" +
\r
3410 "\u0e27\u0e02\u0e49\u0e2d\u0e07\u0e01\u0e31\u0e1a\u0e40\u0e23\u0e37\u0e48\u0e2d\u0e07\u0e02\u0e2d" +
\r
3411 "\u0e07\u0e15\u0e31\u0e27\u0e40\u0e25\u0e02. \u0e04\u0e2d\u0e21\u0e1e\u0e34\u0e27\u0e40\u0e15\u0e2d" +
\r
3412 "\u0e23\u0e4c\u0e08\u0e31\u0e14\u0e40\u0e01\u0e47\u0e1a\u0e15\u0e31\u0e27\u0e2d\u0e31\u0e01\u0e29" +
\r
3413 "\u0e23\u0e41\u0e25\u0e30\u0e2d\u0e31\u0e01\u0e02\u0e23\u0e30\u0e2d\u0e37\u0e48\u0e19\u0e46 \u0e42" +
\r
3414 "\u0e14\u0e22\u0e01\u0e32\u0e23\u0e01\u0e33\u0e2b\u0e19\u0e14\u0e2b\u0e21\u0e32\u0e22\u0e40\u0e25" +
\r
3415 "\u0e02\u0e43\u0e2b\u0e49\u0e2a\u0e33\u0e2b\u0e23\u0e31\u0e1a\u0e41\u0e15\u0e48\u0e25\u0e30\u0e15" +
\r
3416 "\u0e31\u0e27. \u0e01\u0e48\u0e2d\u0e19\u0e2b\u0e19\u0e49\u0e32\u0e17\u0e35\u0e48\u0e4a Unicode \u0e08" +
\r
3417 "\u0e30\u0e16\u0e39\u0e01\u0e2a\u0e23\u0e49\u0e32\u0e07\u0e02\u0e36\u0e49\u0e19, \u0e44\u0e14\u0e49" +
\r
3418 "\u0e21\u0e35\u0e23\u0e30\u0e1a\u0e1a encoding \u0e2d\u0e22\u0e39\u0e48\u0e2b\u0e25\u0e32\u0e22\u0e23" +
\r
3419 "\u0e49\u0e2d\u0e22\u0e23\u0e30\u0e1a\u0e1a\u0e2a\u0e33\u0e2b\u0e23\u0e31\u0e1a\u0e01\u0e32\u0e23" +
\r
3420 "\u0e01\u0e33\u0e2b\u0e19\u0e14\u0e2b\u0e21\u0e32\u0e22\u0e40\u0e25\u0e02\u0e40\u0e2b\u0e25\u0e48" +
\r
3421 "\u0e32\u0e19\u0e35\u0e49. \u0e44\u0e21\u0e48\u0e21\u0e35 encoding \u0e43\u0e14\u0e17\u0e35\u0e48" +
\r
3422 "\u0e21\u0e35\u0e08\u0e33\u0e19\u0e27\u0e19\u0e15\u0e31\u0e27\u0e2d\u0e31\u0e01\u0e02\u0e23\u0e30" +
\r
3423 "\u0e21\u0e32\u0e01\u0e40\u0e1e\u0e35\u0e22\u0e07\u0e1e\u0e2d: \u0e22\u0e01\u0e15\u0e31\u0e27\u0e2d" +
\r
3424 "\u0e22\u0e48\u0e32\u0e07\u0e40\u0e0a\u0e48\u0e19, \u0e40\u0e09\u0e1e\u0e32\u0e30\u0e43\u0e19\u0e01" +
\r
3425 "\u0e25\u0e38\u0e48\u0e21\u0e2a\u0e2b\u0e20\u0e32\u0e1e\u0e22\u0e38\u0e42\u0e23\u0e1b\u0e40\u0e1e" +
\r
3426 "\u0e35\u0e22\u0e07\u0e41\u0e2b\u0e48\u0e07\u0e40\u0e14\u0e35\u0e22\u0e27 \u0e01\u0e47\u0e15\u0e49" +
\r
3427 "\u0e2d\u0e07\u0e01\u0e32\u0e23\u0e2b\u0e25\u0e32\u0e22 encoding \u0e43\u0e19\u0e01\u0e32\u0e23\u0e04" +
\r
3428 "\u0e23\u0e2d\u0e1a\u0e04\u0e25\u0e38\u0e21\u0e17\u0e38\u0e01\u0e20\u0e32\u0e29\u0e32\u0e43\u0e19" +
\r
3429 "\u0e01\u0e25\u0e38\u0e48\u0e21. \u0e2b\u0e23\u0e37\u0e2d\u0e41\u0e21\u0e49\u0e41\u0e15\u0e48\u0e43" +
\r
3430 "\u0e19\u0e20\u0e32\u0e29\u0e32\u0e40\u0e14\u0e35\u0e48\u0e22\u0e27 \u0e40\u0e0a\u0e48\u0e19 \u0e20" +
\r
3431 "\u0e32\u0e29\u0e32\u0e2d\u0e31\u0e07\u0e01\u0e24\u0e29 \u0e01\u0e47\u0e44\u0e21\u0e48\u0e21\u0e35" +
\r
3432 " encoding \u0e43\u0e14\u0e17\u0e35\u0e48\u0e40\u0e1e\u0e35\u0e22\u0e07\u0e1e\u0e2d\u0e2a\u0e33\u0e2b" +
\r
3433 "\u0e23\u0e31\u0e1a\u0e17\u0e38\u0e01\u0e15\u0e31\u0e27\u0e2d\u0e31\u0e01\u0e29\u0e23, \u0e40\u0e04" +
\r
3434 "\u0e23\u0e37\u0e48\u0e2d\u0e07\u0e2b\u0e21\u0e32\u0e22\u0e27\u0e23\u0e23\u0e04\u0e15\u0e2d\u0e19" +
\r
3435 " \u0e41\u0e25\u0e30\u0e2a\u0e31\u0e0d\u0e25\u0e31\u0e01\u0e29\u0e13\u0e4c\u0e17\u0e32\u0e07\u0e40" +
\r
3436 "\u0e17\u0e04\u0e19\u0e34\u0e04\u0e17\u0e35\u0e48\u0e43\u0e0a\u0e49\u0e01\u0e31\u0e19\u0e2d\u0e22" +
\r
3437 "\u0e39\u0e48\u0e17\u0e31\u0e48\u0e27\u0e44\u0e1b.";
\r
3439 String latinText =
\r
3440 "doy ph\u1ee5\u0304\u0302n \u1e6d\u0304h\u0101n l\u00e6\u0302w, khxmphiwtexr\u0312 ca ke\u012b\u0300" +
\r
3441 "ywk\u0304\u0125xng k\u1ea1b re\u1ee5\u0304\u0300xng k\u0304hxng t\u1ea1wlek\u0304h. khxmphiwtexr" +
\r
3442 "\u0312 c\u1ea1d k\u0115b t\u1ea1w x\u1ea1ks\u0304\u02b9r l\u00e6a x\u1ea1kk\u0304h ra x\u1ee5\u0304" +
\r
3443 "\u0300n\u00ab doy k\u0101r k\u1ea3h\u0304nd h\u0304m\u0101ylek\u0304h h\u0304\u0131\u0302 s\u0304" +
\r
3444 "\u1ea3h\u0304r\u1ea1b t\u00e6\u0300la t\u1ea1w. k\u0300xn h\u0304n\u0302\u0101 th\u012b\u0300\u0301" +
\r
3445 " Unicode ca t\u0304h\u016bk s\u0304r\u0302\u0101ng k\u0304h\u1ee5\u0302n, d\u1ecb\u0302 m\u012b " +
\r
3446 "rabb encoding xy\u016b\u0300 h\u0304l\u0101y r\u0302xy rabb s\u0304\u1ea3h\u0304r\u1ea1b k\u0101" +
\r
3447 "r k\u1ea3h\u0304nd h\u0304m\u0101ylek\u0304h h\u0304el\u0300\u0101 n\u012b\u0302. m\u1ecb\u0300m" +
\r
3448 "\u012b encoding d\u0131 th\u012b\u0300 m\u012b c\u1ea3nwn t\u1ea1w x\u1ea1kk\u0304hra m\u0101k p" +
\r
3449 "he\u012byng phx: yk t\u1ea1wx\u1ef3\u0101ng ch\u00e8n, c\u0304heph\u0101a n\u0131 kl\u00f9m s\u0304" +
\r
3450 "h\u0304p\u0323h\u0101ph yurop phe\u012byng h\u0304\u00e6\u0300ng de\u012byw k\u0306 t\u0302xngk\u0101" +
\r
3451 "r h\u0304l\u0101y encoding n\u0131 k\u0101r khrxbkhlum thuk p\u0323h\u0101s\u0304\u02b9\u0101 n\u0131" +
\r
3452 " kl\u00f9m. h\u0304r\u1ee5\u0304x m\u00e6\u0302t\u00e6\u0300 n\u0131 p\u0323h\u0101s\u0304\u02b9" +
\r
3453 "\u0101 de\u012b\u0300yw ch\u00e8n p\u0323h\u0101s\u0304\u02b9\u0101 x\u1ea1ngkvs\u0304\u02b9 k\u0306" +
\r
3454 " m\u1ecb\u0300m\u012b encoding d\u0131 th\u012b\u0300 phe\u012byng phx s\u0304\u1ea3h\u0304r\u1ea1" +
\r
3455 "b thuk t\u1ea1w x\u1ea1ks\u0304\u02b9r, kher\u1ee5\u0304\u0300xngh\u0304m\u0101y wrrkh txn l\u00e6" +
\r
3456 "a s\u0304\u1ea1\u1ef5l\u1ea1ks\u0304\u02b9\u1e47\u0312 th\u0101ng thekhnikh th\u012b\u0300 ch\u0131" +
\r
3457 "\u0302 k\u1ea1n xy\u016b\u0300 th\u1ea1\u0300wp\u1ecb.";
\r
3459 expect(tr, thaiText, latinText);
\r
3463 //======================================================================
\r
3464 // These tests are not mirrored (yet) in icu4c at
\r
3465 // source/test/intltest/transtst.cpp
\r
3466 //======================================================================
\r
3469 * Improve code coverage.
\r
3471 public void TestCoverage() {
\r
3472 // NullTransliterator
\r
3473 Transliterator t = Transliterator.getInstance("Null", Transliterator.FORWARD);
\r
3474 expect(t, "a", "a");
\r
3476 // Source, target set
\r
3477 t = Transliterator.getInstance("Latin-Greek", Transliterator.FORWARD);
\r
3478 t.setFilter(new UnicodeSet("[A-Z]"));
\r
3479 logln("source = " + t.getSourceSet());
\r
3480 logln("target = " + t.getTargetSet());
\r
3482 t = Transliterator.createFromRules("x", "(.) > &Any-Hex($1);", Transliterator.FORWARD);
\r
3483 logln("source = " + t.getSourceSet());
\r
3484 logln("target = " + t.getTargetSet());
\r
3487 * Test case for threading problem in NormalizationTransliterator
\r
3488 * reported by ticket#5160
\r
3490 public void TestT5160() {
\r
3491 final String[] testData = {
\r
3497 final String[] expected = {
\r
3503 Transliterator translit = Transliterator.getInstance("NFC");
\r
3504 NormTranslitTask[] tasks = new NormTranslitTask[testData.length];
\r
3505 for (int i = 0; i < tasks.length; i++) {
\r
3506 tasks[i] = new NormTranslitTask(translit, testData[i], expected[i]);
\r
3508 TestUtil.runUntilDone(tasks);
\r
3510 for (int i = 0; i < tasks.length; i++) {
\r
3511 if (tasks[i].getErrorMessage() != null) {
\r
3512 System.out.println("Fail: thread#" + i + " " + tasks[i].getErrorMessage());
\r
3518 static class NormTranslitTask implements Runnable {
\r
3519 Transliterator translit;
\r
3521 String expectedData;
\r
3524 NormTranslitTask(Transliterator translit, String testData, String expectedData) {
\r
3525 this.translit = translit;
\r
3526 this.testData = testData;
\r
3527 this.expectedData = expectedData;
\r
3530 public void run() {
\r
3532 StringBuffer inBuf = new StringBuffer(testData);
\r
3533 StringBuffer expectedBuf = new StringBuffer(expectedData);
\r
3535 for(int i = 0; i < 1000; i++) {
\r
3536 String in = inBuf.toString();
\r
3537 String out = translit.transliterate(in);
\r
3538 String expected = expectedBuf.toString();
\r
3539 if (!out.equals(expected)) {
\r
3540 errorMsg = "in {" + in + "} / out {" + out + "} / expected {" + expected + "}";
\r
3543 inBuf.append(testData);
\r
3544 expectedBuf.append(expectedData);
\r
3548 public String getErrorMessage() {
\r
3553 //======================================================================
\r
3554 // Support methods
\r
3555 //======================================================================
\r
3556 void expect(String rules,
\r
3558 String expectedResult,
\r
3559 Transliterator.Position pos) {
\r
3560 Transliterator t = Transliterator.createFromRules("<ID>", rules, Transliterator.FORWARD);
\r
3561 expect(t, source, expectedResult, pos);
\r
3564 void expect(String rules, String source, String expectedResult) {
\r
3565 expect(rules, source, expectedResult, null);
\r
3568 void expect(Transliterator t, String source, String expectedResult,
\r
3569 Transliterator reverseTransliterator) {
\r
3570 expect(t, source, expectedResult);
\r
3571 if (reverseTransliterator != null) {
\r
3572 expect(reverseTransliterator, expectedResult, source);
\r
3576 void expect(Transliterator t, String source, String expectedResult) {
\r
3577 expect(t, source, expectedResult, (Transliterator.Position) null);
\r
3580 void expect(Transliterator t, String source, String expectedResult,
\r
3581 Transliterator.Position pos) {
\r
3582 if (pos == null) {
\r
3583 String result = t.transliterate(source);
\r
3584 if (!expectAux(t.getID() + ":String", source, result, expectedResult)) return;
\r
3587 Transliterator.Position index = null;
\r
3588 if (pos == null) {
\r
3589 index = new Transliterator.Position(0, source.length(), 0, source.length());
\r
3591 index = new Transliterator.Position(pos.contextStart, pos.contextLimit,
\r
3592 pos.start, pos.limit);
\r
3595 ReplaceableString rsource = new ReplaceableString(source);
\r
3597 t.finishTransliteration(rsource, index);
\r
3598 // Do it all at once -- below we do it incrementally
\r
3600 if (index.start != index.limit) {
\r
3601 expectAux(t.getID() + ":UNFINISHED", source,
\r
3602 "start: " + index.start + ", limit: " + index.limit, false, expectedResult);
\r
3605 String result = rsource.toString();
\r
3606 if (!expectAux(t.getID() + ":Replaceable", source, result, expectedResult)) return;
\r
3609 if (pos == null) {
\r
3610 index = new Transliterator.Position();
\r
3612 index = new Transliterator.Position(pos.contextStart, pos.contextLimit,
\r
3613 pos.start, pos.limit);
\r
3616 // Test incremental transliteration -- this result
\r
3617 // must be the same after we finalize (see below).
\r
3618 Vector v = new Vector();
\r
3620 rsource.replace(0, rsource.length(), "");
\r
3621 if (pos != null) {
\r
3622 rsource.replace(0, 0, source);
\r
3623 v.add(UtilityExtensions.formatInput(rsource, index));
\r
3624 t.transliterate(rsource, index);
\r
3625 v.add(UtilityExtensions.formatInput(rsource, index));
\r
3627 for (int i=0; i<source.length(); ++i) {
\r
3628 //v.add(i == 0 ? "" : " + " + source.charAt(i) + "");
\r
3629 //log.append(source.charAt(i)).append(" -> "));
\r
3630 t.transliterate(rsource, index, source.charAt(i));
\r
3631 //v.add(UtilityExtensions.formatInput(rsource, index) + source.substring(i+1));
\r
3632 v.add(UtilityExtensions.formatInput(rsource, index) +
\r
3633 ((i<source.length()-1)?(" + '" + source.charAt(i+1) + "' ->"):" =>"));
\r
3637 // As a final step in keyboard transliteration, we must call
\r
3638 // transliterate to finish off any pending partial matches that
\r
3639 // were waiting for more input.
\r
3640 t.finishTransliteration(rsource, index);
\r
3641 result = rsource.toString();
\r
3642 //log.append(" => ").append(rsource.toString());
\r
3645 String[] results = new String[v.size()];
\r
3646 v.copyInto(results);
\r
3647 expectAux(t.getID() + ":Incremental", results,
\r
3648 result.equals(expectedResult),
\r
3652 boolean expectAux(String tag, String source,
\r
3653 String result, String expectedResult) {
\r
3654 return expectAux(tag, new String[] {source, result},
\r
3655 result.equals(expectedResult),
\r
3659 boolean expectAux(String tag, String source,
\r
3660 String result, boolean pass,
\r
3661 String expectedResult) {
\r
3662 return expectAux(tag, new String[] {source, result},
\r
3667 boolean expectAux(String tag, String source,
\r
3669 String expectedResult) {
\r
3670 return expectAux(tag, new String[] {source},
\r
3675 boolean expectAux(String tag, String[] results, boolean pass,
\r
3676 String expectedResult) {
\r
3677 msg((pass?"(":"FAIL: (")+tag+")", pass ? LOG : ERR, true, true);
\r
3679 for (int i = 0; i < results.length; ++i) {
\r
3682 label = "source: ";
\r
3683 } else if (i == results.length - 1) {
\r
3684 label = "result: ";
\r
3686 if (!isVerbose() && pass) continue;
\r
3687 label = "interm" + i + ": ";
\r
3689 msg(" " + label + results[i], pass ? LOG : ERR, false, true);
\r
3693 msg( " expected: " + expectedResult, ERR, false, true);
\r
3699 private void assertTransform(String message, String expected, StringTransform t, String source) {
\r
3700 assertEquals(message + " " + source, expected, t.transform(source));
\r
3704 private void assertTransform(String message, String expected, StringTransform t, StringTransform back, String source, String source2) {
\r
3705 assertEquals(message + " " +source, expected, t.transform(source));
\r
3706 assertEquals(message + " " +source2, expected, t.transform(source2));
\r
3707 assertEquals(message + " " + expected, source, back.transform(expected));
\r
3711 * Tests the method public Enumeration<String> getAvailableTargets(String source)
\r
3713 public void TestGetAvailableTargets() {
\r
3715 // Tests when if (targets == null) is true
\r
3716 Transliterator.getAvailableTargets("");
\r
3717 } catch (Exception e) {
\r
3718 errln("TransliteratorRegistry.getAvailableTargets(String) was not " + "supposed to return an exception.");
\r
3723 * Tests the method public Enumeration<String> getAvailableVariants(String source, String target)
\r
3725 public void TestGetAvailableVariants() {
\r
3727 // Tests when if (targets == null) is true
\r
3728 Transliterator.getAvailableVariants("", "");
\r
3729 } catch (Exception e) {
\r
3730 errln("TransliteratorRegistry.getAvailableVariants(String) was not " + "supposed to return an exception.");
\r
3735 * Tests the mehtod String nextLine() in RuleBody
\r
3737 public void TestNextLine() {
\r
3738 // Tests when "if (s != null && s.length() > 0 && s.charAt(s.length() - 1) == '\\') is true
\r
3740 Transliterator.createFromRules("gif", "\\", Transliterator.FORWARD);
\r
3741 } catch(Exception e){
\r
3742 errln("TransliteratorParser.nextLine() was not suppose to return an " +
\r
3743 "exception for a rule of '\\'");
\r