]> gitweb.fperrin.net Git - Dictionary.git/blob - jars/icu4j-4_4_2-src/main/tests/translit/src/com/ibm/icu/dev/test/translit/TransliteratorTest.java
go
[Dictionary.git] / jars / icu4j-4_4_2-src / main / tests / translit / src / com / ibm / icu / dev / test / translit / TransliteratorTest.java
1 /*\r
2  *******************************************************************************\r
3  * Copyright (C) 1996-2010, International Business Machines Corporation and    *\r
4  * others. All Rights Reserved.                                                *\r
5  *******************************************************************************\r
6  */\r
7 package com.ibm.icu.dev.test.translit;\r
8 \r
9 import java.util.Enumeration;\r
10 import java.util.HashMap;\r
11 import java.util.HashSet;\r
12 import java.util.Iterator;\r
13 import java.util.Locale;\r
14 import java.util.Vector;\r
15 \r
16 import com.ibm.icu.dev.test.TestFmwk;\r
17 import com.ibm.icu.dev.test.TestUtil;\r
18 import com.ibm.icu.impl.Utility;\r
19 import com.ibm.icu.impl.UtilityExtensions;\r
20 import com.ibm.icu.lang.UCharacter;\r
21 import com.ibm.icu.lang.UScript;\r
22 import com.ibm.icu.text.Replaceable;\r
23 import com.ibm.icu.text.ReplaceableString;\r
24 import com.ibm.icu.text.StringTransform;\r
25 import com.ibm.icu.text.Transliterator;\r
26 import com.ibm.icu.text.UTF16;\r
27 import com.ibm.icu.text.UnicodeFilter;\r
28 import com.ibm.icu.text.UnicodeSet;\r
29 import com.ibm.icu.text.UnicodeSetIterator;\r
30 import com.ibm.icu.util.CaseInsensitiveString;\r
31 import com.ibm.icu.util.ULocale;\r
32 \r
33 /***********************************************************************\r
34 \r
35                      HOW TO USE THIS TEST FILE\r
36                                -or-\r
37                   How I developed on two platforms\r
38                 without losing (too much of) my mind\r
39 \r
40 \r
41 1. Add new tests by copying/pasting/changing existing tests.  On Java,\r
42    any public void method named Test...() taking no parameters becomes\r
43    a test.  On C++, you need to modify the header and add a line to\r
44    the runIndexedTest() dispatch method.\r
45 \r
46 2. Make liberal use of the expect() method; it is your friend.\r
47 \r
48 3. The tests in this file exactly match those in a sister file on the\r
49    other side.  The two files are:\r
50 \r
51    icu4j:  src/com.ibm.icu.dev.test/translit/TransliteratorTest.java\r
52    icu4c:  source/test/intltest/transtst.cpp\r
53 \r
54                   ==> THIS IS THE IMPORTANT PART <==\r
55 \r
56    When you add a test in this file, add it in transtst.cpp too.\r
57    Give it the same name and put it in the same relative place.  This\r
58    makes maintenance a lot simpler for any poor soul who ends up\r
59    trying to synchronize the tests between icu4j and icu4c.\r
60 \r
61 4. If you MUST enter a test that is NOT paralleled in the sister file,\r
62    then add it in the special non-mirrored section.  These are\r
63    labeled\r
64 \r
65      "icu4j ONLY"\r
66 \r
67    or\r
68 \r
69      "icu4c ONLY"\r
70 \r
71    Make sure you document the reason the test is here and not there.\r
72 \r
73 \r
74 Thank you.\r
75 The Management\r
76  ***********************************************************************/\r
77 \r
78 /**\r
79  * @test\r
80  * @summary General test of Transliterator\r
81  */\r
82 public class TransliteratorTest extends TestFmwk {\r
83 \r
84     public static void main(String[] args) throws Exception {\r
85         new TransliteratorTest().run(args);\r
86     }\r
87 \r
88     public void TestHangul() {\r
89 \r
90         Transliterator lh = Transliterator.getInstance("Latin-Hangul");\r
91         Transliterator hl = lh.getInverse();\r
92 \r
93         assertTransform("Transform", "\uCE20", lh, "ch");\r
94 \r
95         assertTransform("Transform", "\uC544\uB530", lh, hl, "atta", "a-tta");\r
96         assertTransform("Transform", "\uC544\uBE60", lh, hl, "appa", "a-ppa");\r
97         assertTransform("Transform", "\uC544\uC9DC", lh, hl, "ajja", "a-jja");\r
98         assertTransform("Transform", "\uC544\uAE4C", lh, hl, "akka", "a-kka");\r
99         assertTransform("Transform", "\uC544\uC2F8", lh, hl, "assa", "a-ssa");\r
100         assertTransform("Transform", "\uC544\uCC28", lh, hl, "acha", "a-cha");\r
101         assertTransform("Transform", "\uC545\uC0AC", lh, hl, "agsa", "ag-sa");\r
102         assertTransform("Transform", "\uC548\uC790", lh, hl, "anja", "an-ja");\r
103         assertTransform("Transform", "\uC548\uD558", lh, hl, "anha", "an-ha");\r
104         assertTransform("Transform", "\uC54C\uAC00", lh, hl, "alga", "al-ga");\r
105         assertTransform("Transform", "\uC54C\uB9C8", lh, hl, "alma", "al-ma");\r
106         assertTransform("Transform", "\uC54C\uBC14", lh, hl, "alba", "al-ba");\r
107         assertTransform("Transform", "\uC54C\uC0AC", lh, hl, "alsa", "al-sa");\r
108         assertTransform("Transform", "\uC54C\uD0C0", lh, hl, "alta", "al-ta");\r
109         assertTransform("Transform", "\uC54C\uD30C", lh, hl, "alpa", "al-pa");\r
110         assertTransform("Transform", "\uC54C\uD558", lh, hl, "alha", "al-ha");\r
111         assertTransform("Transform", "\uC555\uC0AC", lh, hl, "absa", "ab-sa");\r
112         assertTransform("Transform", "\uC548\uAC00", lh, hl, "anga", "an-ga");\r
113         assertTransform("Transform", "\uC545\uC2F8", lh, hl, "agssa", "ag-ssa");\r
114         assertTransform("Transform", "\uC548\uC9DC", lh, hl, "anjja", "an-jja");\r
115         assertTransform("Transform", "\uC54C\uC2F8", lh, hl, "alssa", "al-ssa");\r
116         assertTransform("Transform", "\uC54C\uB530", lh, hl, "altta", "al-tta");\r
117         assertTransform("Transform", "\uC54C\uBE60", lh, hl, "alppa", "al-ppa");\r
118         assertTransform("Transform", "\uC555\uC2F8", lh, hl, "abssa", "ab-ssa");\r
119         assertTransform("Transform", "\uC546\uCE74", lh, hl, "akkka", "akk-ka");\r
120         assertTransform("Transform", "\uC558\uC0AC", lh, hl, "asssa", "ass-sa");\r
121 \r
122     }\r
123 \r
124     public void TestChinese() {\r
125         Transliterator hanLatin = Transliterator.getInstance("Han-Latin");\r
126         assertTransform("Transform", "z\u00E0o Unicode", hanLatin, "\u9020Unicode");\r
127         assertTransform("Transform", "z\u00E0i chu\u00E0ng z\u00E0o Unicode zh\u012B qi\u00E1n", hanLatin, "\u5728\u5275\u9020Unicode\u4E4B\u524D");\r
128     } \r
129 \r
130     public void TestRegistry() {\r
131         checkRegistry("foo3", "::[a-z]; ::NFC; [:letter:] a > b;"); // check compound\r
132         checkRegistry("foo2", "::NFC; [:letter:] a > b;"); // check compound\r
133         checkRegistry("foo1", "[:letter:] a > b;");\r
134         for (Enumeration e = Transliterator.getAvailableIDs(); e.hasMoreElements(); ) {\r
135             String id = (String) e.nextElement();\r
136             checkRegistry(id);\r
137         }\r
138     }\r
139 \r
140     private void checkRegistry (String id, String rules) {\r
141         Transliterator foo = Transliterator.createFromRules(id, rules, Transliterator.FORWARD);\r
142         Transliterator.registerInstance(foo);\r
143         checkRegistry(id);\r
144     }\r
145 \r
146     private void checkRegistry(String id) {\r
147         Transliterator fie = Transliterator.getInstance(id);\r
148         final UnicodeSet fae = new UnicodeSet("[a-z5]");\r
149         fie.setFilter(fae);\r
150         Transliterator foe = Transliterator.getInstance(id);\r
151         UnicodeFilter fee = foe.getFilter();\r
152         if (fae.equals(fee)) {\r
153             errln("Changed what is in registry for " + id);\r
154         }\r
155     }\r
156 \r
157     public void TestInstantiation() {\r
158         long ms = System.currentTimeMillis();\r
159         String ID;\r
160         for (Enumeration e = Transliterator.getAvailableIDs(); e.hasMoreElements(); ) {\r
161             ID = (String) e.nextElement();\r
162             if (ID.equals("Latin-Han/definition")) {\r
163                 System.out.println("\nTODO: disabling Latin-Han/definition check for now: fix later");\r
164                 continue;\r
165             }\r
166             Transliterator t = null;\r
167             try {\r
168                 t = Transliterator.getInstance(ID);\r
169                 // This is only true for some subclasses\r
170                 //                // We should get a new instance if we try again\r
171                 //                Transliterator t2 = Transliterator.getInstance(ID);\r
172                 //                if (t != t2) {\r
173                 //                    logln("OK: " + Transliterator.getDisplayName(ID) + " (" + ID + "): " + t);\r
174                 //                } else {\r
175                 //                    errln("FAIL: " + ID + " returned identical instances");\r
176                 //                    t = null;\r
177                 //                }\r
178             } catch (IllegalArgumentException ex) {\r
179                 errln("FAIL: " + ID);\r
180                 throw ex;\r
181             }\r
182 \r
183             //            if (t.getFilter() != null) {\r
184             //                errln("Fail: Should never have filter on transliterator unless we started with one: " + ID + ", " + t.getFilter());\r
185             //            }\r
186 \r
187             if (t != null) {\r
188                 // Now test toRules\r
189                 String rules = null;\r
190                 try {\r
191                     rules = t.toRules(true);\r
192 \r
193                     Transliterator.createFromRules("x", rules, Transliterator.FORWARD);\r
194                 } catch (IllegalArgumentException ex2) {\r
195                     errln("FAIL: " + ID + ".toRules() => bad rules: " +\r
196                             rules);\r
197                     throw ex2;\r
198                 }\r
199             }\r
200         }\r
201 \r
202         // Now test the failure path\r
203         try {\r
204             ID = "<Not a valid Transliterator ID>";\r
205             Transliterator t = Transliterator.getInstance(ID);\r
206             errln("FAIL: " + ID + " returned " + t);\r
207         } catch (IllegalArgumentException ex) {\r
208             logln("OK: Bogus ID handled properly");\r
209         }\r
210 \r
211         ms = System.currentTimeMillis() - ms;\r
212         logln("Elapsed time: " + ms + " ms");\r
213     }\r
214 \r
215     public void TestSimpleRules() {\r
216         /* Example: rules 1. ab>x|y\r
217          *                2. yc>z\r
218          *\r
219          * []|eabcd  start - no match, copy e to tranlated buffer\r
220          * [e]|abcd  match rule 1 - copy output & adjust cursor\r
221          * [ex|y]cd  match rule 2 - copy output & adjust cursor\r
222          * [exz]|d   no match, copy d to transliterated buffer\r
223          * [exzd]|   done\r
224          */\r
225         expect("ab>x|y;" +\r
226                 "yc>z",\r
227                 "eabcd", "exzd");\r
228 \r
229         /* Another set of rules:\r
230          *    1. ab>x|yzacw\r
231          *    2. za>q\r
232          *    3. qc>r\r
233          *    4. cw>n\r
234          *\r
235          * []|ab       Rule 1\r
236          * [x|yzacw]   No match\r
237          * [xy|zacw]   Rule 2\r
238          * [xyq|cw]    Rule 4\r
239          * [xyqn]|     Done\r
240          */\r
241         expect("ab>x|yzacw;" +\r
242                 "za>q;" +\r
243                 "qc>r;" +\r
244                 "cw>n",\r
245                 "ab", "xyqn");\r
246 \r
247         /* Test categories\r
248          */\r
249         Transliterator t = Transliterator.createFromRules("<ID>",\r
250                 "$dummy=\uE100;" +\r
251                 "$vowel=[aeiouAEIOU];" +\r
252                 "$lu=[:Lu:];" +\r
253                 "$vowel } $lu > '!';" +\r
254                 "$vowel > '&';" +\r
255                 "'!' { $lu > '^';" +\r
256                 "$lu > '*';" +\r
257                 "a>ERROR",\r
258                 Transliterator.FORWARD);\r
259         expect(t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");\r
260     }\r
261 \r
262     /**\r
263      * Test inline set syntax and set variable syntax.\r
264      */\r
265     public void TestInlineSet() {\r
266         expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz");\r
267         expect("a[0-9]b > qrs", "1a7b9", "1qrs9");\r
268 \r
269         expect("$digit = [0-9];" +\r
270                 "$alpha = [a-zA-Z];" +\r
271                 "$alphanumeric = [$digit $alpha];" + // ***\r
272                 "$special = [^$alphanumeric];" +     // ***\r
273                 "$alphanumeric > '-';" +\r
274                 "$special > '*';",\r
275 \r
276                 "thx-1138", "---*----");\r
277     }\r
278 \r
279     /**\r
280      * Create some inverses and confirm that they work.  We have to be\r
281      * careful how we do this, since the inverses will not be true\r
282      * inverses -- we can't throw any random string at the composition\r
283      * of the transliterators and expect the identity function.  F x\r
284      * F' != I.  However, if we are careful about the input, we will\r
285      * get the expected results.\r
286      */\r
287     public void TestRuleBasedInverse() {\r
288         String RULES =\r
289             "abc>zyx;" +\r
290             "ab>yz;" +\r
291             "bc>zx;" +\r
292             "ca>xy;" +\r
293             "a>x;" +\r
294             "b>y;" +\r
295             "c>z;" +\r
296 \r
297             "abc<zyx;" +\r
298             "ab<yz;" +\r
299             "bc<zx;" +\r
300             "ca<xy;" +\r
301             "a<x;" +\r
302             "b<y;" +\r
303             "c<z;" +\r
304 \r
305             "";\r
306 \r
307         String[] DATA = {\r
308                 // Careful here -- random strings will not work.  If we keep\r
309                 // the left side to the domain and the right side to the range\r
310                 // we will be okay though (left, abc; right xyz).\r
311                 "a", "x",\r
312                 "abcacab", "zyxxxyy",\r
313                 "caccb", "xyzzy",\r
314         };\r
315 \r
316         Transliterator fwd = Transliterator.createFromRules("<ID>", RULES, Transliterator.FORWARD);\r
317         Transliterator rev = Transliterator.createFromRules("<ID>", RULES, Transliterator.REVERSE);\r
318         for (int i=0; i<DATA.length; i+=2) {\r
319             expect(fwd, DATA[i], DATA[i+1]);\r
320             expect(rev, DATA[i+1], DATA[i]);\r
321         }\r
322     }\r
323 \r
324     /**\r
325      * Basic test of keyboard.\r
326      */\r
327     public void TestKeyboard() {\r
328         Transliterator t = Transliterator.createFromRules("<ID>",\r
329                 "psch>Y;"\r
330                 +"ps>y;"\r
331                 +"ch>x;"\r
332                 +"a>A;", Transliterator.FORWARD);\r
333         String DATA[] = {\r
334                 // insertion, buffer\r
335                 "a", "A",\r
336                 "p", "Ap",\r
337                 "s", "Aps",\r
338                 "c", "Apsc",\r
339                 "a", "AycA",\r
340                 "psch", "AycAY",\r
341                 null, "AycAY", // null means finishKeyboardTransliteration\r
342         };\r
343 \r
344         keyboardAux(t, DATA);\r
345     }\r
346 \r
347     /**\r
348      * Basic test of keyboard with cursor.\r
349      */\r
350     public void TestKeyboard2() {\r
351         Transliterator t = Transliterator.createFromRules("<ID>",\r
352                 "ych>Y;"\r
353                 +"ps>|y;"\r
354                 +"ch>x;"\r
355                 +"a>A;", Transliterator.FORWARD);\r
356         String DATA[] = {\r
357                 // insertion, buffer\r
358                 "a", "A",\r
359                 "p", "Ap",\r
360                 "s", "Aps", // modified for rollback - "Ay",\r
361                 "c", "Apsc", // modified for rollback - "Ayc",\r
362                 "a", "AycA",\r
363                 "p", "AycAp",\r
364                 "s", "AycAps", // modified for rollback - "AycAy",\r
365                 "c", "AycApsc", // modified for rollback - "AycAyc",\r
366                 "h", "AycAY",\r
367                 null, "AycAY", // null means finishKeyboardTransliteration\r
368         };\r
369 \r
370         keyboardAux(t, DATA);\r
371     }\r
372 \r
373     /**\r
374      * Test keyboard transliteration with back-replacement.\r
375      */\r
376     public void TestKeyboard3() {\r
377         // We want th>z but t>y.  Furthermore, during keyboard\r
378         // transliteration we want t>y then yh>z if t, then h are\r
379         // typed.\r
380         String RULES =\r
381             "t>|y;" +\r
382             "yh>z;" +\r
383             "";\r
384 \r
385         String[] DATA = {\r
386                 // Column 1: characters to add to buffer (as if typed)\r
387                 // Column 2: expected appearance of buffer after\r
388                 //           keyboard xliteration.\r
389                 "a", "a",\r
390                 "b", "ab",\r
391                 "t", "abt", // modified for rollback - "aby",\r
392                 "c", "abyc",\r
393                 "t", "abyct", // modified for rollback - "abycy",\r
394                 "h", "abycz",\r
395                 null, "abycz", // null means finishKeyboardTransliteration\r
396         };\r
397 \r
398         Transliterator t = Transliterator.createFromRules("<ID>", RULES, Transliterator.FORWARD);\r
399         keyboardAux(t, DATA);\r
400     }\r
401 \r
402     private void keyboardAux(Transliterator t, String[] DATA) {\r
403         Transliterator.Position index = new Transliterator.Position();\r
404         ReplaceableString s = new ReplaceableString();\r
405         for (int i=0; i<DATA.length; i+=2) {\r
406             StringBuffer log;\r
407             if (DATA[i] != null) {\r
408                 log = new StringBuffer(s.toString() + " + "\r
409                         + DATA[i]\r
410                                + " -> ");\r
411                 t.transliterate(s, index, DATA[i]);\r
412             } else {\r
413                 log = new StringBuffer(s.toString() + " => ");\r
414                 t.finishTransliteration(s, index);\r
415             }\r
416             UtilityExtensions.formatInput(log, s, index);\r
417             if (s.toString().equals(DATA[i+1])) {\r
418                 logln(log.toString());\r
419             } else {\r
420                 errln("FAIL: " + log.toString() + ", expected " + DATA[i+1]);\r
421             }\r
422         }\r
423     }\r
424 \r
425     // Latin-Arabic has been temporarily removed until it can be\r
426     // done correctly.\r
427 \r
428     //  public void TestArabic() {\r
429     //      String DATA[] = {\r
430     //          "Arabic",\r
431     //              "\u062a\u062a\u0645\u062a\u0639 "+\r
432     //              "\u0627\u0644\u0644\u063a\u0629 "+\r
433     //              "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629 "+\r
434     //              "\u0628\u0628\u0646\u0638\u0645 "+\r
435     //              "\u0643\u062a\u0627\u0628\u0628\u064a\u0629 "+\r
436     //              "\u062c\u0645\u064a\u0644\u0629"\r
437     //      };\r
438 \r
439     //      Transliterator t = Transliterator.getInstance("Latin-Arabic");\r
440     //      for (int i=0; i<DATA.length; i+=2) {\r
441     //          expect(t, DATA[i], DATA[i+1]);\r
442     //      }\r
443     //  }\r
444 \r
445     /**\r
446      * Compose the Kana transliterator forward and reverse and try\r
447      * some strings that should come out unchanged.\r
448      */\r
449     public void TestCompoundKana() {\r
450         Transliterator t = Transliterator.getInstance("Latin-Katakana;Katakana-Latin");\r
451         expect(t, "aaaaa", "aaaaa");\r
452     }\r
453 \r
454     /**\r
455      * Compose the hex transliterators forward and reverse.\r
456      */\r
457     public void TestCompoundHex() {\r
458         Transliterator a = Transliterator.getInstance("Any-Hex");\r
459         Transliterator b = Transliterator.getInstance("Hex-Any");\r
460         // Transliterator[] trans = { a, b };\r
461         // Transliterator ab = Transliterator.getInstance(trans);\r
462         Transliterator ab = Transliterator.getInstance("Any-Hex;Hex-Any");\r
463 \r
464         // Do some basic tests of b\r
465         expect(b, "\\u0030\\u0031", "01");\r
466 \r
467         String s = "abcde";\r
468         expect(ab, s, s);\r
469 \r
470         // trans = new Transliterator[] { b, a };\r
471         // Transliterator ba = Transliterator.getInstance(trans);\r
472         Transliterator ba = Transliterator.getInstance("Hex-Any;Any-Hex");\r
473         ReplaceableString str = new ReplaceableString(s);\r
474         a.transliterate(str);\r
475         expect(ba, str.toString(), str.toString());\r
476     }\r
477 \r
478     /**\r
479      * Do some basic tests of filtering.\r
480      */\r
481     public void TestFiltering() {\r
482         Transliterator hex = Transliterator.getInstance("Any-Hex");\r
483         hex.setFilter(new UnicodeFilter() {\r
484             public boolean contains(int c) {\r
485                 return c != 'c';\r
486             }\r
487             public String toPattern(boolean escapeUnprintable) {\r
488                 return "";\r
489             }\r
490             public boolean matchesIndexValue(int v) {\r
491                 return false;\r
492             }\r
493             public void addMatchSetTo(UnicodeSet toUnionTo) {}\r
494         });\r
495         String s = "abcde";\r
496         String out = hex.transliterate(s);\r
497         String exp = "\\u0061\\u0062c\\u0064\\u0065";\r
498         if (out.equals(exp)) {\r
499             logln("Ok:   \"" + exp + "\"");\r
500         } else {\r
501             logln("FAIL: \"" + out + "\", wanted \"" + exp + "\"");\r
502         }\r
503     }\r
504 \r
505     /**\r
506      * Test anchors\r
507      */\r
508     public void TestAnchors() {\r
509         expect("^ab  > 01 ;" +\r
510                 " ab  > |8 ;" +\r
511                 "  b  > k ;" +\r
512                 " 8x$ > 45 ;" +\r
513                 " 8x  > 77 ;",\r
514 \r
515                 "ababbabxabx",\r
516         "018k7745");\r
517         expect("$s = [z$] ;" +\r
518                 "$s{ab    > 01 ;" +\r
519                 "   ab    > |8 ;" +\r
520                 "    b    > k ;" +\r
521                 "   8x}$s > 45 ;" +\r
522                 "   8x    > 77 ;",\r
523 \r
524                 "abzababbabxzabxabx",\r
525         "01z018k45z01x45");\r
526     }\r
527 \r
528     /**\r
529      * Test pattern quoting and escape mechanisms.\r
530      */\r
531     public void TestPatternQuoting() {\r
532         // Array of 3n items\r
533         // Each item is <rules>, <input>, <expected output>\r
534         String[] DATA = {\r
535                 "\u4E01>'[male adult]'", "\u4E01", "[male adult]",\r
536         };\r
537 \r
538         for (int i=0; i<DATA.length; i+=3) {\r
539             logln("Pattern: " + Utility.escape(DATA[i]));\r
540             Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD);\r
541             expect(t, DATA[i+1], DATA[i+2]);\r
542         }\r
543     }\r
544 \r
545     public void TestVariableNames() {\r
546         Transliterator gl = Transliterator.createFromRules("foo5", "$\u2DC0 = qy; a>b;", Transliterator.FORWARD);\r
547         if (gl == null) {\r
548             errln("FAIL: null Transliterator returned.");\r
549         }\r
550     }\r
551 \r
552     /**\r
553      * Regression test for bugs found in Greek transliteration.\r
554      */\r
555     public void TestJ277() {\r
556         Transliterator gl = Transliterator.getInstance("Greek-Latin; NFD; [:M:]Remove; NFC");\r
557 \r
558         char sigma = (char)0x3C3;\r
559         char upsilon = (char)0x3C5;\r
560         char nu = (char)0x3BD;\r
561         // not used char PHI = (char)0x3A6;\r
562         char alpha = (char)0x3B1;\r
563         // not used char omega = (char)0x3C9;\r
564         // not used char omicron = (char)0x3BF;\r
565         // not used char epsilon = (char)0x3B5;\r
566 \r
567         // sigma upsilon nu -> syn\r
568         StringBuffer buf = new StringBuffer();\r
569         buf.append(sigma).append(upsilon).append(nu);\r
570         String syn = buf.toString();\r
571         expect(gl, syn, "syn");\r
572 \r
573         // sigma alpha upsilon nu -> saun\r
574         buf.setLength(0);\r
575         buf.append(sigma).append(alpha).append(upsilon).append(nu);\r
576         String sayn = buf.toString();\r
577         expect(gl, sayn, "saun");\r
578 \r
579         // Again, using a smaller rule set\r
580         String rules =\r
581             "$alpha   = \u03B1;" +\r
582             "$nu      = \u03BD;" +\r
583             "$sigma   = \u03C3;" +\r
584             "$ypsilon = \u03C5;" +\r
585             "$vowel   = [aeiouAEIOU$alpha$ypsilon];" +\r
586             "s <>           $sigma;" +\r
587             "a <>           $alpha;" +\r
588             "u <>  $vowel { $ypsilon;" +\r
589             "y <>           $ypsilon;" +\r
590             "n <>           $nu;";\r
591         Transliterator mini = Transliterator.createFromRules\r
592         ("mini", rules, Transliterator.REVERSE);\r
593         expect(mini, syn, "syn");\r
594         expect(mini, sayn, "saun");\r
595 \r
596         //|    // Transliterate the Greek locale data\r
597         //|    Locale el("el");\r
598         //|    DateFormatSymbols syms(el, status);\r
599         //|    if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }\r
600         //|    int32_t i, count;\r
601         //|    const UnicodeString* data = syms.getMonths(count);\r
602         //|    for (i=0; i<count; ++i) {\r
603         //|        if (data[i].length() == 0) {\r
604         //|            continue;\r
605         //|        }\r
606         //|        UnicodeString out(data[i]);\r
607         //|        gl->transliterate(out);\r
608         //|        bool_t ok = TRUE;\r
609         //|        if (data[i].length() >= 2 && out.length() >= 2 &&\r
610         //|            u_isupper(data[i].charAt(0)) && u_islower(data[i].charAt(1))) {\r
611         //|            if (!(u_isupper(out.charAt(0)) && u_islower(out.charAt(1)))) {\r
612         //|                ok = FALSE;\r
613         //|            }\r
614         //|        }\r
615         //|        if (ok) {\r
616         //|            logln(prettify(data[i] + " -> " + out));\r
617         //|        } else {\r
618         //|            errln(UnicodeString("FAIL: ") + prettify(data[i] + " -> " + out));\r
619         //|        }\r
620         //|    }\r
621     }\r
622 \r
623     //    /**\r
624     //     * Prefix, suffix support in hex transliterators\r
625     //     */\r
626     //    public void TestJ243() {\r
627     //        // Test default Hex-Any, which should handle\r
628     //        // \\u, \\U, u+, and U+\r
629     //        HexToUnicodeTransliterator hex = new HexToUnicodeTransliterator();\r
630     //        expect(hex, "\\u0041+\\U0042,u+0043uu+0044z", "A+B,CuDz");\r
631     //\r
632     //        // Try a custom Hex-Any\r
633     //        // \\uXXXX and &#xXXXX;\r
634     //        HexToUnicodeTransliterator hex2 = new HexToUnicodeTransliterator("\\\\u###0;&\\#x###0\\;");\r
635     //        expect(hex2, "\\u61\\u062\\u0063\\u00645\\u66x&#x30;&#x031;&#x0032;&#x00033;",\r
636     //               "abcd5fx012&#x00033;");\r
637     //\r
638     //        // Try custom Any-Hex (default is tested elsewhere)\r
639     //        UnicodeToHexTransliterator hex3 = new UnicodeToHexTransliterator("&\\#x###0;");\r
640     //        expect(hex3, "012", "&#x30;&#x31;&#x32;");\r
641     //    }\r
642 \r
643     public void TestJ329() {\r
644 \r
645         Object[] DATA = {\r
646                 Boolean.FALSE, "a > b; c > d",\r
647                 Boolean.TRUE,  "a > b; no operator; c > d",\r
648         };\r
649 \r
650         for (int i=0; i<DATA.length; i+=2) {\r
651             String err = null;\r
652             try {\r
653                 Transliterator.createFromRules("<ID>",\r
654                         (String) DATA[i+1],\r
655                         Transliterator.FORWARD);\r
656             } catch (IllegalArgumentException e) {\r
657                 err = e.getMessage();\r
658             }\r
659             boolean gotError = (err != null);\r
660             String desc = (String) DATA[i+1] +\r
661             (gotError ? (" -> error: " + err) : " -> no error");\r
662             if ((err != null) == ((Boolean)DATA[i]).booleanValue()) {\r
663                 logln("Ok:   " + desc);\r
664             } else {\r
665                 errln("FAIL: " + desc);\r
666             }\r
667         }\r
668     }\r
669 \r
670     /**\r
671      * Test segments and segment references.\r
672      */\r
673     public void TestSegments() {\r
674         // Array of 3n items\r
675         // Each item is <rules>, <input>, <expected output>\r
676         String[] DATA = {\r
677                 "([a-z]) '.' ([0-9]) > $2 '-' $1",\r
678                 "abc.123.xyz.456",\r
679                 "ab1-c23.xy4-z56",\r
680         };\r
681 \r
682         for (int i=0; i<DATA.length; i+=3) {\r
683             logln("Pattern: " + Utility.escape(DATA[i]));\r
684             Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD);\r
685             expect(t, DATA[i+1], DATA[i+2]);\r
686         }\r
687     }\r
688 \r
689     /**\r
690      * Test cursor positioning outside of the key\r
691      */\r
692     public void TestCursorOffset() {\r
693         // Array of 3n items\r
694         // Each item is <rules>, <input>, <expected output>\r
695         String[] DATA = {\r
696                 "pre {alpha} post > | @ ALPHA ;" +\r
697                 "eALPHA > beta ;" +\r
698                 "pre {beta} post > BETA @@ | ;" +\r
699                 "post > xyz",\r
700 \r
701                 "prealphapost prebetapost",\r
702                 "prbetaxyz preBETApost",\r
703         };\r
704 \r
705         for (int i=0; i<DATA.length; i+=3) {\r
706             logln("Pattern: " + Utility.escape(DATA[i]));\r
707             Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD);\r
708             expect(t, DATA[i+1], DATA[i+2]);\r
709         }\r
710     }\r
711 \r
712     /**\r
713      * Test zero length and > 1 char length variable values.  Test\r
714      * use of variable refs in UnicodeSets.\r
715      */\r
716     public void TestArbitraryVariableValues() {\r
717         // Array of 3n items\r
718         // Each item is <rules>, <input>, <expected output>\r
719         String[] DATA = {\r
720                 "$abe = ab;" +\r
721                 "$pat = x[yY]z;" +\r
722                 "$ll  = 'a-z';" +\r
723                 "$llZ = [$ll];" +\r
724                 "$llY = [$ll$pat];" +\r
725                 "$emp = ;" +\r
726 \r
727                 "$abe > ABE;" +\r
728                 "$pat > END;" +\r
729                 "$llZ > 1;" +\r
730                 "$llY > 2;" +\r
731                 "7$emp 8 > 9;" +\r
732                 "",\r
733 \r
734                 "ab xYzxyz stY78",\r
735                 "ABE ENDEND 1129",\r
736         };\r
737 \r
738         for (int i=0; i<DATA.length; i+=3) {\r
739             logln("Pattern: " + Utility.escape(DATA[i]));\r
740             Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD);\r
741             expect(t, DATA[i+1], DATA[i+2]);\r
742         }\r
743     }\r
744 \r
745     /**\r
746      * Confirm that the contextStart, contextLimit, start, and limit\r
747      * behave correctly.\r
748      */\r
749     public void TestPositionHandling() {\r
750         // Array of 3n items\r
751         // Each item is <rules>, <input>, <expected output>\r
752         String[] DATA = {\r
753                 "a{t} > SS ; {t}b > UU ; {t} > TT ;",\r
754                 "xtat txtb", // pos 0,9,0,9\r
755                 "xTTaSS TTxUUb",\r
756 \r
757                 "a{t} > SS ; {t}b > UU ; {t} > TT ;",\r
758                 "xtat txtb", // pos 2,9,3,8\r
759                 "xtaSS TTxUUb",\r
760 \r
761                 "a{t} > SS ; {t}b > UU ; {t} > TT ;",\r
762                 "xtat txtb", // pos 3,8,3,8\r
763                 "xtaTT TTxTTb",\r
764         };\r
765 \r
766         // Array of 4n positions -- these go with the DATA array\r
767         // They are: contextStart, contextLimit, start, limit\r
768         int[] POS = {\r
769                 0, 9, 0, 9,\r
770                 2, 9, 3, 8,\r
771                 3, 8, 3, 8,\r
772         };\r
773 \r
774         int n = DATA.length/3;\r
775         for (int i=0; i<n; i++) {\r
776             Transliterator t = Transliterator.createFromRules("<ID>", DATA[3*i], Transliterator.FORWARD);\r
777             Transliterator.Position pos = new Transliterator.Position(\r
778                     POS[4*i], POS[4*i+1], POS[4*i+2], POS[4*i+3]);\r
779             ReplaceableString rsource = new ReplaceableString(DATA[3*i+1]);\r
780             t.transliterate(rsource, pos);\r
781             t.finishTransliteration(rsource, pos);\r
782             String result = rsource.toString();\r
783             String exp = DATA[3*i+2];\r
784             expectAux(Utility.escape(DATA[3*i]),\r
785                     DATA[3*i+1],\r
786                     result,\r
787                     result.equals(exp),\r
788                     exp);\r
789         }\r
790     }\r
791 \r
792     /**\r
793      * Test the Hiragana-Katakana transliterator.\r
794      */\r
795     public void TestHiraganaKatakana() {\r
796         Transliterator hk = Transliterator.getInstance("Hiragana-Katakana");\r
797         Transliterator kh = Transliterator.getInstance("Katakana-Hiragana");\r
798 \r
799         // Array of 3n items\r
800         // Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana>\r
801         String[] DATA = {\r
802                 "both",\r
803                 "\u3042\u3090\u3099\u3092\u3050",\r
804                 "\u30A2\u30F8\u30F2\u30B0",\r
805 \r
806                 "kh",\r
807                 "\u307C\u3051\u3060\u3042\u3093\u30FC",\r
808                 "\u30DC\u30F6\u30C0\u30FC\u30F3\u30FC",\r
809         };\r
810 \r
811         for (int i=0; i<DATA.length; i+=3) {\r
812             switch (DATA[i].charAt(0)) {\r
813             case 'h': // Hiragana-Katakana\r
814                 expect(hk, DATA[i+1], DATA[i+2]);\r
815                 break;\r
816             case 'k': // Katakana-Hiragana\r
817                 expect(kh, DATA[i+2], DATA[i+1]);\r
818                 break;\r
819             case 'b': // both\r
820                 expect(hk, DATA[i+1], DATA[i+2]);\r
821                 expect(kh, DATA[i+2], DATA[i+1]);\r
822                 break;\r
823             }\r
824         }\r
825 \r
826     }\r
827 \r
828     public void TestCopyJ476() {\r
829         // This is a C++-only copy constructor test\r
830     }\r
831 \r
832     /**\r
833      * Test inter-Indic transliterators.  These are composed.\r
834      */\r
835     public void TestInterIndic() {\r
836         String ID = "Devanagari-Gujarati";\r
837         Transliterator dg = Transliterator.getInstance(ID);\r
838         if (dg == null) {\r
839             errln("FAIL: getInstance(" + ID + ") returned null");\r
840             return;\r
841         }\r
842         String id = dg.getID();\r
843         if (!id.equals(ID)) {\r
844             errln("FAIL: getInstance(" + ID + ").getID() => " + id);\r
845         }\r
846         String dev = "\u0901\u090B\u0925";\r
847         String guj = "\u0A81\u0A8B\u0AA5";\r
848         expect(dg, dev, guj);\r
849     }\r
850 \r
851     /**\r
852      * Test filter syntax in IDs. (J23)\r
853      */\r
854     public void TestFilterIDs() {\r
855         String[] DATA = {\r
856                 "[aeiou]Any-Hex", // ID\r
857                 "[aeiou]Hex-Any", // expected inverse ID\r
858                 "quizzical",      // src\r
859                 "q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src)\r
860 \r
861                 "[aeiou]Any-Hex;[^5]Hex-Any",\r
862                 "[^5]Any-Hex;[aeiou]Hex-Any",\r
863                 "quizzical",\r
864                 "q\\u0075izzical",\r
865 \r
866                 "[abc]Null",\r
867                 "[abc]Null",\r
868                 "xyz",\r
869                 "xyz",\r
870         };\r
871 \r
872         for (int i=0; i<DATA.length; i+=4) {\r
873             String ID = DATA[i];\r
874             Transliterator t = Transliterator.getInstance(ID);\r
875             expect(t, DATA[i+2], DATA[i+3]);\r
876 \r
877             // Check the ID\r
878             if (!ID.equals(t.getID())) {\r
879                 errln("FAIL: getInstance(" + ID + ").getID() => " +\r
880                         t.getID());\r
881             }\r
882 \r
883             // Check the inverse\r
884             String uID = DATA[i+1];\r
885             Transliterator u = t.getInverse();\r
886             if (u == null) {\r
887                 errln("FAIL: " + ID + ".getInverse() returned NULL");\r
888             } else if (!u.getID().equals(uID)) {\r
889                 errln("FAIL: " + ID + ".getInverse().getID() => " +\r
890                         u.getID() + ", expected " + uID);\r
891             }\r
892         }\r
893     }\r
894 \r
895     /**\r
896      * Test the case mapping transliterators.\r
897      */\r
898     public void TestCaseMap() {\r
899         Transliterator toUpper =\r
900             Transliterator.getInstance("Any-Upper[^xyzXYZ]");\r
901         Transliterator toLower =\r
902             Transliterator.getInstance("Any-Lower[^xyzXYZ]");\r
903         Transliterator toTitle =\r
904             Transliterator.getInstance("Any-Title[^xyzXYZ]");\r
905 \r
906         expect(toUpper, "The quick brown fox jumped over the lazy dogs.",\r
907         "THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS.");\r
908         expect(toLower, "The quIck brown fOX jUMPED OVER THE LAzY dogs.",\r
909         "the quick brown foX jumped over the lazY dogs.");\r
910         expect(toTitle, "the quick brown foX caN'T jump over the laZy dogs.",\r
911         "The Quick Brown FoX Can't Jump Over The LaZy Dogs.");\r
912     }\r
913 \r
914     /**\r
915      * Test the name mapping transliterators.\r
916      */\r
917     public void TestNameMap() {\r
918         Transliterator uni2name =\r
919             Transliterator.getInstance("Any-Name[^abc]");\r
920         Transliterator name2uni =\r
921             Transliterator.getInstance("Name-Any");\r
922 \r
923         expect(uni2name, "\u00A0abc\u4E01\u00B5\u0A81\uFFFD\u0004\u0009\u0081\uFFFF",\r
924         "\\N{NO-BREAK SPACE}abc\\N{CJK UNIFIED IDEOGRAPH-4E01}\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{END OF TRANSMISSION}\\N{CHARACTER TABULATION}\\N{<control-0081>}\\N{<noncharacter-FFFF>}");\r
925         expect(name2uni, "{\\N { NO-BREAK SPACE}abc\\N{  CJK UNIFIED  IDEOGRAPH-4E01  }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{END OF TRANSMISSION}\\N{CHARACTER TABULATION}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{",\r
926         "{\u00A0abc\u4E01\\N{x\u00B5\u0A81\uFFFD\u0004\u0009\u0081\uFFFF\u0004\\N{");\r
927 \r
928         // round trip\r
929         Transliterator t = Transliterator.getInstance("Any-Name;Name-Any");\r
930 \r
931         String s = "{\u00A0abc\u4E01\\N{x\u00B5\u0A81\uFFFD\u0004\u0009\u0081\uFFFF\u0004\\N{";\r
932         expect(t, s, s);\r
933     }\r
934 \r
935     /**\r
936      * Test liberalized ID syntax.  1006c\r
937      */\r
938     public void TestLiberalizedID() {\r
939         // Some test cases have an expected getID() value of NULL.  This\r
940         // means I have disabled the test case for now.  This stuff is\r
941         // still under development, and I haven't decided whether to make\r
942         // getID() return canonical case yet.  It will all get rewritten\r
943         // with the move to Source-Target/Variant IDs anyway. [aliu]\r
944         String DATA[] = {\r
945                 "latin-greek", null /*"Latin-Greek"*/, "case insensitivity",\r
946                 "  Null  ", "Null", "whitespace",\r
947                 " Latin[a-z]-Greek  ", "[a-z]Latin-Greek", "inline filter",\r
948                 "  null  ; latin-greek  ", null /*"Null;Latin-Greek"*/, "compound whitespace",\r
949         };\r
950 \r
951         for (int i=0; i<DATA.length; i+=3) {\r
952             try {\r
953                 Transliterator t = Transliterator.getInstance(DATA[i]);\r
954                 if (DATA[i+1] == null || DATA[i+1].equals(t.getID())) {\r
955                     logln("Ok: " + DATA[i+2] +\r
956                             " create ID \"" + DATA[i] + "\" => \"" +\r
957                             t.getID() + "\"");\r
958                 } else {\r
959                     errln("FAIL: " + DATA[i+2] +\r
960                             " create ID \"" + DATA[i] + "\" => \"" +\r
961                             t.getID() + "\", exp \"" + DATA[i+1] + "\"");\r
962                 }\r
963             } catch (IllegalArgumentException e) {\r
964                 errln("FAIL: " + DATA[i+2] +\r
965                         " create ID \"" + DATA[i] + "\"");\r
966             }\r
967         }\r
968     }\r
969 \r
970     public void TestCreateInstance() {\r
971         String FORWARD = "F";\r
972         String REVERSE = "R";\r
973         String DATA[] = {\r
974                 // Column 1: id\r
975                 // Column 2: direction\r
976                 // Column 3: expected ID, or "" if expect failure\r
977                 "Latin-Hangul", REVERSE, "Hangul-Latin", // JB#912\r
978 \r
979                 // JB#2689: bad compound causes crash\r
980                 "InvalidSource-InvalidTarget", FORWARD, "",\r
981                 "InvalidSource-InvalidTarget", REVERSE, "",\r
982                 "Hex-Any;InvalidSource-InvalidTarget", FORWARD, "",\r
983                 "Hex-Any;InvalidSource-InvalidTarget", REVERSE, "",\r
984                 "InvalidSource-InvalidTarget;Hex-Any", FORWARD, "",\r
985                 "InvalidSource-InvalidTarget;Hex-Any", REVERSE, "",\r
986 \r
987                 null\r
988         };\r
989 \r
990         for (int i=0; DATA[i]!=null; i+=3) {\r
991             String id=DATA[i];\r
992             int dir = (DATA[i+1]==FORWARD)?\r
993                     Transliterator.FORWARD:Transliterator.REVERSE;\r
994             String expID=DATA[i+2];\r
995             Exception e = null;\r
996             Transliterator t;\r
997             try {\r
998                 t = Transliterator.getInstance(id,dir);\r
999             } catch (Exception e1) {\r
1000                 e = e1;\r
1001                 t = null;\r
1002             }\r
1003             String newID = (t!=null)?t.getID():"";\r
1004             boolean ok = (newID.equals(expID));\r
1005             if (t==null) {\r
1006                 newID = e.getMessage();\r
1007             }\r
1008             if (ok) {\r
1009                 logln("Ok: createInstance(" +\r
1010                         id + "," + DATA[i+1] + ") => " + newID);\r
1011             } else {\r
1012                 errln("FAIL: createInstance(" +\r
1013                         id + "," + DATA[i+1] + ") => " + newID +\r
1014                         ", expected " + expID);\r
1015             }\r
1016         }\r
1017     }\r
1018 \r
1019     /**\r
1020      * Test the normalization transliterator.\r
1021      */\r
1022     public void TestNormalizationTransliterator() {\r
1023         // THE FOLLOWING TWO TABLES ARE COPIED FROM com.ibm.icu.dev.test.normalizer.BasicTest\r
1024         // PLEASE KEEP THEM IN SYNC WITH BasicTest.\r
1025         String[][] CANON = {\r
1026                 // Input               Decomposed            Composed\r
1027                 {"cat",                "cat",                "cat"               },\r
1028                 {"\u00e0ardvark",      "a\u0300ardvark",     "\u00e0ardvark"     },\r
1029 \r
1030                 {"\u1e0a",             "D\u0307",            "\u1e0a"            }, // D-dot_above\r
1031                 {"D\u0307",            "D\u0307",            "\u1e0a"            }, // D dot_above\r
1032 \r
1033                 {"\u1e0c\u0307",       "D\u0323\u0307",      "\u1e0c\u0307"      }, // D-dot_below dot_above\r
1034                 {"\u1e0a\u0323",       "D\u0323\u0307",      "\u1e0c\u0307"      }, // D-dot_above dot_below\r
1035                 {"D\u0307\u0323",      "D\u0323\u0307",      "\u1e0c\u0307"      }, // D dot_below dot_above\r
1036 \r
1037                 {"\u1e10\u0307\u0323", "D\u0327\u0323\u0307","\u1e10\u0323\u0307"}, // D dot_below cedilla dot_above\r
1038                 {"D\u0307\u0328\u0323","D\u0328\u0323\u0307","\u1e0c\u0328\u0307"}, // D dot_above ogonek dot_below\r
1039 \r
1040                 {"\u1E14",             "E\u0304\u0300",      "\u1E14"            }, // E-macron-grave\r
1041                 {"\u0112\u0300",       "E\u0304\u0300",      "\u1E14"            }, // E-macron + grave\r
1042                 {"\u00c8\u0304",       "E\u0300\u0304",      "\u00c8\u0304"      }, // E-grave + macron\r
1043 \r
1044                 {"\u212b",             "A\u030a",            "\u00c5"            }, // angstrom_sign\r
1045                 {"\u00c5",             "A\u030a",            "\u00c5"            }, // A-ring\r
1046 \r
1047                 {"\u00fdffin",         "y\u0301ffin",        "\u00fdffin"        }, //updated with 3.0\r
1048                 {"\u00fd\uFB03n",      "y\u0301\uFB03n",     "\u00fd\uFB03n"     }, //updated with 3.0\r
1049 \r
1050                 {"Henry IV",           "Henry IV",           "Henry IV"          },\r
1051                 {"Henry \u2163",       "Henry \u2163",       "Henry \u2163"      },\r
1052 \r
1053                 {"\u30AC",             "\u30AB\u3099",       "\u30AC"            }, // ga (Katakana)\r
1054                 {"\u30AB\u3099",       "\u30AB\u3099",       "\u30AC"            }, // ka + ten\r
1055                 {"\uFF76\uFF9E",       "\uFF76\uFF9E",       "\uFF76\uFF9E"      }, // hw_ka + hw_ten\r
1056                 {"\u30AB\uFF9E",       "\u30AB\uFF9E",       "\u30AB\uFF9E"      }, // ka + hw_ten\r
1057                 {"\uFF76\u3099",       "\uFF76\u3099",       "\uFF76\u3099"      }, // hw_ka + ten\r
1058 \r
1059                 {"A\u0300\u0316",      "A\u0316\u0300",      "\u00C0\u0316"      },\r
1060         };\r
1061 \r
1062         String[][] COMPAT = {\r
1063                 // Input               Decomposed            Composed\r
1064                 {"\uFB4f",             "\u05D0\u05DC",       "\u05D0\u05DC"      }, // Alef-Lamed vs. Alef, Lamed\r
1065 \r
1066                 {"\u00fdffin",         "y\u0301ffin",        "\u00fdffin"        }, //updated for 3.0\r
1067                 {"\u00fd\uFB03n",      "y\u0301ffin",        "\u00fdffin"        }, // ffi ligature -> f + f + i\r
1068 \r
1069                 {"Henry IV",           "Henry IV",           "Henry IV"          },\r
1070                 {"Henry \u2163",       "Henry IV",           "Henry IV"          },\r
1071 \r
1072                 {"\u30AC",             "\u30AB\u3099",       "\u30AC"            }, // ga (Katakana)\r
1073                 {"\u30AB\u3099",       "\u30AB\u3099",       "\u30AC"            }, // ka + ten\r
1074 \r
1075                 {"\uFF76\u3099",       "\u30AB\u3099",       "\u30AC"            }, // hw_ka + ten\r
1076         };\r
1077 \r
1078         Transliterator NFD = Transliterator.getInstance("NFD");\r
1079         Transliterator NFC = Transliterator.getInstance("NFC");\r
1080         for (int i=0; i<CANON.length; ++i) {\r
1081             String in = CANON[i][0];\r
1082             String expd = CANON[i][1];\r
1083             String expc = CANON[i][2];\r
1084             expect(NFD, in, expd);\r
1085             expect(NFC, in, expc);\r
1086         }\r
1087 \r
1088         Transliterator NFKD = Transliterator.getInstance("NFKD");\r
1089         Transliterator NFKC = Transliterator.getInstance("NFKC");\r
1090         for (int i=0; i<COMPAT.length; ++i) {\r
1091             String in = COMPAT[i][0];\r
1092             String expkd = COMPAT[i][1];\r
1093             String expkc = COMPAT[i][2];\r
1094             expect(NFKD, in, expkd);\r
1095             expect(NFKC, in, expkc);\r
1096         }\r
1097 \r
1098         Transliterator t = Transliterator.getInstance("NFD; [x]Remove");\r
1099         expect(t, "\u010dx", "c\u030C");\r
1100     }\r
1101 \r
1102     /**\r
1103      * Test compound RBT rules.\r
1104      */\r
1105     public void TestCompoundRBT() {\r
1106         // Careful with spacing and ';' here:  Phrase this exactly\r
1107         // as toRules() is going to return it.  If toRules() changes\r
1108         // with regard to spacing or ';', then adjust this string.\r
1109         String rule = "::Hex-Any;\n" +\r
1110         "::Any-Lower;\n" +\r
1111         "a > '.A.';\n" +\r
1112         "b > '.B.';\n" +\r
1113         "::[^t]Any-Upper;";\r
1114         Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD);\r
1115         if (t == null) {\r
1116             errln("FAIL: createFromRules failed");\r
1117             return;\r
1118         }\r
1119         expect(t, "\u0043at in the hat, bat on the mat",\r
1120         "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t");\r
1121         String r = t.toRules(true);\r
1122         if (r.equals(rule)) {\r
1123             logln("OK: toRules() => " + r);\r
1124         } else {\r
1125             errln("FAIL: toRules() => " + r +\r
1126                     ", expected " + rule);\r
1127         }\r
1128 \r
1129         // Now test toRules\r
1130         t = Transliterator.getInstance("Greek-Latin; Latin-Cyrillic", Transliterator.FORWARD);\r
1131         if (t == null) {\r
1132             errln("FAIL: createInstance failed");\r
1133             return;\r
1134         }\r
1135         String exp = "::Greek-Latin;\n::Latin-Cyrillic;";\r
1136         r = t.toRules(true);\r
1137         if (!r.equals(exp)) {\r
1138             errln("FAIL: toRules() => " + r +\r
1139                     ", expected " + exp);\r
1140         } else {\r
1141             logln("OK: toRules() => " + r);\r
1142         }\r
1143 \r
1144         // Round trip the result of toRules\r
1145         t = Transliterator.createFromRules("Test", r, Transliterator.FORWARD);\r
1146         if (t == null) {\r
1147             errln("FAIL: createFromRules #2 failed");\r
1148             return;\r
1149         } else {\r
1150             logln("OK: createFromRules(" + r + ") succeeded");\r
1151         }\r
1152 \r
1153         // Test toRules again\r
1154         r = t.toRules(true);\r
1155         if (!r.equals(exp)) {\r
1156             errln("FAIL: toRules() => " + r +\r
1157                     ", expected " + exp);\r
1158         } else {\r
1159             logln("OK: toRules() => " + r);\r
1160         }\r
1161 \r
1162         // Test Foo(Bar) IDs.  Careful with spacing in id; make it conform\r
1163         // to what the regenerated ID will look like.\r
1164         String id = "Upper(Lower);(NFKC)";\r
1165         t = Transliterator.getInstance(id, Transliterator.FORWARD);\r
1166         if (t == null) {\r
1167             errln("FAIL: createInstance #2 failed");\r
1168             return;\r
1169         }\r
1170         if (t.getID().equals(id)) {\r
1171             logln("OK: created " + id);\r
1172         } else {\r
1173             errln("FAIL: createInstance(" + id +\r
1174                     ").getID() => " + t.getID());\r
1175         }\r
1176 \r
1177         Transliterator u = t.getInverse();\r
1178         if (u == null) {\r
1179             errln("FAIL: createInverse failed");\r
1180             return;\r
1181         }\r
1182         exp = "NFKC();Lower(Upper)";\r
1183         if (u.getID().equals(exp)) {\r
1184             logln("OK: createInverse(" + id + ") => " +\r
1185                     u.getID());\r
1186         } else {\r
1187             errln("FAIL: createInverse(" + id + ") => " +\r
1188                     u.getID());\r
1189         }\r
1190     }\r
1191 \r
1192     /**\r
1193      * Compound filter semantics were orginially not implemented\r
1194      * correctly.  Originally, each component filter f(i) is replaced by\r
1195      * f'(i) = f(i) && g, where g is the filter for the compound\r
1196      * transliterator.\r
1197      *\r
1198      * From Mark:\r
1199      *\r
1200      * Suppose and I have a transliterator X. Internally X is\r
1201      * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A].\r
1202      *\r
1203      * The compound should convert all greek characters (through latin) to\r
1204      * cyrillic, then lowercase the result. The filter should say "don't\r
1205      * touch 'A' in the original". But because an intermediate result\r
1206      * happens to go through "A", the Greek Alpha gets hung up.\r
1207      */\r
1208     public void TestCompoundFilter() {\r
1209         Transliterator t = Transliterator.getInstance\r
1210         ("Greek-Latin; Latin-Greek; Lower", Transliterator.FORWARD);\r
1211         t.setFilter(new UnicodeSet("[^A]"));\r
1212 \r
1213         // Only the 'A' at index 1 should remain unchanged\r
1214         expect(t,\r
1215                 CharsToUnicodeString("BA\\u039A\\u0391"),\r
1216                 CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1"));\r
1217     }\r
1218 \r
1219     /**\r
1220      * Test the "Remove" transliterator.\r
1221      */\r
1222     public void TestRemove() {\r
1223         Transliterator t = Transliterator.getInstance("Remove[aeiou]");\r
1224         expect(t, "The quick brown fox.",\r
1225         "Th qck brwn fx.");\r
1226     }\r
1227 \r
1228     public void TestToRules() {\r
1229         String RBT = "rbt";\r
1230         String SET = "set";\r
1231         String[] DATA = {\r
1232                 RBT,\r
1233                 "$a=\\u4E61; [$a] > A;",\r
1234                 "[\\u4E61] > A;",\r
1235 \r
1236                 RBT,\r
1237                 "$white=[[:Zs:][:Zl:]]; $white{a} > A;",\r
1238                 "[[:Zs:][:Zl:]]{a} > A;",\r
1239 \r
1240                 SET,\r
1241                 "[[:Zs:][:Zl:]]",\r
1242                 "[[:Zs:][:Zl:]]",\r
1243 \r
1244                 SET,\r
1245                 "[:Ps:]",\r
1246                 "[:Ps:]",\r
1247 \r
1248                 SET,\r
1249                 "[:L:]",\r
1250                 "[:L:]",\r
1251 \r
1252                 SET,\r
1253                 "[[:L:]-[A]]",\r
1254                 "[[:L:]-[A]]",\r
1255 \r
1256                 SET,\r
1257                 "[~[:Lu:][:Ll:]]",\r
1258                 "[~[:Lu:][:Ll:]]",\r
1259 \r
1260                 SET,\r
1261                 "[~[a-z]]",\r
1262                 "[~[a-z]]",\r
1263 \r
1264                 RBT,\r
1265                 "$white=[:Zs:]; $black=[^$white]; $black{a} > A;",\r
1266                 "[^[:Zs:]]{a} > A;",\r
1267 \r
1268                 RBT,\r
1269                 "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;",\r
1270                 "[[a-z]-[:Zs:]]{a} > A;",\r
1271 \r
1272                 RBT,\r
1273                 "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;",\r
1274                 "[[:Zs:]&[a-z]]{a} > A;",\r
1275 \r
1276                 RBT,\r
1277                 "$a=[:Zs:]; $b=[x$a]; $b{a} > A;",\r
1278                 "[x[:Zs:]]{a} > A;",\r
1279 \r
1280                 RBT,\r
1281                 "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"+\r
1282                 "$macron = \\u0304 ;"+\r
1283                 "$evowel = [aeiouyAEIOUY] ;"+\r
1284                 "$iotasub = \\u0345 ;"+\r
1285                 "($evowel $macron $accentMinus *) i > | $1 $iotasub ;",\r
1286                 "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;",\r
1287 \r
1288                 RBT,\r
1289                 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",\r
1290                 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",\r
1291         };\r
1292 \r
1293         for (int d=0; d < DATA.length; d+=3) {\r
1294             if (DATA[d] == RBT) {\r
1295                 // Transliterator test\r
1296                 Transliterator t = Transliterator.createFromRules("ID",\r
1297                         DATA[d+1], Transliterator.FORWARD);\r
1298                 if (t == null) {\r
1299                     errln("FAIL: createFromRules failed");\r
1300                     return;\r
1301                 }\r
1302                 String rules, escapedRules;\r
1303                 rules = t.toRules(false);\r
1304                 escapedRules = t.toRules(true);\r
1305                 String expRules = Utility.unescape(DATA[d+2]);\r
1306                 String expEscapedRules = DATA[d+2];\r
1307                 if (rules.equals(expRules)) {\r
1308                     logln("Ok: " + DATA[d+1] +\r
1309                             " => " + Utility.escape(rules));\r
1310                 } else {\r
1311                     errln("FAIL: " + DATA[d+1] +\r
1312                             " => " + Utility.escape(rules + ", exp " + expRules));\r
1313                 }\r
1314                 if (escapedRules.equals(expEscapedRules)) {\r
1315                     logln("Ok: " + DATA[d+1] +\r
1316                             " => " + escapedRules);\r
1317                 } else {\r
1318                     errln("FAIL: " + DATA[d+1] +\r
1319                             " => " + escapedRules + ", exp " + expEscapedRules);\r
1320                 }\r
1321 \r
1322             } else {\r
1323                 // UnicodeSet test\r
1324                 String pat = DATA[d+1];\r
1325                 String expToPat = DATA[d+2];\r
1326                 UnicodeSet set = new UnicodeSet(pat);\r
1327 \r
1328                 // Adjust spacing etc. as necessary.\r
1329                 String toPat;\r
1330                 toPat = set.toPattern(true);\r
1331                 if (expToPat.equals(toPat)) {\r
1332                     logln("Ok: " + pat +\r
1333                             " => " + toPat);\r
1334                 } else {\r
1335                     errln("FAIL: " + pat +\r
1336                             " => " + Utility.escape(toPat) +\r
1337                             ", exp " + Utility.escape(pat));\r
1338                 }\r
1339             }\r
1340         }\r
1341     }\r
1342 \r
1343     public void TestContext() {\r
1344         Transliterator.Position pos = new Transliterator.Position(0, 2, 0, 1); // cs cl s l\r
1345 \r
1346         expect("de > x; {d}e > y;",\r
1347                 "de",\r
1348                 "ye",\r
1349                 pos);\r
1350 \r
1351         expect("ab{c} > z;",\r
1352                 "xadabdabcy",\r
1353         "xadabdabzy");\r
1354     }\r
1355 \r
1356     static final String CharsToUnicodeString(String s) {\r
1357         return Utility.unescape(s);\r
1358     }\r
1359 \r
1360     public void TestSupplemental() {\r
1361 \r
1362         expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];" +\r
1363         "a > $a; $s > i;"),\r
1364         CharsToUnicodeString("ab\\U0001030Fx"),\r
1365         CharsToUnicodeString("\\U00010300bix"));\r
1366 \r
1367         expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];" +\r
1368                 "$b=[A-Z\\U00010400-\\U0001044D];" +\r
1369         "($a)($b) > $2 $1;"),\r
1370         CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"),\r
1371         CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301"));\r
1372 \r
1373         // k|ax\\U00010300xm\r
1374 \r
1375         // k|a\\U00010400\\U00010300xm\r
1376         // ky|\\U00010400\\U00010300xm\r
1377         // ky\\U00010400|\\U00010300xm\r
1378 \r
1379         // ky\\U00010400|\\U00010300\\U00010400m\r
1380         // ky\\U00010400y|\\U00010400m\r
1381         expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];" +\r
1382                 "$a {x} > | @ \\U00010400;" +\r
1383         "{$a} [^\\u0000-\\uFFFF] > y;"),\r
1384         CharsToUnicodeString("kax\\U00010300xm"),\r
1385         CharsToUnicodeString("ky\\U00010400y\\U00010400m"));\r
1386 \r
1387         expect(Transliterator.getInstance("Any-Name"),\r
1388                 CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"),\r
1389         "\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}");\r
1390 \r
1391         expect(Transliterator.getInstance("Name-Any"),\r
1392                 "\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}",\r
1393                 CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"));\r
1394 \r
1395         expect(Transliterator.getInstance("Any-Hex/Unicode"),\r
1396                 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),\r
1397         "U+10330U+10FF00U+E0061U+00A0");\r
1398 \r
1399         expect(Transliterator.getInstance("Any-Hex/C"),\r
1400                 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),\r
1401         "\\U00010330\\U0010FF00\\U000E0061\\u00A0");\r
1402 \r
1403         expect(Transliterator.getInstance("Any-Hex/Perl"),\r
1404                 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),\r
1405         "\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}");\r
1406 \r
1407         expect(Transliterator.getInstance("Any-Hex/Java"),\r
1408                 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),\r
1409         "\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0");\r
1410 \r
1411         expect(Transliterator.getInstance("Any-Hex/XML"),\r
1412                 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),\r
1413         "&#x10330;&#x10FF00;&#xE0061;&#xA0;");\r
1414 \r
1415         expect(Transliterator.getInstance("Any-Hex/XML10"),\r
1416                 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),\r
1417         "&#66352;&#1113856;&#917601;&#160;");\r
1418 \r
1419         expect(Transliterator.getInstance("[\\U000E0000-\\U000E0FFF] Remove"),\r
1420                 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),\r
1421                 CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0"));\r
1422     }\r
1423 \r
1424     public void TestQuantifier() {\r
1425 \r
1426         // Make sure @ in a quantified anteContext works\r
1427         expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';",\r
1428                 "AAAAAb",\r
1429         "aaa(aac)");\r
1430 \r
1431         // Make sure @ in a quantified postContext works\r
1432         expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';",\r
1433                 "baaaaa",\r
1434         "caa(aaa)");\r
1435 \r
1436         // Make sure @ in a quantified postContext with seg ref works\r
1437         expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';",\r
1438                 "baaaaa",\r
1439         "baa(aaa)");\r
1440 \r
1441         // Make sure @ past ante context doesn't enter ante context\r
1442         Transliterator.Position pos = new Transliterator.Position(0, 5, 3, 5);\r
1443         expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';",\r
1444                 "xxxab",\r
1445                 "xxx(ac)",\r
1446                 pos);\r
1447 \r
1448         // Make sure @ past post context doesn't pass limit\r
1449         Transliterator.Position pos2 = new Transliterator.Position(0, 4, 0, 2);\r
1450         expect("{b} a+ > c @@ |; x > y; a > A;",\r
1451                 "baxx",\r
1452                 "caxx",\r
1453                 pos2);\r
1454 \r
1455         // Make sure @ past post context doesn't enter post context\r
1456         expect("{b} a+ > c @@ |; x > y; a > A;",\r
1457                 "baxx",\r
1458         "cayy");\r
1459 \r
1460         expect("(ab)? c > d;",\r
1461                 "c abc ababc",\r
1462         "d d abd");\r
1463 \r
1464         // NOTE: The (ab)+ when referenced just yields a single "ab",\r
1465         // not the full sequence of them.  This accords with perl behavior.\r
1466         expect("(ab)+ {x} > '(' $1 ')';",\r
1467                 "x abx ababxy",\r
1468         "x ab(ab) abab(ab)y");\r
1469 \r
1470         expect("b+ > x;",\r
1471                 "ac abc abbc abbbc",\r
1472         "ac axc axc axc");\r
1473 \r
1474         expect("[abc]+ > x;",\r
1475                 "qac abrc abbcs abtbbc",\r
1476         "qx xrx xs xtx");\r
1477 \r
1478         expect("q{(ab)+} > x;",\r
1479                 "qa qab qaba qababc qaba",\r
1480         "qa qx qxa qxc qxa");\r
1481 \r
1482         expect("q(ab)* > x;",\r
1483                 "qa qab qaba qababc",\r
1484         "xa x xa xc");\r
1485 \r
1486         // NOTE: The (ab)+ when referenced just yields a single "ab",\r
1487         // not the full sequence of them.  This accords with perl behavior.\r
1488         expect("q(ab)* > '(' $1 ')';",\r
1489                 "qa qab qaba qababc",\r
1490         "()a (ab) (ab)a (ab)c");\r
1491 \r
1492         // 'foo'+ and 'foo'* -- the quantifier should apply to the entire\r
1493         // quoted string\r
1494         expect("'ab'+ > x;",\r
1495                 "bb ab ababb",\r
1496         "bb x xb");\r
1497 \r
1498         // $foo+ and $foo* -- the quantifier should apply to the entire\r
1499         // variable reference\r
1500         expect("$var = ab; $var+ > x;",\r
1501                 "bb ab ababb",\r
1502         "bb x xb");\r
1503     }\r
1504 \r
1505     static class TestFact implements Transliterator.Factory {\r
1506         static class NameableNullTrans extends Transliterator {\r
1507             public NameableNullTrans(String id) {\r
1508                 super(id, null);\r
1509             }\r
1510             protected void handleTransliterate(Replaceable text,\r
1511                     Position offsets, boolean incremental) {\r
1512                 offsets.start = offsets.limit;\r
1513             }\r
1514         }\r
1515         String id;\r
1516         public TestFact(String theID) {\r
1517             id = theID;\r
1518         }\r
1519         public Transliterator getInstance(String ignoredID) {\r
1520             return new NameableNullTrans(id);\r
1521         }\r
1522     }\r
1523 \r
1524     public void TestSTV() {\r
1525         Enumeration es = Transliterator.getAvailableSources();\r
1526         for (int i=0; es.hasMoreElements(); ++i) {\r
1527             String source = (String) es.nextElement();\r
1528             logln("" + i + ": " + source);\r
1529             if (source.length() == 0) {\r
1530                 errln("FAIL: empty source");\r
1531                 continue;\r
1532             }\r
1533             Enumeration et = Transliterator.getAvailableTargets(source);\r
1534             for (int j=0; et.hasMoreElements(); ++j) {\r
1535                 String target = (String) et.nextElement();\r
1536                 logln(" " + j + ": " + target);\r
1537                 if (target.length() == 0) {\r
1538                     errln("FAIL: empty target");\r
1539                     continue;\r
1540                 }\r
1541                 Enumeration ev = Transliterator.getAvailableVariants(source, target);\r
1542                 for (int k=0; ev.hasMoreElements(); ++k) {\r
1543                     String variant = (String) ev.nextElement();\r
1544                     if (variant.length() == 0) {\r
1545                         logln("  " + k + ": <empty>");\r
1546                     } else {\r
1547                         logln("  " + k + ": " + variant);\r
1548                     }\r
1549                 }\r
1550             }\r
1551         }\r
1552 \r
1553         // Test registration\r
1554         String[] IDS = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };\r
1555         String[] FULL_IDS = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };\r
1556         String[] SOURCES = { null, "Seoridf", "Oewoir" };\r
1557         for (int i=0; i<3; ++i) {\r
1558             Transliterator.registerFactory(IDS[i], new TestFact(IDS[i]));\r
1559             try {\r
1560                 Transliterator t = Transliterator.getInstance(IDS[i]);\r
1561                 if (t.getID().equals(IDS[i])) {\r
1562                     logln("Ok: Registration/creation succeeded for ID " +\r
1563                             IDS[i]);\r
1564                 } else {\r
1565                     errln("FAIL: Registration of ID " +\r
1566                             IDS[i] + " creates ID " + t.getID());\r
1567                 }\r
1568                 Transliterator.unregister(IDS[i]);\r
1569                 try {\r
1570                     t = Transliterator.getInstance(IDS[i]);\r
1571                     errln("FAIL: Unregistration failed for ID " +\r
1572                             IDS[i] + "; still receiving ID " + t.getID());\r
1573                 } catch (IllegalArgumentException e2) {\r
1574                     // Good; this is what we expect\r
1575                     logln("Ok; Unregistered " + IDS[i]);\r
1576                 }\r
1577             } catch (IllegalArgumentException e) {\r
1578                 errln("FAIL: Registration/creation failed for ID " +\r
1579                         IDS[i]);\r
1580             } finally {\r
1581                 Transliterator.unregister(IDS[i]);\r
1582             }\r
1583         }\r
1584 \r
1585         // Make sure getAvailable API reflects removal\r
1586         for (Enumeration e = Transliterator.getAvailableIDs();\r
1587         e.hasMoreElements(); ) {\r
1588             String id = (String) e.nextElement();\r
1589             for (int i=0; i<3; ++i) {\r
1590                 if (id.equals(FULL_IDS[i])) {\r
1591                     errln("FAIL: unregister(" + id + ") failed");\r
1592                 }\r
1593             }\r
1594         }\r
1595         for (Enumeration e = Transliterator.getAvailableTargets("Any");\r
1596         e.hasMoreElements(); ) {\r
1597             String t = (String) e.nextElement();\r
1598             if (t.equals(IDS[0])) {\r
1599                 errln("FAIL: unregister(Any-" + t + ") failed");\r
1600             }\r
1601         }\r
1602         for (Enumeration e = Transliterator.getAvailableSources();\r
1603         e.hasMoreElements(); ) {\r
1604             String s = (String) e.nextElement();\r
1605             for (int i=0; i<3; ++i) {\r
1606                 if (SOURCES[i] == null) continue;\r
1607                 if (s.equals(SOURCES[i])) {\r
1608                     errln("FAIL: unregister(" + s + "-*) failed");\r
1609                 }\r
1610             }\r
1611         }\r
1612     }\r
1613 \r
1614     /**\r
1615      * Test inverse of Greek-Latin; Title()\r
1616      */\r
1617     public void TestCompoundInverse() {\r
1618         Transliterator t = Transliterator.getInstance\r
1619         ("Greek-Latin; Title()", Transliterator.REVERSE);\r
1620         if (t == null) {\r
1621             errln("FAIL: createInstance");\r
1622             return;\r
1623         }\r
1624         String exp = "(Title);Latin-Greek";\r
1625         if (t.getID().equals(exp)) {\r
1626             logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" +\r
1627                     t.getID());\r
1628         } else {\r
1629             errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" +\r
1630                     t.getID() + "\", expected \"" + exp + "\"");\r
1631         }\r
1632     }\r
1633 \r
1634     /**\r
1635      * Test NFD chaining with RBT\r
1636      */\r
1637     public void TestNFDChainRBT() {\r
1638         Transliterator t = Transliterator.createFromRules(\r
1639                 "TEST", "::NFD; aa > Q; a > q;",\r
1640                 Transliterator.FORWARD);\r
1641         logln(t.toRules(true));\r
1642         expect(t, "aa", "Q");\r
1643     }\r
1644 \r
1645     /**\r
1646      * Inverse of "Null" should be "Null". (J21)\r
1647      */\r
1648     public void TestNullInverse() {\r
1649         Transliterator t = Transliterator.getInstance("Null");\r
1650         Transliterator u = t.getInverse();\r
1651         if (!u.getID().equals("Null")) {\r
1652             errln("FAIL: Inverse of Null should be Null");\r
1653         }\r
1654     }\r
1655 \r
1656     /**\r
1657      * Check ID of inverse of alias. (J22)\r
1658      */\r
1659     public void TestAliasInverseID() {\r
1660         String ID = "Latin-Hangul"; // This should be any alias ID with an inverse\r
1661         Transliterator t = Transliterator.getInstance(ID);\r
1662         Transliterator u = t.getInverse();\r
1663         String exp = "Hangul-Latin";\r
1664         String got = u.getID();\r
1665         if (!got.equals(exp)) {\r
1666             errln("FAIL: Inverse of " + ID + " is " + got +\r
1667                     ", expected " + exp);\r
1668         }\r
1669     }\r
1670 \r
1671     /**\r
1672      * Test IDs of inverses of compound transliterators. (J20)\r
1673      */\r
1674     public void TestCompoundInverseID() {\r
1675         String ID = "Latin-Jamo;NFC(NFD)";\r
1676         Transliterator t = Transliterator.getInstance(ID);\r
1677         Transliterator u = t.getInverse();\r
1678         String exp = "NFD(NFC);Jamo-Latin";\r
1679         String got = u.getID();\r
1680         if (!got.equals(exp)) {\r
1681             errln("FAIL: Inverse of " + ID + " is " + got +\r
1682                     ", expected " + exp);\r
1683         }\r
1684     }\r
1685 \r
1686     /**\r
1687      * Test undefined variable.\r
1688      */\r
1689     public void TestUndefinedVariable() {\r
1690         String rule = "$initial } a <> \u1161;";\r
1691         try {\r
1692             Transliterator.createFromRules("<ID>", rule,Transliterator.FORWARD);\r
1693         } catch (IllegalArgumentException e) {\r
1694             logln("OK: Got exception for " + rule + ", as expected: " +\r
1695                     e.getMessage());\r
1696             return;\r
1697         }\r
1698         errln("Fail: bogus rule " + rule + " compiled without error");\r
1699     }\r
1700 \r
1701     /**\r
1702      * Test empty context.\r
1703      */\r
1704     public void TestEmptyContext() {\r
1705         expect(" { a } > b;", "xay a ", "xby b ");\r
1706     }\r
1707 \r
1708     /**\r
1709      * Test compound filter ID syntax\r
1710      */\r
1711     public void TestCompoundFilterID() {\r
1712         String[] DATA = {\r
1713                 // Col. 1 = ID or rule set (latter must start with #)\r
1714 \r
1715                 // = columns > 1 are null if expect col. 1 to be illegal =\r
1716 \r
1717                 // Col. 2 = direction, "F..." or "R..."\r
1718                 // Col. 3 = source string\r
1719                 // Col. 4 = exp result\r
1720 \r
1721                 "[abc]; [abc]", null, null, null, // multiple filters\r
1722                 "Latin-Greek; [abc];", null, null, null, // misplaced filter\r
1723                 "[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\u0392c",\r
1724                 "[b]; (Lower); Latin-Greek; Upper(); ([\u0392])", "R", "\u0391\u0392\u0393", "\u0391b\u0393",\r
1725                 "#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\u0392c",\r
1726                 "#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\u0392]);", "R", "\u0391\u0392\u0393", "\u0391b\u0393",\r
1727         };\r
1728 \r
1729         for (int i=0; i<DATA.length; i+=4) {\r
1730             String id = DATA[i];\r
1731             int direction = (DATA[i+1] != null && DATA[i+1].charAt(0) == 'R') ?\r
1732                     Transliterator.REVERSE : Transliterator.FORWARD;\r
1733             String source = DATA[i+2];\r
1734             String exp = DATA[i+3];\r
1735             boolean expOk = (DATA[i+1] != null);\r
1736             Transliterator t = null;\r
1737             IllegalArgumentException e = null;\r
1738             try {\r
1739                 if (id.charAt(0) == '#') {\r
1740                     t = Transliterator.createFromRules("ID", id, direction);\r
1741                 } else {\r
1742                     t = Transliterator.getInstance(id, direction);\r
1743                 }\r
1744             } catch (IllegalArgumentException ee) {\r
1745                 e = ee;\r
1746             }\r
1747             boolean ok = (t != null && e == null);\r
1748             if (ok == expOk) {\r
1749                 logln("Ok: " + id + " => " + t +\r
1750                         (e != null ? (", " + e.getMessage()) : ""));\r
1751                 if (source != null) {\r
1752                     expect(t, source, exp);\r
1753                 }\r
1754             } else {\r
1755                 errln("FAIL: " + id + " => " + t +\r
1756                         (e != null ? (", " + e.getMessage()) : ""));\r
1757             }\r
1758         }\r
1759     }\r
1760 \r
1761     /**\r
1762      * Test new property set syntax\r
1763      */\r
1764     public void TestPropertySet() {\r
1765         expect("a>A; \\p{Lu}>x; \\p{Any}>y;", "abcDEF", "Ayyxxx");\r
1766         expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9",\r
1767         "[ a stitch ]\n[ in time ]\r[ saves 9]");\r
1768     }\r
1769 \r
1770     /**\r
1771      * Test various failure points of the new 2.0 engine.\r
1772      */\r
1773     public void TestNewEngine() {\r
1774         Transliterator t = Transliterator.getInstance("Latin-Hiragana");\r
1775         // Katakana should be untouched\r
1776         expect(t, "a\u3042\u30A2", "\u3042\u3042\u30A2");\r
1777 \r
1778         if (true) {\r
1779             // This test will only work if Transliterator.ROLLBACK is\r
1780             // true.  Otherwise, this test will fail, revealing a\r
1781             // limitation of global filters in incremental mode.\r
1782 \r
1783             Transliterator a =\r
1784                 Transliterator.createFromRules("a_to_A", "a > A;", Transliterator.FORWARD);\r
1785             Transliterator A =\r
1786                 Transliterator.createFromRules("A_to_b", "A > b;", Transliterator.FORWARD);\r
1787 \r
1788             //Transliterator array[] = new Transliterator[] {\r
1789             //    a,\r
1790             //    Transliterator.getInstance("NFD"),\r
1791             //    A };\r
1792             //t = Transliterator.getInstance(array, new UnicodeSet("[:Ll:]"));\r
1793 \r
1794             try {\r
1795                 Transliterator.registerInstance(a);\r
1796                 Transliterator.registerInstance(A);\r
1797 \r
1798                 t = Transliterator.getInstance("[:Ll:];a_to_A;NFD;A_to_b");\r
1799                 expect(t, "aAaA", "bAbA");\r
1800 \r
1801                 Transliterator[] u = t.getElements();\r
1802                 assertTrue("getElements().length", u.length == 3);\r
1803                 assertEquals("getElements()[0]", u[0].getID(), "a_to_A");\r
1804                 assertEquals("getElements()[1]", u[1].getID(), "NFD");\r
1805                 assertEquals("getElements()[2]", u[2].getID(), "A_to_b");\r
1806 \r
1807                 t = Transliterator.getInstance("a_to_A;NFD;A_to_b");\r
1808                 t.setFilter(new UnicodeSet("[:Ll:]"));\r
1809                 expect(t, "aAaA", "bAbA");\r
1810             } finally {\r
1811                 Transliterator.unregister("a_to_A"); \r
1812                 Transliterator.unregister("A_to_b");   \r
1813             }\r
1814         }\r
1815 \r
1816         expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;",\r
1817                 "a",\r
1818         "ax");\r
1819 \r
1820         String gr =\r
1821             "$ddot = \u0308 ;" +\r
1822             "$lcgvowel = [\u03b1\u03b5\u03b7\u03b9\u03bf\u03c5\u03c9] ;" +\r
1823             "$rough = \u0314 ;" +\r
1824             "($lcgvowel+ $ddot?) $rough > h | $1 ;" +\r
1825             "\u03b1 <> a ;" +\r
1826             "$rough <> h ;";\r
1827 \r
1828         expect(gr, "\u03B1\u0314", "ha");\r
1829     }\r
1830 \r
1831     /**\r
1832      * Test quantified segment behavior.  We want:\r
1833      * ([abc])+ > x $1 x; applied to "cba" produces "xax"\r
1834      */\r
1835     public void TestQuantifiedSegment() {\r
1836         // The normal case\r
1837         expect("([abc]+) > x $1 x;", "cba", "xcbax");\r
1838 \r
1839         // The tricky case; the quantifier is around the segment\r
1840         expect("([abc])+ > x $1 x;", "cba", "xax");\r
1841 \r
1842         // Tricky case in reverse direction\r
1843         expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax");\r
1844 \r
1845         // Check post-context segment\r
1846         expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba");\r
1847 \r
1848         // Test toRule/toPattern for non-quantified segment.\r
1849         // Careful with spacing here.\r
1850         String r = "([a-c]){q} > x $1 x;";\r
1851         Transliterator t = Transliterator.createFromRules("ID", r, Transliterator.FORWARD);\r
1852         String rr = t.toRules(true);\r
1853         if (!r.equals(rr)) {\r
1854             errln("FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");\r
1855         } else {\r
1856             logln("Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");\r
1857         }\r
1858 \r
1859         // Test toRule/toPattern for quantified segment.\r
1860         // Careful with spacing here.\r
1861         r = "([a-c])+{q} > x $1 x;";\r
1862         t = Transliterator.createFromRules("ID", r, Transliterator.FORWARD);\r
1863         rr = t.toRules(true);\r
1864         if (!r.equals(rr)) {\r
1865             errln("FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");\r
1866         } else {\r
1867             logln("Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");\r
1868         }\r
1869     }\r
1870 \r
1871     //======================================================================\r
1872     // Ram's tests\r
1873     //======================================================================\r
1874     /* this test performs  test of rules in ISO 15915 */\r
1875     public void  TestDevanagariLatinRT(){\r
1876         String[]  source = {\r
1877                 "bh\u0101rata",\r
1878                 "kra",\r
1879                 "k\u1E63a",\r
1880                 "khra",\r
1881                 "gra",\r
1882                 "\u1E45ra",\r
1883                 "cra",\r
1884                 "chra",\r
1885                 "j\u00F1a",\r
1886                 "jhra",\r
1887                 "\u00F1ra",\r
1888                 "\u1E6Dya",\r
1889                 "\u1E6Dhra",\r
1890                 "\u1E0Dya",\r
1891                 //"r\u0323ya", // \u095c is not valid in Devanagari\r
1892                 "\u1E0Dhya",\r
1893                 "\u1E5Bhra",\r
1894                 "\u1E47ra",\r
1895                 "tta",\r
1896                 "thra",\r
1897                 "dda",\r
1898                 "dhra",\r
1899                 "nna",\r
1900                 "pra",\r
1901                 "phra",\r
1902                 "bra",\r
1903                 "bhra",\r
1904                 "mra",\r
1905                 "\u1E49ra",\r
1906                 //"l\u0331ra",\r
1907                 "yra",\r
1908                 "\u1E8Fra",\r
1909                 //"l-",\r
1910                 "vra",\r
1911                 "\u015Bra",\r
1912                 "\u1E63ra",\r
1913                 "sra",\r
1914                 "hma",\r
1915                 "\u1E6D\u1E6Da",\r
1916                 "\u1E6D\u1E6Dha",\r
1917                 "\u1E6Dh\u1E6Dha",\r
1918                 "\u1E0D\u1E0Da",\r
1919                 "\u1E0D\u1E0Dha",\r
1920                 "\u1E6Dya",\r
1921                 "\u1E6Dhya",\r
1922                 "\u1E0Dya",\r
1923                 "\u1E0Dhya",\r
1924                 // Not roundtrippable --\r
1925                 // \u0939\u094d\u094d\u092E  - hma\r
1926                 // \u0939\u094d\u092E         - hma\r
1927                 // CharsToUnicodeString("hma"),\r
1928                 "hya",\r
1929                 "\u015Br\u0325",\r
1930                 "\u015Bca",\r
1931                 "\u0115",\r
1932                 "san\u0304j\u012Bb s\u0113nagupta",\r
1933                 "\u0101nand vaddir\u0101ju",\r
1934         };\r
1935         String[]  expected = {\r
1936                 "\u092D\u093E\u0930\u0924",    /* bha\u0304rata */\r
1937                 "\u0915\u094D\u0930",          /* kra         */\r
1938                 "\u0915\u094D\u0937",          /* ks\u0323a  */\r
1939                 "\u0916\u094D\u0930",          /* khra        */\r
1940                 "\u0917\u094D\u0930",          /* gra         */\r
1941                 "\u0919\u094D\u0930",          /* n\u0307ra  */\r
1942                 "\u091A\u094D\u0930",          /* cra         */\r
1943                 "\u091B\u094D\u0930",          /* chra        */\r
1944                 "\u091C\u094D\u091E",          /* jn\u0303a  */\r
1945                 "\u091D\u094D\u0930",          /* jhra        */\r
1946                 "\u091E\u094D\u0930",          /* n\u0303ra  */\r
1947                 "\u091F\u094D\u092F",          /* t\u0323ya  */\r
1948                 "\u0920\u094D\u0930",          /* t\u0323hra */\r
1949                 "\u0921\u094D\u092F",          /* d\u0323ya  */\r
1950                 //"\u095C\u094D\u092F",          /* r\u0323ya  */ // \u095c is not valid in Devanagari\r
1951                 "\u0922\u094D\u092F",          /* d\u0323hya */\r
1952                 "\u0922\u093C\u094D\u0930",    /* r\u0323hra */\r
1953                 "\u0923\u094D\u0930",          /* n\u0323ra  */\r
1954                 "\u0924\u094D\u0924",          /* tta         */\r
1955                 "\u0925\u094D\u0930",          /* thra        */\r
1956                 "\u0926\u094D\u0926",          /* dda         */\r
1957                 "\u0927\u094D\u0930",          /* dhra        */\r
1958                 "\u0928\u094D\u0928",          /* nna         */\r
1959                 "\u092A\u094D\u0930",          /* pra         */\r
1960                 "\u092B\u094D\u0930",          /* phra        */\r
1961                 "\u092C\u094D\u0930",          /* bra         */\r
1962                 "\u092D\u094D\u0930",          /* bhra        */\r
1963                 "\u092E\u094D\u0930",          /* mra         */\r
1964                 "\u0929\u094D\u0930",          /* n\u0331ra  */\r
1965                 //"\u0934\u094D\u0930",          /* l\u0331ra  */\r
1966                 "\u092F\u094D\u0930",          /* yra         */\r
1967                 "\u092F\u093C\u094D\u0930",    /* y\u0307ra  */\r
1968                 //"l-",\r
1969                 "\u0935\u094D\u0930",          /* vra         */\r
1970                 "\u0936\u094D\u0930",          /* s\u0301ra  */\r
1971                 "\u0937\u094D\u0930",          /* s\u0323ra  */\r
1972                 "\u0938\u094D\u0930",          /* sra         */\r
1973                 "\u0939\u094d\u092E",          /* hma         */\r
1974                 "\u091F\u094D\u091F",          /* t\u0323t\u0323a  */\r
1975                 "\u091F\u094D\u0920",          /* t\u0323t\u0323ha */\r
1976                 "\u0920\u094D\u0920",          /* t\u0323ht\u0323ha*/\r
1977                 "\u0921\u094D\u0921",          /* d\u0323d\u0323a  */\r
1978                 "\u0921\u094D\u0922",          /* d\u0323d\u0323ha */\r
1979                 "\u091F\u094D\u092F",          /* t\u0323ya  */\r
1980                 "\u0920\u094D\u092F",          /* t\u0323hya */\r
1981                 "\u0921\u094D\u092F",          /* d\u0323ya  */\r
1982                 "\u0922\u094D\u092F",          /* d\u0323hya */\r
1983                 // "hma",                         /* hma         */\r
1984                 "\u0939\u094D\u092F",          /* hya         */\r
1985                 "\u0936\u0943",                /* s\u0301r\u0325a  */\r
1986                 "\u0936\u094D\u091A",          /* s\u0301ca  */\r
1987                 "\u090d",                      /* e\u0306    */\r
1988                 "\u0938\u0902\u091C\u0940\u092C\u094D \u0938\u0947\u0928\u0917\u0941\u092A\u094D\u0924",\r
1989                 "\u0906\u0928\u0902\u0926\u094D \u0935\u0926\u094D\u0926\u093F\u0930\u093E\u091C\u0941",\r
1990         };\r
1991 \r
1992         Transliterator latinToDev=Transliterator.getInstance("Latin-Devanagari", Transliterator.FORWARD );\r
1993         Transliterator devToLatin=Transliterator.getInstance("Devanagari-Latin", Transliterator.FORWARD);\r
1994 \r
1995         for(int i= 0; i<source.length; i++){\r
1996             expect(latinToDev,(source[i]),(expected[i]));\r
1997             expect(devToLatin,(expected[i]),(source[i]));\r
1998         }\r
1999 \r
2000     }\r
2001     public void  TestTeluguLatinRT(){\r
2002         String[]  source = {\r
2003                 "raghur\u0101m vi\u015Bvan\u0101dha",                           /* Raghuram Viswanadha    */\r
2004                 "\u0101nand vaddir\u0101ju",                                    /* Anand Vaddiraju        */\r
2005                 "r\u0101j\u012Bv ka\u015Barab\u0101da",                         /* Rajeev Kasarabada      */\r
2006                 "san\u0304j\u012Bv ka\u015Barab\u0101da",                       /* sanjeev kasarabada     */\r
2007                 "san\u0304j\u012Bb sen'gupta",                                  /* sanjib sengupata       */\r
2008                 "amar\u0113ndra hanum\u0101nula",                               /* Amarendra hanumanula   */\r
2009                 "ravi kum\u0101r vi\u015Bvan\u0101dha",                         /* Ravi Kumar Viswanadha  */\r
2010                 "\u0101ditya kandr\u0113gula",                                  /* Aditya Kandregula      */\r
2011                 "\u015Br\u012Bdhar ka\u1E47\u1E6Dama\u015Be\u1E6D\u1E6Di",      /* Shridhar Kantamsetty   */\r
2012                 "m\u0101dhav de\u015Be\u1E6D\u1E6Di"                            /* Madhav Desetty         */\r
2013         };\r
2014 \r
2015         String[]  expected = {\r
2016                 "\u0c30\u0c18\u0c41\u0c30\u0c3e\u0c2e\u0c4d \u0c35\u0c3f\u0c36\u0c4d\u0c35\u0c28\u0c3e\u0c27",\r
2017                 "\u0c06\u0c28\u0c02\u0c26\u0c4d \u0C35\u0C26\u0C4D\u0C26\u0C3F\u0C30\u0C3E\u0C1C\u0C41",\r
2018                 "\u0c30\u0c3e\u0c1c\u0c40\u0c35\u0c4d \u0c15\u0c36\u0c30\u0c2c\u0c3e\u0c26",\r
2019                 "\u0c38\u0c02\u0c1c\u0c40\u0c35\u0c4d \u0c15\u0c36\u0c30\u0c2c\u0c3e\u0c26",\r
2020                 "\u0c38\u0c02\u0c1c\u0c40\u0c2c\u0c4d \u0c38\u0c46\u0c28\u0c4d\u0c17\u0c41\u0c2a\u0c4d\u0c24",\r
2021                 "\u0c05\u0c2e\u0c30\u0c47\u0c02\u0c26\u0c4d\u0c30 \u0c39\u0c28\u0c41\u0c2e\u0c3e\u0c28\u0c41\u0c32",\r
2022                 "\u0c30\u0c35\u0c3f \u0c15\u0c41\u0c2e\u0c3e\u0c30\u0c4d \u0c35\u0c3f\u0c36\u0c4d\u0c35\u0c28\u0c3e\u0c27",\r
2023                 "\u0c06\u0c26\u0c3f\u0c24\u0c4d\u0c2f \u0C15\u0C02\u0C26\u0C4D\u0C30\u0C47\u0C17\u0C41\u0c32",\r
2024                 "\u0c36\u0c4d\u0c30\u0c40\u0C27\u0C30\u0C4D \u0c15\u0c02\u0c1f\u0c2e\u0c36\u0c46\u0c1f\u0c4d\u0c1f\u0c3f",\r
2025                 "\u0c2e\u0c3e\u0c27\u0c35\u0c4d \u0c26\u0c46\u0c36\u0c46\u0c1f\u0c4d\u0c1f\u0c3f",\r
2026         };\r
2027 \r
2028 \r
2029         Transliterator latinToDev=Transliterator.getInstance("Latin-Telugu", Transliterator.FORWARD);\r
2030         Transliterator devToLatin=Transliterator.getInstance("Telugu-Latin", Transliterator.FORWARD);\r
2031 \r
2032         for(int i= 0; i<source.length; i++){\r
2033             expect(latinToDev,(source[i]),(expected[i]));\r
2034             expect(devToLatin,(expected[i]),(source[i]));\r
2035         }\r
2036     }\r
2037 \r
2038     public void  TestSanskritLatinRT(){\r
2039         int MAX_LEN =15;\r
2040         String[]  source = {\r
2041                 "rmk\u1E63\u0113t",\r
2042                 "\u015Br\u012Bmad",\r
2043                 "bhagavadg\u012Bt\u0101",\r
2044                 "adhy\u0101ya",\r
2045                 "arjuna",\r
2046                 "vi\u1E63\u0101da",\r
2047                 "y\u014Dga",\r
2048                 "dhr\u0325tar\u0101\u1E63\u1E6Dra",\r
2049                 "uv\u0101cr\u0325",\r
2050                 "dharmak\u1E63\u0113tr\u0113",\r
2051                 "kuruk\u1E63\u0113tr\u0113",\r
2052                 "samav\u0113t\u0101",\r
2053                 "yuyutsava\u1E25",\r
2054                 "m\u0101mak\u0101\u1E25",\r
2055                 // "p\u0101\u1E47\u1E0Dav\u0101\u015Bcaiva",\r
2056                 "kimakurvata",\r
2057                 "san\u0304java",\r
2058         };\r
2059         String[]  expected = {\r
2060                 "\u0930\u094D\u092E\u094D\u0915\u094D\u0937\u0947\u0924\u094D",\r
2061                 "\u0936\u094d\u0930\u0940\u092e\u0926\u094d",\r
2062                 "\u092d\u0917\u0935\u0926\u094d\u0917\u0940\u0924\u093e",\r
2063                 "\u0905\u0927\u094d\u092f\u093e\u092f",\r
2064                 "\u0905\u0930\u094d\u091c\u0941\u0928",\r
2065                 "\u0935\u093f\u0937\u093e\u0926",\r
2066                 "\u092f\u094b\u0917",\r
2067                 "\u0927\u0943\u0924\u0930\u093e\u0937\u094d\u091f\u094d\u0930",\r
2068                 "\u0909\u0935\u093E\u091A\u0943",\r
2069                 "\u0927\u0930\u094d\u092e\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947",\r
2070                 "\u0915\u0941\u0930\u0941\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947",\r
2071                 "\u0938\u092e\u0935\u0947\u0924\u093e",\r
2072                 "\u092f\u0941\u092f\u0941\u0924\u094d\u0938\u0935\u0903",\r
2073                 "\u092e\u093e\u092e\u0915\u093e\u0903",\r
2074                 //"\u092a\u093e\u0923\u094d\u0921\u0935\u093e\u0936\u094d\u091a\u0948\u0935",\r
2075                 "\u0915\u093f\u092e\u0915\u0941\u0930\u094d\u0935\u0924",\r
2076                 "\u0938\u0902\u091c\u0935",\r
2077         };\r
2078 \r
2079         Transliterator latinToDev=Transliterator.getInstance("Latin-Devanagari", Transliterator.FORWARD);\r
2080         Transliterator devToLatin=Transliterator.getInstance("Devanagari-Latin", Transliterator.FORWARD);\r
2081         for(int i= 0; i<MAX_LEN; i++){\r
2082             expect(latinToDev,(source[i]),(expected[i]));\r
2083             expect(devToLatin,(expected[i]),(source[i]));\r
2084         }\r
2085     }\r
2086 \r
2087     public void  TestCompoundLatinRT(){\r
2088         int MAX_LEN =15;\r
2089         String[]  source = {\r
2090                 "rmk\u1E63\u0113t",\r
2091                 "\u015Br\u012Bmad",\r
2092                 "bhagavadg\u012Bt\u0101",\r
2093                 "adhy\u0101ya",\r
2094                 "arjuna",\r
2095                 "vi\u1E63\u0101da",\r
2096                 "y\u014Dga",\r
2097                 "dhr\u0325tar\u0101\u1E63\u1E6Dra",\r
2098                 "uv\u0101cr\u0325",\r
2099                 "dharmak\u1E63\u0113tr\u0113",\r
2100                 "kuruk\u1E63\u0113tr\u0113",\r
2101                 "samav\u0113t\u0101",\r
2102                 "yuyutsava\u1E25",\r
2103                 "m\u0101mak\u0101\u1E25",\r
2104                 // "p\u0101\u1E47\u1E0Dav\u0101\u015Bcaiva",\r
2105                 "kimakurvata",\r
2106                 "san\u0304java"\r
2107         };\r
2108         String[]  expected = {\r
2109                 "\u0930\u094D\u092E\u094D\u0915\u094D\u0937\u0947\u0924\u094D",\r
2110                 "\u0936\u094d\u0930\u0940\u092e\u0926\u094d",\r
2111                 "\u092d\u0917\u0935\u0926\u094d\u0917\u0940\u0924\u093e",\r
2112                 "\u0905\u0927\u094d\u092f\u093e\u092f",\r
2113                 "\u0905\u0930\u094d\u091c\u0941\u0928",\r
2114                 "\u0935\u093f\u0937\u093e\u0926",\r
2115                 "\u092f\u094b\u0917",\r
2116                 "\u0927\u0943\u0924\u0930\u093e\u0937\u094d\u091f\u094d\u0930",\r
2117                 "\u0909\u0935\u093E\u091A\u0943",\r
2118                 "\u0927\u0930\u094d\u092e\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947",\r
2119                 "\u0915\u0941\u0930\u0941\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947",\r
2120                 "\u0938\u092e\u0935\u0947\u0924\u093e",\r
2121                 "\u092f\u0941\u092f\u0941\u0924\u094d\u0938\u0935\u0903",\r
2122                 "\u092e\u093e\u092e\u0915\u093e\u0903",\r
2123                 //  "\u092a\u093e\u0923\u094d\u0921\u0935\u093e\u0936\u094d\u091a\u0948\u0935",\r
2124                 "\u0915\u093f\u092e\u0915\u0941\u0930\u094d\u0935\u0924",\r
2125                 "\u0938\u0902\u091c\u0935"\r
2126         };\r
2127 \r
2128         Transliterator latinToDevToLatin=Transliterator.getInstance("Latin-Devanagari;Devanagari-Latin", Transliterator.FORWARD);\r
2129         Transliterator devToLatinToDev=Transliterator.getInstance("Devanagari-Latin;Latin-Devanagari", Transliterator.FORWARD);\r
2130         for(int i= 0; i<MAX_LEN; i++){\r
2131             expect(latinToDevToLatin,(source[i]),(source[i]));\r
2132             expect(devToLatinToDev,(expected[i]),(expected[i]));\r
2133         }\r
2134     }\r
2135     /**\r
2136      * Test Gurmukhi-Devanagari Tippi and Bindi\r
2137      */\r
2138     public void TestGurmukhiDevanagari(){\r
2139         // the rule says:\r
2140         // (\u0902) (when preceded by vowel)      --->  (\u0A02)\r
2141         // (\u0902) (when preceded by consonant)  --->  (\u0A70)\r
2142 \r
2143         UnicodeSet vowel =new UnicodeSet("[\u0905-\u090A \u090F\u0910\u0913\u0914 \u093e-\u0942\u0947\u0948\u094B\u094C\u094D]");\r
2144         UnicodeSet non_vowel =new UnicodeSet("[\u0915-\u0928\u092A-\u0930]");\r
2145 \r
2146         UnicodeSetIterator vIter = new UnicodeSetIterator(vowel);\r
2147         UnicodeSetIterator nvIter = new UnicodeSetIterator(non_vowel);\r
2148         Transliterator trans = Transliterator.getInstance("Devanagari-Gurmukhi");\r
2149         StringBuffer src = new StringBuffer(" \u0902");\r
2150         StringBuffer expect = new StringBuffer(" \u0A02");\r
2151         while(vIter.next()){\r
2152             src.setCharAt(0,(char) vIter.codepoint);\r
2153             expect.setCharAt(0,(char) (vIter.codepoint+0x0100));\r
2154             expect(trans,src.toString(),expect.toString());\r
2155         }\r
2156 \r
2157         expect.setCharAt(1,'\u0A70');\r
2158         while(nvIter.next()){\r
2159             //src.setCharAt(0,(char) nvIter.codepoint);\r
2160             src.setCharAt(0,(char)nvIter.codepoint);\r
2161             expect.setCharAt(0,(char) (nvIter.codepoint+0x0100));\r
2162             expect(trans,src.toString(),expect.toString());\r
2163         }\r
2164     }\r
2165     /**\r
2166      * Test instantiation from a locale.\r
2167      */\r
2168     public void TestLocaleInstantiation() {\r
2169         Transliterator t;\r
2170         try{\r
2171             t = Transliterator.getInstance("te_IN-Latin");\r
2172             //expect(t, "\u0430", "a");\r
2173         }catch(IllegalArgumentException ex){\r
2174             warnln("Could not load locale data for obtaining the script used in the locale te_IN. "+ex.getMessage());\r
2175         }\r
2176         try{\r
2177             t = Transliterator.getInstance("ru_RU-Latin");\r
2178             expect(t, "\u0430", "a");\r
2179         }catch(IllegalArgumentException ex){\r
2180             warnln("Could not load locale data for obtaining the script used in the locale ru_RU. "+ex.getMessage());\r
2181         }\r
2182         try{\r
2183             t = Transliterator.getInstance("en-el");\r
2184             expect(t, "a", "\u03B1");\r
2185         }catch(IllegalArgumentException ex){\r
2186             warnln("Could not load locale data for obtaining the script used in the locale el. "+ ex.getMessage());\r
2187         }\r
2188     }\r
2189 \r
2190     /**\r
2191      * Test title case handling of accent (should ignore accents)\r
2192      */\r
2193     public void TestTitleAccents() {\r
2194         Transliterator t = Transliterator.getInstance("Title");\r
2195         expect(t, "a\u0300b can't abe", "A\u0300b Can't Abe");\r
2196     }\r
2197 \r
2198     /**\r
2199      * Basic test of a locale resource based rule.\r
2200      */\r
2201     public void TestLocaleResource() {\r
2202         String DATA[] = {\r
2203                 // id                    from             to\r
2204                 "Latin-Greek/UNGEGN",    "b",             "\u03bc\u03c0",\r
2205                 "Latin-el",              "b",             "\u03bc\u03c0",\r
2206                 "Latin-Greek",           "b",             "\u03B2",\r
2207                 "Greek-Latin/UNGEGN",    "\u03B2",        "v",\r
2208                 "el-Latin",              "\u03B2",        "v",\r
2209                 "Greek-Latin",           "\u03B2",        "b",\r
2210         };\r
2211         for (int i=0; i<DATA.length; i+=3) {\r
2212             Transliterator t = Transliterator.getInstance(DATA[i]);\r
2213             expect(t, DATA[i+1], DATA[i+2]);\r
2214         }\r
2215     }\r
2216 \r
2217     /**\r
2218      * Make sure parse errors reference the right line.\r
2219      */\r
2220     public void TestParseError() {\r
2221         String rule =\r
2222             "a > b;\n" +\r
2223             "# more stuff\n" +\r
2224             "d << b;";\r
2225         try {\r
2226             Transliterator t = Transliterator.createFromRules("ID", rule, Transliterator.FORWARD);\r
2227             if(t!=null){\r
2228                 errln("FAIL: Did not get expected exception");\r
2229             }\r
2230         } catch (IllegalArgumentException e) {\r
2231             String err = e.getMessage();\r
2232             if (err.indexOf("d << b") >= 0) {\r
2233                 logln("Ok: " + err);\r
2234             } else {\r
2235                 errln("FAIL: " + err);\r
2236             }\r
2237             return;\r
2238         }\r
2239         errln("FAIL: no syntax error");\r
2240     }\r
2241 \r
2242     /**\r
2243      * Make sure sets on output are disallowed.\r
2244      */\r
2245     public void TestOutputSet() {\r
2246         String rule = "$set = [a-cm-n]; b > $set;";\r
2247         Transliterator t = null;\r
2248         try {\r
2249             t = Transliterator.createFromRules("ID", rule, Transliterator.FORWARD);\r
2250             if(t!=null){\r
2251                 errln("FAIL: Did not get the expected exception");\r
2252             }\r
2253         } catch (IllegalArgumentException e) {\r
2254             logln("Ok: " + e.getMessage());\r
2255             return;\r
2256         }\r
2257         errln("FAIL: No syntax error");\r
2258     }\r
2259 \r
2260     /**\r
2261      * Test the use variable range pragma, making sure that use of\r
2262      * variable range characters is detected and flagged as an error.\r
2263      */\r
2264     public void TestVariableRange() {\r
2265         String rule = "use variable range 0x70 0x72; a > A; b > B; q > Q;";\r
2266         try {\r
2267             Transliterator t =\r
2268                 Transliterator.createFromRules("ID", rule, Transliterator.FORWARD);\r
2269             if(t!=null){\r
2270                 errln("FAIL: Did not get the expected exception");\r
2271             }\r
2272         } catch (IllegalArgumentException e) {\r
2273             logln("Ok: " + e.getMessage());\r
2274             return;\r
2275         }\r
2276         errln("FAIL: No syntax error");\r
2277     }\r
2278 \r
2279     /**\r
2280      * Test invalid post context error handling\r
2281      */\r
2282     public void TestInvalidPostContext() {\r
2283         try {\r
2284             Transliterator t =\r
2285                 Transliterator.createFromRules("ID", "a}b{c>d;", Transliterator.FORWARD);\r
2286             if(t!=null){\r
2287                 errln("FAIL: Did not get the expected exception");\r
2288             }\r
2289         } catch (IllegalArgumentException e) {\r
2290             String msg = e.getMessage();\r
2291             if (msg.indexOf("a}b{c") >= 0) {\r
2292                 logln("Ok: " + msg);\r
2293             } else {\r
2294                 errln("FAIL: " + msg);\r
2295             }\r
2296             return;\r
2297         }\r
2298         errln("FAIL: No syntax error");\r
2299     }\r
2300 \r
2301     /**\r
2302      * Test ID form variants\r
2303      */\r
2304     public void TestIDForms() {\r
2305         String DATA[] = {\r
2306                 "NFC", null, "NFD",\r
2307                 "nfd", null, "NFC", // make sure case is ignored\r
2308                 "Any-NFKD", null, "Any-NFKC",\r
2309                 "Null", null, "Null",\r
2310                 "-nfkc", "nfkc", "NFKD",\r
2311                 "-nfkc/", "nfkc", "NFKD",\r
2312                 "Latin-Greek/UNGEGN", null, "Greek-Latin/UNGEGN",\r
2313                 "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN",\r
2314                 "Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali",\r
2315                 "Source-", null, null,\r
2316                 "Source/Variant-", null, null,\r
2317                 "Source-/Variant", null, null,\r
2318                 "/Variant", null, null,\r
2319                 "/Variant-", null, null,\r
2320                 "-/Variant", null, null,\r
2321                 "-/", null, null,\r
2322                 "-", null, null,\r
2323                 "/", null, null,\r
2324         };\r
2325 \r
2326         for (int i=0; i<DATA.length; i+=3) {\r
2327             String ID = DATA[i];\r
2328             String expID = DATA[i+1];\r
2329             String expInvID = DATA[i+2];\r
2330             boolean expValid = (expInvID != null);\r
2331             if (expID == null) {\r
2332                 expID = ID;\r
2333             }\r
2334             try {\r
2335                 Transliterator t =\r
2336                     Transliterator.getInstance(ID);\r
2337                 Transliterator u = t.getInverse();\r
2338                 if (t.getID().equals(expID) &&\r
2339                         u.getID().equals(expInvID)) {\r
2340                     logln("Ok: " + ID + ".getInverse() => " + expInvID);\r
2341                 } else {\r
2342                     errln("FAIL: getInstance(" + ID + ") => " +\r
2343                             t.getID() + " x getInverse() => " + u.getID() +\r
2344                             ", expected " + expInvID);\r
2345                 }\r
2346             } catch (IllegalArgumentException e) {\r
2347                 if (!expValid) {\r
2348                     logln("Ok: getInstance(" + ID + ") => " + e.getMessage());\r
2349                 } else {\r
2350                     errln("FAIL: getInstance(" + ID + ") => " + e.getMessage());\r
2351                 }\r
2352             }\r
2353         }\r
2354     }\r
2355 \r
2356     void checkRules(String label, Transliterator t2, String testRulesForward) {\r
2357         String rules2 = t2.toRules(true);\r
2358         //rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");\r
2359         rules2 = TestUtility.replace(rules2, " ", "");\r
2360         rules2 = TestUtility.replace(rules2, "\n", "");\r
2361         rules2 = TestUtility.replace(rules2, "\r", "");\r
2362         testRulesForward = TestUtility.replace(testRulesForward, " ", "");\r
2363 \r
2364         if (!rules2.equals(testRulesForward)) {\r
2365             errln(label);\r
2366             logln("GENERATED RULES: " + rules2);\r
2367             logln("SHOULD BE:       " + testRulesForward);\r
2368         }\r
2369     }\r
2370 \r
2371     /**\r
2372      * Mark's toRules test.\r
2373      */\r
2374     public void TestToRulesMark() {\r
2375 \r
2376         String testRules =\r
2377             "::[[:Latin:][:Mark:]];"\r
2378             + "::NFKD (NFC);"\r
2379             + "::Lower (Lower);"\r
2380             + "a <> \\u03B1;" // alpha\r
2381             + "::NFKC (NFD);"\r
2382             + "::Upper (Lower);"\r
2383             + "::Lower ();"\r
2384             + "::([[:Greek:][:Mark:]]);"\r
2385             ;\r
2386         String testRulesForward =\r
2387             "::[[:Latin:][:Mark:]];"\r
2388             + "::NFKD(NFC);"\r
2389             + "::Lower(Lower);"\r
2390             + "a > \\u03B1;"\r
2391             + "::NFKC(NFD);"\r
2392             + "::Upper (Lower);"\r
2393             + "::Lower ();"\r
2394             ;\r
2395         String testRulesBackward =\r
2396             "::[[:Greek:][:Mark:]];"\r
2397             + "::Lower (Upper);"\r
2398             + "::NFD(NFKC);"\r
2399             + "\\u03B1 > a;"\r
2400             + "::Lower(Lower);"\r
2401             + "::NFC(NFKD);"\r
2402             ;\r
2403         String source = "\u00E1"; // a-acute\r
2404         String target = "\u03AC"; // alpha-acute\r
2405 \r
2406         Transliterator t2 = Transliterator.createFromRules("source-target", testRules, Transliterator.FORWARD);\r
2407         Transliterator t3 = Transliterator.createFromRules("target-source", testRules, Transliterator.REVERSE);\r
2408 \r
2409         expect(t2, source, target);\r
2410         expect(t3, target, source);\r
2411 \r
2412         checkRules("Failed toRules FORWARD", t2, testRulesForward);\r
2413         checkRules("Failed toRules BACKWARD", t3, testRulesBackward);\r
2414     }\r
2415 \r
2416     /**\r
2417      * Test Escape and Unescape transliterators.\r
2418      */\r
2419     public void TestEscape() {\r
2420         expect(Transliterator.getInstance("Hex-Any"),\r
2421                 "\\x{40}\\U00000031&#x32;&#81;",\r
2422         "@12Q");\r
2423         expect(Transliterator.getInstance("Any-Hex/C"),\r
2424                 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),\r
2425         "\\u0041\\U0010BEEF\\uFEED");\r
2426         expect(Transliterator.getInstance("Any-Hex/Java"),\r
2427                 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),\r
2428         "\\u0041\\uDBEF\\uDEEF\\uFEED");\r
2429         expect(Transliterator.getInstance("Any-Hex/Perl"),\r
2430                 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),\r
2431         "\\x{41}\\x{10BEEF}\\x{FEED}");\r
2432     }\r
2433 \r
2434     /**\r
2435      * Make sure display names of variants look reasonable.\r
2436      */\r
2437     public void TestDisplayName() {\r
2438         String DATA[] = {\r
2439                 // ID, forward name, reverse name\r
2440                 // Update the text as necessary -- the important thing is\r
2441                 // not the text itself, but how various cases are handled.\r
2442 \r
2443                 // Basic test\r
2444                 "Any-Hex", "Any to Hex Escape", "Hex Escape to Any",\r
2445 \r
2446                 // Variants\r
2447                 "Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl",\r
2448 \r
2449                 // Target-only IDs\r
2450                 "NFC", "Any to NFC", "Any to NFD",\r
2451         };\r
2452 \r
2453         Locale US = Locale.US;\r
2454 \r
2455         for (int i=0; i<DATA.length; i+=3) {\r
2456             String name = Transliterator.getDisplayName(DATA[i], US);\r
2457             if (!name.equals(DATA[i+1])) {\r
2458                 errln("FAIL: " + DATA[i] + ".getDisplayName() => " +\r
2459                         name + ", expected " + DATA[i+1]);\r
2460             } else {\r
2461                 logln("Ok: " + DATA[i] + ".getDisplayName() => " + name);\r
2462             }\r
2463             Transliterator t = Transliterator.getInstance(DATA[i], Transliterator.REVERSE);\r
2464             name = Transliterator.getDisplayName(t.getID(), US);\r
2465             if (!name.equals(DATA[i+2])) {\r
2466                 errln("FAIL: " + t.getID() + ".getDisplayName() => " +\r
2467                         name + ", expected " + DATA[i+2]);\r
2468             } else {\r
2469                 logln("Ok: " + t.getID() + ".getDisplayName() => " + name);\r
2470             }\r
2471 \r
2472             // Cover getDisplayName(String)\r
2473             ULocale save = ULocale.getDefault();\r
2474             ULocale.setDefault(ULocale.US);\r
2475             String name2 = Transliterator.getDisplayName(t.getID());\r
2476             if (!name.equals(name2))\r
2477                 errln("FAIL: getDisplayName with default locale failed");\r
2478             ULocale.setDefault(save);\r
2479         }\r
2480     }\r
2481 \r
2482     /**\r
2483      * Test anchor masking\r
2484      */\r
2485     public void TestAnchorMasking() {\r
2486         String rule = "^a > Q; a > q;";\r
2487         try {\r
2488             Transliterator t = Transliterator.createFromRules("ID", rule, Transliterator.FORWARD);\r
2489             if(t==null){\r
2490                 errln("FAIL: Did not get the expected exception");\r
2491             }\r
2492         } catch (IllegalArgumentException e) {\r
2493             errln("FAIL: " + rule + " => " + e);\r
2494         }\r
2495     }\r
2496 \r
2497     /**\r
2498      * This test is not in trnstst.cpp. This test has been moved from com/ibm/icu/dev/test/lang/TestUScript.java\r
2499      * during ICU4J modularization to remove dependency of tests on Transliterator.\r
2500      */\r
2501     public void TestScriptAllCodepoints(){\r
2502         int code;\r
2503         HashSet  scriptIdsChecked   = new HashSet();\r
2504         HashSet  scriptAbbrsChecked = new HashSet();\r
2505         for( int i =0; i <= 0x10ffff; i++){\r
2506             code = UScript.getScript(i);\r
2507             if(code==UScript.INVALID_CODE){\r
2508                 errln("UScript.getScript for codepoint 0x"+ hex(i)+" failed");\r
2509             }\r
2510             String id =UScript.getName(code);\r
2511             String abbr = UScript.getShortName(code);\r
2512             if (!scriptIdsChecked.contains(id)) {\r
2513                 scriptIdsChecked.add(id);\r
2514                 String newId ="[:"+id+":];NFD";\r
2515                 try{\r
2516                     Transliterator t = Transliterator.getInstance(newId);\r
2517                     if(t==null){\r
2518                         errln("Failed to create transliterator for "+hex(i)+\r
2519                                 " script code: " +id);\r
2520                     }\r
2521                 }catch(Exception e){\r
2522                     errln("Failed to create transliterator for "+hex(i)\r
2523                             +" script code: " +id\r
2524                             + " Exception: "+e.getMessage());\r
2525                 }\r
2526             }\r
2527             if (!scriptAbbrsChecked.contains(abbr)) {\r
2528                 scriptAbbrsChecked.add(abbr);\r
2529                 String newAbbrId ="[:"+abbr+":];NFD";\r
2530                 try{\r
2531                     Transliterator t = Transliterator.getInstance(newAbbrId);\r
2532                     if(t==null){\r
2533                         errln("Failed to create transliterator for "+hex(i)+\r
2534                                 " script code: " +abbr);\r
2535                     }\r
2536                 }catch(Exception e){\r
2537                     errln("Failed to create transliterator for "+hex(i)\r
2538                             +" script code: " +abbr\r
2539                             + " Exception: "+e.getMessage());\r
2540                 }\r
2541             }\r
2542         }\r
2543     }\r
2544 \r
2545 \r
2546     static final String[][] registerRules = {\r
2547         {"Any-Dev1", "x > X; y > Y;"},\r
2548         {"Any-Dev2", "XY > Z"},\r
2549         {"Greek-Latin/FAKE",\r
2550             "[^[:L:][:M:]] { \u03bc\u03c0 > b ; "+\r
2551             "\u03bc\u03c0 } [^[:L:][:M:]] > b ; "+\r
2552             "[^[:L:][:M:]] { [\u039c\u03bc][\u03a0\u03c0] > B ; "+\r
2553             "[\u039c\u03bc][\u03a0\u03c0] } [^[:L:][:M:]] > B ;"\r
2554         },\r
2555     };\r
2556 \r
2557     static final String DESERET_DEE = UTF16.valueOf(0x10414);\r
2558     static final String DESERET_dee = UTF16.valueOf(0x1043C);\r
2559 \r
2560     static final String[][] testCases = {\r
2561 \r
2562         // NORMALIZATION\r
2563         // should add more test cases\r
2564         {"NFD" , "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"},\r
2565         {"NFC" , "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"},\r
2566         {"NFKD", "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"},\r
2567         {"NFKC", "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"},\r
2568 \r
2569         // mp -> b BUG\r
2570         {"Greek-Latin/UNGEGN", "(\u03BC\u03C0)", "(b)"},\r
2571         {"Greek-Latin/FAKE", "(\u03BC\u03C0)", "(b)"},\r
2572 \r
2573         // check for devanagari bug\r
2574         {"nfd;Dev1;Dev2;nfc", "xy", "Z"},\r
2575 \r
2576         // ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE\r
2577         {"Title", "ab'cD ffi\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE,\r
2578             "Ab'cd Ffi\u0131ii\u0307 \u01C8\u01C9\u01C9 " + DESERET_DEE + DESERET_dee},\r
2579             //TODO: enable this test once Titlecase works right\r
2580             //{"Title", "\uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE,\r
2581             //          "Ffi\u0131ii \u01C8\u01C9\u01C9 " + DESERET_DEE + DESERET_dee},\r
2582 \r
2583             {"Upper", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE,\r
2584                 "AB'CD FFIII\u0130 \u01C7\u01C7\u01C7 " + DESERET_DEE + DESERET_DEE},\r
2585                 {"Lower", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE,\r
2586                     "ab'cd \uFB00i\u0131ii\u0307 \u01C9\u01C9\u01C9 " + DESERET_dee + DESERET_dee},\r
2587 \r
2588                     {"Upper", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE},\r
2589                     {"Lower", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE},\r
2590 \r
2591                     // FORMS OF S\r
2592                     {"Greek-Latin/UNGEGN", "\u03C3 \u03C3\u03C2 \u03C2\u03C3", "s ss s\u0331s\u0331"},\r
2593                     {"Latin-Greek/UNGEGN", "s ss s\u0331s\u0331", "\u03C3 \u03C3\u03C2 \u03C2\u03C3"},\r
2594                     {"Greek-Latin", "\u03C3 \u03C3\u03C2 \u03C2\u03C3", "s ss s\u0331s\u0331"},\r
2595                     {"Latin-Greek", "s ss s\u0331s\u0331", "\u03C3 \u03C3\u03C2 \u03C2\u03C3"},\r
2596 \r
2597                     // Tatiana bug\r
2598                     // Upper: TAT\u02B9\u00C2NA\r
2599                     // Lower: tat\u02B9\u00E2na\r
2600                     // Title: Tat\u02B9\u00E2na\r
2601                     {"Upper", "tat\u02B9\u00E2na", "TAT\u02B9\u00C2NA"},\r
2602                     {"Lower", "TAT\u02B9\u00C2NA", "tat\u02B9\u00E2na"},\r
2603                     {"Title", "tat\u02B9\u00E2na", "Tat\u02B9\u00E2na"},\r
2604     };\r
2605 \r
2606     public void TestSpecialCases() {\r
2607 \r
2608         for (int i = 0; i < registerRules.length; ++i) {\r
2609             Transliterator t = Transliterator.createFromRules(registerRules[i][0],\r
2610                     registerRules[i][1], Transliterator.FORWARD);\r
2611             DummyFactory.add(registerRules[i][0], t);\r
2612         }\r
2613         for (int i = 0; i < testCases.length; ++i) {\r
2614             String name = testCases[i][0];\r
2615             Transliterator t = Transliterator.getInstance(name);\r
2616             String id = t.getID();\r
2617             String source = testCases[i][1];\r
2618             String target = null;\r
2619 \r
2620             // Automatic generation of targets, to make it simpler to add test cases (and more fail-safe)\r
2621 \r
2622             if (testCases[i].length > 2)    target = testCases[i][2];\r
2623             else if (id.equalsIgnoreCase("NFD"))    target = com.ibm.icu.text.Normalizer.normalize(source, com.ibm.icu.text.Normalizer.NFD);\r
2624             else if (id.equalsIgnoreCase("NFC"))    target = com.ibm.icu.text.Normalizer.normalize(source, com.ibm.icu.text.Normalizer.NFC);\r
2625             else if (id.equalsIgnoreCase("NFKD"))   target = com.ibm.icu.text.Normalizer.normalize(source, com.ibm.icu.text.Normalizer.NFKD);\r
2626             else if (id.equalsIgnoreCase("NFKC"))   target = com.ibm.icu.text.Normalizer.normalize(source, com.ibm.icu.text.Normalizer.NFKC);\r
2627             else if (id.equalsIgnoreCase("Lower"))  target = UCharacter.toLowerCase(Locale.US, source);\r
2628             else if (id.equalsIgnoreCase("Upper"))  target = UCharacter.toUpperCase(Locale.US, source);\r
2629 \r
2630             expect(t, source, target);\r
2631         }\r
2632         for (int i = 0; i < registerRules.length; ++i) {\r
2633             Transliterator.unregister(registerRules[i][0]);\r
2634         }\r
2635     }\r
2636 \r
2637     // seems like there should be an easier way to just register an instance of a transliterator\r
2638 \r
2639     static class DummyFactory implements Transliterator.Factory {\r
2640         static DummyFactory singleton = new DummyFactory();\r
2641         static HashMap m = new HashMap();\r
2642 \r
2643         // Since Transliterators are immutable, we don't have to clone on set & get\r
2644         static void add(String ID, Transliterator t) {\r
2645             m.put(ID, t);\r
2646             //System.out.println("Registering: " + ID + ", " + t.toRules(true));\r
2647             Transliterator.registerFactory(ID, singleton);\r
2648         }\r
2649         public Transliterator getInstance(String ID) {\r
2650             return (Transliterator) m.get(ID);\r
2651         }\r
2652     }\r
2653 \r
2654     public void TestCasing() {\r
2655         Transliterator toLower = Transliterator.getInstance("lower");\r
2656         Transliterator toCasefold = Transliterator.getInstance("casefold");\r
2657         Transliterator toUpper = Transliterator.getInstance("upper");\r
2658         Transliterator toTitle = Transliterator.getInstance("title");\r
2659         for (int i = 0; i < 0x600; ++i) {\r
2660             String s = UTF16.valueOf(i);\r
2661 \r
2662             String lower = UCharacter.toLowerCase(ULocale.ROOT, s);\r
2663             assertEquals("Lowercase", lower, toLower.transform(s));\r
2664 \r
2665             String casefold = UCharacter.foldCase(s, true);\r
2666             assertEquals("Casefold", casefold, toCasefold.transform(s));\r
2667 \r
2668             String title = UCharacter.toTitleCase(ULocale.ROOT, s, null);\r
2669             assertEquals("Title", title, toTitle.transform(s));\r
2670 \r
2671             String upper = UCharacter.toUpperCase(ULocale.ROOT, s);\r
2672             assertEquals("Upper", upper, toUpper.transform(s));\r
2673         }\r
2674     }\r
2675 \r
2676     public void TestSurrogateCasing () {\r
2677         // check that casing handles surrogates\r
2678         // titlecase is currently defective\r
2679         int dee = UTF16.charAt(DESERET_dee,0);\r
2680         int DEE = UCharacter.toTitleCase(dee);\r
2681         if (!UTF16.valueOf(DEE).equals(DESERET_DEE)) {\r
2682             errln("Fails titlecase of surrogates" + Integer.toString(dee,16) + ", " + Integer.toString(DEE,16));\r
2683         }\r
2684 \r
2685         if (!UCharacter.toUpperCase(DESERET_dee + DESERET_DEE).equals(DESERET_DEE + DESERET_DEE)) {\r
2686             errln("Fails uppercase of surrogates");\r
2687         }\r
2688 \r
2689         if (!UCharacter.toLowerCase(DESERET_dee + DESERET_DEE).equals(DESERET_dee + DESERET_dee)) {\r
2690             errln("Fails lowercase of surrogates");\r
2691         }\r
2692     }\r
2693 \r
2694     // Check to see that incremental gets at least part way through a reasonable string.\r
2695 \r
2696     public void TestIncrementalProgress() {\r
2697         String latinTest = "The Quick Brown Fox.";\r
2698         String devaTest = Transliterator.getInstance("Latin-Devanagari").transliterate(latinTest);\r
2699         String kataTest = Transliterator.getInstance("Latin-Katakana").transliterate(latinTest);\r
2700         String[][] tests = {\r
2701                 {"Any", latinTest},\r
2702                 {"Latin", latinTest},\r
2703                 {"Halfwidth", latinTest},\r
2704                 {"Devanagari", devaTest},\r
2705                 {"Katakana", kataTest},\r
2706         };\r
2707 \r
2708         Enumeration sources = Transliterator.getAvailableSources();\r
2709         while(sources.hasMoreElements()) {\r
2710             String source = (String) sources.nextElement();\r
2711             String test = findMatch(source, tests);\r
2712             if (test == null) {\r
2713                 logln("Skipping " + source + "-X");\r
2714                 continue;\r
2715             }\r
2716             Enumeration targets = Transliterator.getAvailableTargets(source);\r
2717             while(targets.hasMoreElements()) {\r
2718                 String target = (String) targets.nextElement();\r
2719                 Enumeration variants = Transliterator.getAvailableVariants(source, target);\r
2720                 while(variants.hasMoreElements()) {\r
2721                     String variant = (String) variants.nextElement();\r
2722                     String id = source + "-" + target + "/" + variant;\r
2723                     logln("id: " + id);\r
2724 \r
2725                     String filter = getTranslitTestFilter();\r
2726                     if (filter != null && id.indexOf(filter) < 0) continue;\r
2727 \r
2728                     Transliterator t = Transliterator.getInstance(id);\r
2729                     CheckIncrementalAux(t, test);\r
2730 \r
2731                     String rev = t.transliterate(test);\r
2732                     Transliterator inv = t.getInverse();\r
2733                     CheckIncrementalAux(inv, rev);\r
2734                 }\r
2735             }\r
2736         }\r
2737     }\r
2738 \r
2739     public String findMatch (String source, String[][] pairs) {\r
2740         for (int i = 0; i < pairs.length; ++i) {\r
2741             if (source.equalsIgnoreCase(pairs[i][0])) return pairs[i][1];\r
2742         }\r
2743         return null;\r
2744     }\r
2745 \r
2746     public void CheckIncrementalAux(Transliterator t, String input) {\r
2747 \r
2748         Replaceable test = new ReplaceableString(input);\r
2749         Transliterator.Position pos = new Transliterator.Position(0, test.length(), 0, test.length());\r
2750         t.transliterate(test, pos);\r
2751         boolean gotError = false;\r
2752 \r
2753         // we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X?\r
2754 \r
2755         if (pos.start == 0 && pos.limit != 0 && !t.getID().equals("Hex-Any/Unicode")) {\r
2756             errln("No Progress, " + t.getID() + ": " + UtilityExtensions.formatInput(test, pos));\r
2757             gotError = true;\r
2758         } else {\r
2759             logln("PASS Progress, " + t.getID() + ": " + UtilityExtensions.formatInput(test, pos));\r
2760         }\r
2761         t.finishTransliteration(test, pos);\r
2762         if (pos.start != pos.limit) {\r
2763             errln("Incomplete, " + t.getID() + ":  " + UtilityExtensions.formatInput(test, pos));\r
2764             gotError = true;\r
2765         }\r
2766         if(!gotError){\r
2767             //errln("FAIL: Did not get expected error");\r
2768         }\r
2769     }\r
2770 \r
2771     public void TestFunction() {\r
2772         // Careful with spacing and ';' here:  Phrase this exactly\r
2773         // as toRules() is going to return it.  If toRules() changes\r
2774         // with regard to spacing or ';', then adjust this string.\r
2775         String rule =\r
2776             "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';";\r
2777 \r
2778         Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD);\r
2779         if (t == null) {\r
2780             errln("FAIL: createFromRules failed");\r
2781             return;\r
2782         }\r
2783 \r
2784         String r = t.toRules(true);\r
2785         if (r.equals(rule)) {\r
2786             logln("OK: toRules() => " + r);\r
2787         } else {\r
2788             errln("FAIL: toRules() => " + r +\r
2789                     ", expected " + rule);\r
2790         }\r
2791 \r
2792         expect(t, "The Quick Brown Fox",\r
2793         "T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox");\r
2794         rule =\r
2795             "([^\\ -\\u007F]) > &Hex/Unicode( $1 ) ' ' &Name( $1 ) ;";\r
2796 \r
2797         t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD);\r
2798         if (t == null) {\r
2799             errln("FAIL: createFromRules failed");\r
2800             return;\r
2801         }\r
2802 \r
2803         r = t.toRules(true);\r
2804         if (r.equals(rule)) {\r
2805             logln("OK: toRules() => " + r);\r
2806         } else {\r
2807             errln("FAIL: toRules() => " + r +\r
2808                     ", expected " + rule);\r
2809         }\r
2810 \r
2811         expect(t, "\u0301",\r
2812         "U+0301 \\N{COMBINING ACUTE ACCENT}");\r
2813     }\r
2814 \r
2815     public void TestInvalidBackRef() {\r
2816         String rule =  ". > $1;";\r
2817         String rule2 ="(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\u0020;";\r
2818         try {\r
2819             Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD);\r
2820             if (t != null) {\r
2821                 errln("FAIL: createFromRules should have returned NULL");\r
2822             }\r
2823             errln("FAIL: Ok: . > $1; => no error");\r
2824             Transliterator t2= Transliterator.createFromRules("Test2", rule2, Transliterator.FORWARD);\r
2825             if (t2 != null) {\r
2826                 errln("FAIL: createFromRules should have returned NULL");\r
2827             }\r
2828             errln("FAIL: Ok: . > $1; => no error");\r
2829         } catch (IllegalArgumentException e) {\r
2830             logln("Ok: . > $1; => " + e.getMessage());\r
2831         }\r
2832     }\r
2833 \r
2834     public void TestMulticharStringSet() {\r
2835         // Basic testing\r
2836         String rule =\r
2837             "       [{aa}]       > x;" +\r
2838             "         a          > y;" +\r
2839             "       [b{bc}]      > z;" +\r
2840             "[{gd}] { e          > q;" +\r
2841             "         e } [{fg}] > r;" ;\r
2842 \r
2843         Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD);\r
2844         if (t == null) {\r
2845             errln("FAIL: createFromRules failed");\r
2846             return;\r
2847         }\r
2848 \r
2849         expect(t, "a aa ab bc d gd de gde gdefg ddefg",\r
2850         "y x yz z d gd de gdq gdqfg ddrfg");\r
2851 \r
2852         // Overlapped string test.  Make sure that when multiple\r
2853         // strings can match that the longest one is matched.\r
2854         rule =\r
2855             "    [a {ab} {abc}]    > x;" +\r
2856             "           b          > y;" +\r
2857             "           c          > z;" +\r
2858             " q [t {st} {rst}] { e > p;" ;\r
2859 \r
2860         t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD);\r
2861         if (t == null) {\r
2862             errln("FAIL: createFromRules failed");\r
2863             return;\r
2864         }\r
2865 \r
2866         expect(t, "a ab abc qte qste qrste",\r
2867         "x x x qtp qstp qrstp");\r
2868     }\r
2869 \r
2870     /**\r
2871      * Test that user-registered transliterators can be used under function\r
2872      * syntax.\r
2873      */\r
2874     public void TestUserFunction() {\r
2875         Transliterator t;\r
2876 \r
2877         // There's no need to register inverses if we don't use them\r
2878         TestUserFunctionFactory.add("Any-gif",\r
2879                 Transliterator.createFromRules("gif",\r
2880                         "'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';",\r
2881                         Transliterator.FORWARD));\r
2882         //TestUserFunctionFactory.add("gif-Any", Transliterator.getInstance("Any-Null"));\r
2883 \r
2884         TestUserFunctionFactory.add("Any-RemoveCurly",\r
2885                 Transliterator.createFromRules("RemoveCurly", "[\\{\\}] > ; \\\\N > ;", Transliterator.FORWARD));\r
2886         //TestUserFunctionFactory.add("RemoveCurly-Any", Transliterator.getInstance("Any-Null"));\r
2887 \r
2888         logln("Trying &hex");\r
2889         t = Transliterator.createFromRules("hex2", "(.) > &hex($1);", Transliterator.FORWARD);\r
2890         logln("Registering");\r
2891         TestUserFunctionFactory.add("Any-hex2", t);\r
2892         t = Transliterator.getInstance("Any-hex2");\r
2893         expect(t, "abc", "\\u0061\\u0062\\u0063");\r
2894 \r
2895         logln("Trying &gif");\r
2896         t = Transliterator.createFromRules("gif2", "(.) > &Gif(&Hex2($1));", Transliterator.FORWARD);\r
2897         logln("Registering");\r
2898         TestUserFunctionFactory.add("Any-gif2", t);\r
2899         t = Transliterator.getInstance("Any-gif2");\r
2900         expect(t, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">" +\r
2901         "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">");\r
2902 \r
2903         // Test that filters are allowed after &\r
2904         t = Transliterator.createFromRules("test",\r
2905                 "(.) > &Hex($1) ' ' &Any-RemoveCurly(&Name($1)) ' ';", Transliterator.FORWARD);\r
2906         expect(t, "abc", "\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C ");\r
2907 \r
2908         // Unregister our test stuff\r
2909         TestUserFunctionFactory.unregister();\r
2910     }\r
2911 \r
2912     static class TestUserFunctionFactory implements Transliterator.Factory {\r
2913         static TestUserFunctionFactory singleton = new TestUserFunctionFactory();\r
2914         static HashMap m = new HashMap();\r
2915 \r
2916         static void add(String ID, Transliterator t) {\r
2917             m.put(new CaseInsensitiveString(ID), t);\r
2918             Transliterator.registerFactory(ID, singleton);\r
2919         }\r
2920 \r
2921         public Transliterator getInstance(String ID) {\r
2922             return (Transliterator) m.get(new CaseInsensitiveString(ID));\r
2923         }\r
2924 \r
2925         static void unregister() {\r
2926             Iterator ids = m.keySet().iterator();\r
2927             while (ids.hasNext()) {\r
2928                 CaseInsensitiveString id = (CaseInsensitiveString) ids.next();\r
2929                 Transliterator.unregister(id.getString());\r
2930                 ids.remove(); // removes pair from m\r
2931             }\r
2932         }\r
2933     }\r
2934 \r
2935     /**\r
2936      * Test the Any-X transliterators.\r
2937      */\r
2938     public void TestAnyX() {\r
2939         Transliterator anyLatin =\r
2940             Transliterator.getInstance("Any-Latin", Transliterator.FORWARD);\r
2941 \r
2942         expect(anyLatin,\r
2943                 "greek:\u03B1\u03B2\u03BA\u0391\u0392\u039A hiragana:\u3042\u3076\u304F cyrillic:\u0430\u0431\u0446",\r
2944         "greek:abkABK hiragana:abuku cyrillic:abc");\r
2945     }\r
2946 \r
2947     /**\r
2948      * Test Any-X transliterators with sample letters from all scripts.\r
2949      */\r
2950     public void TestAny() {\r
2951         UnicodeSet alphabetic = (UnicodeSet) new UnicodeSet("[:alphabetic:]").freeze();\r
2952         StringBuffer testString = new StringBuffer();\r
2953         for (int i = 0; i < UScript.CODE_LIMIT; ++i) {\r
2954             UnicodeSet sample = new UnicodeSet().applyPropertyAlias("script", UScript.getShortName(i)).retainAll(alphabetic);\r
2955             int count = 5;\r
2956             for (UnicodeSetIterator it = new UnicodeSetIterator(sample); it.next();) {\r
2957                 testString.append(it.getString());\r
2958                 if (--count < 0) break;\r
2959             }\r
2960         }\r
2961         logln("Sample set for Any-Latin: " + testString);\r
2962         Transliterator anyLatin = Transliterator.getInstance("any-Latn");\r
2963         String result = anyLatin.transliterate(testString.toString());\r
2964         logln("Sample result for Any-Latin: " + result);\r
2965     }\r
2966 \r
2967 \r
2968     /**\r
2969      * Test the source and target set API.  These are only implemented\r
2970      * for RBT and CompoundTransliterator at this time.\r
2971      */\r
2972     public void TestSourceTargetSet() {\r
2973         // Rules\r
2974         String r =\r
2975             "a > b; " +\r
2976             "r [x{lu}] > q;";\r
2977 \r
2978         // Expected source\r
2979         UnicodeSet expSrc = new UnicodeSet("[arx{lu}]");\r
2980 \r
2981         // Expected target\r
2982         UnicodeSet expTrg = new UnicodeSet("[bq]");\r
2983 \r
2984         Transliterator t = Transliterator.createFromRules("test", r, Transliterator.FORWARD);\r
2985         UnicodeSet src = t.getSourceSet();\r
2986         UnicodeSet trg = t.getTargetSet();\r
2987 \r
2988         if (src.equals(expSrc) && trg.equals(expTrg)) {\r
2989             logln("Ok: " + r + " => source = " + src.toPattern(true) +\r
2990                     ", target = " + trg.toPattern(true));\r
2991         } else {\r
2992             errln("FAIL: " + r + " => source = " + src.toPattern(true) +\r
2993                     ", expected " + expSrc.toPattern(true) +\r
2994                     "; target = " + trg.toPattern(true) +\r
2995                     ", expected " + expTrg.toPattern(true));\r
2996         }\r
2997     }\r
2998 \r
2999     /**\r
3000      * Test handling of rule whitespace, for both RBT and UnicodeSet.\r
3001      */\r
3002     public void TestRuleWhitespace() {\r
3003         // Rules\r
3004         String r = "a > \u200E b;";\r
3005 \r
3006         Transliterator t = Transliterator.createFromRules("test", r, Transliterator.FORWARD);\r
3007 \r
3008         expect(t, "a", "b");\r
3009 \r
3010         // UnicodeSet\r
3011         UnicodeSet set = new UnicodeSet("[a \u200E]");\r
3012 \r
3013         if (set.contains(0x200E)) {\r
3014             errln("FAIL: U+200E not being ignored by UnicodeSet");\r
3015         }\r
3016     }\r
3017 \r
3018     public void TestAlternateSyntax() {\r
3019         // U+2206 == &\r
3020         // U+2190 == <\r
3021         // U+2192 == >\r
3022         // U+2194 == <>\r
3023         expect("a \u2192 x; b \u2190 y; c \u2194 z",\r
3024                 "abc",\r
3025         "xbz");\r
3026         expect("([:^ASCII:]) \u2192 \u2206Name($1);",\r
3027                 "<=\u2190; >=\u2192; <>=\u2194; &=\u2206",\r
3028         "<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}");\r
3029     }\r
3030 \r
3031     public void TestPositionAPI() {\r
3032         Transliterator.Position a = new Transliterator.Position(3,5,7,11);\r
3033         Transliterator.Position b = new Transliterator.Position(a);\r
3034         Transliterator.Position c = new Transliterator.Position();\r
3035         c.set(a);\r
3036         // Call the toString() API:\r
3037         if (a.equals(b) && a.equals(c)) {\r
3038             logln("Ok: " + a + " == " + b + " == " + c);\r
3039         } else {\r
3040             errln("FAIL: " + a + " != " + b + " != " + c);\r
3041         }\r
3042     }\r
3043 \r
3044     //======================================================================\r
3045     // New tests for the ::BEGIN/::END syntax\r
3046     //======================================================================\r
3047 \r
3048     private static final String[] BEGIN_END_RULES = new String[] {\r
3049         // [0]\r
3050         "abc > xy;"\r
3051         + "aba > z;",\r
3052 \r
3053         // [1]\r
3054         /*\r
3055         "::BEGIN;"\r
3056         + "abc > xy;"\r
3057         + "::END;"\r
3058         + "::BEGIN;"\r
3059         + "aba > z;"\r
3060         + "::END;",\r
3061          */\r
3062         "", // test case commented out below, this is here to keep from messing up the indexes\r
3063 \r
3064         // [2]\r
3065         /*\r
3066         "abc > xy;"\r
3067         + "::BEGIN;"\r
3068         + "aba > z;"\r
3069         + "::END;",\r
3070          */\r
3071         "", // test case commented out below, this is here to keep from messing up the indexes\r
3072 \r
3073         // [3]\r
3074         /*\r
3075         "::BEGIN;"\r
3076         + "abc > xy;"\r
3077         + "::END;"\r
3078         + "aba > z;",\r
3079          */\r
3080         "", // test case commented out below, this is here to keep from messing up the indexes\r
3081 \r
3082         // [4]\r
3083         "abc > xy;"\r
3084         + "::Null;"\r
3085         + "aba > z;",\r
3086 \r
3087         // [5]\r
3088         "::Upper;"\r
3089         + "ABC > xy;"\r
3090         + "AB > x;"\r
3091         + "C > z;"\r
3092         + "::Upper;"\r
3093         + "XYZ > p;"\r
3094         + "XY > q;"\r
3095         + "Z > r;"\r
3096         + "::Upper;",\r
3097 \r
3098         // [6]\r
3099         "$ws = [[:Separator:][\\u0009-\\u000C]$];"\r
3100         + "$delim = [\\-$ws];"\r
3101         + "$ws $delim* > ' ';"\r
3102         + "'-' $delim* > '-';",\r
3103 \r
3104         // [7]\r
3105         "::Null;"\r
3106         + "$ws = [[:Separator:][\\u0009-\\u000C]$];"\r
3107         + "$delim = [\\-$ws];"\r
3108         + "$ws $delim* > ' ';"\r
3109         + "'-' $delim* > '-';",\r
3110 \r
3111         // [8]\r
3112         "$ws = [[:Separator:][\\u0009-\\u000C]$];"\r
3113         + "$delim = [\\-$ws];"\r
3114         + "$ws $delim* > ' ';"\r
3115         + "'-' $delim* > '-';"\r
3116         + "::Null;",\r
3117 \r
3118         // [9]\r
3119         "$ws = [[:Separator:][\\u0009-\\u000C]$];"\r
3120         + "$delim = [\\-$ws];"\r
3121         + "::Null;"\r
3122         + "$ws $delim* > ' ';"\r
3123         + "'-' $delim* > '-';",\r
3124 \r
3125         // [10]\r
3126         /*\r
3127         "::BEGIN;"\r
3128         + "$ws = [[:Separator:][\\u0009-\\u000C]$];"\r
3129         + "$delim = [\\-$ws];"\r
3130         + "::END;"\r
3131         + "$ws $delim* > ' ';"\r
3132         + "'-' $delim* > '-';",\r
3133          */\r
3134         "", // test case commented out below, this is here to keep from messing up the indexes\r
3135 \r
3136         // [11]\r
3137         /*\r
3138         "$ws = [[:Separator:][\\u0009-\\u000C]$];"\r
3139         + "$delim = [\\-$ws];"\r
3140         + "::BEGIN;"\r
3141         + "$ws $delim* > ' ';"\r
3142         + "'-' $delim* > '-';"\r
3143         + "::END;",\r
3144          */\r
3145         "", // test case commented out below, this is here to keep from messing up the indexes\r
3146 \r
3147         // [12]\r
3148         /*\r
3149         "$ws = [[:Separator:][\\u0009-\\u000C]$];"\r
3150         + "$delim = [\\-$ws];"\r
3151         + "$ab = [ab];"\r
3152         + "::BEGIN;"\r
3153         + "$ws $delim* > ' ';"\r
3154         + "'-' $delim* > '-';"\r
3155         + "::END;"\r
3156         + "::BEGIN;"\r
3157         + "$ab { ' ' } $ab > '-';"\r
3158         + "c { ' ' > ;"\r
3159         + "::END;"\r
3160         + "::BEGIN;"\r
3161         + "'a-a' > a\\%|a;"\r
3162         + "::END;",\r
3163          */\r
3164         "", // test case commented out below, this is here to keep from messing up the indexes\r
3165 \r
3166         // [13]\r
3167         "$ws = [[:Separator:][\\u0009-\\u000C]$];"\r
3168         + "$delim = [\\-$ws];"\r
3169         + "$ab = [ab];"\r
3170         + "::Null;"\r
3171         + "$ws $delim* > ' ';"\r
3172         + "'-' $delim* > '-';"\r
3173         + "::Null;"\r
3174         + "$ab { ' ' } $ab > '-';"\r
3175         + "c { ' ' > ;"\r
3176         + "::Null;"\r
3177         + "'a-a' > a\\%|a;",\r
3178 \r
3179         // [14]\r
3180         /*\r
3181         "::[abc];"\r
3182         + "::BEGIN;"\r
3183         + "abc > xy;"\r
3184         + "::END;"\r
3185         + "::BEGIN;"\r
3186         + "aba > yz;"\r
3187         + "::END;"\r
3188         + "::Upper;",\r
3189          */\r
3190         "", // test case commented out below, this is here to keep from messing up the indexes\r
3191 \r
3192         // [15]\r
3193         "::[abc];"\r
3194         + "abc > xy;"\r
3195         + "::Null;"\r
3196         + "aba > yz;"\r
3197         + "::Upper;",\r
3198 \r
3199         // [16]\r
3200         /*\r
3201         "::[abc];"\r
3202         + "::BEGIN;"\r
3203         + "abc <> xy;"\r
3204         + "::END;"\r
3205         + "::BEGIN;"\r
3206         + "aba <> yz;"\r
3207         + "::END;"\r
3208         + "::Upper(Lower);"\r
3209         + "::([XYZ]);",\r
3210          */\r
3211         "", // test case commented out below, this is here to keep from messing up the indexes\r
3212 \r
3213         // [17]\r
3214         "::[abc];"\r
3215         + "abc <> xy;"\r
3216         + "::Null;"\r
3217         + "aba <> yz;"\r
3218         + "::Upper(Lower);"\r
3219         + "::([XYZ]);"\r
3220     };\r
3221 \r
3222     /*\r
3223 (This entire test is commented out below and will need some heavy revision when we re-add\r
3224 the ::BEGIN/::END stuff)\r
3225     private static final String[] BOGUS_BEGIN_END_RULES = new String[] {\r
3226         // [7]\r
3227         "::BEGIN;"\r
3228         + "abc > xy;"\r
3229         + "::BEGIN;"\r
3230         + "aba > z;"\r
3231         + "::END;"\r
3232         + "::END;",\r
3233 \r
3234         // [8]\r
3235         "abc > xy;"\r
3236         + " aba > z;"\r
3237         + "::END;",\r
3238 \r
3239         // [9]\r
3240         "::BEGIN;"\r
3241         + "::Upper;"\r
3242         + "::END;"\r
3243     };\r
3244      */\r
3245 \r
3246     private static final String[] BEGIN_END_TEST_CASES = new String[] {\r
3247         BEGIN_END_RULES[0], "abc ababc aba", "xy zbc z",\r
3248         //        BEGIN_END_RULES[1], "abc ababc aba", "xy abxy z",\r
3249         //        BEGIN_END_RULES[2], "abc ababc aba", "xy abxy z",\r
3250         //        BEGIN_END_RULES[3], "abc ababc aba", "xy abxy z",\r
3251         BEGIN_END_RULES[4], "abc ababc aba", "xy abxy z",\r
3252         BEGIN_END_RULES[5], "abccabaacababcbc", "PXAARXQBR",\r
3253 \r
3254         BEGIN_END_RULES[6], "e   e - e---e-  e", "e e e-e-e",\r
3255         BEGIN_END_RULES[7], "e   e - e---e-  e", "e e e-e-e",\r
3256         BEGIN_END_RULES[8], "e   e - e---e-  e", "e e e-e-e",\r
3257         BEGIN_END_RULES[9], "e   e - e---e-  e", "e e e-e-e",\r
3258         //        BEGIN_END_RULES[10], "e   e - e---e-  e", "e e e-e-e",\r
3259         //        BEGIN_END_RULES[11], "e   e - e---e-  e", "e e e-e-e",\r
3260         //        BEGIN_END_RULES[12], "e   e - e---e-  e", "e e e-e-e",\r
3261         //        BEGIN_END_RULES[12], "a    a    a    a", "a%a%a%a",\r
3262         //        BEGIN_END_RULES[12], "a a-b c b a", "a%a-b cb-a",\r
3263         BEGIN_END_RULES[13], "e   e - e---e-  e", "e e e-e-e",\r
3264         BEGIN_END_RULES[13], "a    a    a    a", "a%a%a%a",\r
3265         BEGIN_END_RULES[13], "a a-b c b a", "a%a-b cb-a",\r
3266 \r
3267         //        BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",\r
3268         BEGIN_END_RULES[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",\r
3269         //        BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",\r
3270         BEGIN_END_RULES[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ"\r
3271     };\r
3272 \r
3273     public void TestBeginEnd() {\r
3274         // run through the list of test cases above\r
3275         for (int i = 0; i < BEGIN_END_TEST_CASES.length; i += 3) {\r
3276             expect(BEGIN_END_TEST_CASES[i], BEGIN_END_TEST_CASES[i + 1], BEGIN_END_TEST_CASES[i + 2]);\r
3277         }\r
3278 \r
3279         // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing\r
3280         Transliterator reversed  = Transliterator.createFromRules("Reversed", BEGIN_END_RULES[17],\r
3281                 Transliterator.REVERSE);\r
3282         expect(reversed, "xy XY XYZ yz YZ", "xy abc xaba yz aba");\r
3283 \r
3284         // finally, run through the list of syntactically-ill-formed rule sets above and make sure\r
3285         // that all of them cause errors\r
3286         /*\r
3287 (commented out until we have the real ::BEGIN/::END stuff in place\r
3288         for (int i = 0; i < BOGUS_BEGIN_END_RULES.length; i++) {\r
3289             try {\r
3290                 Transliterator t = Transliterator.createFromRules("foo", BOGUS_BEGIN_END_RULES[i],\r
3291                         Transliterator.FORWARD);\r
3292                 errln("Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]);\r
3293             }\r
3294             catch (IllegalArgumentException e) {\r
3295                 // this is supposed to happen; do nothing here\r
3296             }\r
3297         }\r
3298          */\r
3299     }\r
3300 \r
3301     public void TestBeginEndToRules() {\r
3302         // run through the same list of test cases we used above, but this time, instead of just\r
3303         // instantiating a Transliterator from the rules and running the test against it, we instantiate\r
3304         // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from\r
3305         // the resulting set of rules, and make sure that the generated rule set is semantically equivalent\r
3306         // to (i.e., does the same thing as) the original rule set\r
3307         for (int i = 0; i < BEGIN_END_TEST_CASES.length; i += 3) {\r
3308             Transliterator t = Transliterator.createFromRules("--", BEGIN_END_TEST_CASES[i],\r
3309                     Transliterator.FORWARD);\r
3310             String rules = t.toRules(false);\r
3311             Transliterator t2 = Transliterator.createFromRules("Test case #" + (i / 3), rules, Transliterator.FORWARD);\r
3312             expect(t2, BEGIN_END_TEST_CASES[i + 1], BEGIN_END_TEST_CASES[i + 2]);\r
3313         }\r
3314 \r
3315         // do the same thing for the reversible test case\r
3316         Transliterator reversed = Transliterator.createFromRules("Reversed", BEGIN_END_RULES[17],\r
3317                 Transliterator.REVERSE);\r
3318         String rules = reversed.toRules(false);\r
3319         Transliterator reversed2 = Transliterator.createFromRules("Reversed", rules, Transliterator.FORWARD);\r
3320         expect(reversed2, "xy XY XYZ yz YZ", "xy abc xaba yz aba");\r
3321     }\r
3322 \r
3323     public void TestRegisterAlias() {\r
3324         String longID = "Lower;[aeiou]Upper";\r
3325         String shortID = "Any-CapVowels";\r
3326         String reallyShortID = "CapVowels";\r
3327 \r
3328         Transliterator.registerAlias(shortID, longID);\r
3329 \r
3330         Transliterator t1 = Transliterator.getInstance(longID);\r
3331         Transliterator t2 = Transliterator.getInstance(reallyShortID);\r
3332 \r
3333         if (!t1.getID().equals(longID))\r
3334             errln("Transliterator instantiated with long ID doesn't have long ID");\r
3335         if (!t2.getID().equals(reallyShortID))\r
3336             errln("Transliterator instantiated with short ID doesn't have short ID");\r
3337 \r
3338         if (!t1.toRules(true).equals(t2.toRules(true)))\r
3339             errln("Alias transliterators aren't the same");\r
3340 \r
3341         Transliterator.unregister(shortID);\r
3342 \r
3343         try {\r
3344             t1 = Transliterator.getInstance(shortID);\r
3345             errln("Instantiation with short ID succeeded after short ID was unregistered");\r
3346         }\r
3347         catch (IllegalArgumentException e) {\r
3348         }\r
3349 \r
3350         // try the same thing again, but this time with something other than\r
3351         // an instance of CompoundTransliterator\r
3352         String realID = "Latin-Greek";\r
3353         String fakeID = "Latin-dlgkjdflkjdl";\r
3354         Transliterator.registerAlias(fakeID, realID);\r
3355 \r
3356         t1 = Transliterator.getInstance(realID);\r
3357         t2 = Transliterator.getInstance(fakeID);\r
3358 \r
3359         if (!t1.toRules(true).equals(t2.toRules(true)))\r
3360             errln("Alias transliterators aren't the same");\r
3361 \r
3362         Transliterator.unregister(fakeID);\r
3363     }\r
3364 \r
3365     /**\r
3366      * Test the Halfwidth-Fullwidth transliterator (ticket 6281).\r
3367      */\r
3368     public void TestHalfwidthFullwidth() {\r
3369         Transliterator hf = Transliterator.getInstance("Halfwidth-Fullwidth");\r
3370         Transliterator fh = Transliterator.getInstance("Fullwidth-Halfwidth");\r
3371 \r
3372         // Array of 3n items\r
3373         // Each item is\r
3374         //   "hf"|"fh"|"both",\r
3375         //   <Halfwidth>,\r
3376         //   <Fullwidth>\r
3377         String[] DATA = {\r
3378                 "both",\r
3379                 "\uFFE9\uFFEA\uFFEB\uFFEC\u0061\uFF71\u00AF\u0020",\r
3380                 "\u2190\u2191\u2192\u2193\uFF41\u30A2\uFFE3\u3000",\r
3381         };\r
3382 \r
3383         for (int i=0; i<DATA.length; i+=3) {\r
3384             switch (DATA[i].charAt(0)) {\r
3385             case 'h': // Halfwidth-Fullwidth only\r
3386                 expect(hf, DATA[i+1], DATA[i+2]);\r
3387                 break;\r
3388             case 'f': // Fullwidth-Halfwidth only\r
3389                 expect(fh, DATA[i+2], DATA[i+1]);\r
3390                 break;\r
3391             case 'b': // both directions\r
3392                 expect(hf, DATA[i+1], DATA[i+2]);\r
3393                 expect(fh, DATA[i+2], DATA[i+1]);\r
3394                 break;\r
3395             }\r
3396         }\r
3397 \r
3398     }\r
3399 \r
3400     /**\r
3401      *  Test Thai.  The text is the first paragraph of "What is Unicode" from the Unicode.org web site.\r
3402      *              TODO: confirm that the expected results are correct.\r
3403      *              For now, test just confirms that C++ and Java give identical results.\r
3404      */\r
3405     public void TestThai() {\r
3406         Transliterator tr = Transliterator.getInstance("Any-Latin", Transliterator.FORWARD);\r
3407         String thaiText = \r
3408             "\u0e42\u0e14\u0e22\u0e1e\u0e37\u0e49\u0e19\u0e10\u0e32\u0e19\u0e41\u0e25\u0e49\u0e27, \u0e04\u0e2d" +\r
3409             "\u0e21\u0e1e\u0e34\u0e27\u0e40\u0e15\u0e2d\u0e23\u0e4c\u0e08\u0e30\u0e40\u0e01\u0e35\u0e48\u0e22" +\r
3410             "\u0e27\u0e02\u0e49\u0e2d\u0e07\u0e01\u0e31\u0e1a\u0e40\u0e23\u0e37\u0e48\u0e2d\u0e07\u0e02\u0e2d" +\r
3411             "\u0e07\u0e15\u0e31\u0e27\u0e40\u0e25\u0e02. \u0e04\u0e2d\u0e21\u0e1e\u0e34\u0e27\u0e40\u0e15\u0e2d" +\r
3412             "\u0e23\u0e4c\u0e08\u0e31\u0e14\u0e40\u0e01\u0e47\u0e1a\u0e15\u0e31\u0e27\u0e2d\u0e31\u0e01\u0e29" +\r
3413             "\u0e23\u0e41\u0e25\u0e30\u0e2d\u0e31\u0e01\u0e02\u0e23\u0e30\u0e2d\u0e37\u0e48\u0e19\u0e46 \u0e42" +\r
3414             "\u0e14\u0e22\u0e01\u0e32\u0e23\u0e01\u0e33\u0e2b\u0e19\u0e14\u0e2b\u0e21\u0e32\u0e22\u0e40\u0e25" +\r
3415             "\u0e02\u0e43\u0e2b\u0e49\u0e2a\u0e33\u0e2b\u0e23\u0e31\u0e1a\u0e41\u0e15\u0e48\u0e25\u0e30\u0e15" +\r
3416             "\u0e31\u0e27. \u0e01\u0e48\u0e2d\u0e19\u0e2b\u0e19\u0e49\u0e32\u0e17\u0e35\u0e48\u0e4a Unicode \u0e08" +\r
3417             "\u0e30\u0e16\u0e39\u0e01\u0e2a\u0e23\u0e49\u0e32\u0e07\u0e02\u0e36\u0e49\u0e19, \u0e44\u0e14\u0e49" +\r
3418             "\u0e21\u0e35\u0e23\u0e30\u0e1a\u0e1a encoding \u0e2d\u0e22\u0e39\u0e48\u0e2b\u0e25\u0e32\u0e22\u0e23" +\r
3419             "\u0e49\u0e2d\u0e22\u0e23\u0e30\u0e1a\u0e1a\u0e2a\u0e33\u0e2b\u0e23\u0e31\u0e1a\u0e01\u0e32\u0e23" +\r
3420             "\u0e01\u0e33\u0e2b\u0e19\u0e14\u0e2b\u0e21\u0e32\u0e22\u0e40\u0e25\u0e02\u0e40\u0e2b\u0e25\u0e48" +\r
3421             "\u0e32\u0e19\u0e35\u0e49. \u0e44\u0e21\u0e48\u0e21\u0e35 encoding \u0e43\u0e14\u0e17\u0e35\u0e48" +\r
3422             "\u0e21\u0e35\u0e08\u0e33\u0e19\u0e27\u0e19\u0e15\u0e31\u0e27\u0e2d\u0e31\u0e01\u0e02\u0e23\u0e30" +\r
3423             "\u0e21\u0e32\u0e01\u0e40\u0e1e\u0e35\u0e22\u0e07\u0e1e\u0e2d: \u0e22\u0e01\u0e15\u0e31\u0e27\u0e2d" +\r
3424             "\u0e22\u0e48\u0e32\u0e07\u0e40\u0e0a\u0e48\u0e19, \u0e40\u0e09\u0e1e\u0e32\u0e30\u0e43\u0e19\u0e01" +\r
3425             "\u0e25\u0e38\u0e48\u0e21\u0e2a\u0e2b\u0e20\u0e32\u0e1e\u0e22\u0e38\u0e42\u0e23\u0e1b\u0e40\u0e1e" +\r
3426             "\u0e35\u0e22\u0e07\u0e41\u0e2b\u0e48\u0e07\u0e40\u0e14\u0e35\u0e22\u0e27 \u0e01\u0e47\u0e15\u0e49" +\r
3427             "\u0e2d\u0e07\u0e01\u0e32\u0e23\u0e2b\u0e25\u0e32\u0e22 encoding \u0e43\u0e19\u0e01\u0e32\u0e23\u0e04" +\r
3428             "\u0e23\u0e2d\u0e1a\u0e04\u0e25\u0e38\u0e21\u0e17\u0e38\u0e01\u0e20\u0e32\u0e29\u0e32\u0e43\u0e19" +\r
3429             "\u0e01\u0e25\u0e38\u0e48\u0e21. \u0e2b\u0e23\u0e37\u0e2d\u0e41\u0e21\u0e49\u0e41\u0e15\u0e48\u0e43" +\r
3430             "\u0e19\u0e20\u0e32\u0e29\u0e32\u0e40\u0e14\u0e35\u0e48\u0e22\u0e27 \u0e40\u0e0a\u0e48\u0e19 \u0e20" +\r
3431             "\u0e32\u0e29\u0e32\u0e2d\u0e31\u0e07\u0e01\u0e24\u0e29 \u0e01\u0e47\u0e44\u0e21\u0e48\u0e21\u0e35" +\r
3432             " encoding \u0e43\u0e14\u0e17\u0e35\u0e48\u0e40\u0e1e\u0e35\u0e22\u0e07\u0e1e\u0e2d\u0e2a\u0e33\u0e2b" +\r
3433             "\u0e23\u0e31\u0e1a\u0e17\u0e38\u0e01\u0e15\u0e31\u0e27\u0e2d\u0e31\u0e01\u0e29\u0e23, \u0e40\u0e04" +\r
3434             "\u0e23\u0e37\u0e48\u0e2d\u0e07\u0e2b\u0e21\u0e32\u0e22\u0e27\u0e23\u0e23\u0e04\u0e15\u0e2d\u0e19" +\r
3435             " \u0e41\u0e25\u0e30\u0e2a\u0e31\u0e0d\u0e25\u0e31\u0e01\u0e29\u0e13\u0e4c\u0e17\u0e32\u0e07\u0e40" +\r
3436             "\u0e17\u0e04\u0e19\u0e34\u0e04\u0e17\u0e35\u0e48\u0e43\u0e0a\u0e49\u0e01\u0e31\u0e19\u0e2d\u0e22" +\r
3437             "\u0e39\u0e48\u0e17\u0e31\u0e48\u0e27\u0e44\u0e1b.";\r
3438 \r
3439         String latinText = \r
3440             "doy ph\u1ee5\u0304\u0302n \u1e6d\u0304h\u0101n l\u00e6\u0302w, khxmphiwtexr\u0312 ca ke\u012b\u0300" +\r
3441             "ywk\u0304\u0125xng k\u1ea1b re\u1ee5\u0304\u0300xng k\u0304hxng t\u1ea1wlek\u0304h. khxmphiwtexr" +\r
3442             "\u0312 c\u1ea1d k\u0115b t\u1ea1w x\u1ea1ks\u0304\u02b9r l\u00e6a x\u1ea1kk\u0304h ra x\u1ee5\u0304" +\r
3443             "\u0300n\u00ab doy k\u0101r k\u1ea3h\u0304nd h\u0304m\u0101ylek\u0304h h\u0304\u0131\u0302 s\u0304" +\r
3444             "\u1ea3h\u0304r\u1ea1b t\u00e6\u0300la t\u1ea1w. k\u0300xn h\u0304n\u0302\u0101 th\u012b\u0300\u0301" +\r
3445             " Unicode ca t\u0304h\u016bk s\u0304r\u0302\u0101ng k\u0304h\u1ee5\u0302n, d\u1ecb\u0302 m\u012b " +\r
3446             "rabb encoding xy\u016b\u0300 h\u0304l\u0101y r\u0302xy rabb s\u0304\u1ea3h\u0304r\u1ea1b k\u0101" +\r
3447             "r k\u1ea3h\u0304nd h\u0304m\u0101ylek\u0304h h\u0304el\u0300\u0101 n\u012b\u0302. m\u1ecb\u0300m" +\r
3448             "\u012b encoding d\u0131 th\u012b\u0300 m\u012b c\u1ea3nwn t\u1ea1w x\u1ea1kk\u0304hra m\u0101k p" +\r
3449             "he\u012byng phx: yk t\u1ea1wx\u1ef3\u0101ng ch\u00e8n, c\u0304heph\u0101a n\u0131 kl\u00f9m s\u0304" +\r
3450             "h\u0304p\u0323h\u0101ph yurop phe\u012byng h\u0304\u00e6\u0300ng de\u012byw k\u0306 t\u0302xngk\u0101" +\r
3451             "r h\u0304l\u0101y encoding n\u0131 k\u0101r khrxbkhlum thuk p\u0323h\u0101s\u0304\u02b9\u0101 n\u0131" +\r
3452             " kl\u00f9m. h\u0304r\u1ee5\u0304x m\u00e6\u0302t\u00e6\u0300 n\u0131 p\u0323h\u0101s\u0304\u02b9" +\r
3453             "\u0101 de\u012b\u0300yw ch\u00e8n p\u0323h\u0101s\u0304\u02b9\u0101 x\u1ea1ngkvs\u0304\u02b9 k\u0306" +\r
3454             " m\u1ecb\u0300m\u012b encoding d\u0131 th\u012b\u0300 phe\u012byng phx s\u0304\u1ea3h\u0304r\u1ea1" +\r
3455             "b thuk t\u1ea1w x\u1ea1ks\u0304\u02b9r, kher\u1ee5\u0304\u0300xngh\u0304m\u0101y wrrkh txn l\u00e6" +\r
3456             "a s\u0304\u1ea1\u1ef5l\u1ea1ks\u0304\u02b9\u1e47\u0312 th\u0101ng thekhnikh th\u012b\u0300 ch\u0131" +\r
3457             "\u0302 k\u1ea1n xy\u016b\u0300 th\u1ea1\u0300wp\u1ecb.";\r
3458 \r
3459         expect(tr, thaiText, latinText);\r
3460     }\r
3461 \r
3462 \r
3463     //======================================================================\r
3464     // These tests are not mirrored (yet) in icu4c at\r
3465     // source/test/intltest/transtst.cpp\r
3466     //======================================================================\r
3467 \r
3468     /**\r
3469      * Improve code coverage.\r
3470      */\r
3471     public void TestCoverage() {\r
3472         // NullTransliterator\r
3473         Transliterator t = Transliterator.getInstance("Null", Transliterator.FORWARD);\r
3474         expect(t, "a", "a");\r
3475 \r
3476         // Source, target set\r
3477         t = Transliterator.getInstance("Latin-Greek", Transliterator.FORWARD);\r
3478         t.setFilter(new UnicodeSet("[A-Z]"));\r
3479         logln("source = " + t.getSourceSet());\r
3480         logln("target = " + t.getTargetSet());\r
3481 \r
3482         t = Transliterator.createFromRules("x", "(.) > &Any-Hex($1);", Transliterator.FORWARD);\r
3483         logln("source = " + t.getSourceSet());\r
3484         logln("target = " + t.getTargetSet());\r
3485     }\r
3486     /*\r
3487      * Test case for threading problem in NormalizationTransliterator\r
3488      * reported by ticket#5160\r
3489      */\r
3490     public void TestT5160() {\r
3491         final String[] testData = {\r
3492                 "a",\r
3493                 "b",\r
3494                 "\u09BE",\r
3495                 "A\u0301",\r
3496         };\r
3497         final String[] expected = {\r
3498                 "a",\r
3499                 "b",\r
3500                 "\u09BE",\r
3501                 "\u00C1",\r
3502         };\r
3503         Transliterator translit = Transliterator.getInstance("NFC");\r
3504         NormTranslitTask[] tasks = new NormTranslitTask[testData.length];\r
3505         for (int i = 0; i < tasks.length; i++) {\r
3506             tasks[i] = new NormTranslitTask(translit, testData[i], expected[i]);\r
3507         }\r
3508         TestUtil.runUntilDone(tasks);\r
3509 \r
3510         for (int i = 0; i < tasks.length; i++) {\r
3511             if (tasks[i].getErrorMessage() != null) {\r
3512                 System.out.println("Fail: thread#" + i + " " + tasks[i].getErrorMessage());\r
3513                 break;\r
3514             }\r
3515         }\r
3516     }\r
3517 \r
3518     static class NormTranslitTask implements Runnable {\r
3519         Transliterator translit;\r
3520         String testData;\r
3521         String expectedData;\r
3522         String errorMsg;\r
3523 \r
3524         NormTranslitTask(Transliterator translit, String testData, String expectedData) {\r
3525             this.translit = translit;\r
3526             this.testData = testData;\r
3527             this.expectedData = expectedData;\r
3528         }\r
3529 \r
3530         public void run() {\r
3531             errorMsg = null;\r
3532             StringBuffer inBuf = new StringBuffer(testData);\r
3533             StringBuffer expectedBuf = new StringBuffer(expectedData);\r
3534 \r
3535             for(int i = 0; i < 1000; i++) {\r
3536                 String in = inBuf.toString();\r
3537                 String out = translit.transliterate(in);\r
3538                 String expected = expectedBuf.toString();\r
3539                 if (!out.equals(expected)) {\r
3540                     errorMsg = "in {" + in + "} / out {" + out + "} / expected {" + expected + "}";\r
3541                     break;\r
3542                 }\r
3543                 inBuf.append(testData);\r
3544                 expectedBuf.append(expectedData);\r
3545             }\r
3546         }\r
3547 \r
3548         public String getErrorMessage() {\r
3549             return errorMsg;\r
3550         }\r
3551     }\r
3552 \r
3553     //======================================================================\r
3554     // Support methods\r
3555     //======================================================================\r
3556     void expect(String rules,\r
3557             String source,\r
3558             String expectedResult,\r
3559             Transliterator.Position pos) {\r
3560         Transliterator t = Transliterator.createFromRules("<ID>", rules, Transliterator.FORWARD);\r
3561         expect(t, source, expectedResult, pos);\r
3562     }\r
3563 \r
3564     void expect(String rules, String source, String expectedResult) {\r
3565         expect(rules, source, expectedResult, null);\r
3566     }\r
3567 \r
3568     void expect(Transliterator t, String source, String expectedResult,\r
3569             Transliterator reverseTransliterator) {\r
3570         expect(t, source, expectedResult);\r
3571         if (reverseTransliterator != null) {\r
3572             expect(reverseTransliterator, expectedResult, source);\r
3573         }\r
3574     }\r
3575 \r
3576     void expect(Transliterator t, String source, String expectedResult) {\r
3577         expect(t, source, expectedResult, (Transliterator.Position) null);\r
3578     }\r
3579 \r
3580     void expect(Transliterator t, String source, String expectedResult,\r
3581             Transliterator.Position pos) {\r
3582         if (pos == null) {\r
3583             String result = t.transliterate(source);\r
3584             if (!expectAux(t.getID() + ":String", source, result, expectedResult)) return;\r
3585         }\r
3586 \r
3587         Transliterator.Position index = null;\r
3588         if (pos == null) {\r
3589             index = new Transliterator.Position(0, source.length(), 0, source.length());\r
3590         } else {\r
3591             index = new Transliterator.Position(pos.contextStart, pos.contextLimit,\r
3592                     pos.start, pos.limit);\r
3593         }\r
3594 \r
3595         ReplaceableString rsource = new ReplaceableString(source);\r
3596 \r
3597         t.finishTransliteration(rsource, index);\r
3598         // Do it all at once -- below we do it incrementally\r
3599 \r
3600         if (index.start != index.limit) {\r
3601             expectAux(t.getID() + ":UNFINISHED", source,\r
3602                     "start: " + index.start + ", limit: " + index.limit, false, expectedResult);\r
3603             return;\r
3604         }\r
3605         String result = rsource.toString();\r
3606         if (!expectAux(t.getID() + ":Replaceable", source, result, expectedResult)) return;\r
3607 \r
3608 \r
3609         if (pos == null) {\r
3610             index = new Transliterator.Position();\r
3611         } else {\r
3612             index = new Transliterator.Position(pos.contextStart, pos.contextLimit,\r
3613                     pos.start, pos.limit);\r
3614         }\r
3615 \r
3616         // Test incremental transliteration -- this result\r
3617         // must be the same after we finalize (see below).\r
3618         Vector v = new Vector();\r
3619         v.add(source);\r
3620         rsource.replace(0, rsource.length(), "");\r
3621         if (pos != null) {\r
3622             rsource.replace(0, 0, source);\r
3623             v.add(UtilityExtensions.formatInput(rsource, index));\r
3624             t.transliterate(rsource, index);\r
3625             v.add(UtilityExtensions.formatInput(rsource, index));\r
3626         } else {\r
3627             for (int i=0; i<source.length(); ++i) {\r
3628                 //v.add(i == 0 ? "" : " + " + source.charAt(i) + "");\r
3629                 //log.append(source.charAt(i)).append(" -> "));\r
3630                 t.transliterate(rsource, index, source.charAt(i));\r
3631                 //v.add(UtilityExtensions.formatInput(rsource, index) + source.substring(i+1));\r
3632                 v.add(UtilityExtensions.formatInput(rsource, index) +\r
3633                         ((i<source.length()-1)?(" + '" + source.charAt(i+1) + "' ->"):" =>"));\r
3634             }\r
3635         }\r
3636 \r
3637         // As a final step in keyboard transliteration, we must call\r
3638         // transliterate to finish off any pending partial matches that\r
3639         // were waiting for more input.\r
3640         t.finishTransliteration(rsource, index);\r
3641         result = rsource.toString();\r
3642         //log.append(" => ").append(rsource.toString());\r
3643         v.add(result);\r
3644 \r
3645         String[] results = new String[v.size()];\r
3646         v.copyInto(results);\r
3647         expectAux(t.getID() + ":Incremental", results,\r
3648                 result.equals(expectedResult),\r
3649                 expectedResult);\r
3650     }\r
3651 \r
3652     boolean expectAux(String tag, String source,\r
3653             String result, String expectedResult) {\r
3654         return expectAux(tag, new String[] {source, result},\r
3655                 result.equals(expectedResult),\r
3656                 expectedResult);\r
3657     }\r
3658 \r
3659     boolean expectAux(String tag, String source,\r
3660             String result, boolean pass,\r
3661             String expectedResult) {\r
3662         return expectAux(tag, new String[] {source, result},\r
3663                 pass,\r
3664                 expectedResult);\r
3665     }\r
3666 \r
3667     boolean expectAux(String tag, String source,\r
3668             boolean pass,\r
3669             String expectedResult) {\r
3670         return expectAux(tag, new String[] {source},\r
3671                 pass,\r
3672                 expectedResult);\r
3673     }\r
3674 \r
3675     boolean expectAux(String tag, String[] results, boolean pass,\r
3676             String expectedResult) {\r
3677         msg((pass?"(":"FAIL: (")+tag+")", pass ? LOG : ERR, true, true);\r
3678 \r
3679         for (int i = 0; i < results.length; ++i) {\r
3680             String label;\r
3681             if (i == 0) {\r
3682                 label = "source:   ";\r
3683             } else if (i == results.length - 1) {\r
3684                 label = "result:   ";\r
3685             } else {\r
3686                 if (!isVerbose() && pass) continue;\r
3687                 label = "interm" + i + ":  ";\r
3688             }\r
3689             msg("    " + label + results[i], pass ? LOG : ERR, false, true);\r
3690         }\r
3691 \r
3692         if (!pass) {\r
3693             msg(  "    expected: " + expectedResult, ERR, false, true);\r
3694         }\r
3695 \r
3696         return pass;\r
3697     }\r
3698 \r
3699     private void assertTransform(String message, String expected, StringTransform t, String source) {\r
3700         assertEquals(message + " " + source, expected, t.transform(source));\r
3701     }\r
3702 \r
3703 \r
3704     private void assertTransform(String message, String expected, StringTransform t, StringTransform back, String source, String source2) {\r
3705         assertEquals(message + " " +source, expected, t.transform(source));\r
3706         assertEquals(message + " " +source2, expected, t.transform(source2));\r
3707         assertEquals(message + " " + expected, source, back.transform(expected));\r
3708     }\r
3709 \r
3710     /*\r
3711      * Tests the method public Enumeration<String> getAvailableTargets(String source)\r
3712      */\r
3713     public void TestGetAvailableTargets() {\r
3714         try {\r
3715             // Tests when if (targets == null) is true\r
3716             Transliterator.getAvailableTargets("");\r
3717         } catch (Exception e) {\r
3718             errln("TransliteratorRegistry.getAvailableTargets(String) was not " + "supposed to return an exception.");\r
3719         }\r
3720     }\r
3721 \r
3722     /*\r
3723      * Tests the method public Enumeration<String> getAvailableVariants(String source, String target)\r
3724      */\r
3725     public void TestGetAvailableVariants() {\r
3726         try {\r
3727             // Tests when if (targets == null) is true\r
3728             Transliterator.getAvailableVariants("", "");\r
3729         } catch (Exception e) {\r
3730             errln("TransliteratorRegistry.getAvailableVariants(String) was not " + "supposed to return an exception.");\r
3731         }\r
3732     }\r
3733 \r
3734     /*\r
3735      * Tests the mehtod String nextLine() in RuleBody\r
3736      */\r
3737     public void TestNextLine() {\r
3738         // Tests when "if (s != null && s.length() > 0 && s.charAt(s.length() - 1) == '\\') is true\r
3739         try{\r
3740             Transliterator.createFromRules("gif", "\\", Transliterator.FORWARD);\r
3741         } catch(Exception e){\r
3742             errln("TransliteratorParser.nextLine() was not suppose to return an " +\r
3743                     "exception for a rule of '\\'");\r
3744         }\r
3745     }\r
3746 }\r