]> gitweb.fperrin.net Git - Dictionary.git/blob - jars/icu4j-4_4_2-src/main/tests/translit/src/com/ibm/icu/dev/test/translit/RoundTripTest.java
go
[Dictionary.git] / jars / icu4j-4_4_2-src / main / tests / translit / src / com / ibm / icu / dev / test / translit / RoundTripTest.java
1 /**\r
2  *******************************************************************************\r
3  * Copyright (C) 2000-2009, International Business Machines Corporation and    *\r
4  * others. All Rights Reserved.                                                *\r
5  *******************************************************************************\r
6  */\r
7 package com.ibm.icu.dev.test.translit;\r
8 \r
9 import java.io.BufferedWriter;\r
10 import java.io.ByteArrayOutputStream;\r
11 import java.io.File;\r
12 import java.io.FileNotFoundException;\r
13 import java.io.FileOutputStream;\r
14 import java.io.IOException;\r
15 import java.io.OutputStreamWriter;\r
16 import java.io.PrintWriter;\r
17 import java.io.UnsupportedEncodingException;\r
18 import java.util.MissingResourceException;\r
19 \r
20 import com.ibm.icu.dev.test.TestFmwk;\r
21 import com.ibm.icu.impl.Utility;\r
22 import com.ibm.icu.lang.UCharacter;\r
23 import com.ibm.icu.lang.UProperty;\r
24 import com.ibm.icu.text.Normalizer;\r
25 import com.ibm.icu.text.Transliterator;\r
26 import com.ibm.icu.text.UTF16;\r
27 import com.ibm.icu.text.UnicodeSet;\r
28 import com.ibm.icu.text.UnicodeSetIterator;\r
29 import com.ibm.icu.util.LocaleData;\r
30 import com.ibm.icu.util.ULocale;\r
31 \r
32 /**\r
33  * @test\r
34  * @summary Round trip test of Transliterator\r
35  */\r
36 public class RoundTripTest extends TestFmwk {\r
37 \r
38     static final boolean EXTRA_TESTS = true;\r
39     static final boolean PRINT_RULES = true;\r
40 \r
41     public static void main(String[] args) throws Exception {\r
42         new RoundTripTest().run(args);\r
43     }\r
44     /*\r
45     public void TestSingle() throws IOException, ParseException {\r
46         Transliterator t = Transliterator.getInstance("Latin-Greek");\r
47         String s = t.transliterate("\u0101\u0069");\r
48     }\r
49      */\r
50 \r
51     /*\r
52     Note: Unicode 3.2 added new Hiragana/Katakana characters:\r
53 \r
54 3095..3096    ; 3.2 #   [2] HIRAGANA LETTER SMALL KA..HIRAGANA LETTER SMALL KE\r
55 309F..30A0    ; 3.2 #   [2] HIRAGANA DIGRAPH YORI..KATAKANA-HIRAGANA DOUBLE HYPHEN\r
56 30FF          ; 3.2 #       KATAKANA DIGRAPH KOTO\r
57 31F0..31FF    ; 3.2 #  [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO\r
58 \r
59     Unicode 5.2 added another Hiragana character:\r
60 1F200         ; 5.2 #       SQUARE HIRAGANA HOKA\r
61 \r
62     We will not add them to the rules until they are more supported (e.g. in fonts on Windows)\r
63     A bug has been filed to remind us to do this: #1979.\r
64      */\r
65 \r
66     static String KATAKANA = "[[[:katakana:][\u30A1-\u30FA\u30FC]]-[\u30FF\u31F0-\u31FF]]";\r
67     static String HIRAGANA = "[[[:hiragana:][\u3040-\u3094]]-[\u3095-\u3096\u309F-\u30A0\\U0001F200-\\U0001F2FF]]";\r
68     static String LENGTH = "[\u30FC]";\r
69     static String HALFWIDTH_KATAKANA = "[\uFF65-\uFF9D]";\r
70     static String KATAKANA_ITERATION = "[\u30FD\u30FE]";\r
71     static String HIRAGANA_ITERATION = "[\u309D\u309E]";\r
72 \r
73     //------------------------------------------------------------------\r
74     // AbbreviatedUnicodeSetIterator\r
75     //------------------------------------------------------------------\r
76 \r
77     static class AbbreviatedUnicodeSetIterator extends UnicodeSetIterator {\r
78 \r
79         private boolean abbreviated;\r
80         private int perRange;\r
81 \r
82         public AbbreviatedUnicodeSetIterator() {\r
83             super();\r
84             abbreviated = false;\r
85         }\r
86 \r
87         public void reset(UnicodeSet newSet) {\r
88             reset(newSet, false);\r
89         }\r
90 \r
91         public void reset(UnicodeSet newSet, boolean abb) {\r
92             reset(newSet, abb, 100);\r
93         }\r
94 \r
95         public void reset(UnicodeSet newSet, boolean abb, int density) {\r
96             super.reset(newSet);\r
97             abbreviated = abb;\r
98             perRange = newSet.getRangeCount();\r
99             if (perRange != 0) {\r
100                 perRange = density / perRange;\r
101             }\r
102         }\r
103 \r
104         protected void loadRange(int myRange) {\r
105             super.loadRange(myRange);\r
106             if (abbreviated && (endElement > nextElement + perRange)) {\r
107                 endElement = nextElement + perRange;\r
108             }\r
109         }\r
110     }\r
111 \r
112     //--------------------------------------------------------------------\r
113 \r
114     public void showElapsed(long start, String name) {\r
115         double dur = (System.currentTimeMillis() - start) / 1000.0;\r
116         logln(name + " took " + dur + " seconds");\r
117     }\r
118 \r
119     public void TestKana() throws IOException {\r
120         long start = System.currentTimeMillis();\r
121         new Test("Katakana-Hiragana")\r
122         .test(KATAKANA, "[" + HIRAGANA + LENGTH + "]", "[" + HALFWIDTH_KATAKANA + LENGTH + "]", this, new Legal());\r
123         showElapsed(start, "TestKana");\r
124     }\r
125 \r
126     public void TestHiragana() throws IOException {\r
127         long start = System.currentTimeMillis();\r
128         new Test("Latin-Hiragana")\r
129         .test("[a-zA-Z]", HIRAGANA, HIRAGANA_ITERATION, this, new Legal());\r
130         showElapsed(start, "TestHiragana");\r
131     }\r
132 \r
133     public void TestKatakana() throws IOException {\r
134         long start = System.currentTimeMillis();\r
135         new Test("Latin-Katakana")\r
136         .test("[a-zA-Z]", KATAKANA, "[" + KATAKANA_ITERATION + HALFWIDTH_KATAKANA + "]", this, new Legal());\r
137         showElapsed(start, "TestKatakana");\r
138     }\r
139 \r
140     public void TestJamo() throws IOException {\r
141         long start = System.currentTimeMillis();\r
142         new Test("Latin-Jamo")\r
143         .test("[a-zA-Z]", "[\u1100-\u1112 \u1161-\u1175 \u11A8-\u11C2]", "", this, new LegalJamo());\r
144         showElapsed(start, "TestJamo");\r
145     }\r
146 \r
147     /*\r
148         SBase = 0xAC00, LBase = 0x1100, VBase = 0x1161, TBase = 0x11A7,\r
149         LCount = 19, VCount = 21, TCount = 28,\r
150         NCount = VCount * TCount,   // 588\r
151         SCount = LCount * NCount,   // 11172\r
152         LLimit = LBase + LCount,    // 1113\r
153         VLimit = VBase + VCount,    // 1176\r
154         TLimit = TBase + TCount,    // 11C3\r
155         SLimit = SBase + SCount;    // D7A4\r
156      */\r
157 \r
158     public void TestHangul() throws IOException {\r
159         long start = System.currentTimeMillis();\r
160         Test t = new Test("Latin-Hangul", 5);\r
161         boolean TEST_ALL = "true".equalsIgnoreCase(getProperty("HangulRoundTripAll")); \r
162         if (TEST_ALL && getInclusion() == 10) {\r
163             t.setPairLimit(Integer.MAX_VALUE); // only go to the limit if we have TEST_ALL and getInclusion\r
164         }\r
165         t.test("[a-zA-Z]", "[\uAC00-\uD7A4]", "", this, new Legal());\r
166         showElapsed(start, "TestHangul");\r
167     }\r
168 \r
169     /**\r
170      * This is a shorter version of the test for doubles, that allows us to skip lots of cases, but\r
171      * does check the ones that should cause problems (if any do).\r
172      */\r
173     public void TestHangul2() {\r
174         Transliterator lh = Transliterator.getInstance("Latin-Hangul");\r
175         Transliterator hl = lh.getInverse();\r
176         final UnicodeSet representativeHangul = getRepresentativeHangul();\r
177         for (UnicodeSetIterator it = new UnicodeSetIterator(representativeHangul); it.next();) {\r
178             assertRoundTripTransform("Transform", it.getString(), lh, hl);\r
179         }\r
180     }\r
181 \r
182     private void assertRoundTripTransform(String message, String source, Transliterator lh, Transliterator hl) {\r
183         String to = hl.transform(source);\r
184         String back = lh.transform(to);\r
185         if (!source.equals(back)) {\r
186             String to2 = hl.transform(source.replaceAll("(.)", "$1 ").trim());\r
187             String to3 = hl.transform(back.replaceAll("(.)", "$1 ").trim());\r
188             assertEquals(message + " " + source + " [" + to + "/"+ to2 + "/"+ to3 + "]", source, back);\r
189         }\r
190     }\r
191 \r
192     public static UnicodeSet getRepresentativeHangul() {\r
193         UnicodeSet extraSamples = new UnicodeSet("[\uCE20{\uAD6C\uB514}{\uAD73\uC774}{\uBB34\uB837}{\uBB3C\uC5FF}{\uC544\uAE4C}{\uC544\uB530}{\uC544\uBE60}{\uC544\uC2F8}{\uC544\uC9DC}{\uC544\uCC28}{\uC545\uC0AC}{\uC545\uC2F8}{\uC546\uCE74}{\uC548\uAC00}{\uC548\uC790}{\uC548\uC9DC}{\uC548\uD558}{\uC54C\uAC00}{\uC54C\uB530}{\uC54C\uB9C8}{\uC54C\uBC14}{\uC54C\uBE60}{\uC54C\uC0AC}{\uC54C\uC2F8}{\uC54C\uD0C0}{\uC54C\uD30C}{\uC54C\uD558}{\uC555\uC0AC}{\uC555\uC2F8}{\uC558\uC0AC}{\uC5C5\uC12F\uC501}{\uC5C6\uC5C8\uC2B5}]");\r
194         UnicodeSet sourceSet = new UnicodeSet();\r
195         addRepresentativeHangul(sourceSet, 2, false);\r
196         addRepresentativeHangul(sourceSet, 3, false);\r
197         addRepresentativeHangul(sourceSet, 2, true);\r
198         addRepresentativeHangul(sourceSet, 3, true);\r
199         // add the boundary cases; we want an example of each case of V + L and one example of each case of T+L\r
200 \r
201         UnicodeSet more = getRepresentativeBoundaryHangul();\r
202         sourceSet.addAll(more);\r
203         sourceSet.addAll(extraSamples);\r
204         return sourceSet;\r
205     }\r
206 \r
207     private static UnicodeSet getRepresentativeBoundaryHangul() {\r
208         UnicodeSet resultToAddTo = new UnicodeSet();\r
209         // U+1100 HANGUL CHOSEONG KIYEOK\r
210         // U+1161 HANGUL JUNGSEONG A\r
211         UnicodeSet L = new UnicodeSet("[:hst=L:]");\r
212         UnicodeSet V = new UnicodeSet("[:hst=V:]");\r
213         UnicodeSet T = new UnicodeSet("[:hst=T:]");\r
214 \r
215         String prefixLV = "\u1100\u1161";\r
216         String prefixL = "\u1100";\r
217         String suffixV = "\u1161";\r
218         String nullL = "\u110B"; // HANGUL CHOSEONG IEUNG\r
219 \r
220         UnicodeSet L0 = new UnicodeSet("[\u1100\u110B]");\r
221 \r
222         // do all combinations of L0 + V + nullL + V\r
223 \r
224         for (UnicodeSetIterator iL0 = new UnicodeSetIterator(L0); iL0.next();) {\r
225             for (UnicodeSetIterator iV = new UnicodeSetIterator(V); iV.next();) {\r
226                 for (UnicodeSetIterator iV2 = new UnicodeSetIterator(V); iV2.next();) {\r
227                     String sample = iL0.getString() + iV.getString() + nullL + iV2.getString();\r
228                     String trial = Normalizer.compose(sample, false);\r
229                     if (trial.length() == 2) {\r
230                         resultToAddTo.add(trial);\r
231                     }\r
232                 }\r
233             }\r
234         }\r
235 \r
236         for (UnicodeSetIterator iL = new UnicodeSetIterator(L); iL.next();) {\r
237             // do all combinations of "g" + V + L + "a"\r
238             final String suffix = iL.getString() + suffixV;\r
239             for (UnicodeSetIterator iV = new UnicodeSetIterator(V); iV.next();) {\r
240                 String sample = prefixL + iV.getString() + suffix;\r
241                 String trial = Normalizer.compose(sample, false);\r
242                 if (trial.length() == 2) {\r
243                     resultToAddTo.add(trial);\r
244                 }\r
245             }\r
246             // do all combinations of "ga" + T + L + "a"\r
247             for (UnicodeSetIterator iT = new UnicodeSetIterator(T); iT.next();) {\r
248                 String sample = prefixLV + iT.getString() + suffix;\r
249                 String trial = Normalizer.compose(sample, false);\r
250                 if (trial.length() == 2) {\r
251                     resultToAddTo.add(trial);\r
252                 }\r
253             }\r
254         }\r
255         return resultToAddTo;\r
256     }\r
257 \r
258     private static void addRepresentativeHangul(UnicodeSet resultToAddTo, int leng, boolean noFirstConsonant) {\r
259         UnicodeSet notYetSeen = new UnicodeSet();\r
260         for (char c = '\uAC00'; c <  '\uD7AF'; ++c) {\r
261             String charStr = String.valueOf(c);\r
262             String decomp = Normalizer.decompose(charStr, false);\r
263             if (decomp.length() != leng) {\r
264                 continue; // only take one length at a time\r
265             }\r
266             if (decomp.startsWith("\u110B ") != noFirstConsonant) {\r
267                 continue;\r
268             }\r
269             if (!notYetSeen.containsAll(decomp)) {\r
270                 resultToAddTo.add(c);\r
271                 notYetSeen.addAll(decomp);\r
272             }\r
273         }\r
274     }\r
275 \r
276 \r
277     public void TestHan() throws UnsupportedEncodingException, FileNotFoundException {\r
278         try{\r
279             UnicodeSet exemplars = LocaleData.getExemplarSet(new ULocale("zh"),0);\r
280             // create string with all chars\r
281             StringBuffer b = new StringBuffer();\r
282             for (UnicodeSetIterator it = new UnicodeSetIterator(exemplars); it.next();) {\r
283                 UTF16.append(b,it.codepoint);\r
284             }\r
285             String source = b.toString();\r
286             // transform with Han translit\r
287             Transliterator han = Transliterator.getInstance("Han-Latin");\r
288             String target = han.transliterate(source);\r
289             // now verify that there are no Han characters left\r
290             UnicodeSet allHan = new UnicodeSet("[:han:]");\r
291             assertFalse("No Han must be left after Han-Latin transliteration",allHan.containsSome(target));\r
292             // check the pinyin translit\r
293             Transliterator pn = Transliterator.getInstance("Latin-NumericPinyin");\r
294             String target2 = pn.transliterate(target);\r
295             // verify that there are no marks\r
296             Transliterator nfc = Transliterator.getInstance("nfc");\r
297             String nfced = nfc.transliterate(target2);\r
298             UnicodeSet allMarks = new UnicodeSet("[:mark:]");\r
299             assertFalse("NumericPinyin must contain no marks", allMarks.containsSome(nfced));\r
300             // verify roundtrip\r
301             Transliterator np = pn.getInverse();\r
302             String target3 = np.transliterate(target);\r
303             boolean roundtripOK = target3.equals(target);\r
304             assertTrue("NumericPinyin must roundtrip", roundtripOK);\r
305             if (!roundtripOK) {\r
306                 String filename = "numeric-pinyin.log.txt";\r
307                 PrintWriter out = new PrintWriter(\r
308                         new BufferedWriter(\r
309                                 new OutputStreamWriter(\r
310                                         new FileOutputStream(filename), "UTF8"), 4*1024));\r
311                 errln("Creating log file " + new File(filename).getAbsoluteFile());\r
312                 out.println("Pinyin:                " + target);\r
313                 out.println("Pinyin-Numeric-Pinyin: " + target2);\r
314                 out.close();\r
315             }\r
316         }catch(MissingResourceException ex){\r
317             warnln("Could not load the locale data for fetching the exemplar characters.");\r
318         }\r
319     }\r
320 \r
321     public void TestSingle() {\r
322         Transliterator t = Transliterator.getInstance("Latin-Greek");\r
323         t.transliterate("\u0061\u0101\u0069");\r
324     }\r
325 \r
326     String getGreekSet() {\r
327         // Time bomb\r
328         if (skipIfBeforeICU(4,5,0)) {\r
329             // We temporarily filter against Unicode 4.1, but we only do this\r
330             // before version 3.5.\r
331             logln("TestGreek needs to be updated to remove delete the section marked [:Age=4.0:] filter");\r
332         } else {\r
333             errln("TestGreek needs to be updated to remove delete the [:Age=4.0:] filter ");\r
334         }\r
335         return \r
336         // isICU28() ? "[[\u003B\u00B7[:Greek:]-[\u03D7-\u03EF]]&[:Age=3.2:]]" :\r
337         "[\u003B\u00B7[[:Greek:]&[:Letter:]]-[" +\r
338         "\u1D26-\u1D2A" + // L&   [5] GREEK LETTER SMALL CAPITAL GAMMA..GREEK LETTER SMALL CAPITAL PSI\r
339         "\u1D5D-\u1D61" + // Lm   [5] MODIFIER LETTER SMALL BETA..MODIFIER LETTER SMALL CHI\r
340         "\u1D66-\u1D6A" + // L&   [5] GREEK SUBSCRIPT SMALL LETTER BETA..GREEK SUBSCRIPT SMALL LETTER CHI\r
341         "\u03D7-\u03EF" + // \N{GREEK KAI SYMBOL}..\N{COPTIC SMALL LETTER DEI}\r
342         "] & [:Age=4.0:]]";\r
343     }\r
344 \r
345     public void TestGreek() throws IOException {\r
346         long start = System.currentTimeMillis();\r
347         new Test("Latin-Greek", 50)\r
348         .test("[a-zA-Z]", getGreekSet(),\r
349                 "[\u00B5\u037A\u03D0-\u03F5\u03F9]", /* roundtrip exclusions */\r
350                 this, new LegalGreek(true));\r
351         showElapsed(start, "TestGreek");\r
352     }\r
353 \r
354     public void TestGreekUNGEGN() throws IOException {\r
355         long start = System.currentTimeMillis();\r
356         new Test("Latin-Greek/UNGEGN")\r
357         .test("[a-zA-Z]", getGreekSet(),\r
358                 "[\u00B5\u037A\u03D0-\uFFFF{\u039C\u03C0}]", /* roundtrip exclusions */\r
359                 this, new LegalGreek(false));\r
360         showElapsed(start, "TestGreekUNGEGN");\r
361     }\r
362 \r
363     public void Testel() throws IOException {\r
364         long start = System.currentTimeMillis();\r
365         new Test("Latin-el")\r
366         .test("[a-zA-Z]", getGreekSet(),\r
367                 "[\u00B5\u037A\u03D0-\uFFFF{\u039C\u03C0}]", /* roundtrip exclusions */\r
368                 this, new LegalGreek(false));\r
369         showElapsed(start, "Testel");\r
370     }\r
371 \r
372     public void TestCyrillic() throws IOException {\r
373         long start = System.currentTimeMillis();\r
374         new Test("Latin-Cyrillic")\r
375         .test("[a-zA-Z\u0110\u0111\u02BA\u02B9]", "[\u0400-\u045F]", null, this, new Legal());\r
376         showElapsed(start, "TestCyrillic");\r
377     }\r
378 \r
379     static final String ARABIC = "[\u06A9\u060C\u061B\u061F\u0621\u0627-\u063A\u0641-\u0655\u0660-\u066C\u067E\u0686\u0698\u06A4\u06AD\u06AF\u06CB-\u06CC\u06F0-\u06F9]";\r
380 \r
381     public void TestArabic() throws IOException {\r
382         long start = System.currentTimeMillis();\r
383         new Test("Latin-Arabic")\r
384         .test("[a-zA-Z\u02BE\u02BF]", ARABIC, "[a-zA-Z\u02BE\u02BF\u207F]", null, this, new Legal()); //\r
385         showElapsed(start, "TestArabic");\r
386     }\r
387 \r
388     public void TestHebrew() throws IOException {\r
389         //      Time bomb\r
390         if (skipIfBeforeICU(4,5,0)) {\r
391             // We temporarily filter against Unicode 4.1, but we only do this\r
392             // before version 3.5.\r
393             logln("TestHebrew needs to be updated to remove delete the section marked [:Age=4.0:] filter");\r
394         } else {\r
395             errln("TestHebrew needs to be updated to remove delete the [:Age=4.0:] filter ");\r
396         }\r
397         long start = System.currentTimeMillis();\r
398         new Test("Latin-Hebrew")\r
399         .test("[a-zA-Z\u02BC\u02BB]", "[[[:hebrew:]-[\u05BD\uFB00-\uFBFF]]& [:Age=4.0:]]", "[\u05F0\u05F1\u05F2]", this, new LegalHebrew());\r
400         showElapsed(start, "TestHebrew");\r
401     }\r
402 \r
403     public void TestThai() throws IOException {\r
404         long start = System.currentTimeMillis();\r
405         if(skipIfBeforeICU(4,5,0)){\r
406             new Test("Latin-Thai")\r
407             .test("[a-zA-Z\u0142\u1ECD\u00E6\u0131\u0268\u02CC]",\r
408                     "[\u0E01-\u0E3A\u0E40-\u0E5B]", \r
409                     "[a-zA-Z\u0142\u1ECD\u00E6\u0131\u0268\u02B9\u02CC]",\r
410                     "[\u0E4F]", this, new LegalThai());   \r
411         }else{\r
412             new Test("Latin-Thai")\r
413             .test("[a-zA-Z\u0142\u1ECD\u00E6\u0131\u0268\u02CC]",\r
414                     "[\u0E01-\u0E3A\u0E40-\u0E5B]", \r
415                     "[a-zA-Z\u0142\u1ECD\u00E6\u0131\u0268\u02B9\u02CC]",\r
416                     null, this, new LegalThai());\r
417         }\r
418 \r
419         showElapsed(start, "TestThai");\r
420     }\r
421 \r
422     //----------------------------------\r
423     // Inter-Indic Tests\r
424     //----------------------------------\r
425     public static class LegalIndic extends Legal{\r
426         UnicodeSet vowelSignSet = new UnicodeSet();\r
427 \r
428         public LegalIndic(){\r
429             vowelSignSet.addAll(new UnicodeSet("[\u0901\u0902\u0903\u0904\u093e-\u094c\u0962\u0963]"));               /* Devanagari */\r
430             vowelSignSet.addAll(new UnicodeSet("[\u0981\u0982\u0983\u09be-\u09cc\u09e2\u09e3\u09D7]"));         /* Bengali */\r
431             vowelSignSet.addAll(new UnicodeSet("[\u0a01\u0a02\u0a03\u0a3e-\u0a4c\u0a62\u0a63\u0a70\u0a71]"));   /* Gurmukhi */\r
432             vowelSignSet.addAll(new UnicodeSet("[\u0a81\u0a82\u0a83\u0abe-\u0acc\u0ae2\u0ae3]"));               /* Gujarati */\r
433             vowelSignSet.addAll(new UnicodeSet("[\u0b01\u0b02\u0b03\u0b3e-\u0b4c\u0b62\u0b63\u0b56\u0b57]"));   /* Oriya */\r
434             vowelSignSet.addAll(new UnicodeSet("[\u0b81\u0b82\u0b83\u0bbe-\u0bcc\u0be2\u0be3\u0bd7]"));         /* Tamil */\r
435             vowelSignSet.addAll(new UnicodeSet("[\u0c01\u0c02\u0c03\u0c3e-\u0c4c\u0c62\u0c63\u0c55\u0c56]"));   /* Telugu */\r
436             vowelSignSet.addAll(new UnicodeSet("[\u0c81\u0c82\u0c83\u0cbe-\u0ccc\u0ce2\u0ce3\u0cd5\u0cd6]"));   /* Kannada */\r
437             vowelSignSet.addAll(new UnicodeSet("[\u0d01\u0d02\u0d03\u0d3e-\u0d4c\u0d62\u0d63\u0d57]"));         /* Malayalam */\r
438         }\r
439 \r
440         String avagraha = "\u093d\u09bd\u0abd\u0b3d\u0cbd";\r
441         String nukta = "\u093c\u09bc\u0a3c\u0abc\u0b3c\u0cbc";\r
442         String virama = "\u094d\u09cd\u0a4d\u0acd\u0b4d\u0bcd\u0c4d\u0ccd\u0d4d";\r
443         String sanskritStressSigns = "\u0951\u0952\u0953\u0954\u097d";\r
444         String chandrabindu = "\u0901\u0981\u0A81\u0b01\u0c01";\r
445         public boolean is(String sourceString){\r
446             int cp=sourceString.charAt(0);\r
447 \r
448             // A vowel sign cannot be the first char\r
449             if(vowelSignSet.contains(cp)){\r
450                 return false;\r
451             }else if(avagraha.indexOf(cp)!=-1){\r
452                 return false;\r
453             }else if(virama.indexOf(cp)!=-1){\r
454                 return false;\r
455             }else if(nukta.indexOf(cp)!=-1){\r
456                 return false;\r
457             }else if(sanskritStressSigns.indexOf(cp)!=-1){\r
458                 return false;\r
459             }else if((chandrabindu.indexOf(cp)!=-1) &&\r
460                     (sourceString.length() >1 &&\r
461                             vowelSignSet.contains(sourceString.charAt(1)))){\r
462                 return false;\r
463             }\r
464             return true;\r
465         }\r
466     }\r
467     static String latinForIndic = "[['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD"+\r
468     "\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F"+\r
469     "\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148"+\r
470     "\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0"+\r
471     "\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u01FB"+\r
472     "\u0200-\u021B\u021E-\u021F\u0226-\u0233\u0294\u0303-\u0304\u0306\u0314-\u0315"+\r
473     "\u0325\u040E\u0419\u0439\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7"+\r
474     "\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03\u1F05"+\r
475     "\u1F07\u1F09\u1F0B\u1F0D\u1F0F\u1F11\u1F13\u1F15\u1F19\u1F1B\u1F1D\u1F21"+\r
476     "\u1F23\u1F25\u1F27\u1F29\u1F2B\u1F2D\u1F2F\u1F31\u1F33\u1F35\u1F37\u1F39"+\r
477     "\u1F3B\u1F3D\u1F3F\u1F41\u1F43\u1F45\u1F49\u1F4B\u1F4D\u1F51\u1F53\u1F55"+\r
478     "\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63\u1F65\u1F67\u1F69\u1F6B\u1F6D"+\r
479     "\u1F6F\u1F81\u1F83\u1F85\u1F87\u1F89\u1F8B\u1F8D\u1F8F\u1F91\u1F93\u1F95"+\r
480     "\u1F97\u1F99\u1F9B\u1F9D\u1F9F\u1FA1\u1FA3\u1FA5\u1FA7\u1FA9\u1FAB\u1FAD"+\r
481     "\u1FAF-\u1FB1\u1FB8-\u1FB9\u1FD0-\u1FD1\u1FD8-\u1FD9\u1FE0-\u1FE1\u1FE5"+\r
482     "\u1FE8-\u1FE9\u1FEC\u212A-\u212B\uE04D\uE064]"+\r
483     "-[\uE000-\uE080 \u01E2\u01E3]& [[:latin:][:mark:]]]";\r
484 \r
485     public void TestDevanagariLatin() throws IOException {\r
486         long start = System.currentTimeMillis();\r
487         if (skipIfBeforeICU(4,5,0)) {\r
488             logln("Warning: TestDevanagariLatin needs to be updated to remove delete the section marked [:Age=4.1:] filter");\r
489         } else {\r
490             //              We temporarily filter against Unicode 4.1, but we only do this\r
491             // before version 3.4.\r
492             errln("FAIL: TestDevanagariLatin needs to be updated to remove delete the [:Age=4.1:] filter ");\r
493             return;\r
494         }\r
495         new Test("Latin-DEVANAGARI", 50)\r
496         .test(latinForIndic, "[[[:Devanagari:][\u094d][\u0964\u0965]]&[:Age=4.1:]]", "[\u0965\u0904]", this, new LegalIndic());\r
497         showElapsed(start, "TestDevanagariLatin");\r
498     }\r
499 \r
500     private static final String [][] interIndicArray= new String[][]{\r
501         new String [] {  "BENGALI-DEVANAGARI",\r
502                 "[:BENGALI:]", "[:Devanagari:]",\r
503                 "[\u0904\u0951-\u0954\u0943-\u0949\u094a\u0962\u0963\u090D\u090e\u0911\u0912\u0929\u0933\u0934\u0935\u0950\u0958\u0959\u095a\u095b\u095e\u097d]", /*roundtrip exclusions*/\r
504         },\r
505         new String [] {  "DEVANAGARI-BENGALI",\r
506                 "[:Devanagari:]", "[:BENGALI:]",\r
507                 "[\u09D7\u090D\u090e\u0911\u0912\u0929\u0933\u0934\u0935\u0950\u0958\u0959\u095a\u095b\u095e\u09f0\u09f1\u09f2-\u09fa\u09ce]", /*roundtrip exclusions*/\r
508         },\r
509 \r
510         new String [] {  "GURMUKHI-DEVANAGARI",\r
511                 "[:GURMUKHI:]", "[:Devanagari:]",\r
512                 "[\u0904\u0902\u0936\u0933\u0951-\u0954\u0902\u0903\u0943-\u0949\u094a\u0962\u0963\u090B\u090C\u090D\u090e\u0911\u0912\u0934\u0937\u093D\u0950\u0960\u0961\u097d]", /*roundtrip exclusions*/\r
513         },\r
514         new String [] {  "DEVANAGARI-GURMUKHI",\r
515                 "[:Devanagari:]", "[:GURMUKHI:]",\r
516                 "[\u0A02\u0946\u0A5C\u0951-\u0954\u0A70\u0A71\u090B\u090C\u090D\u090e\u0911\u0912\u0934\u0937\u093D\u0950\u0960\u0961\u0a72\u0a73\u0a74]", /*roundtrip exclusions*/\r
517         },\r
518 \r
519         new String [] {  "GUJARATI-DEVANAGARI",\r
520                 "[:GUJARATI:]", "[:Devanagari:]",\r
521                 "[\u0904\u0946\u094A\u0962\u0963\u0951-\u0954\u0961\u090c\u090e\u0912\u097d]", /*roundtrip exclusions*/\r
522         },\r
523         new String [] {  "DEVANAGARI-GUJARATI",\r
524                 "[:Devanagari:]", "[:GUJARATI:]",\r
525                 "[\u0951-\u0954\u0961\u090c\u090e\u0912]", /*roundtrip exclusions*/\r
526         },\r
527 \r
528         new String [] {  "ORIYA-DEVANAGARI",\r
529                 "[:ORIYA:]", "[:Devanagari:]",\r
530                 "[\u0904\u0912\u0911\u090D\u090e\u0931\u0943-\u094a\u0962\u0963\u0951-\u0954\u0950\u097d]", /*roundtrip exclusions*/\r
531         },\r
532         new String [] {  "DEVANAGARI-ORIYA",\r
533                 "[:Devanagari:]", "[:ORIYA:]",\r
534                 "[\u0b5f\u0b56\u0b57\u0b70\u0b71\u0950\u090D\u090e\u0912\u0911\u0931]", /*roundtrip exclusions*/\r
535         },\r
536 \r
537         new String [] {  "Tamil-DEVANAGARI",\r
538                 "[:tamil:]", "[:Devanagari:]",\r
539                 "[\u0901\u0904\u093c\u0943-\u094a\u0951-\u0954\u0962\u0963\u090B\u090C\u090D\u0911\u0916\u0917\u0918\u091B\u091D\u0920\u0921\u0922\u0925\u0926\u0927\u092B\u092C\u092D\u0936\u093d\u0950[\u0958-\u0961]\u097d]", /*roundtrip exclusions*/\r
540         },\r
541         new String [] {  "DEVANAGARI-Tamil",\r
542                 "[:Devanagari:]", "[:tamil:]",\r
543                 "[\u0bd7\u0BF0\u0BF1\u0BF2]", /*roundtrip exclusions*/\r
544         },\r
545 \r
546         new String [] {  "Telugu-DEVANAGARI",\r
547                 "[:telugu:]", "[:Devanagari:]",\r
548                 "[\u0904\u093c\u0950\u0945\u0949\u0951-\u0954\u0962\u0963\u090D\u0911\u093d\u0929\u0934[\u0958-\u095f]\u097d]", /*roundtrip exclusions*/\r
549         },\r
550         new String [] {  "DEVANAGARI-TELUGU",\r
551                 "[:Devanagari:]", "[:TELUGU:]",\r
552                 "[\u0c55\u0c56\u0950\u090D\u0911\u093d\u0929\u0934[\u0958-\u095f]]", /*roundtrip exclusions*/\r
553         },\r
554 \r
555         new String [] {  "KANNADA-DEVANAGARI",\r
556                 "[:KANNADA:]", "[:Devanagari:]",\r
557                 "[\u0901\u0904\u0946\u0950\u0945\u0949\u0951-\u0954\u0962\u0963\u0950\u090D\u0911\u093d\u0929\u0934[\u0958-\u095f]\u097d]", /*roundtrip exclusions*/\r
558         },\r
559         new String [] {  "DEVANAGARI-KANNADA",\r
560                 "[:Devanagari:]", "[:KANNADA:]",\r
561                 "[{\u0cb0\u0cbc}{\u0cb3\u0cbc}\u0cde\u0cd5\u0cd6\u0950\u090D\u0911\u093d\u0929\u0934[\u0958-\u095f]]", /*roundtrip exclusions*/\r
562         },\r
563 \r
564         new String [] {  "MALAYALAM-DEVANAGARI",\r
565                 "[:MALAYALAM:]", "[:Devanagari:]",\r
566                 "[\u0901\u0904\u094a\u094b\u094c\u093c\u0950\u0944\u0945\u0949\u0951-\u0954\u0962\u0963\u090D\u0911\u093d\u0929\u0934[\u0958-\u095f]\u097d]", /*roundtrip exclusions*/\r
567         },\r
568         new String [] {  "DEVANAGARI-MALAYALAM",\r
569                 "[:Devanagari:]", "[:MALAYALAM:]",\r
570                 "[\u0d4c\u0d57\u0950\u090D\u0911\u093d\u0929\u0934[\u0958-\u095f]]", /*roundtrip exclusions*/\r
571         },\r
572 \r
573         new String [] {  "GURMUKHI-BENGALI",\r
574                 "[:GURMUKHI:]", "[:BENGALI:]",\r
575                 "[\u0982\u09b6\u09e2\u09e3\u09c3\u09c4\u09d7\u098B\u098C\u09B7\u09E0\u09E1\u09F0\u09F1\u09f2-\u09fa\u09ce]", /*roundtrip exclusions*/\r
576         },\r
577         new String [] {  "BENGALI-GURMUKHI",\r
578                 "[:BENGALI:]", "[:GURMUKHI:]",\r
579                 "[\u0A02\u0a5c\u0a47\u0a70\u0a71\u0A33\u0A35\u0A59\u0A5A\u0A5B\u0A5E\u0A72\u0A73\u0A74]", /*roundtrip exclusions*/\r
580         },\r
581 \r
582         new String [] {  "GUJARATI-BENGALI",\r
583                 "[:GUJARATI:]", "[:BENGALI:]",\r
584                 "[\u09d7\u09e2\u09e3\u098c\u09e1\u09f0\u09f1\u09f2-\u09fa\u09ce]", /*roundtrip exclusions*/\r
585         },\r
586         new String [] {  "BENGALI-GUJARATI",\r
587                 "[:BENGALI:]", "[:GUJARATI:]",\r
588                 "[\u0A82\u0a83\u0Ac9\u0Ac5\u0ac7\u0A8D\u0A91\u0AB3\u0AB5\u0ABD\u0AD0]", /*roundtrip exclusions*/\r
589         },\r
590 \r
591         new String [] {  "ORIYA-BENGALI",\r
592                 "[:ORIYA:]", "[:BENGALI:]",\r
593                 "[\u09c4\u09e2\u09e3\u09f0\u09f1\u09f2-\u09fa\u09ce]", /*roundtrip exclusions*/\r
594         },\r
595         new String [] {  "BENGALI-ORIYA",\r
596                 "[:BENGALI:]", "[:ORIYA:]",\r
597                 "[\u0b35\u0b71\u0b5f\u0b56\u0b33\u0b3d]", /*roundtrip exclusions*/\r
598         },\r
599 \r
600         new String [] {  "Tamil-BENGALI",\r
601                 "[:tamil:]", "[:BENGALI:]",\r
602                 "[\u0981\u09bc\u09c3\u09c4\u09e2\u09e3\u09f0\u09f1\u098B\u098C\u0996\u0997\u0998\u099B\u099D\u09A0\u09A1\u09A2\u09A5\u09A6\u09A7\u09AB\u09AC\u09AD\u09B6\u09DC\u09DD\u09DF\u09E0\u09E1\u09f2-\u09fa\u09ce]", /*roundtrip exclusions*/\r
603         },\r
604         new String [] {  "BENGALI-Tamil",\r
605                 "[:BENGALI:]", "[:tamil:]",\r
606                 "[\u0bc6\u0bc7\u0bca\u0B8E\u0B92\u0BA9\u0BB1\u0BB3\u0BB4\u0BB5\u0BF0\u0BF1\u0BF2]", /*roundtrip exclusions*/\r
607         },\r
608 \r
609         new String [] {  "Telugu-BENGALI",\r
610                 "[:telugu:]", "[:BENGALI:]",\r
611                 "[\u09e2\u09e3\u09bc\u09d7\u09f0\u09f1\u09dc\u09dd\u09df\u09f2-\u09fa\u09ce]", /*roundtrip exclusions*/\r
612         },\r
613         new String [] {  "BENGALI-TELUGU",\r
614                 "[:BENGALI:]", "[:TELUGU:]",\r
615                 "[\u0c55\u0c56\u0c47\u0c46\u0c4a\u0C0E\u0C12\u0C31\u0C33\u0C35]", /*roundtrip exclusions*/\r
616         },\r
617 \r
618         new String [] {  "KANNADA-BENGALI",\r
619                 "[:KANNADA:]", "[:BENGALI:]",\r
620                 "[\u0981\u09e2\u09e3\u09bc\u09d7\u09f0\u09f1\u09dc\u09dd\u09df\u09f2-\u09fa\u09ce]", /*roundtrip exclusions*/\r
621         },\r
622         new String [] {  "BENGALI-KANNADA",\r
623                 "[:BENGALI:]", "[:KANNADA:]",\r
624                 "[{\u0cb0\u0cbc}{\u0cb3\u0cbc}\u0cc6\u0cca\u0cd5\u0cd6\u0cc7\u0C8E\u0C92\u0CB1\u0cb3\u0cb5\u0cde]", /*roundtrip exclusions*/\r
625         },\r
626 \r
627         new String [] {  "MALAYALAM-BENGALI",\r
628                 "[:MALAYALAM:]", "[:BENGALI:]",\r
629                 "[\u0981\u09e2\u09e3\u09bc\u09c4\u09f0\u09f1\u09dc\u09dd\u09df\u09f2-\u09fa\u09ce]", /*roundtrip exclusions*/\r
630         },\r
631         new String [] {  "BENGALI-MALAYALAM",\r
632                 "[:BENGALI:]", "[:MALAYALAM:]",\r
633                 "[\u0d46\u0d4a\u0d47\u0d31-\u0d35\u0d0e\u0d12]", /*roundtrip exclusions*/\r
634         },\r
635 \r
636         new String [] {  "GUJARATI-GURMUKHI",\r
637                 "[:GUJARATI:]", "[:GURMUKHI:]",\r
638                 "[\u0A02\u0ab3\u0ab6\u0A70\u0a71\u0a82\u0a83\u0ac3\u0ac4\u0ac5\u0ac9\u0a5c\u0a72\u0a73\u0a74\u0a8b\u0a8d\u0a91\u0abd]", /*roundtrip exclusions*/\r
639         },\r
640         new String [] {  "GURMUKHI-GUJARATI",\r
641                 "[:GURMUKHI:]", "[:GUJARATI:]",\r
642                 "[\u0a5c\u0A70\u0a71\u0a72\u0a73\u0a74\u0a82\u0a83\u0a8b\u0a8c\u0a8d\u0a91\u0ab3\u0ab6\u0ab7\u0abd\u0ac3\u0ac4\u0ac5\u0ac9\u0ad0\u0ae0\u0ae1]", /*roundtrip exclusions*/\r
643         },\r
644 \r
645         new String [] {  "ORIYA-GURMUKHI",\r
646                 "[:ORIYA:]", "[:GURMUKHI:]",\r
647                 "[\u0A02\u0a5c\u0a21\u0a47\u0a71\u0b02\u0b03\u0b33\u0b36\u0b43\u0b56\u0b57\u0B0B\u0B0C\u0B37\u0B3D\u0B5F\u0B60\u0B61\u0a35\u0a72\u0a73\u0a74]", /*roundtrip exclusions*/\r
648         },\r
649         new String [] {  "GURMUKHI-ORIYA",\r
650                 "[:GURMUKHI:]", "[:ORIYA:]",\r
651                 "[\u0a71\u0b02\u0b03\u0b33\u0b36\u0b43\u0b56\u0b57\u0B0B\u0B0C\u0B37\u0B3D\u0B5F\u0B60\u0B61\u0b70\u0b71]", /*roundtrip exclusions*/\r
652         },\r
653 \r
654         new String [] {  "TAMIL-GURMUKHI",\r
655                 "[:TAMIL:]", "[:GURMUKHI:]",\r
656                 "[\u0A01\u0A02\u0a33\u0a36\u0a3c\u0a70\u0a71\u0a47\u0A16\u0A17\u0A18\u0A1B\u0A1D\u0A20\u0A21\u0A22\u0A25\u0A26\u0A27\u0A2B\u0A2C\u0A2D\u0A59\u0A5A\u0A5B\u0A5C\u0A5E\u0A72\u0A73\u0A74]", /*roundtrip exclusions*/\r
657         },\r
658         new String [] {  "GURMUKHI-TAMIL",\r
659                 "[:GURMUKHI:]", "[:TAMIL:]",\r
660                 "[\u0b82\u0bc6\u0bca\u0bd7\u0bb7\u0bb3\u0b83\u0B8E\u0B92\u0BA9\u0BB1\u0BB4\u0bb6\u0BF0\u0BF1\u0BF2]", /*roundtrip exclusions*/\r
661         },\r
662 \r
663         new String [] {  "TELUGU-GURMUKHI",\r
664                 "[:TELUGU:]", "[:GURMUKHI:]",\r
665                 "[\u0A02\u0a33\u0a36\u0a3c\u0a70\u0a71\u0A59\u0A5A\u0A5B\u0A5C\u0A5E\u0A72\u0A73\u0A74]", /*roundtrip exclusions*/\r
666         },\r
667         new String [] {  "GURMUKHI-TELUGU",\r
668                 "[:GURMUKHI:]", "[:TELUGU:]",\r
669                 "[\u0c02\u0c03\u0c33\u0c36\u0c44\u0c43\u0c46\u0c4a\u0c56\u0c55\u0C0B\u0C0C\u0C0E\u0C12\u0C31\u0C37\u0C60\u0C61]", /*roundtrip exclusions*/\r
670         },\r
671         new String [] {  "KANNADA-GURMUKHI",\r
672                 "[:KANNADA:]", "[:GURMUKHI:]",\r
673                 "[\u0A01\u0A02\u0a33\u0a36\u0a3c\u0a70\u0a71\u0A59\u0A5A\u0A5B\u0A5C\u0A5E\u0A72\u0A73\u0A74]", /*roundtrip exclusions*/\r
674         },\r
675         new String [] {  "GURMUKHI-KANNADA",\r
676                 "[:GURMUKHI:]", "[:KANNADA:]",\r
677                 "[{\u0cb0\u0cbc}{\u0cb3\u0cbc}\u0c82\u0c83\u0cb3\u0cb6\u0cc4\u0cc3\u0cc6\u0cca\u0cd5\u0cd6\u0C8B\u0C8C\u0C8E\u0C92\u0CB1\u0CB7\u0cbd\u0CE0\u0CE1\u0cde]", /*roundtrip exclusions*/\r
678         },\r
679 \r
680         new String [] {  "MALAYALAM-GURMUKHI",\r
681                 "[:MALAYALAM:]", "[:GURMUKHI:]",\r
682                 "[\u0A01\u0A02\u0a4b\u0a4c\u0a33\u0a36\u0a3c\u0a70\u0a71\u0A59\u0A5A\u0A5B\u0A5C\u0A5E\u0A72\u0A73\u0A74]", /*roundtrip exclusions*/\r
683         },\r
684         new String [] {  "GURMUKHI-MALAYALAM",\r
685                 "[:GURMUKHI:]", "[:MALAYALAM:]",\r
686                 "[\u0d02\u0d03\u0d33\u0d36\u0d43\u0d46\u0d4a\u0d4c\u0d57\u0D0B\u0D0C\u0D0E\u0D12\u0D31\u0D34\u0D37\u0D60\u0D61]", /*roundtrip exclusions*/\r
687         },\r
688 \r
689         new String [] {  "GUJARATI-ORIYA",\r
690                 "[:GUJARATI:]", "[:ORIYA:]",\r
691                 "[\u0b56\u0b57\u0B0C\u0B5F\u0B61\u0b70\u0b71]", /*roundtrip exclusions*/\r
692         },\r
693         new String [] {  "ORIYA-GUJARATI",\r
694                 "[:ORIYA:]", "[:GUJARATI:]",\r
695                 "[\u0Ac4\u0Ac5\u0Ac9\u0Ac7\u0A8D\u0A91\u0AB5\u0Ad0]", /*roundtrip exclusions*/\r
696         },\r
697 \r
698         new String [] {  "TAMIL-GUJARATI",\r
699                 "[:TAMIL:]", "[:GUJARATI:]",\r
700                 "[\u0A81\u0a8c\u0abc\u0ac3\u0Ac4\u0Ac5\u0Ac9\u0Ac7\u0A8B\u0A8D\u0A91\u0A96\u0A97\u0A98\u0A9B\u0A9D\u0AA0\u0AA1\u0AA2\u0AA5\u0AA6\u0AA7\u0AAB\u0AAC\u0AAD\u0AB6\u0ABD\u0AD0\u0AE0\u0AE1]", /*roundtrip exclusions*/\r
701         },\r
702         new String [] {  "GUJARATI-TAMIL",\r
703                 "[:GUJARATI:]", "[:TAMIL:]",\r
704                 "[\u0Bc6\u0Bca\u0Bd7\u0B8E\u0B92\u0BA9\u0BB1\u0BB4\u0BF0\u0BF1\u0BF2]", /*roundtrip exclusions*/\r
705         },\r
706 \r
707         new String [] {  "TELUGU-GUJARATI",\r
708                 "[:TELUGU:]", "[:GUJARATI:]",\r
709                 "[\u0abc\u0Ac5\u0Ac9\u0A8D\u0A91\u0ABD\u0Ad0]", /*roundtrip exclusions*/\r
710         },\r
711         new String [] {  "GUJARATI-TELUGU",\r
712                 "[:GUJARATI:]", "[:TELUGU:]",\r
713                 "[\u0c46\u0c4a\u0c55\u0c56\u0C0C\u0C0E\u0C12\u0C31\u0C61]", /*roundtrip exclusions*/\r
714         },\r
715 \r
716         new String [] {  "KANNADA-GUJARATI",\r
717                 "[:KANNADA:]", "[:GUJARATI:]",\r
718                 "[\u0A81\u0abc\u0Ac5\u0Ac9\u0A8D\u0A91\u0ABD\u0Ad0]", /*roundtrip exclusions*/\r
719         },\r
720         new String [] {  "GUJARATI-KANNADA",\r
721                 "[:GUJARATI:]", "[:KANNADA:]",\r
722                 "[{\u0cb0\u0cbc}{\u0cb3\u0cbc}\u0cc6\u0cca\u0cd5\u0cd6\u0C8C\u0C8E\u0C92\u0CB1\u0CDE\u0CE1]", /*roundtrip exclusions*/\r
723         },\r
724 \r
725         new String [] {  "MALAYALAM-GUJARATI",\r
726                 "[:MALAYALAM:]", "[:GUJARATI:]",\r
727                 "[\u0A81\u0ac4\u0acb\u0acc\u0abc\u0Ac5\u0Ac9\u0A8D\u0A91\u0ABD\u0Ad0]", /*roundtrip exclusions*/\r
728         },\r
729         new String [] {  "GUJARATI-MALAYALAM",\r
730                 "[:GUJARATI:]", "[:MALAYALAM:]",\r
731                 "[\u0d46\u0d4a\u0d4c\u0d55\u0d57\u0D0C\u0D0E\u0D12\u0D31\u0D34\u0D61]", /*roundtrip exclusions*/\r
732         },\r
733 \r
734         new String [] {  "TAMIL-ORIYA",\r
735                 "[:TAMIL:]", "[:ORIYA:]",\r
736                 "[\u0B01\u0b3c\u0b43\u0b56\u0B0B\u0B0C\u0B16\u0B17\u0B18\u0B1B\u0B1D\u0B20\u0B21\u0B22\u0B25\u0B26\u0B27\u0B2B\u0B2C\u0B2D\u0B36\u0B3D\u0B5C\u0B5D\u0B5F\u0B60\u0B61\u0b70\u0b71]", /*roundtrip exclusions*/\r
737         },\r
738         new String [] {  "ORIYA-TAMIL",\r
739                 "[:ORIYA:]", "[:TAMIL:]",\r
740                 "[\u0bc6\u0bca\u0bc7\u0B8E\u0B92\u0BA9\u0BB1\u0BB4\u0BB5\u0BF0\u0BF1\u0BF2]", /*roundtrip exclusions*/\r
741         },\r
742 \r
743         new String [] {  "TELUGU-ORIYA",\r
744                 "[:TELUGU:]", "[:ORIYA:]",\r
745                 "[\u0b3c\u0b57\u0b56\u0B3D\u0B5C\u0B5D\u0B5F\u0b70\u0b71]", /*roundtrip exclusions*/\r
746         },\r
747         new String [] {  "ORIYA-TELUGU",\r
748                 "[:ORIYA:]", "[:TELUGU:]",\r
749                 "[\u0c44\u0c46\u0c4a\u0c55\u0c47\u0C0E\u0C12\u0C31\u0C35]", /*roundtrip exclusions*/\r
750         },\r
751 \r
752         new String [] {  "KANNADA-ORIYA",\r
753                 "[:KANNADA:]", "[:ORIYA:]",\r
754                 "[\u0B01\u0b3c\u0b57\u0B3D\u0B5C\u0B5D\u0B5F\u0b70\u0b71]", /*roundtrip exclusions*/\r
755         },\r
756         new String [] {  "ORIYA-KANNADA",\r
757                 "[:ORIYA:]", "[:KANNADA:]",\r
758                 "[{\u0cb0\u0cbc}{\u0cb3\u0cbc}\u0cc4\u0cc6\u0cca\u0cd5\u0cc7\u0C8E\u0C92\u0CB1\u0CB5\u0CDE]", /*roundtrip exclusions*/\r
759         },\r
760 \r
761         new String [] {  "MALAYALAM-ORIYA",\r
762                 "[:MALAYALAM:]", "[:ORIYA:]",\r
763                 "[\u0B01\u0b3c\u0b56\u0B3D\u0B5C\u0B5D\u0B5F\u0b70\u0b71]", /*roundtrip exclusions*/\r
764         },\r
765         new String [] {  "ORIYA-MALAYALAM",\r
766                 "[:ORIYA:]", "[:MALAYALAM:]",\r
767                 "[\u0D47\u0D46\u0D4a\u0D0E\u0D12\u0D31\u0D34\u0D35]", /*roundtrip exclusions*/\r
768         },\r
769 \r
770         new String [] {  "TELUGU-TAMIL",\r
771                 "[:TELUGU:]", "[:TAMIL:]",\r
772                 "[\u0bd7\u0ba9\u0bb4\u0BF0\u0BF1\u0BF2\u0BF0\u0BF1\u0BF2]", /*roundtrip exclusions*/\r
773         },\r
774         new String [] {  "TAMIL-TELUGU",\r
775                 "[:TAMIL:]", "[:TELUGU:]",\r
776                 "[\u0C01\u0c43\u0c44\u0c46\u0c47\u0c55\u0c56\u0c66\u0C0B\u0C0C\u0C16\u0C17\u0C18\u0C1B\u0C1D\u0C20\u0C21\u0C22\u0C25\u0C26\u0C27\u0C2B\u0C2C\u0C2D\u0C36\u0C60\u0C61]", /*roundtrip exclusions*/\r
777         },\r
778 \r
779         new String [] {  "KANNADA-TAMIL",\r
780                 "[:KANNADA:]", "[:TAMIL:]",\r
781                 "[\u0bd7\u0bc6\u0ba9\u0bb4\u0BF0\u0BF1\u0BF2]", /*roundtrip exclusions*/\r
782         },\r
783         new String [] {  "TAMIL-KANNADA",\r
784                 "[:TAMIL:]", "[:KANNADA:]",\r
785                 "[\u0cc3\u0cc4\u0cc6\u0cc7\u0cd5\u0cd6\u0C8B\u0C8C\u0C96\u0C97\u0C98\u0C9B\u0C9D\u0CA0\u0CA1\u0CA2\u0CA5\u0CA6\u0CA7\u0CAB\u0CAC\u0CAD\u0CB6\u0cbc\u0cbd\u0CDE\u0CE0\u0CE1]", /*roundtrip exclusions*/\r
786         },\r
787 \r
788         new String [] {  "MALAYALAM-TAMIL",\r
789                 "[:MALAYALAM:]", "[:TAMIL:]",\r
790                 "[\u0ba9\u0BF0\u0BF1\u0BF2]", /*roundtrip exclusions*/\r
791         },\r
792         new String [] {  "TAMIL-MALAYALAM",\r
793                 "[:TAMIL:]", "[:MALAYALAM:]",\r
794                 "[\u0d43\u0d12\u0D0B\u0D0C\u0D16\u0D17\u0D18\u0D1B\u0D1D\u0D20\u0D21\u0D22\u0D25\u0D26\u0D27\u0D2B\u0D2C\u0D2D\u0D36\u0D60\u0D61]", /*roundtrip exclusions*/\r
795         },\r
796 \r
797         new String [] {  "KANNADA-TELUGU",\r
798                 "[:KANNADA:]", "[:TELUGU:]",\r
799                 "[\u0C01\u0c3f\u0c46\u0c48\u0c4a]", /*roundtrip exclusions*/\r
800         },\r
801         new String [] {  "TELUGU-KANNADA",\r
802                 "[:TELUGU:]", "[:KANNADA:]",\r
803                 "[\u0cc8\u0cd5\u0cd6\u0CDE\u0cbc\u0cbd]", /*roundtrip exclusions*/\r
804         },\r
805 \r
806         new String [] {  "MALAYALAM-TELUGU",\r
807                 "[:MALAYALAM:]", "[:TELUGU:]",\r
808                 "[\u0C01\u0c44\u0c4a\u0c4c\u0c4b\u0c55\u0c56]", /*roundtrip exclusions*/\r
809         },\r
810         new String [] {  "TELUGU-MALAYALAM",\r
811                 "[:TELUGU:]", "[:MALAYALAM:]",\r
812                 "[\u0d4c\u0d57\u0D34]", /*roundtrip exclusions*/\r
813         },\r
814 \r
815         new String [] {  "MALAYALAM-KANNADA",\r
816                 "[:MALAYALAM:]", "[:KANNADA:]",\r
817                 "[\u0cbc\u0cbd\u0cc4\u0cc6\u0cca\u0ccc\u0ccb\u0cd5\u0cd6\u0cDe]", /*roundtrip exclusions*/\r
818         },\r
819         new String [] {  "Latin-Bengali",\r
820                 latinForIndic, "[[:Bengali:][\u0964\u0965]]",\r
821                 "[\u0965\u09f0-\u09fa\u09ce]", /*roundtrip exclusions*/\r
822         },\r
823         new String [] {  "Latin-Gurmukhi",\r
824                 latinForIndic, "[[:Gurmukhi:][\u0964\u0965]]",\r
825                 "[\u0a01\u0a02\u0965\u0a72\u0a73\u0a74]", /*roundtrip exclusions*/\r
826         },\r
827         new String [] {  "Latin-Gujarati",\r
828                 latinForIndic, "[[:Gujarati:][\u0964\u0965]]",\r
829                 "[\u0965]", /*roundtrip exclusions*/\r
830         },\r
831         new String [] {  "Latin-Oriya",\r
832                 latinForIndic, "[[:Oriya:][\u0964\u0965]]",\r
833                 "[\u0965\u0b70]", /*roundtrip exclusions*/\r
834         },\r
835         new String [] {  "Latin-Tamil",\r
836                 latinForIndic, "[:Tamil:]",\r
837                 "[\u0BF0\u0BF1\u0BF2]", /*roundtrip exclusions*/\r
838         },\r
839         new String [] {  "Latin-Telugu",\r
840                 latinForIndic, "[:Telugu:]",\r
841                 null, /*roundtrip exclusions*/\r
842         },\r
843         new String [] {  "Latin-Kannada",\r
844                 latinForIndic, "[:Kannada:]",\r
845                 null, /*roundtrip exclusions*/\r
846         },\r
847         new String [] {  "Latin-Malayalam",\r
848                 latinForIndic, "[:Malayalam:]",\r
849                 null, /*roundtrip exclusions*/\r
850         },\r
851     };\r
852 \r
853     public void TestInterIndic() throws Exception{\r
854         long start = System.currentTimeMillis();\r
855         int num = interIndicArray.length;\r
856         if (isQuick()) {\r
857             logln("Testing only 5 of "+ interIndicArray.length+" Skipping rest (use -e for exhaustive)");\r
858             num = 5;\r
859         }\r
860         if (skipIfBeforeICU(4,5,0)) {\r
861             logln("Warning: TestInterIndic needs to be updated to remove delete the section marked [:Age=4.1:] filter");\r
862         } else {\r
863             //          We temporarily filter against Unicode 4.1, but we only do this\r
864             // before version 3.4.\r
865             errln("FAIL: TestInterIndic needs to be updated to remove delete the [:Age=4.1:] filter ");\r
866             return;\r
867         }\r
868         for(int i=0; i<num;i++){\r
869             logln("Testing " + interIndicArray[i][0] + " at index " + i   );\r
870             /*TODO: uncomment the line below when the transliterator is fixed\r
871             new Test(interIndicArray[i][0], 50)\r
872                 .test(interIndicArray[i][1],\r
873                       interIndicArray[i][2],\r
874                       interIndicArray[i][3],\r
875                       this, new LegalIndic());\r
876              */\r
877             /* comment lines below  when transliterator is fixed */\r
878             // start\r
879             new Test(interIndicArray[i][0], 50)\r
880             .test("["+interIndicArray[i][1]+" &[:Age=4.1:]]",\r
881                     "["+interIndicArray[i][2]+" &[:Age=4.1:]]",\r
882                     interIndicArray[i][3],\r
883                     this, new LegalIndic());\r
884             //end\r
885         }\r
886         showElapsed(start, "TestInterIndic");\r
887     }\r
888 \r
889     //---------------\r
890     // End Indic\r
891     //---------------\r
892 \r
893     public static class Legal {\r
894         public boolean is(String sourceString) {return true;}\r
895     }\r
896 \r
897     public static class LegalJamo extends Legal {\r
898         // any initial must be followed by a medial (or initial)\r
899         // any medial must follow an initial (or medial)\r
900         // any final must follow a medial (or final)\r
901 \r
902         public boolean is(String sourceString) {\r
903             try {\r
904                 int t;\r
905                 String decomp = Normalizer.normalize(sourceString, Normalizer.NFD);\r
906                 for (int i = 0; i < decomp.length(); ++i) { // don't worry about surrogates\r
907                     switch (getType(decomp.charAt(i))) {\r
908                     case 0:\r
909                         t = getType(decomp.charAt(i+1));\r
910                         if (t != 0 && t != 1) return false;\r
911                         break;\r
912                     case 1:\r
913                         t = getType(decomp.charAt(i-1));\r
914                         if (t != 0 && t != 1) return false;\r
915                         break;\r
916                     case 2:\r
917                         t = getType(decomp.charAt(i-1));\r
918                         if (t != 1 && t != 2) return false;\r
919                         break;\r
920                     }\r
921                 }\r
922                 return true;\r
923             } catch (StringIndexOutOfBoundsException e) {\r
924                 return false;\r
925             }\r
926         }\r
927 \r
928         public int getType(char c) {\r
929             if ('\u1100' <= c && c <= '\u1112') return 0;\r
930             else if ('\u1161' <= c && c  <= '\u1175') return 1;\r
931             else if ('\u11A8' <= c && c  <= '\u11C2') return 2;\r
932             return -1; // other\r
933         }\r
934     }\r
935 \r
936     //static BreakIterator thaiBreak = BreakIterator.getWordInstance(new Locale("th", "TH"));\r
937     // anything is legal except word ending with Logical-order-exception\r
938     public static class LegalThai extends Legal {\r
939         public boolean is(String sourceString) {\r
940             if (sourceString.length() == 0) return true;\r
941             char ch = sourceString.charAt(sourceString.length() - 1); // don't worry about surrogates.\r
942             if (UCharacter.hasBinaryProperty(ch, UProperty.LOGICAL_ORDER_EXCEPTION)) return false;\r
943 \r
944 \r
945             // disallow anything with a wordbreak between\r
946             /*\r
947             if (UTF16.countCodePoint(sourceString) <= 1) return true;\r
948             thaiBreak.setText(sourceString);\r
949             for (int pos = thaiBreak.first(); pos != BreakIterator.DONE; pos = thaiBreak.next()) {\r
950                 if (pos > 0 && pos < sourceString.length()) {\r
951                     System.out.println("Skipping " + Utility.escape(sourceString));\r
952                     return false;\r
953                 }\r
954             }\r
955              */\r
956             return true;\r
957         }\r
958     }\r
959 \r
960     // anything is legal except that Final letters can't be followed by letter; NonFinal must be\r
961     public static class LegalHebrew extends Legal {\r
962         static UnicodeSet FINAL = new UnicodeSet("[\u05DA\u05DD\u05DF\u05E3\u05E5]");\r
963         static UnicodeSet NON_FINAL = new UnicodeSet("[\u05DB\u05DE\u05E0\u05E4\u05E6]");\r
964         static UnicodeSet LETTER = new UnicodeSet("[:letter:]");\r
965         public boolean is(String sourceString) {\r
966             if (sourceString.length() == 0) return true;\r
967             // don't worry about surrogates.\r
968             for (int i = 0; i < sourceString.length(); ++i) {\r
969                 char ch = sourceString.charAt(i);\r
970                 char next = i+1 == sourceString.length() ? '\u0000' : sourceString.charAt(i);\r
971                 if (FINAL.contains(ch)) {\r
972                     if (LETTER.contains(next)) return false;\r
973                 } else if (NON_FINAL.contains(ch)) {\r
974                     if (!LETTER.contains(next)) return false;\r
975                 }\r
976             }\r
977             return true;\r
978         }\r
979     }\r
980 \r
981 \r
982     public static class LegalGreek extends Legal {\r
983 \r
984         boolean full;\r
985 \r
986         public LegalGreek(boolean full) {\r
987             this.full = full;\r
988         }\r
989 \r
990         static final char IOTA_SUBSCRIPT = '\u0345';\r
991         static final UnicodeSet breathing = new UnicodeSet("[\\u0313\\u0314']");\r
992         static final UnicodeSet validSecondVowel = new UnicodeSet("[\\u03C5\\u03B9\\u03A5\\u0399]");\r
993 \r
994         public static boolean isVowel(char c) {\r
995             return "\u03B1\u03B5\u03B7\u03B9\u03BF\u03C5\u03C9\u0391\u0395\u0397\u0399\u039F\u03A5\u03A9".indexOf(c) >= 0;\r
996         }\r
997 \r
998         public static boolean isRho(char c) {\r
999             return "\u03C1\u03A1".indexOf(c) >= 0;\r
1000         }\r
1001 \r
1002         public boolean is(String sourceString) {\r
1003             try {\r
1004                 String decomp = Normalizer.normalize(sourceString, Normalizer.NFD);\r
1005 \r
1006                 // modern is simpler: don't care about anything but a grave\r
1007                 if (!full) {\r
1008                     //if (sourceString.equals("\u039C\u03C0")) return false;\r
1009                     for (int i = 0; i < decomp.length(); ++i) {\r
1010                         char c = decomp.charAt(i);\r
1011                         // exclude all the accents\r
1012                         if (c == '\u0313' || c == '\u0314' || c == '\u0300' || c == '\u0302'\r
1013                             || c == '\u0342' || c == '\u0345'\r
1014                         ) return false;\r
1015                     }\r
1016                     return true;\r
1017                 }\r
1018 \r
1019                 // Legal full Greek has breathing marks IFF there is a vowel or RHO at the start\r
1020                 // IF it has them, it has exactly one.\r
1021                 // IF it starts with a RHO, then the breathing mark must come before the second letter.\r
1022                 // IF it starts with a vowel, then it must before the third letter.\r
1023                 //  it will only come after the second if of the format [vowel] [no iota subscript!] [upsilon or iota]\r
1024                 // Since there are no surrogates in greek, don't worry about them\r
1025 \r
1026                 boolean firstIsVowel = false;\r
1027                 boolean firstIsRho = false;\r
1028                 boolean noLetterYet = true;\r
1029                 int breathingCount = 0;\r
1030                 int letterCount = 0;\r
1031                 //int breathingPosition = -1;\r
1032 \r
1033                 for (int i = 0; i < decomp.length(); ++i) {\r
1034                     char c = decomp.charAt(i);\r
1035                     if (UCharacter.isLetter(c)) {\r
1036                         ++letterCount;\r
1037                         if (firstIsVowel && !validSecondVowel.contains(c) && breathingCount == 0) return false;\r
1038                         if (noLetterYet) {\r
1039                             noLetterYet = false;\r
1040                             firstIsVowel = isVowel(c);\r
1041                             firstIsRho = isRho(c);\r
1042                         }\r
1043                         if (firstIsRho && letterCount == 2 && breathingCount == 0) return false;\r
1044                     }\r
1045                     if (c == IOTA_SUBSCRIPT && firstIsVowel && breathingCount == 0) return false;\r
1046                     if (breathing.contains(c)) {\r
1047                         // breathingPosition = i;\r
1048                         ++breathingCount;\r
1049                     }\r
1050                 }\r
1051 \r
1052                 if (firstIsVowel || firstIsRho) return breathingCount == 1;\r
1053                 return breathingCount == 0;\r
1054             } catch (Throwable t) {\r
1055                 System.out.println(t.getClass().getName() + " " + t.getMessage());\r
1056                 return true;\r
1057             }\r
1058         }\r
1059     }\r
1060 \r
1061     static class Test {\r
1062 \r
1063         PrintWriter out;\r
1064 \r
1065         private String transliteratorID;\r
1066         private int errorLimit = 500;\r
1067         private int errorCount = 0;\r
1068         private long pairLimit  = 1000000; // make default be 1M.\r
1069         private int density = 100;\r
1070         UnicodeSet sourceRange;\r
1071         UnicodeSet targetRange;\r
1072         UnicodeSet toSource;\r
1073         UnicodeSet toTarget;\r
1074         UnicodeSet roundtripExclusions;\r
1075 \r
1076         RoundTripTest log;\r
1077         Legal legalSource;\r
1078         UnicodeSet badCharacters;\r
1079 \r
1080         /*\r
1081          * create a test for the given script transliterator.\r
1082          */\r
1083         Test(String transliteratorID) {\r
1084             this(transliteratorID, 100);\r
1085         }\r
1086 \r
1087         Test(String transliteratorID, int dens) {\r
1088             this.transliteratorID = transliteratorID;\r
1089             this.density = dens;\r
1090         }\r
1091 \r
1092         public void setErrorLimit(int limit) {\r
1093             errorLimit = limit;\r
1094         }\r
1095 \r
1096         public void setPairLimit(int limit) {\r
1097             pairLimit = limit;\r
1098         }\r
1099 \r
1100         // Added to do better equality check.\r
1101 \r
1102         public static boolean isSame(String a, String b) {\r
1103             if (a.equals(b)) return true;\r
1104             if (a.equalsIgnoreCase(b) && isCamel(a)) return true;\r
1105             a = Normalizer.normalize(a, Normalizer.NFD);\r
1106             b = Normalizer.normalize(b, Normalizer.NFD);\r
1107             if (a.equals(b)) return true;\r
1108             if (a.equalsIgnoreCase(b) && isCamel(a)) return true;\r
1109             return false;\r
1110         }\r
1111 \r
1112         /*\r
1113         public boolean includesSome(UnicodeSet set, String a) {\r
1114             int cp;\r
1115             for (int i = 0; i < a.length(); i += UTF16.getCharCount(cp)) {\r
1116                 cp = UTF16.charAt(a, i);\r
1117                 if (set.contains(cp)) return true;\r
1118             }\r
1119             return false;\r
1120         }\r
1121          */\r
1122 \r
1123         public static boolean isCamel(String a) {\r
1124             //System.out.println("CamelTest");\r
1125             // see if string is of the form aB; e.g. lower, then upper or title\r
1126             int cp;\r
1127             boolean haveLower = false;\r
1128             for (int i = 0; i < a.length(); i += UTF16.getCharCount(cp)) {\r
1129                 cp = UTF16.charAt(a, i);\r
1130                 int t = UCharacter.getType(cp);\r
1131                 //System.out.println("\t" + t + " " + Integer.toString(cp,16) + " " + UCharacter.getName(cp));\r
1132                 switch (t) {\r
1133                 case Character.UPPERCASE_LETTER:\r
1134                     if (haveLower) return true;\r
1135                     break;\r
1136                 case Character.TITLECASE_LETTER:\r
1137                     if (haveLower) return true;\r
1138                     // drop through, since second letter is lower.\r
1139                 case Character.LOWERCASE_LETTER:\r
1140                     haveLower = true;\r
1141                     break;\r
1142                 }\r
1143             }\r
1144             //System.out.println("FALSE");\r
1145             return false;\r
1146         }\r
1147 \r
1148         static final UnicodeSet okAnyway = new UnicodeSet("[^[:Letter:]]");\r
1149         static final UnicodeSet neverOk = new UnicodeSet("[:Other:]");\r
1150 \r
1151         public void test(String srcRange, String trgtRange,\r
1152                 String rdtripExclusions, RoundTripTest logger, Legal legalSrc)\r
1153         throws java.io.IOException {\r
1154             test(srcRange, trgtRange, srcRange, rdtripExclusions, logger, legalSrc);\r
1155         }\r
1156 \r
1157         /**\r
1158          * Will test \r
1159          * that everything in sourceRange maps to targetRange,\r
1160          * that everything in targetRange maps to backtoSourceRange\r
1161          * that everything roundtrips from target -> source -> target, except roundtripExceptions\r
1162          */\r
1163         public void test(String srcRange, String trgtRange, String backtoSourceRange,\r
1164                 String rdtripExclusions, RoundTripTest logger, Legal legalSrc)\r
1165         throws java.io.IOException {\r
1166 \r
1167             legalSource = legalSrc;\r
1168             sourceRange = new UnicodeSet(srcRange);\r
1169             sourceRange.removeAll(neverOk);\r
1170 \r
1171             targetRange = new UnicodeSet(trgtRange);\r
1172             targetRange.removeAll(neverOk);\r
1173 \r
1174             toSource = new UnicodeSet(backtoSourceRange);\r
1175             toSource.addAll(okAnyway);\r
1176 \r
1177             toTarget = new UnicodeSet(trgtRange);\r
1178             toTarget.addAll(okAnyway);\r
1179 \r
1180             if (rdtripExclusions != null && rdtripExclusions.length() > 0) {\r
1181                 roundtripExclusions = new UnicodeSet(rdtripExclusions);\r
1182             }else{\r
1183                 roundtripExclusions = new UnicodeSet(); // empty\r
1184             }\r
1185 \r
1186             log = logger;\r
1187 \r
1188             log.logln(Utility.escape("Source:  " + sourceRange));\r
1189             log.logln(Utility.escape("Target:  " + targetRange));\r
1190             log.logln(Utility.escape("Exclude: " + roundtripExclusions));\r
1191             if (log.isQuick()) log.logln("Abbreviated Test");\r
1192 \r
1193             badCharacters = new UnicodeSet("[:other:]");\r
1194 \r
1195             // make a UTF-8 output file we can read with a browser\r
1196 \r
1197             // note: check that every transliterator transliterates the null string correctly!\r
1198 \r
1199             // {dlf} reorganize so can run test in protected security environment\r
1200             //              String logFileName = "test_" + transliteratorID.replace('/', '_') + ".html";\r
1201 \r
1202             //              File lf = new File(logFileName);\r
1203             //              log.logln("Creating log file " + lf.getAbsoluteFile());\r
1204 \r
1205             //              out = new PrintWriter(new BufferedWriter(new OutputStreamWriter(\r
1206             //                        new FileOutputStream(logFileName), "UTF8"), 4*1024));\r
1207 \r
1208             ByteArrayOutputStream bast = new ByteArrayOutputStream();\r
1209             out = new PrintWriter(new BufferedWriter(new OutputStreamWriter(\r
1210                     bast, "UTF8"), 4*1024));\r
1211             //out.write('\uFFEF');    // BOM\r
1212             out.println("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\">");\r
1213             out.println("<HTML><HEAD>");\r
1214             out.println("<META content=\"text/html; charset=utf-8\" http-equiv=Content-Type></HEAD>");\r
1215             out.println("<BODY bgcolor='#FFFFFF' style='font-family: Arial Unicode MS'>");\r
1216 \r
1217             try {\r
1218                 test2();\r
1219             } catch (TestTruncated e) {\r
1220                 out.println(e.getMessage());\r
1221             }\r
1222             out.println("</BODY></HTML>");\r
1223             out.close();\r
1224 \r
1225             if (errorCount > 0) {\r
1226                 try {\r
1227                     File translitErrorDirectory = new File("translitErrorLogs");\r
1228                     if (!translitErrorDirectory.exists()) {\r
1229                         translitErrorDirectory.mkdir();\r
1230                     }\r
1231                     String logFileName = "translitErrorLogs/test_" + transliteratorID.replace('/', '_') + ".html";\r
1232                     File lf = new File(logFileName);\r
1233                     logger.logln("Creating log file " + lf.getAbsoluteFile());\r
1234                     FileOutputStream fos = new FileOutputStream(lf);\r
1235                     fos.write(bast.toByteArray());\r
1236                     fos.close();\r
1237                     logger.errln(transliteratorID + " errors: "\r
1238                             + errorCount + (errorCount > errorLimit ? " (at least!)" : "")\r
1239                             + ", see " + lf.getAbsoluteFile());\r
1240                 }\r
1241                 catch (SecurityException e) {\r
1242                     logger.errln(transliteratorID + " errors: "\r
1243                             + errorCount + (errorCount > errorLimit ? " (at least!)" : "")\r
1244                             + ", no log provided due to protected test domain");\r
1245                 }\r
1246             } else {\r
1247                 logger.logln(transliteratorID + " ok");\r
1248                 //                  new File(logFileName).delete();\r
1249             }\r
1250         }\r
1251 \r
1252         // ok if at least one is not equal\r
1253         public boolean checkIrrelevants(Transliterator t, String irrelevants) {\r
1254             for (int i = 0; i < irrelevants.length(); ++i) {\r
1255                 char c = irrelevants.charAt(i);\r
1256                 String cs = UTF16.valueOf(c);\r
1257                 String targ = t.transliterate(cs);\r
1258                 if (cs.equals(targ)) return true;\r
1259             }\r
1260             return false;\r
1261         }\r
1262 \r
1263         AbbreviatedUnicodeSetIterator usi = new AbbreviatedUnicodeSetIterator();\r
1264         AbbreviatedUnicodeSetIterator usi2 = new AbbreviatedUnicodeSetIterator();\r
1265 \r
1266         Transliterator sourceToTarget;\r
1267         Transliterator targetToSource;\r
1268 \r
1269         public void test2() {\r
1270 \r
1271             sourceToTarget = Transliterator.getInstance(transliteratorID);\r
1272             targetToSource = sourceToTarget.getInverse();\r
1273 \r
1274             log.logln("Checking that at least one irrevant characters is not NFC'ed");\r
1275             out.println("<h3>Checking that at least one irrevant characters is not NFC'ed</h3>");\r
1276 \r
1277             String irrelevants = "\u2000\u2001\u2126\u212A\u212B\u2329"; // string is from NFC_NO in the UCD\r
1278 \r
1279             if (!checkIrrelevants(sourceToTarget, irrelevants)) {\r
1280                 logFails("" + getSourceTarget(transliteratorID) + ", Must not NFC everything");\r
1281             }\r
1282             if (!checkIrrelevants(targetToSource, irrelevants)) {\r
1283                 logFails("" + getTargetSource(transliteratorID) + ", irrelevants");\r
1284             }\r
1285 \r
1286             if (EXTRA_TESTS) {\r
1287                 log.logln("Checking that toRules works");\r
1288                 String rules = "";\r
1289                 Transliterator sourceToTarget2;\r
1290                 Transliterator targetToSource2;\r
1291                 try {\r
1292                     rules = sourceToTarget.toRules(false);\r
1293                     sourceToTarget2 = Transliterator.createFromRules("s2t2", rules, Transliterator.FORWARD);\r
1294                     if (PRINT_RULES) {\r
1295                         out.println("<h3>Forward Rules:</h3><p>");\r
1296                         out.println(TestUtility.replace(rules, "\n", "\u200E<br>\n\u200E"));\r
1297                         out.println("</p>");\r
1298                     }\r
1299                     rules = targetToSource.toRules(false);\r
1300                     targetToSource2 = Transliterator.createFromRules("t2s2", rules, Transliterator.FORWARD);\r
1301                     if (PRINT_RULES) {\r
1302                         out.println("<h3>Backward Rules:</h3><p>");\r
1303                         out.println(TestUtility.replace(rules, "\n", "\u200E<br>\n\u200E"));\r
1304                         out.println("</p>");\r
1305                     }\r
1306                 } catch (RuntimeException e) {\r
1307                     out.println("<h3>Broken Rules:</h3><p>");\r
1308                     out.println(TestUtility.replace(rules, "\n", "<br>\n"));\r
1309                     out.println("</p>");\r
1310                     out.flush();\r
1311                     throw e;\r
1312                 }\r
1313 \r
1314                 out.println("<h3>Roundtrip Exclusions: " + new UnicodeSet(roundtripExclusions) + "</h3>");\r
1315                 out.flush();\r
1316 \r
1317                 checkSourceTargetSource(sourceToTarget2);\r
1318 \r
1319                 checkTargetSourceTarget(targetToSource2);\r
1320             }\r
1321 \r
1322             UnicodeSet failSourceTarg = new UnicodeSet();\r
1323 \r
1324 \r
1325             checkSourceTargetSingles(failSourceTarg);\r
1326 \r
1327             boolean quickRt = checkSourceTargetDoubles(failSourceTarg);\r
1328 \r
1329             UnicodeSet failTargSource = new UnicodeSet();\r
1330             UnicodeSet failRound = new UnicodeSet();\r
1331 \r
1332             checkTargetSourceSingles(failTargSource, failRound);\r
1333             checkTargetSourceDoubles(quickRt, failTargSource, failRound);\r
1334         }\r
1335 \r
1336         private void checkSourceTargetSource(Transliterator sourceToTarget2) {\r
1337             log.logln("Checking that source -> target -> source");\r
1338             out.println("<h3>Checking that source -> target -> source</h3>");\r
1339 \r
1340             usi.reset(sourceRange);\r
1341             while (usi.next()) {\r
1342                 int c = usi.codepoint;\r
1343 \r
1344                 String cs = UTF16.valueOf(c);\r
1345                 String targ = sourceToTarget.transliterate(cs);\r
1346                 String targ2 = sourceToTarget2.transliterate(cs);\r
1347                 if (!targ.equals(targ2)) {\r
1348                     logToRulesFails("" + getSourceTarget(transliteratorID) + ", toRules", cs, targ, targ2);\r
1349                 }\r
1350             }\r
1351         }\r
1352 \r
1353         private void checkTargetSourceTarget(Transliterator targetToSource2) {\r
1354             log.logln("Checking that target -> source -> target");\r
1355             out.println("<h3>Checking that target -> source -> target</h3>");\r
1356             usi.reset(targetRange);\r
1357             while (usi.next()) {\r
1358                 int c = usi.codepoint;\r
1359 \r
1360                 String cs = UTF16.valueOf(c);\r
1361                 String targ = targetToSource.transliterate(cs);\r
1362                 String targ2 = targetToSource2.transliterate(cs);\r
1363                 if (!targ.equals(targ2)) {\r
1364                     logToRulesFails("" + getTargetSource(transliteratorID) + ", toRules", cs, targ, targ2);\r
1365                 }\r
1366             }\r
1367         }\r
1368 \r
1369         private void checkSourceTargetSingles(UnicodeSet failSourceTarg) {\r
1370             log.logln("Checking that source characters convert to target - Singles");\r
1371             out.println("<h3>Checking that source characters convert to target - Singles</h3>");\r
1372 \r
1373 \r
1374             /*\r
1375             for (char c = 0; c < 0xFFFF; ++c) {\r
1376                 if (!sourceRange.contains(c)) continue;\r
1377              */\r
1378             usi.reset(sourceRange);\r
1379             while (usi.next()) {\r
1380                 int c = usi.codepoint;\r
1381 \r
1382                 String cs = UTF16.valueOf(c);\r
1383                 String targ = sourceToTarget.transliterate(cs);\r
1384                 if (!toTarget.containsAll(targ)\r
1385                         || badCharacters.containsSome(targ)) {\r
1386                     String targD = Normalizer.normalize(targ, Normalizer.NFD);\r
1387                     if (!toTarget.containsAll(targD)\r
1388                             || badCharacters.containsSome(targD)) {\r
1389                         logWrongScript("" + getSourceTarget(transliteratorID) + "", cs, targ, toTarget, badCharacters);\r
1390                         failSourceTarg.add(c);\r
1391                         continue;\r
1392                     }\r
1393                 }\r
1394 \r
1395                 String cs2 = Normalizer.normalize(cs, Normalizer.NFD);\r
1396                 String targ2 = sourceToTarget.transliterate(cs2);\r
1397                 if (!targ.equals(targ2)) {\r
1398                     logNotCanonical("" + getSourceTarget(transliteratorID) + "", cs, targ, cs2, targ2);\r
1399                 }\r
1400             }\r
1401         }\r
1402 \r
1403         private boolean checkSourceTargetDoubles(UnicodeSet failSourceTarg) {\r
1404             log.logln("Checking that source characters convert to target - Doubles");\r
1405             out.println("<h3>Checking that source characters convert to target - Doubles</h3>");\r
1406             long count = 0;\r
1407 \r
1408             /*\r
1409             for (char c = 0; c < 0xFFFF; ++c) {\r
1410                 if (TestUtility.isUnassigned(c) ||\r
1411                     !sourceRange.contains(c)) continue;\r
1412                 if (failSourceTarg.get(c)) continue;\r
1413 \r
1414              */\r
1415 \r
1416             UnicodeSet sourceRangeMinusFailures = new UnicodeSet(sourceRange);\r
1417             sourceRangeMinusFailures.removeAll(failSourceTarg);\r
1418 \r
1419             boolean quickRt = log.getInclusion() < 10;\r
1420 \r
1421             usi.reset(sourceRangeMinusFailures, quickRt, density);\r
1422 \r
1423             while (usi.next()) {\r
1424                 int c = usi.codepoint;\r
1425 \r
1426                 /*\r
1427                 for (char d = 0; d < 0xFFFF; ++d) {\r
1428                     if (TestUtility.isUnassigned(d) ||\r
1429                         !sourceRange.contains(d)) continue;\r
1430                     if (failSourceTarg.get(d)) continue;\r
1431                  */\r
1432                 log.logln(count + "/" + pairLimit + " Checking starting with " + UTF16.valueOf(c));\r
1433                 usi2.reset(sourceRangeMinusFailures, quickRt, density);\r
1434 \r
1435                 while (usi2.next()) {\r
1436                     int d = usi2.codepoint;\r
1437                     ++count;\r
1438 \r
1439                     String cs = UTF16.valueOf(c) + UTF16.valueOf(d);\r
1440                     String targ = sourceToTarget.transliterate(cs);\r
1441                     if (!toTarget.containsAll(targ)\r
1442                             || badCharacters.containsSome(targ)) {\r
1443                         String targD = Normalizer.normalize(targ, Normalizer.NFD);\r
1444                         if (!toTarget.containsAll(targD)\r
1445                                 || badCharacters.containsSome(targD)) {\r
1446                             logWrongScript("" + getSourceTarget(transliteratorID) + "", cs, targ, toTarget, badCharacters);\r
1447                             continue;\r
1448                         }\r
1449                     }\r
1450                     String cs2 = Normalizer.normalize(cs, Normalizer.NFD);\r
1451                     String targ2 = sourceToTarget.transliterate(cs2);\r
1452                     if (!targ.equals(targ2)) {\r
1453                         logNotCanonical("" + getSourceTarget(transliteratorID) + "", cs, targ, cs2, targ2);\r
1454                     }\r
1455                 }\r
1456             }\r
1457             return quickRt;\r
1458         }\r
1459 \r
1460         void checkTargetSourceSingles(UnicodeSet failTargSource, UnicodeSet failRound) {\r
1461             log.logln("Checking that target characters convert to source and back - Singles");\r
1462             out.println("<h3>Checking that target characters convert to source and back - Singles</h3>");\r
1463 \r
1464 \r
1465             /*for (char c = 0; c < 0xFFFF; ++c) {\r
1466                 if (TestUtility.isUnassigned(c) ||\r
1467                     !targetRange.contains(c)) continue;\r
1468              */\r
1469 \r
1470             usi.reset(targetRange);\r
1471             while (usi.next()) {\r
1472                 String cs;\r
1473                 int c;\r
1474                 if(usi.codepoint == UnicodeSetIterator.IS_STRING){\r
1475                     cs = usi.string;\r
1476                     c = UTF16.charAt(cs,0);\r
1477                 }else{\r
1478                     c = usi.codepoint;\r
1479                     cs =UTF16.valueOf(c);\r
1480                 }\r
1481 \r
1482                 String targ = targetToSource.transliterate(cs);\r
1483                 String reverse = sourceToTarget.transliterate(targ);\r
1484 \r
1485                 if (!toSource.containsAll(targ)\r
1486                         || badCharacters.containsSome(targ)) {\r
1487                     String targD = Normalizer.normalize(targ, Normalizer.NFD);\r
1488                     if (!toSource.containsAll(targD)\r
1489                             || badCharacters.containsSome(targD)) {\r
1490                         /*UnicodeSet temp = */new UnicodeSet().addAll(targD);\r
1491                         logWrongScript("" + getTargetSource(transliteratorID) + "", cs, targ, toSource, badCharacters);\r
1492                         failTargSource.add(cs);\r
1493                         continue;\r
1494                     }\r
1495                 }\r
1496                 if (!isSame(cs, reverse) && !roundtripExclusions.contains(c)\r
1497                         && !roundtripExclusions.contains(cs)) {\r
1498                     logRoundTripFailure(cs,targetToSource.getID(), targ,sourceToTarget.getID(), reverse);\r
1499                     failRound.add(c);\r
1500                     continue;\r
1501                 }\r
1502                 String targ2 = Normalizer.normalize(targ, Normalizer.NFD);\r
1503                 String reverse2 = sourceToTarget.transliterate(targ2);\r
1504                 if (!reverse.equals(reverse2)) {\r
1505                     logNotCanonical("" + getTargetSource(transliteratorID) + "", targ, reverse, targ2, reverse2);\r
1506                 }\r
1507             }\r
1508 \r
1509         }\r
1510 \r
1511         private void checkTargetSourceDoubles(boolean quickRt, UnicodeSet failTargSource,\r
1512                 UnicodeSet failRound) {\r
1513             log.logln("Checking that target characters convert to source and back - Doubles");\r
1514             out.println("<h3>Checking that target characters convert to source and back - Doubles</h3>");\r
1515             long count = 0;\r
1516 \r
1517             UnicodeSet targetRangeMinusFailures = new UnicodeSet(targetRange);\r
1518             targetRangeMinusFailures.removeAll(failTargSource);\r
1519             targetRangeMinusFailures.removeAll(failRound);\r
1520 \r
1521             //char[] buf = new char[4]; // maximum we can have with 2 code points\r
1522             /*\r
1523             for (char c = 0; c < 0xFFFF; ++c) {\r
1524                 if (TestUtility.isUnassigned(c) ||\r
1525                     !targetRange.contains(c)) continue;\r
1526              */\r
1527             \r
1528             usi.reset(targetRangeMinusFailures, quickRt, density);\r
1529 \r
1530             while (usi.next()) {\r
1531                 int c = usi.codepoint;\r
1532 \r
1533                 //log.log(TestUtility.hex(c));\r
1534 \r
1535                 /*\r
1536                 for (char d = 0; d < 0xFFFF; ++d) {\r
1537                     if (TestUtility.isUnassigned(d) ||\r
1538                         !targetRange.contains(d)) continue;\r
1539                  */\r
1540                 log.logln(count + "/" + pairLimit + " Checking starting with " + UTF16.valueOf(c));\r
1541                 usi2.reset(targetRangeMinusFailures, quickRt, density);\r
1542 \r
1543                 while (usi2.next()) {\r
1544                     \r
1545                     int d = usi2.codepoint;\r
1546                     if (d < 0) break;\r
1547                     \r
1548                     if (++count > pairLimit) {\r
1549                         throw new TestTruncated("Test truncated at " + pairLimit);\r
1550                     }\r
1551 \r
1552                     String cs = UTF16.valueOf(c) + UTF16.valueOf(d);\r
1553                     String targ = targetToSource.transliterate(cs);\r
1554                     String reverse = sourceToTarget.transliterate(targ);\r
1555 \r
1556                     if (!toSource.containsAll(targ) /*&& !failTargSource.contains(c) && !failTargSource.contains(d)*/\r
1557                             || badCharacters.containsSome(targ)) {\r
1558                         String targD = Normalizer.normalize(targ, Normalizer.NFD);\r
1559                         if (!toSource.containsAll(targD) /*&& !failTargSource.contains(c) && !failTargSource.contains(d)*/\r
1560                                 || badCharacters.containsSome(targD)) {\r
1561                             logWrongScript("" + getTargetSource(transliteratorID) + "", cs, targ, toSource, badCharacters);\r
1562                             continue;\r
1563                         }\r
1564                     }\r
1565                     if (!isSame(cs, reverse) /*&& !failRound.contains(c) && !failRound.contains(d)*/\r
1566                             && !roundtripExclusions.contains(c)\r
1567                             && !roundtripExclusions.contains(d)\r
1568                             && !roundtripExclusions.contains(cs)) {\r
1569                         logRoundTripFailure(cs,targetToSource.getID(), targ,sourceToTarget.getID(), reverse);\r
1570                         continue;\r
1571                     }\r
1572                     String targ2 = Normalizer.normalize(targ, Normalizer.NFD);\r
1573                     String reverse2 = sourceToTarget.transliterate(targ2);\r
1574                     if (!reverse.equals(reverse2)) {\r
1575                         logNotCanonical("" + getTargetSource(transliteratorID) + "", targ, reverse, targ2, reverse2);\r
1576                     }\r
1577                 }\r
1578             }\r
1579             log.logln("");\r
1580         }\r
1581 \r
1582         /**\r
1583          * @param transliteratorID2\r
1584          * @return\r
1585          */\r
1586         private String getTargetSource(String transliteratorID2) {\r
1587             return "Target-Source [" + transliteratorID2 + "]";\r
1588         }\r
1589 \r
1590         /**\r
1591          * @param transliteratorID2\r
1592          * @return\r
1593          */\r
1594         private String getSourceTarget(String transliteratorID2) {\r
1595             return "Source-Target [" + transliteratorID2 + "]";\r
1596         }\r
1597 \r
1598         final String info(String s) {\r
1599             StringBuffer result = new StringBuffer();\r
1600             result.append("\u200E").append(s).append("\u200E (").append(TestUtility.hex(s)).append("/");\r
1601             if (false) { // append age, as a check\r
1602                 int cp = 0;    \r
1603                 for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {\r
1604                     cp = UTF16.charAt(s, i);\r
1605                     if (i > 0) result.append(", ");\r
1606                     result.append(UCharacter.getAge(cp));\r
1607                 }\r
1608             }\r
1609             result.append(")");\r
1610             return result.toString();\r
1611         }\r
1612 \r
1613         final void logWrongScript(String label, String from, String to, \r
1614                 UnicodeSet shouldContainAll, UnicodeSet shouldNotContainAny) {\r
1615             if (++errorCount > errorLimit) {\r
1616                 throw new TestTruncated("Test truncated; too many failures");\r
1617             }\r
1618             String toD = Normalizer.normalize(to, Normalizer.NFD);\r
1619             UnicodeSet temp = new UnicodeSet().addAll(toD);\r
1620             UnicodeSet bad = new UnicodeSet(shouldNotContainAny).retainAll(temp)\r
1621             .addAll(new UnicodeSet(temp).removeAll(shouldContainAll));\r
1622 \r
1623             out.println("<br>Fail " + label + ": " +\r
1624                     info(from) + " => " + info(to) + " " + bad\r
1625             );\r
1626         }\r
1627 \r
1628         final void logNotCanonical(String label, String from, String to, String fromCan, String toCan) {\r
1629             if (++errorCount > errorLimit) {\r
1630                 throw new TestTruncated("Test truncated; too many failures");\r
1631             }\r
1632             out.println("<br>Fail (can.equiv) " + label + ": " +\r
1633                     info(from) + " => " + info(to) +\r
1634                     " -- " +\r
1635                     info(fromCan) + " => " + info(toCan) + ")"\r
1636             );\r
1637         }\r
1638 \r
1639         final void logFails(String label) {\r
1640             if (++errorCount > errorLimit) {\r
1641                 throw new TestTruncated("Test truncated; too many failures");\r
1642             }\r
1643             out.println("<br>Fail (can.equiv)" + label);\r
1644         }\r
1645 \r
1646         final void logToRulesFails(String label, String from, String to, String toCan) {\r
1647             if (++errorCount > errorLimit) {\r
1648                 throw new TestTruncated("Test truncated; too many failures");\r
1649             }\r
1650             out.println("<br>Fail " + label + ": " +\r
1651                     info(from) + " => " + info(to) + ", " + info(toCan)\r
1652             );\r
1653         }\r
1654 \r
1655         final void logRoundTripFailure(String from,String toID, String to,String backID, String back) {\r
1656             if (!legalSource.is(from)) return; // skip illegals\r
1657 \r
1658             if (++errorCount > errorLimit) {\r
1659                 throw new TestTruncated("Test truncated; too many failures");\r
1660             }\r
1661             out.println("<br>Fail Roundtrip: " +\r
1662                     info(from) + " "+toID+" => " + info(to) + " " + backID+" => " + info(back)\r
1663             );\r
1664         }\r
1665 \r
1666         /*\r
1667          * Characters to filter for source-target mapping completeness\r
1668          * Typically is base alphabet, minus extended characters\r
1669          * Default is ASCII letters for Latin\r
1670          */\r
1671         /*\r
1672         public boolean isSource(char c) {\r
1673             if (!sourceRange.contains(c)) return false;\r
1674             return true;\r
1675         }\r
1676          */\r
1677 \r
1678         /*\r
1679          * Characters to check for target back to source mapping.\r
1680          * Typically the same as the target script, plus punctuation\r
1681          */\r
1682         /*\r
1683         public boolean isReceivingSource(char c) {\r
1684             if (!targetRange.contains(c)) return false;\r
1685             return true;\r
1686         }\r
1687          */\r
1688         /*\r
1689          * Characters to filter for target-source mapping\r
1690          * Typically is base alphabet, minus extended characters\r
1691          */\r
1692         /*\r
1693         public boolean isTarget(char c) {\r
1694             byte script = TestUtility.getScript(c);\r
1695             if (script != targetScript) return false;\r
1696             if (!TestUtility.isLetter(c)) return false;\r
1697             if (targetRange != null && !targetRange.contains(c)) return false;\r
1698             return true;\r
1699         }\r
1700          */\r
1701 \r
1702         /*\r
1703          * Characters to check for target-source mapping\r
1704          * Typically the same as the source script, plus punctuation\r
1705          */\r
1706         /*\r
1707         public boolean isReceivingTarget(char c) {\r
1708             byte script = TestUtility.getScript(c);\r
1709             return (script == targetScript || script == TestUtility.COMMON_SCRIPT);\r
1710         }\r
1711 \r
1712         final boolean isSource(String s) {\r
1713             for (int i = 0; i < s.length(); ++i) {\r
1714                 if (!isSource(s.charAt(i))) return false;\r
1715             }\r
1716             return true;\r
1717         }\r
1718 \r
1719         final boolean isTarget(String s) {\r
1720             for (int i = 0; i < s.length(); ++i) {\r
1721                 if (!isTarget(s.charAt(i))) return false;\r
1722             }\r
1723             return true;\r
1724         }\r
1725 \r
1726         final boolean isReceivingSource(String s) {\r
1727             for (int i = 0; i < s.length(); ++i) {\r
1728                 if (!isReceivingSource(s.charAt(i))) return false;\r
1729             }\r
1730             return true;\r
1731         }\r
1732 \r
1733         final boolean isReceivingTarget(String s) {\r
1734             for (int i = 0; i < s.length(); ++i) {\r
1735                 if (!isReceivingTarget(s.charAt(i))) return false;\r
1736             }\r
1737             return true;\r
1738         }\r
1739          */\r
1740 \r
1741         static class TestTruncated extends RuntimeException {\r
1742             /**\r
1743              * For serialization\r
1744              */\r
1745             private static final long serialVersionUID = 3361828190488168323L;\r
1746 \r
1747             TestTruncated(String msg) {\r
1748                 super(msg);\r
1749             }\r
1750         }\r
1751     }\r
1752 \r
1753     //  static class TestHangul extends Test {\r
1754     //      TestHangul () {\r
1755     //          super("Jamo-Hangul", TestUtility.JAMO_SCRIPT, TestUtility.HANGUL_SCRIPT);\r
1756     //      }\r
1757     //\r
1758     //      public boolean isSource(char c) {\r
1759     //          if (0x1113 <= c && c <= 0x1160) return false;\r
1760     //          if (0x1176 <= c && c <= 0x11F9) return false;\r
1761     //          if (0x3131 <= c && c <= 0x318E) return false;\r
1762     //          return super.isSource(c);\r
1763     //      }\r
1764     //  }\r
1765 \r
1766 \r
1767 }\r