]> gitweb.fperrin.net Git - Dictionary.git/blob - jars/icu4j-4_2_1-src/src/com/ibm/icu/dev/test/translit/RoundTripTest.java
go
[Dictionary.git] / jars / icu4j-4_2_1-src / src / com / ibm / icu / dev / test / translit / RoundTripTest.java
1 //##header J2SE15
2 /**
3  *******************************************************************************
4  * Copyright (C) 2000-2009, International Business Machines Corporation and    *
5  * others. All Rights Reserved.                                                *
6  *******************************************************************************
7  */
8 package com.ibm.icu.dev.test.translit;
9
10 import com.ibm.icu.dev.test.*;
11 import com.ibm.icu.lang.*;
12 import com.ibm.icu.text.*;
13 import com.ibm.icu.util.LocaleData;
14 import com.ibm.icu.util.ULocale;
15 import com.ibm.icu.impl.Utility;
16
17 import java.io.BufferedWriter;
18 import java.io.ByteArrayOutputStream;
19 import java.io.File;
20 import java.io.FileNotFoundException;
21 import java.io.FileOutputStream;
22 import java.io.IOException;
23 import java.io.OutputStreamWriter;
24 import java.io.PrintWriter;
25 import java.io.UnsupportedEncodingException;
26 import java.util.MissingResourceException;
27
28 /**
29  * @test
30  * @summary Round trip test of Transliterator
31  */
32 public class RoundTripTest extends TestFmwk {
33
34     static final boolean EXTRA_TESTS = true;
35     static final boolean PRINT_RULES = true;
36
37     public static void main(String[] args) throws Exception {
38         new RoundTripTest().run(args);
39     }
40     /*
41     public void TestSingle() throws IOException, ParseException {
42         Transliterator t = Transliterator.getInstance("Latin-Greek");
43         String s = t.transliterate("\u0101\u0069");
44     }
45      */
46
47     /*
48     Note: Unicode 3.2 added new Hiragana/Katakana characters:
49
50 3095..3096    ; 3.2 #   [2] HIRAGANA LETTER SMALL KA..HIRAGANA LETTER SMALL KE
51 309F..30A0    ; 3.2 #   [2] HIRAGANA DIGRAPH YORI..KATAKANA-HIRAGANA DOUBLE HYPHEN
52 30FF          ; 3.2 #       KATAKANA DIGRAPH KOTO
53 31F0..31FF    ; 3.2 #  [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO
54
55     We will not add them to the rules until they are more supported (e.g. in fonts on Windows)
56     A bug has been filed to remind us to do this: #1979.
57      */
58
59     static String KATAKANA = "[[[:katakana:][\u30A1-\u30FA\u30FC]]-[\u30FF\u31F0-\u31FF]]";
60     static String HIRAGANA = "[[[:hiragana:][\u3040-\u3094]]-[\u3095-\u3096\u309F-\u30A0]]";
61     static String LENGTH = "[\u30FC]";
62     static String HALFWIDTH_KATAKANA = "[\uFF65-\uFF9D]";
63     static String KATAKANA_ITERATION = "[\u30FD\u30FE]";
64     static String HIRAGANA_ITERATION = "[\u309D\u309E]";
65
66     //------------------------------------------------------------------
67     // AbbreviatedUnicodeSetIterator
68     //------------------------------------------------------------------
69
70     static class AbbreviatedUnicodeSetIterator extends UnicodeSetIterator {
71
72         private boolean abbreviated;
73         private int perRange;
74
75         public AbbreviatedUnicodeSetIterator() {
76             super();
77             abbreviated = false;
78         }
79
80         public void reset(UnicodeSet newSet) {
81             reset(newSet, false);
82         }
83
84         public void reset(UnicodeSet newSet, boolean abb) {
85             reset(newSet, abb, 100);
86         }
87
88         public void reset(UnicodeSet newSet, boolean abb, int density) {
89             super.reset(newSet);
90             abbreviated = abb;
91             perRange = newSet.getRangeCount();
92             if (perRange != 0) {
93                 perRange = density / perRange;
94             }
95         }
96
97         protected void loadRange(int myRange) {
98             super.loadRange(myRange);
99             if (abbreviated && (endElement > nextElement + perRange)) {
100                 endElement = nextElement + perRange;
101             }
102         }
103     }
104
105     //--------------------------------------------------------------------
106
107     public void showElapsed(long start, String name) {
108         double dur = (System.currentTimeMillis() - start) / 1000.0;
109         logln(name + " took " + dur + " seconds");
110     }
111
112     public void TestKana() throws IOException {
113         long start = System.currentTimeMillis();
114         new Test("Katakana-Hiragana")
115         .test(KATAKANA, "[" + HIRAGANA + LENGTH + "]", "[" + HALFWIDTH_KATAKANA + LENGTH + "]", this, new Legal());
116         showElapsed(start, "TestKana");
117     }
118
119     public void TestHiragana() throws IOException {
120         long start = System.currentTimeMillis();
121         new Test("Latin-Hiragana")
122         .test("[a-zA-Z]", HIRAGANA, HIRAGANA_ITERATION, this, new Legal());
123         showElapsed(start, "TestHiragana");
124     }
125
126     public void TestKatakana() throws IOException {
127         long start = System.currentTimeMillis();
128         new Test("Latin-Katakana")
129         .test("[a-zA-Z]", KATAKANA, "[" + KATAKANA_ITERATION + HALFWIDTH_KATAKANA + "]", this, new Legal());
130         showElapsed(start, "TestKatakana");
131     }
132
133     public void TestJamo() throws IOException {
134         long start = System.currentTimeMillis();
135         new Test("Latin-Jamo")
136         .test("[a-zA-Z]", "[\u1100-\u1112 \u1161-\u1175 \u11A8-\u11C2]", "", this, new LegalJamo());
137         showElapsed(start, "TestJamo");
138     }
139
140     /*
141         SBase = 0xAC00, LBase = 0x1100, VBase = 0x1161, TBase = 0x11A7,
142         LCount = 19, VCount = 21, TCount = 28,
143         NCount = VCount * TCount,   // 588
144         SCount = LCount * NCount,   // 11172
145         LLimit = LBase + LCount,    // 1113
146         VLimit = VBase + VCount,    // 1176
147         TLimit = TBase + TCount,    // 11C3
148         SLimit = SBase + SCount;    // D7A4
149      */
150
151     public void TestHangul() throws IOException {
152         long start = System.currentTimeMillis();
153         Test t = new Test("Latin-Hangul", 5);
154         boolean TEST_ALL = "true".equalsIgnoreCase(getProperty("HangulRoundTripAll")); 
155         if (TEST_ALL && getInclusion() == 10) {
156             t.setPairLimit(Integer.MAX_VALUE); // only go to the limit if we have TEST_ALL and getInclusion
157         }
158         t.test("[a-zA-Z]", "[\uAC00-\uD7A4]", "", this, new Legal());
159         showElapsed(start, "TestHangul");
160     }
161
162     /**
163      * This is a shorter version of the test for doubles, that allows us to skip lots of cases, but
164      * does check the ones that should cause problems (if any do).
165      */
166     public void TestHangul2() {
167         Transliterator lh = Transliterator.getInstance("Latin-Hangul");
168         Transliterator hl = lh.getInverse();
169         final UnicodeSet representativeHangul = getRepresentativeHangul();
170         for (UnicodeSetIterator it = new UnicodeSetIterator(representativeHangul); it.next();) {
171             assertRoundTripTransform("Transform", it.getString(), lh, hl);
172         }
173     }
174
175     private void assertRoundTripTransform(String message, String source, Transliterator lh, Transliterator hl) {
176         String to = hl.transform(source);
177         String back = lh.transform(to);
178 //#if defined(FOUNDATION10) || defined(J2SE13)
179 //##    // No regular expression support in Java 1.3
180 //#else
181         if (!source.equals(back)) {
182             String to2 = hl.transform(source.replaceAll("(.)", "$1 ").trim());
183             String to3 = hl.transform(back.replaceAll("(.)", "$1 ").trim());
184             assertEquals(message + " " + source + " [" + to + "/"+ to2 + "/"+ to3 + "]", source, back);
185         }
186 //#endif
187     }
188
189     public static UnicodeSet getRepresentativeHangul() {
190         UnicodeSet extraSamples = new UnicodeSet("[\uCE20{\uAD6C\uB514}{\uAD73\uC774}{\uBB34\uB837}{\uBB3C\uC5FF}{\uC544\uAE4C}{\uC544\uB530}{\uC544\uBE60}{\uC544\uC2F8}{\uC544\uC9DC}{\uC544\uCC28}{\uC545\uC0AC}{\uC545\uC2F8}{\uC546\uCE74}{\uC548\uAC00}{\uC548\uC790}{\uC548\uC9DC}{\uC548\uD558}{\uC54C\uAC00}{\uC54C\uB530}{\uC54C\uB9C8}{\uC54C\uBC14}{\uC54C\uBE60}{\uC54C\uC0AC}{\uC54C\uC2F8}{\uC54C\uD0C0}{\uC54C\uD30C}{\uC54C\uD558}{\uC555\uC0AC}{\uC555\uC2F8}{\uC558\uC0AC}{\uC5C5\uC12F\uC501}{\uC5C6\uC5C8\uC2B5}]");
191         UnicodeSet sourceSet = new UnicodeSet();
192         addRepresentativeHangul(sourceSet, 2, false);
193         addRepresentativeHangul(sourceSet, 3, false);
194         addRepresentativeHangul(sourceSet, 2, true);
195         addRepresentativeHangul(sourceSet, 3, true);
196         // add the boundary cases; we want an example of each case of V + L and one example of each case of T+L
197
198         UnicodeSet more = getRepresentativeBoundaryHangul();
199         sourceSet.addAll(more);
200         sourceSet.addAll(extraSamples);
201         return sourceSet;
202     }
203
204     private static UnicodeSet getRepresentativeBoundaryHangul() {
205         UnicodeSet resultToAddTo = new UnicodeSet();
206         // U+1100 HANGUL CHOSEONG KIYEOK
207         // U+1161 HANGUL JUNGSEONG A
208         UnicodeSet L = new UnicodeSet("[:hst=L:]");
209         UnicodeSet V = new UnicodeSet("[:hst=V:]");
210         UnicodeSet T = new UnicodeSet("[:hst=T:]");
211
212         String prefixLV = "\u1100\u1161";
213         String prefixL = "\u1100";
214         String suffixV = "\u1161";
215         String nullL = "\u110B"; // HANGUL CHOSEONG IEUNG
216
217         UnicodeSet L0 = new UnicodeSet("[\u1100\u110B]");
218
219         // do all combinations of L0 + V + nullL + V
220
221         for (UnicodeSetIterator iL0 = new UnicodeSetIterator(L0); iL0.next();) {
222             for (UnicodeSetIterator iV = new UnicodeSetIterator(V); iV.next();) {
223                 for (UnicodeSetIterator iV2 = new UnicodeSetIterator(V); iV2.next();) {
224                     String sample = iL0.getString() + iV.getString() + nullL + iV2.getString();
225                     String trial = Normalizer.compose(sample, false);
226                     if (trial.length() == 2) {
227                         resultToAddTo.add(trial);
228                     }
229                 }
230             }
231         }
232
233         for (UnicodeSetIterator iL = new UnicodeSetIterator(L); iL.next();) {
234             // do all combinations of "g" + V + L + "a"
235             final String suffix = iL.getString() + suffixV;
236             for (UnicodeSetIterator iV = new UnicodeSetIterator(V); iV.next();) {
237                 String sample = prefixL + iV.getString() + suffix;
238                 String trial = Normalizer.compose(sample, false);
239                 if (trial.length() == 2) {
240                     resultToAddTo.add(trial);
241                 }
242             }
243             // do all combinations of "ga" + T + L + "a"
244             for (UnicodeSetIterator iT = new UnicodeSetIterator(T); iT.next();) {
245                 String sample = prefixLV + iT.getString() + suffix;
246                 String trial = Normalizer.compose(sample, false);
247                 if (trial.length() == 2) {
248                     resultToAddTo.add(trial);
249                 }
250             }
251         }
252         return resultToAddTo;
253     }
254
255     private static void addRepresentativeHangul(UnicodeSet resultToAddTo, int leng, boolean noFirstConsonant) {
256         UnicodeSet notYetSeen = new UnicodeSet();
257         for (char c = '\uAC00'; c <  '\uD7AF'; ++c) {
258             String charStr = String.valueOf(c);
259             String decomp = Normalizer.decompose(charStr, false);
260             if (decomp.length() != leng) {
261                 continue; // only take one length at a time
262             }
263             if (decomp.startsWith("\u110B ") != noFirstConsonant) {
264                 continue;
265             }
266             if (!notYetSeen.containsAll(decomp)) {
267                 resultToAddTo.add(c);
268                 notYetSeen.addAll(decomp);
269             }
270         }
271     }
272
273
274     public void TestHan() throws UnsupportedEncodingException, FileNotFoundException {
275         try{
276             UnicodeSet exemplars = LocaleData.getExemplarSet(new ULocale("zh"),0);
277             // create string with all chars
278             StringBuffer b = new StringBuffer();
279             for (UnicodeSetIterator it = new UnicodeSetIterator(exemplars); it.next();) {
280                 UTF16.append(b,it.codepoint);
281             }
282             String source = b.toString();
283             // transform with Han translit
284             Transliterator han = Transliterator.getInstance("Han-Latin");
285             String target = han.transliterate(source);
286             // now verify that there are no Han characters left
287             UnicodeSet allHan = new UnicodeSet("[:han:]");
288             assertFalse("No Han must be left after Han-Latin transliteration",allHan.containsSome(target));
289             // check the pinyin translit
290             Transliterator pn = Transliterator.getInstance("Latin-NumericPinyin");
291             String target2 = pn.transliterate(target);
292             // verify that there are no marks
293             Transliterator nfc = Transliterator.getInstance("nfc");
294             String nfced = nfc.transliterate(target2);
295             UnicodeSet allMarks = new UnicodeSet("[:mark:]");
296             assertFalse("NumericPinyin must contain no marks", allMarks.containsSome(nfced));
297             // verify roundtrip
298             Transliterator np = pn.getInverse();
299             String target3 = np.transliterate(target);
300             boolean roundtripOK = target3.equals(target);
301             assertTrue("NumericPinyin must roundtrip", roundtripOK);
302             if (!roundtripOK) {
303                 String filename = "numeric-pinyin.log.txt";
304                 PrintWriter out = new PrintWriter(
305                         new BufferedWriter(
306                                 new OutputStreamWriter(
307                                         new FileOutputStream(filename), "UTF8"), 4*1024));
308                 errln("Creating log file " + new File(filename).getAbsoluteFile());
309                 out.println("Pinyin:                " + target);
310                 out.println("Pinyin-Numeric-Pinyin: " + target2);
311                 out.close();
312             }
313         }catch(MissingResourceException ex){
314             warnln("Could not load the locale data for fetching the exemplar characters.");
315         }
316     }
317
318     public void TestSingle() {
319         Transliterator t = Transliterator.getInstance("Latin-Greek");
320         t.transliterate("\u0061\u0101\u0069");
321     }
322
323     String getGreekSet() {
324         // Time bomb
325         if (skipIfBeforeICU(4,3,0)) {
326             // We temporarily filter against Unicode 4.1, but we only do this
327             // before version 3.5.
328             logln("TestGreek needs to be updated to remove delete the section marked [:Age=4.0:] filter");
329         } else {
330             errln("TestGreek needs to be updated to remove delete the [:Age=4.0:] filter ");
331         }
332         return 
333         // isICU28() ? "[[\u003B\u00B7[:Greek:]-[\u03D7-\u03EF]]&[:Age=3.2:]]" :
334         "[\u003B\u00B7[[:Greek:]&[:Letter:]]-[" +
335         "\u1D26-\u1D2A" + // L&   [5] GREEK LETTER SMALL CAPITAL GAMMA..GREEK LETTER SMALL CAPITAL PSI
336         "\u1D5D-\u1D61" + // Lm   [5] MODIFIER LETTER SMALL BETA..MODIFIER LETTER SMALL CHI
337         "\u1D66-\u1D6A" + // L&   [5] GREEK SUBSCRIPT SMALL LETTER BETA..GREEK SUBSCRIPT SMALL LETTER CHI
338         "\u03D7-\u03EF" + // \N{GREEK KAI SYMBOL}..\N{COPTIC SMALL LETTER DEI}
339         "] & [:Age=4.0:]]";
340     }
341
342     public void TestGreek() throws IOException {
343         long start = System.currentTimeMillis();
344         new Test("Latin-Greek", 50)
345         .test("[a-zA-Z]", getGreekSet(),
346                 "[\u00B5\u037A\u03D0-\u03F5\u03F9]", /* roundtrip exclusions */
347                 this, new LegalGreek(true));
348         showElapsed(start, "TestGreek");
349     }
350
351     public void TestGreekUNGEGN() throws IOException {
352         long start = System.currentTimeMillis();
353         new Test("Latin-Greek/UNGEGN")
354         .test("[a-zA-Z]", getGreekSet(),
355                 "[\u00B5\u037A\u03D0-\uFFFF{\u039C\u03C0}]", /* roundtrip exclusions */
356                 this, new LegalGreek(false));
357         showElapsed(start, "TestGreekUNGEGN");
358     }
359
360     public void Testel() throws IOException {
361         long start = System.currentTimeMillis();
362         new Test("Latin-el")
363         .test("[a-zA-Z]", getGreekSet(),
364                 "[\u00B5\u037A\u03D0-\uFFFF{\u039C\u03C0}]", /* roundtrip exclusions */
365                 this, new LegalGreek(false));
366         showElapsed(start, "Testel");
367     }
368
369     public void TestCyrillic() throws IOException {
370         long start = System.currentTimeMillis();
371         new Test("Latin-Cyrillic")
372         .test("[a-zA-Z\u0110\u0111\u02BA\u02B9]", "[\u0400-\u045F]", null, this, new Legal());
373         showElapsed(start, "TestCyrillic");
374     }
375
376     static final String ARABIC = "[\u06A9\u060C\u061B\u061F\u0621\u0627-\u063A\u0641-\u0655\u0660-\u066C\u067E\u0686\u0698\u06A4\u06AD\u06AF\u06CB-\u06CC\u06F0-\u06F9]";
377
378     public void TestArabic() throws IOException {
379         long start = System.currentTimeMillis();
380         new Test("Latin-Arabic")
381         .test("[a-zA-Z\u02BE\u02BF]", ARABIC, "[a-zA-Z\u02BE\u02BF\u207F]", null, this, new Legal()); //
382         showElapsed(start, "TestArabic");
383     }
384
385     public void TestHebrew() throws IOException {
386         //      Time bomb
387         if (skipIfBeforeICU(4,3,0)) {
388             // We temporarily filter against Unicode 4.1, but we only do this
389             // before version 3.5.
390             logln("TestHebrew needs to be updated to remove delete the section marked [:Age=4.0:] filter");
391         } else {
392             errln("TestHebrew needs to be updated to remove delete the [:Age=4.0:] filter ");
393         }
394         long start = System.currentTimeMillis();
395         new Test("Latin-Hebrew")
396         .test("[a-zA-Z\u02BC\u02BB]", "[[[:hebrew:]-[\u05BD\uFB00-\uFBFF]]& [:Age=4.0:]]", "[\u05F0\u05F1\u05F2]", this, new LegalHebrew());
397         showElapsed(start, "TestHebrew");
398     }
399
400     public void TestThai() throws IOException {
401         long start = System.currentTimeMillis();
402         if(skipIfBeforeICU(4,3,0)){
403             new Test("Latin-Thai")
404             .test("[a-zA-Z\u0142\u1ECD\u00E6\u0131\u0268\u02CC]",
405                     "[\u0E01-\u0E3A\u0E40-\u0E5B]", 
406                     "[a-zA-Z\u0142\u1ECD\u00E6\u0131\u0268\u02B9\u02CC]",
407                     "[\u0E4F]", this, new LegalThai());   
408         }else{
409             new Test("Latin-Thai")
410             .test("[a-zA-Z\u0142\u1ECD\u00E6\u0131\u0268\u02CC]",
411                     "[\u0E01-\u0E3A\u0E40-\u0E5B]", 
412                     "[a-zA-Z\u0142\u1ECD\u00E6\u0131\u0268\u02B9\u02CC]",
413                     null, this, new LegalThai());
414         }
415
416         showElapsed(start, "TestThai");
417     }
418
419     //----------------------------------
420     // Inter-Indic Tests
421     //----------------------------------
422     public static class LegalIndic extends Legal{
423         UnicodeSet vowelSignSet = new UnicodeSet();
424
425         public LegalIndic(){
426             vowelSignSet.addAll(new UnicodeSet("[\u0901\u0902\u0903\u0904\u093e-\u094c\u0962\u0963]"));               /* Devanagari */
427             vowelSignSet.addAll(new UnicodeSet("[\u0981\u0982\u0983\u09be-\u09cc\u09e2\u09e3\u09D7]"));         /* Bengali */
428             vowelSignSet.addAll(new UnicodeSet("[\u0a01\u0a02\u0a03\u0a3e-\u0a4c\u0a62\u0a63\u0a70\u0a71]"));   /* Gurmukhi */
429             vowelSignSet.addAll(new UnicodeSet("[\u0a81\u0a82\u0a83\u0abe-\u0acc\u0ae2\u0ae3]"));               /* Gujarati */
430             vowelSignSet.addAll(new UnicodeSet("[\u0b01\u0b02\u0b03\u0b3e-\u0b4c\u0b62\u0b63\u0b56\u0b57]"));   /* Oriya */
431             vowelSignSet.addAll(new UnicodeSet("[\u0b81\u0b82\u0b83\u0bbe-\u0bcc\u0be2\u0be3\u0bd7]"));         /* Tamil */
432             vowelSignSet.addAll(new UnicodeSet("[\u0c01\u0c02\u0c03\u0c3e-\u0c4c\u0c62\u0c63\u0c55\u0c56]"));   /* Telugu */
433             vowelSignSet.addAll(new UnicodeSet("[\u0c81\u0c82\u0c83\u0cbe-\u0ccc\u0ce2\u0ce3\u0cd5\u0cd6]"));   /* Kannada */
434             vowelSignSet.addAll(new UnicodeSet("[\u0d01\u0d02\u0d03\u0d3e-\u0d4c\u0d62\u0d63\u0d57]"));         /* Malayalam */
435         }
436
437         String avagraha = "\u093d\u09bd\u0abd\u0b3d\u0cbd";
438         String nukta = "\u093c\u09bc\u0a3c\u0abc\u0b3c\u0cbc";
439         String virama = "\u094d\u09cd\u0a4d\u0acd\u0b4d\u0bcd\u0c4d\u0ccd\u0d4d";
440         String sanskritStressSigns = "\u0951\u0952\u0953\u0954\u097d";
441         String chandrabindu = "\u0901\u0981\u0A81\u0b01\u0c01";
442         public boolean is(String sourceString){
443             int cp=sourceString.charAt(0);
444
445             // A vowel sign cannot be the first char
446             if(vowelSignSet.contains(cp)){
447                 return false;
448             }else if(avagraha.indexOf(cp)!=-1){
449                 return false;
450             }else if(virama.indexOf(cp)!=-1){
451                 return false;
452             }else if(nukta.indexOf(cp)!=-1){
453                 return false;
454             }else if(sanskritStressSigns.indexOf(cp)!=-1){
455                 return false;
456             }else if((chandrabindu.indexOf(cp)!=-1) &&
457                     (sourceString.length() >1 &&
458                             vowelSignSet.contains(sourceString.charAt(1)))){
459                 return false;
460             }
461             return true;
462         }
463     }
464     static String latinForIndic = "[['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD"+
465     "\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F"+
466     "\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148"+
467     "\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0"+
468     "\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u01FB"+
469     "\u0200-\u021B\u021E-\u021F\u0226-\u0233\u0294\u0303-\u0304\u0306\u0314-\u0315"+
470     "\u0325\u040E\u0419\u0439\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7"+
471     "\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03\u1F05"+
472     "\u1F07\u1F09\u1F0B\u1F0D\u1F0F\u1F11\u1F13\u1F15\u1F19\u1F1B\u1F1D\u1F21"+
473     "\u1F23\u1F25\u1F27\u1F29\u1F2B\u1F2D\u1F2F\u1F31\u1F33\u1F35\u1F37\u1F39"+
474     "\u1F3B\u1F3D\u1F3F\u1F41\u1F43\u1F45\u1F49\u1F4B\u1F4D\u1F51\u1F53\u1F55"+
475     "\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63\u1F65\u1F67\u1F69\u1F6B\u1F6D"+
476     "\u1F6F\u1F81\u1F83\u1F85\u1F87\u1F89\u1F8B\u1F8D\u1F8F\u1F91\u1F93\u1F95"+
477     "\u1F97\u1F99\u1F9B\u1F9D\u1F9F\u1FA1\u1FA3\u1FA5\u1FA7\u1FA9\u1FAB\u1FAD"+
478     "\u1FAF-\u1FB1\u1FB8-\u1FB9\u1FD0-\u1FD1\u1FD8-\u1FD9\u1FE0-\u1FE1\u1FE5"+
479     "\u1FE8-\u1FE9\u1FEC\u212A-\u212B\uE04D\uE064]"+
480     "-[\uE000-\uE080 \u01E2\u01E3]& [[:latin:][:mark:]]]";
481
482     public void TestDevanagariLatin() throws IOException {
483         long start = System.currentTimeMillis();
484         if (skipIfBeforeICU(4,3,0)) {
485             logln("Warning: TestDevanagariLatin needs to be updated to remove delete the section marked [:Age=4.1:] filter");
486         } else {
487             //              We temporarily filter against Unicode 4.1, but we only do this
488             // before version 3.4.
489             errln("FAIL: TestDevanagariLatin needs to be updated to remove delete the [:Age=4.1:] filter ");
490             return;
491         }
492         new Test("Latin-DEVANAGARI", 50)
493         .test(latinForIndic, "[[[:Devanagari:][\u094d][\u0964\u0965]]&[:Age=4.1:]]", "[\u0965\u0904]", this, new LegalIndic());
494         showElapsed(start, "TestDevanagariLatin");
495     }
496
497     private static final String [][] interIndicArray= new String[][]{
498         new String [] {  "BENGALI-DEVANAGARI",
499                 "[:BENGALI:]", "[:Devanagari:]",
500                 "[\u0904\u0951-\u0954\u0943-\u0949\u094a\u0962\u0963\u090D\u090e\u0911\u0912\u0929\u0933\u0934\u0935\u0950\u0958\u0959\u095a\u095b\u095e\u097d]", /*roundtrip exclusions*/
501         },
502         new String [] {  "DEVANAGARI-BENGALI",
503                 "[:Devanagari:]", "[:BENGALI:]",
504                 "[\u09D7\u090D\u090e\u0911\u0912\u0929\u0933\u0934\u0935\u0950\u0958\u0959\u095a\u095b\u095e\u09f0\u09f1\u09f2-\u09fa\u09ce]", /*roundtrip exclusions*/
505         },
506
507         new String [] {  "GURMUKHI-DEVANAGARI",
508                 "[:GURMUKHI:]", "[:Devanagari:]",
509                 "[\u0904\u0902\u0936\u0933\u0951-\u0954\u0902\u0903\u0943-\u0949\u094a\u0962\u0963\u090B\u090C\u090D\u090e\u0911\u0912\u0934\u0937\u093D\u0950\u0960\u0961\u097d]", /*roundtrip exclusions*/
510         },
511         new String [] {  "DEVANAGARI-GURMUKHI",
512                 "[:Devanagari:]", "[:GURMUKHI:]",
513                 "[\u0A02\u0946\u0A5C\u0951-\u0954\u0A70\u0A71\u090B\u090C\u090D\u090e\u0911\u0912\u0934\u0937\u093D\u0950\u0960\u0961\u0a72\u0a73\u0a74]", /*roundtrip exclusions*/
514         },
515
516         new String [] {  "GUJARATI-DEVANAGARI",
517                 "[:GUJARATI:]", "[:Devanagari:]",
518                 "[\u0904\u0946\u094A\u0962\u0963\u0951-\u0954\u0961\u090c\u090e\u0912\u097d]", /*roundtrip exclusions*/
519         },
520         new String [] {  "DEVANAGARI-GUJARATI",
521                 "[:Devanagari:]", "[:GUJARATI:]",
522                 "[\u0951-\u0954\u0961\u090c\u090e\u0912]", /*roundtrip exclusions*/
523         },
524
525         new String [] {  "ORIYA-DEVANAGARI",
526                 "[:ORIYA:]", "[:Devanagari:]",
527                 "[\u0904\u0912\u0911\u090D\u090e\u0931\u0943-\u094a\u0962\u0963\u0951-\u0954\u0950\u097d]", /*roundtrip exclusions*/
528         },
529         new String [] {  "DEVANAGARI-ORIYA",
530                 "[:Devanagari:]", "[:ORIYA:]",
531                 "[\u0b5f\u0b56\u0b57\u0b70\u0b71\u0950\u090D\u090e\u0912\u0911\u0931]", /*roundtrip exclusions*/
532         },
533
534         new String [] {  "Tamil-DEVANAGARI",
535                 "[:tamil:]", "[:Devanagari:]",
536                 "[\u0901\u0904\u093c\u0943-\u094a\u0951-\u0954\u0962\u0963\u090B\u090C\u090D\u0911\u0916\u0917\u0918\u091B\u091D\u0920\u0921\u0922\u0925\u0926\u0927\u092B\u092C\u092D\u0936\u093d\u0950[\u0958-\u0961]\u097d]", /*roundtrip exclusions*/
537         },
538         new String [] {  "DEVANAGARI-Tamil",
539                 "[:Devanagari:]", "[:tamil:]",
540                 "[\u0bd7\u0BF0\u0BF1\u0BF2]", /*roundtrip exclusions*/
541         },
542
543         new String [] {  "Telugu-DEVANAGARI",
544                 "[:telugu:]", "[:Devanagari:]",
545                 "[\u0904\u093c\u0950\u0945\u0949\u0951-\u0954\u0962\u0963\u090D\u0911\u093d\u0929\u0934[\u0958-\u095f]\u097d]", /*roundtrip exclusions*/
546         },
547         new String [] {  "DEVANAGARI-TELUGU",
548                 "[:Devanagari:]", "[:TELUGU:]",
549                 "[\u0c55\u0c56\u0950\u090D\u0911\u093d\u0929\u0934[\u0958-\u095f]]", /*roundtrip exclusions*/
550         },
551
552         new String [] {  "KANNADA-DEVANAGARI",
553                 "[:KANNADA:]", "[:Devanagari:]",
554                 "[\u0901\u0904\u0946\u0950\u0945\u0949\u0951-\u0954\u0962\u0963\u0950\u090D\u0911\u093d\u0929\u0934[\u0958-\u095f]\u097d]", /*roundtrip exclusions*/
555         },
556         new String [] {  "DEVANAGARI-KANNADA",
557                 "[:Devanagari:]", "[:KANNADA:]",
558                 "[{\u0cb0\u0cbc}{\u0cb3\u0cbc}\u0cde\u0cd5\u0cd6\u0950\u090D\u0911\u093d\u0929\u0934[\u0958-\u095f]]", /*roundtrip exclusions*/
559         },
560
561         new String [] {  "MALAYALAM-DEVANAGARI",
562                 "[:MALAYALAM:]", "[:Devanagari:]",
563                 "[\u0901\u0904\u094a\u094b\u094c\u093c\u0950\u0944\u0945\u0949\u0951-\u0954\u0962\u0963\u090D\u0911\u093d\u0929\u0934[\u0958-\u095f]\u097d]", /*roundtrip exclusions*/
564         },
565         new String [] {  "DEVANAGARI-MALAYALAM",
566                 "[:Devanagari:]", "[:MALAYALAM:]",
567                 "[\u0d4c\u0d57\u0950\u090D\u0911\u093d\u0929\u0934[\u0958-\u095f]]", /*roundtrip exclusions*/
568         },
569
570         new String [] {  "GURMUKHI-BENGALI",
571                 "[:GURMUKHI:]", "[:BENGALI:]",
572                 "[\u0982\u09b6\u09e2\u09e3\u09c3\u09c4\u09d7\u098B\u098C\u09B7\u09E0\u09E1\u09F0\u09F1\u09f2-\u09fa\u09ce]", /*roundtrip exclusions*/
573         },
574         new String [] {  "BENGALI-GURMUKHI",
575                 "[:BENGALI:]", "[:GURMUKHI:]",
576                 "[\u0A02\u0a5c\u0a47\u0a70\u0a71\u0A33\u0A35\u0A59\u0A5A\u0A5B\u0A5E\u0A72\u0A73\u0A74]", /*roundtrip exclusions*/
577         },
578
579         new String [] {  "GUJARATI-BENGALI",
580                 "[:GUJARATI:]", "[:BENGALI:]",
581                 "[\u09d7\u09e2\u09e3\u098c\u09e1\u09f0\u09f1\u09f2-\u09fa\u09ce]", /*roundtrip exclusions*/
582         },
583         new String [] {  "BENGALI-GUJARATI",
584                 "[:BENGALI:]", "[:GUJARATI:]",
585                 "[\u0A82\u0a83\u0Ac9\u0Ac5\u0ac7\u0A8D\u0A91\u0AB3\u0AB5\u0ABD\u0AD0]", /*roundtrip exclusions*/
586         },
587
588         new String [] {  "ORIYA-BENGALI",
589                 "[:ORIYA:]", "[:BENGALI:]",
590                 "[\u09c4\u09e2\u09e3\u09f0\u09f1\u09f2-\u09fa\u09ce]", /*roundtrip exclusions*/
591         },
592         new String [] {  "BENGALI-ORIYA",
593                 "[:BENGALI:]", "[:ORIYA:]",
594                 "[\u0b35\u0b71\u0b5f\u0b56\u0b33\u0b3d]", /*roundtrip exclusions*/
595         },
596
597         new String [] {  "Tamil-BENGALI",
598                 "[:tamil:]", "[:BENGALI:]",
599                 "[\u0981\u09bc\u09c3\u09c4\u09e2\u09e3\u09f0\u09f1\u098B\u098C\u0996\u0997\u0998\u099B\u099D\u09A0\u09A1\u09A2\u09A5\u09A6\u09A7\u09AB\u09AC\u09AD\u09B6\u09DC\u09DD\u09DF\u09E0\u09E1\u09f2-\u09fa\u09ce]", /*roundtrip exclusions*/
600         },
601         new String [] {  "BENGALI-Tamil",
602                 "[:BENGALI:]", "[:tamil:]",
603                 "[\u0bc6\u0bc7\u0bca\u0B8E\u0B92\u0BA9\u0BB1\u0BB3\u0BB4\u0BB5\u0BF0\u0BF1\u0BF2]", /*roundtrip exclusions*/
604         },
605
606         new String [] {  "Telugu-BENGALI",
607                 "[:telugu:]", "[:BENGALI:]",
608                 "[\u09e2\u09e3\u09bc\u09d7\u09f0\u09f1\u09dc\u09dd\u09df\u09f2-\u09fa\u09ce]", /*roundtrip exclusions*/
609         },
610         new String [] {  "BENGALI-TELUGU",
611                 "[:BENGALI:]", "[:TELUGU:]",
612                 "[\u0c55\u0c56\u0c47\u0c46\u0c4a\u0C0E\u0C12\u0C31\u0C33\u0C35]", /*roundtrip exclusions*/
613         },
614
615         new String [] {  "KANNADA-BENGALI",
616                 "[:KANNADA:]", "[:BENGALI:]",
617                 "[\u0981\u09e2\u09e3\u09bc\u09d7\u09f0\u09f1\u09dc\u09dd\u09df\u09f2-\u09fa\u09ce]", /*roundtrip exclusions*/
618         },
619         new String [] {  "BENGALI-KANNADA",
620                 "[:BENGALI:]", "[:KANNADA:]",
621                 "[{\u0cb0\u0cbc}{\u0cb3\u0cbc}\u0cc6\u0cca\u0cd5\u0cd6\u0cc7\u0C8E\u0C92\u0CB1\u0cb3\u0cb5\u0cde]", /*roundtrip exclusions*/
622         },
623
624         new String [] {  "MALAYALAM-BENGALI",
625                 "[:MALAYALAM:]", "[:BENGALI:]",
626                 "[\u0981\u09e2\u09e3\u09bc\u09c4\u09f0\u09f1\u09dc\u09dd\u09df\u09f2-\u09fa\u09ce]", /*roundtrip exclusions*/
627         },
628         new String [] {  "BENGALI-MALAYALAM",
629                 "[:BENGALI:]", "[:MALAYALAM:]",
630                 "[\u0d46\u0d4a\u0d47\u0d31-\u0d35\u0d0e\u0d12]", /*roundtrip exclusions*/
631         },
632
633         new String [] {  "GUJARATI-GURMUKHI",
634                 "[:GUJARATI:]", "[:GURMUKHI:]",
635                 "[\u0A02\u0ab3\u0ab6\u0A70\u0a71\u0a82\u0a83\u0ac3\u0ac4\u0ac5\u0ac9\u0a5c\u0a72\u0a73\u0a74\u0a8b\u0a8d\u0a91\u0abd]", /*roundtrip exclusions*/
636         },
637         new String [] {  "GURMUKHI-GUJARATI",
638                 "[:GURMUKHI:]", "[:GUJARATI:]",
639                 "[\u0a5c\u0A70\u0a71\u0a72\u0a73\u0a74\u0a82\u0a83\u0a8b\u0a8c\u0a8d\u0a91\u0ab3\u0ab6\u0ab7\u0abd\u0ac3\u0ac4\u0ac5\u0ac9\u0ad0\u0ae0\u0ae1]", /*roundtrip exclusions*/
640         },
641
642         new String [] {  "ORIYA-GURMUKHI",
643                 "[:ORIYA:]", "[:GURMUKHI:]",
644                 "[\u0A02\u0a5c\u0a21\u0a47\u0a71\u0b02\u0b03\u0b33\u0b36\u0b43\u0b56\u0b57\u0B0B\u0B0C\u0B37\u0B3D\u0B5F\u0B60\u0B61\u0a35\u0a72\u0a73\u0a74]", /*roundtrip exclusions*/
645         },
646         new String [] {  "GURMUKHI-ORIYA",
647                 "[:GURMUKHI:]", "[:ORIYA:]",
648                 "[\u0a71\u0b02\u0b03\u0b33\u0b36\u0b43\u0b56\u0b57\u0B0B\u0B0C\u0B37\u0B3D\u0B5F\u0B60\u0B61\u0b70\u0b71]", /*roundtrip exclusions*/
649         },
650
651         new String [] {  "TAMIL-GURMUKHI",
652                 "[:TAMIL:]", "[:GURMUKHI:]",
653                 "[\u0A01\u0A02\u0a33\u0a36\u0a3c\u0a70\u0a71\u0a47\u0A16\u0A17\u0A18\u0A1B\u0A1D\u0A20\u0A21\u0A22\u0A25\u0A26\u0A27\u0A2B\u0A2C\u0A2D\u0A59\u0A5A\u0A5B\u0A5C\u0A5E\u0A72\u0A73\u0A74]", /*roundtrip exclusions*/
654         },
655         new String [] {  "GURMUKHI-TAMIL",
656                 "[:GURMUKHI:]", "[:TAMIL:]",
657                 "[\u0b82\u0bc6\u0bca\u0bd7\u0bb7\u0bb3\u0b83\u0B8E\u0B92\u0BA9\u0BB1\u0BB4\u0bb6\u0BF0\u0BF1\u0BF2]", /*roundtrip exclusions*/
658         },
659
660         new String [] {  "TELUGU-GURMUKHI",
661                 "[:TELUGU:]", "[:GURMUKHI:]",
662                 "[\u0A02\u0a33\u0a36\u0a3c\u0a70\u0a71\u0A59\u0A5A\u0A5B\u0A5C\u0A5E\u0A72\u0A73\u0A74]", /*roundtrip exclusions*/
663         },
664         new String [] {  "GURMUKHI-TELUGU",
665                 "[:GURMUKHI:]", "[:TELUGU:]",
666                 "[\u0c02\u0c03\u0c33\u0c36\u0c44\u0c43\u0c46\u0c4a\u0c56\u0c55\u0C0B\u0C0C\u0C0E\u0C12\u0C31\u0C37\u0C60\u0C61]", /*roundtrip exclusions*/
667         },
668         new String [] {  "KANNADA-GURMUKHI",
669                 "[:KANNADA:]", "[:GURMUKHI:]",
670                 "[\u0A01\u0A02\u0a33\u0a36\u0a3c\u0a70\u0a71\u0A59\u0A5A\u0A5B\u0A5C\u0A5E\u0A72\u0A73\u0A74]", /*roundtrip exclusions*/
671         },
672         new String [] {  "GURMUKHI-KANNADA",
673                 "[:GURMUKHI:]", "[:KANNADA:]",
674                 "[{\u0cb0\u0cbc}{\u0cb3\u0cbc}\u0c82\u0c83\u0cb3\u0cb6\u0cc4\u0cc3\u0cc6\u0cca\u0cd5\u0cd6\u0C8B\u0C8C\u0C8E\u0C92\u0CB1\u0CB7\u0cbd\u0CE0\u0CE1\u0cde]", /*roundtrip exclusions*/
675         },
676
677         new String [] {  "MALAYALAM-GURMUKHI",
678                 "[:MALAYALAM:]", "[:GURMUKHI:]",
679                 "[\u0A01\u0A02\u0a4b\u0a4c\u0a33\u0a36\u0a3c\u0a70\u0a71\u0A59\u0A5A\u0A5B\u0A5C\u0A5E\u0A72\u0A73\u0A74]", /*roundtrip exclusions*/
680         },
681         new String [] {  "GURMUKHI-MALAYALAM",
682                 "[:GURMUKHI:]", "[:MALAYALAM:]",
683                 "[\u0d02\u0d03\u0d33\u0d36\u0d43\u0d46\u0d4a\u0d4c\u0d57\u0D0B\u0D0C\u0D0E\u0D12\u0D31\u0D34\u0D37\u0D60\u0D61]", /*roundtrip exclusions*/
684         },
685
686         new String [] {  "GUJARATI-ORIYA",
687                 "[:GUJARATI:]", "[:ORIYA:]",
688                 "[\u0b56\u0b57\u0B0C\u0B5F\u0B61\u0b70\u0b71]", /*roundtrip exclusions*/
689         },
690         new String [] {  "ORIYA-GUJARATI",
691                 "[:ORIYA:]", "[:GUJARATI:]",
692                 "[\u0Ac4\u0Ac5\u0Ac9\u0Ac7\u0A8D\u0A91\u0AB5\u0Ad0]", /*roundtrip exclusions*/
693         },
694
695         new String [] {  "TAMIL-GUJARATI",
696                 "[:TAMIL:]", "[:GUJARATI:]",
697                 "[\u0A81\u0a8c\u0abc\u0ac3\u0Ac4\u0Ac5\u0Ac9\u0Ac7\u0A8B\u0A8D\u0A91\u0A96\u0A97\u0A98\u0A9B\u0A9D\u0AA0\u0AA1\u0AA2\u0AA5\u0AA6\u0AA7\u0AAB\u0AAC\u0AAD\u0AB6\u0ABD\u0AD0\u0AE0\u0AE1]", /*roundtrip exclusions*/
698         },
699         new String [] {  "GUJARATI-TAMIL",
700                 "[:GUJARATI:]", "[:TAMIL:]",
701                 "[\u0Bc6\u0Bca\u0Bd7\u0B8E\u0B92\u0BA9\u0BB1\u0BB4\u0BF0\u0BF1\u0BF2]", /*roundtrip exclusions*/
702         },
703
704         new String [] {  "TELUGU-GUJARATI",
705                 "[:TELUGU:]", "[:GUJARATI:]",
706                 "[\u0abc\u0Ac5\u0Ac9\u0A8D\u0A91\u0ABD\u0Ad0]", /*roundtrip exclusions*/
707         },
708         new String [] {  "GUJARATI-TELUGU",
709                 "[:GUJARATI:]", "[:TELUGU:]",
710                 "[\u0c46\u0c4a\u0c55\u0c56\u0C0C\u0C0E\u0C12\u0C31\u0C61]", /*roundtrip exclusions*/
711         },
712
713         new String [] {  "KANNADA-GUJARATI",
714                 "[:KANNADA:]", "[:GUJARATI:]",
715                 "[\u0A81\u0abc\u0Ac5\u0Ac9\u0A8D\u0A91\u0ABD\u0Ad0]", /*roundtrip exclusions*/
716         },
717         new String [] {  "GUJARATI-KANNADA",
718                 "[:GUJARATI:]", "[:KANNADA:]",
719                 "[{\u0cb0\u0cbc}{\u0cb3\u0cbc}\u0cc6\u0cca\u0cd5\u0cd6\u0C8C\u0C8E\u0C92\u0CB1\u0CDE\u0CE1]", /*roundtrip exclusions*/
720         },
721
722         new String [] {  "MALAYALAM-GUJARATI",
723                 "[:MALAYALAM:]", "[:GUJARATI:]",
724                 "[\u0A81\u0ac4\u0acb\u0acc\u0abc\u0Ac5\u0Ac9\u0A8D\u0A91\u0ABD\u0Ad0]", /*roundtrip exclusions*/
725         },
726         new String [] {  "GUJARATI-MALAYALAM",
727                 "[:GUJARATI:]", "[:MALAYALAM:]",
728                 "[\u0d46\u0d4a\u0d4c\u0d55\u0d57\u0D0C\u0D0E\u0D12\u0D31\u0D34\u0D61]", /*roundtrip exclusions*/
729         },
730
731         new String [] {  "TAMIL-ORIYA",
732                 "[:TAMIL:]", "[:ORIYA:]",
733                 "[\u0B01\u0b3c\u0b43\u0b56\u0B0B\u0B0C\u0B16\u0B17\u0B18\u0B1B\u0B1D\u0B20\u0B21\u0B22\u0B25\u0B26\u0B27\u0B2B\u0B2C\u0B2D\u0B36\u0B3D\u0B5C\u0B5D\u0B5F\u0B60\u0B61\u0b70\u0b71]", /*roundtrip exclusions*/
734         },
735         new String [] {  "ORIYA-TAMIL",
736                 "[:ORIYA:]", "[:TAMIL:]",
737                 "[\u0bc6\u0bca\u0bc7\u0B8E\u0B92\u0BA9\u0BB1\u0BB4\u0BB5\u0BF0\u0BF1\u0BF2]", /*roundtrip exclusions*/
738         },
739
740         new String [] {  "TELUGU-ORIYA",
741                 "[:TELUGU:]", "[:ORIYA:]",
742                 "[\u0b3c\u0b57\u0b56\u0B3D\u0B5C\u0B5D\u0B5F\u0b70\u0b71]", /*roundtrip exclusions*/
743         },
744         new String [] {  "ORIYA-TELUGU",
745                 "[:ORIYA:]", "[:TELUGU:]",
746                 "[\u0c44\u0c46\u0c4a\u0c55\u0c47\u0C0E\u0C12\u0C31\u0C35]", /*roundtrip exclusions*/
747         },
748
749         new String [] {  "KANNADA-ORIYA",
750                 "[:KANNADA:]", "[:ORIYA:]",
751                 "[\u0B01\u0b3c\u0b57\u0B3D\u0B5C\u0B5D\u0B5F\u0b70\u0b71]", /*roundtrip exclusions*/
752         },
753         new String [] {  "ORIYA-KANNADA",
754                 "[:ORIYA:]", "[:KANNADA:]",
755                 "[{\u0cb0\u0cbc}{\u0cb3\u0cbc}\u0cc4\u0cc6\u0cca\u0cd5\u0cc7\u0C8E\u0C92\u0CB1\u0CB5\u0CDE]", /*roundtrip exclusions*/
756         },
757
758         new String [] {  "MALAYALAM-ORIYA",
759                 "[:MALAYALAM:]", "[:ORIYA:]",
760                 "[\u0B01\u0b3c\u0b56\u0B3D\u0B5C\u0B5D\u0B5F\u0b70\u0b71]", /*roundtrip exclusions*/
761         },
762         new String [] {  "ORIYA-MALAYALAM",
763                 "[:ORIYA:]", "[:MALAYALAM:]",
764                 "[\u0D47\u0D46\u0D4a\u0D0E\u0D12\u0D31\u0D34\u0D35]", /*roundtrip exclusions*/
765         },
766
767         new String [] {  "TELUGU-TAMIL",
768                 "[:TELUGU:]", "[:TAMIL:]",
769                 "[\u0bd7\u0ba9\u0bb4\u0BF0\u0BF1\u0BF2\u0BF0\u0BF1\u0BF2]", /*roundtrip exclusions*/
770         },
771         new String [] {  "TAMIL-TELUGU",
772                 "[:TAMIL:]", "[:TELUGU:]",
773                 "[\u0C01\u0c43\u0c44\u0c46\u0c47\u0c55\u0c56\u0c66\u0C0B\u0C0C\u0C16\u0C17\u0C18\u0C1B\u0C1D\u0C20\u0C21\u0C22\u0C25\u0C26\u0C27\u0C2B\u0C2C\u0C2D\u0C36\u0C60\u0C61]", /*roundtrip exclusions*/
774         },
775
776         new String [] {  "KANNADA-TAMIL",
777                 "[:KANNADA:]", "[:TAMIL:]",
778                 "[\u0bd7\u0bc6\u0ba9\u0bb4\u0BF0\u0BF1\u0BF2]", /*roundtrip exclusions*/
779         },
780         new String [] {  "TAMIL-KANNADA",
781                 "[:TAMIL:]", "[:KANNADA:]",
782                 "[\u0cc3\u0cc4\u0cc6\u0cc7\u0cd5\u0cd6\u0C8B\u0C8C\u0C96\u0C97\u0C98\u0C9B\u0C9D\u0CA0\u0CA1\u0CA2\u0CA5\u0CA6\u0CA7\u0CAB\u0CAC\u0CAD\u0CB6\u0cbc\u0cbd\u0CDE\u0CE0\u0CE1]", /*roundtrip exclusions*/
783         },
784
785         new String [] {  "MALAYALAM-TAMIL",
786                 "[:MALAYALAM:]", "[:TAMIL:]",
787                 "[\u0ba9\u0BF0\u0BF1\u0BF2]", /*roundtrip exclusions*/
788         },
789         new String [] {  "TAMIL-MALAYALAM",
790                 "[:TAMIL:]", "[:MALAYALAM:]",
791                 "[\u0d43\u0d12\u0D0B\u0D0C\u0D16\u0D17\u0D18\u0D1B\u0D1D\u0D20\u0D21\u0D22\u0D25\u0D26\u0D27\u0D2B\u0D2C\u0D2D\u0D36\u0D60\u0D61]", /*roundtrip exclusions*/
792         },
793
794         new String [] {  "KANNADA-TELUGU",
795                 "[:KANNADA:]", "[:TELUGU:]",
796                 "[\u0C01\u0c3f\u0c46\u0c48\u0c4a]", /*roundtrip exclusions*/
797         },
798         new String [] {  "TELUGU-KANNADA",
799                 "[:TELUGU:]", "[:KANNADA:]",
800                 "[\u0cc8\u0cd5\u0cd6\u0CDE\u0cbc\u0cbd]", /*roundtrip exclusions*/
801         },
802
803         new String [] {  "MALAYALAM-TELUGU",
804                 "[:MALAYALAM:]", "[:TELUGU:]",
805                 "[\u0C01\u0c44\u0c4a\u0c4c\u0c4b\u0c55\u0c56]", /*roundtrip exclusions*/
806         },
807         new String [] {  "TELUGU-MALAYALAM",
808                 "[:TELUGU:]", "[:MALAYALAM:]",
809                 "[\u0d4c\u0d57\u0D34]", /*roundtrip exclusions*/
810         },
811
812         new String [] {  "MALAYALAM-KANNADA",
813                 "[:MALAYALAM:]", "[:KANNADA:]",
814                 "[\u0cbc\u0cbd\u0cc4\u0cc6\u0cca\u0ccc\u0ccb\u0cd5\u0cd6\u0cDe]", /*roundtrip exclusions*/
815         },
816         new String [] {  "Latin-Bengali",
817                 latinForIndic, "[[:Bengali:][\u0964\u0965]]",
818                 "[\u0965\u09f0-\u09fa\u09ce]", /*roundtrip exclusions*/
819         },
820         new String [] {  "Latin-Gurmukhi",
821                 latinForIndic, "[[:Gurmukhi:][\u0964\u0965]]",
822                 "[\u0a01\u0a02\u0965\u0a72\u0a73\u0a74]", /*roundtrip exclusions*/
823         },
824         new String [] {  "Latin-Gujarati",
825                 latinForIndic, "[[:Gujarati:][\u0964\u0965]]",
826                 "[\u0965]", /*roundtrip exclusions*/
827         },
828         new String [] {  "Latin-Oriya",
829                 latinForIndic, "[[:Oriya:][\u0964\u0965]]",
830                 "[\u0965\u0b70]", /*roundtrip exclusions*/
831         },
832         new String [] {  "Latin-Tamil",
833                 latinForIndic, "[:Tamil:]",
834                 "[\u0BF0\u0BF1\u0BF2]", /*roundtrip exclusions*/
835         },
836         new String [] {  "Latin-Telugu",
837                 latinForIndic, "[:Telugu:]",
838                 null, /*roundtrip exclusions*/
839         },
840         new String [] {  "Latin-Kannada",
841                 latinForIndic, "[:Kannada:]",
842                 null, /*roundtrip exclusions*/
843         },
844         new String [] {  "Latin-Malayalam",
845                 latinForIndic, "[:Malayalam:]",
846                 null, /*roundtrip exclusions*/
847         },
848     };
849
850     public void TestInterIndic() throws Exception{
851         long start = System.currentTimeMillis();
852         int num = interIndicArray.length;
853         if (isQuick()) {
854             logln("Testing only 5 of "+ interIndicArray.length+" Skipping rest (use -e for exhaustive)");
855             num = 5;
856         }
857         if (skipIfBeforeICU(4,3,0)) {
858             logln("Warning: TestInterIndic needs to be updated to remove delete the section marked [:Age=4.1:] filter");
859         } else {
860             //          We temporarily filter against Unicode 4.1, but we only do this
861             // before version 3.4.
862             errln("FAIL: TestInterIndic needs to be updated to remove delete the [:Age=4.1:] filter ");
863             return;
864         }
865         for(int i=0; i<num;i++){
866             logln("Testing " + interIndicArray[i][0] + " at index " + i   );
867             /*TODO: uncomment the line below when the transliterator is fixed
868             new Test(interIndicArray[i][0], 50)
869                 .test(interIndicArray[i][1],
870                       interIndicArray[i][2],
871                       interIndicArray[i][3],
872                       this, new LegalIndic());
873              */
874             /* comment lines below  when transliterator is fixed */
875             // start
876             new Test(interIndicArray[i][0], 50)
877             .test("["+interIndicArray[i][1]+" &[:Age=4.1:]]",
878                     "["+interIndicArray[i][2]+" &[:Age=4.1:]]",
879                     interIndicArray[i][3],
880                     this, new LegalIndic());
881             //end
882         }
883         showElapsed(start, "TestInterIndic");
884     }
885
886     //---------------
887     // End Indic
888     //---------------
889
890     public static class Legal {
891         public boolean is(String sourceString) {return true;}
892     }
893
894     public static class LegalJamo extends Legal {
895         // any initial must be followed by a medial (or initial)
896         // any medial must follow an initial (or medial)
897         // any final must follow a medial (or final)
898
899         public boolean is(String sourceString) {
900             try {
901                 int t;
902                 String decomp = Normalizer.normalize(sourceString, Normalizer.NFD);
903                 for (int i = 0; i < decomp.length(); ++i) { // don't worry about surrogates
904                     switch (getType(decomp.charAt(i))) {
905                     case 0:
906                         t = getType(decomp.charAt(i+1));
907                         if (t != 0 && t != 1) return false;
908                         break;
909                     case 1:
910                         t = getType(decomp.charAt(i-1));
911                         if (t != 0 && t != 1) return false;
912                         break;
913                     case 2:
914                         t = getType(decomp.charAt(i-1));
915                         if (t != 1 && t != 2) return false;
916                         break;
917                     }
918                 }
919                 return true;
920             } catch (StringIndexOutOfBoundsException e) {
921                 return false;
922             }
923         }
924
925         public int getType(char c) {
926             if ('\u1100' <= c && c <= '\u1112') return 0;
927             else if ('\u1161' <= c && c  <= '\u1175') return 1;
928             else if ('\u11A8' <= c && c  <= '\u11C2') return 2;
929             return -1; // other
930         }
931     }
932
933     //static BreakIterator thaiBreak = BreakIterator.getWordInstance(new Locale("th", "TH"));
934     // anything is legal except word ending with Logical-order-exception
935     public static class LegalThai extends Legal {
936         public boolean is(String sourceString) {
937             if (sourceString.length() == 0) return true;
938             char ch = sourceString.charAt(sourceString.length() - 1); // don't worry about surrogates.
939             if (UCharacter.hasBinaryProperty(ch, UProperty.LOGICAL_ORDER_EXCEPTION)) return false;
940
941
942             // disallow anything with a wordbreak between
943             /*
944             if (UTF16.countCodePoint(sourceString) <= 1) return true;
945             thaiBreak.setText(sourceString);
946             for (int pos = thaiBreak.first(); pos != BreakIterator.DONE; pos = thaiBreak.next()) {
947                 if (pos > 0 && pos < sourceString.length()) {
948                     System.out.println("Skipping " + Utility.escape(sourceString));
949                     return false;
950                 }
951             }
952              */
953             return true;
954         }
955     }
956
957     // anything is legal except that Final letters can't be followed by letter; NonFinal must be
958     public static class LegalHebrew extends Legal {
959         static UnicodeSet FINAL = new UnicodeSet("[\u05DA\u05DD\u05DF\u05E3\u05E5]");
960         static UnicodeSet NON_FINAL = new UnicodeSet("[\u05DB\u05DE\u05E0\u05E4\u05E6]");
961         static UnicodeSet LETTER = new UnicodeSet("[:letter:]");
962         public boolean is(String sourceString) {
963             if (sourceString.length() == 0) return true;
964             // don't worry about surrogates.
965             for (int i = 0; i < sourceString.length(); ++i) {
966                 char ch = sourceString.charAt(i);
967                 char next = i+1 == sourceString.length() ? '\u0000' : sourceString.charAt(i);
968                 if (FINAL.contains(ch)) {
969                     if (LETTER.contains(next)) return false;
970                 } else if (NON_FINAL.contains(ch)) {
971                     if (!LETTER.contains(next)) return false;
972                 }
973             }
974             return true;
975         }
976     }
977
978
979     public static class LegalGreek extends Legal {
980
981         boolean full;
982
983         public LegalGreek(boolean full) {
984             this.full = full;
985         }
986
987         static final char IOTA_SUBSCRIPT = '\u0345';
988         static final UnicodeSet breathing = new UnicodeSet("[\\u0313\\u0314']");
989         static final UnicodeSet validSecondVowel = new UnicodeSet("[\\u03C5\\u03B9\\u03A5\\u0399]");
990
991         public static boolean isVowel(char c) {
992             return "\u03B1\u03B5\u03B7\u03B9\u03BF\u03C5\u03C9\u0391\u0395\u0397\u0399\u039F\u03A5\u03A9".indexOf(c) >= 0;
993         }
994
995         public static boolean isRho(char c) {
996             return "\u03C1\u03A1".indexOf(c) >= 0;
997         }
998
999         public boolean is(String sourceString) {
1000             try {
1001                 String decomp = Normalizer.normalize(sourceString, Normalizer.NFD);
1002
1003                 // modern is simpler: don't care about anything but a grave
1004                 if (!full) {
1005                     //if (sourceString.equals("\u039C\u03C0")) return false;
1006                     for (int i = 0; i < decomp.length(); ++i) {
1007                         char c = decomp.charAt(i);
1008                         // exclude all the accents
1009                         if (c == '\u0313' || c == '\u0314' || c == '\u0300' || c == '\u0302'
1010                             || c == '\u0342' || c == '\u0345'
1011                         ) return false;
1012                     }
1013                     return true;
1014                 }
1015
1016                 // Legal full Greek has breathing marks IFF there is a vowel or RHO at the start
1017                 // IF it has them, it has exactly one.
1018                 // IF it starts with a RHO, then the breathing mark must come before the second letter.
1019                 // IF it starts with a vowel, then it must before the third letter.
1020                 //  it will only come after the second if of the format [vowel] [no iota subscript!] [upsilon or iota]
1021                 // Since there are no surrogates in greek, don't worry about them
1022
1023                 boolean firstIsVowel = false;
1024                 boolean firstIsRho = false;
1025                 boolean noLetterYet = true;
1026                 int breathingCount = 0;
1027                 int letterCount = 0;
1028                 //int breathingPosition = -1;
1029
1030                 for (int i = 0; i < decomp.length(); ++i) {
1031                     char c = decomp.charAt(i);
1032                     if (UCharacter.isLetter(c)) {
1033                         ++letterCount;
1034                         if (firstIsVowel && !validSecondVowel.contains(c) && breathingCount == 0) return false;
1035                         if (noLetterYet) {
1036                             noLetterYet = false;
1037                             firstIsVowel = isVowel(c);
1038                             firstIsRho = isRho(c);
1039                         }
1040                         if (firstIsRho && letterCount == 2 && breathingCount == 0) return false;
1041                     }
1042                     if (c == IOTA_SUBSCRIPT && firstIsVowel && breathingCount == 0) return false;
1043                     if (breathing.contains(c)) {
1044                         // breathingPosition = i;
1045                         ++breathingCount;
1046                     }
1047                 }
1048
1049                 if (firstIsVowel || firstIsRho) return breathingCount == 1;
1050                 return breathingCount == 0;
1051             } catch (Throwable t) {
1052                 System.out.println(t.getClass().getName() + " " + t.getMessage());
1053                 return true;
1054             }
1055         }
1056     }
1057
1058     static class Test {
1059
1060         PrintWriter out;
1061
1062         private String transliteratorID;
1063         private int errorLimit = 500;
1064         private int errorCount = 0;
1065         private long pairLimit  = 1000000; // make default be 1M.
1066         private int density = 100;
1067         UnicodeSet sourceRange;
1068         UnicodeSet targetRange;
1069         UnicodeSet toSource;
1070         UnicodeSet toTarget;
1071         UnicodeSet roundtripExclusions;
1072
1073         RoundTripTest log;
1074         Legal legalSource;
1075         UnicodeSet badCharacters;
1076
1077         /*
1078          * create a test for the given script transliterator.
1079          */
1080         Test(String transliteratorID) {
1081             this(transliteratorID, 100);
1082         }
1083
1084         Test(String transliteratorID, int dens) {
1085             this.transliteratorID = transliteratorID;
1086             this.density = dens;
1087         }
1088
1089         public void setErrorLimit(int limit) {
1090             errorLimit = limit;
1091         }
1092
1093         public void setPairLimit(int limit) {
1094             pairLimit = limit;
1095         }
1096
1097         // Added to do better equality check.
1098
1099         public static boolean isSame(String a, String b) {
1100             if (a.equals(b)) return true;
1101             if (a.equalsIgnoreCase(b) && isCamel(a)) return true;
1102             a = Normalizer.normalize(a, Normalizer.NFD);
1103             b = Normalizer.normalize(b, Normalizer.NFD);
1104             if (a.equals(b)) return true;
1105             if (a.equalsIgnoreCase(b) && isCamel(a)) return true;
1106             return false;
1107         }
1108
1109         /*
1110         public boolean includesSome(UnicodeSet set, String a) {
1111             int cp;
1112             for (int i = 0; i < a.length(); i += UTF16.getCharCount(cp)) {
1113                 cp = UTF16.charAt(a, i);
1114                 if (set.contains(cp)) return true;
1115             }
1116             return false;
1117         }
1118          */
1119
1120         public static boolean isCamel(String a) {
1121             //System.out.println("CamelTest");
1122             // see if string is of the form aB; e.g. lower, then upper or title
1123             int cp;
1124             boolean haveLower = false;
1125             for (int i = 0; i < a.length(); i += UTF16.getCharCount(cp)) {
1126                 cp = UTF16.charAt(a, i);
1127                 int t = UCharacter.getType(cp);
1128                 //System.out.println("\t" + t + " " + Integer.toString(cp,16) + " " + UCharacter.getName(cp));
1129                 switch (t) {
1130                 case Character.UPPERCASE_LETTER:
1131                     if (haveLower) return true;
1132                     break;
1133                 case Character.TITLECASE_LETTER:
1134                     if (haveLower) return true;
1135                     // drop through, since second letter is lower.
1136                 case Character.LOWERCASE_LETTER:
1137                     haveLower = true;
1138                     break;
1139                 }
1140             }
1141             //System.out.println("FALSE");
1142             return false;
1143         }
1144
1145         static final UnicodeSet okAnyway = new UnicodeSet("[^[:Letter:]]");
1146         static final UnicodeSet neverOk = new UnicodeSet("[:Other:]");
1147
1148         public void test(String srcRange, String trgtRange,
1149                 String rdtripExclusions, RoundTripTest logger, Legal legalSrc)
1150         throws java.io.IOException {
1151             test(srcRange, trgtRange, srcRange, rdtripExclusions, logger, legalSrc);
1152         }
1153
1154         /**
1155          * Will test 
1156          * that everything in sourceRange maps to targetRange,
1157          * that everything in targetRange maps to backtoSourceRange
1158          * that everything roundtrips from target -> source -> target, except roundtripExceptions
1159          */
1160         public void test(String srcRange, String trgtRange, String backtoSourceRange,
1161                 String rdtripExclusions, RoundTripTest logger, Legal legalSrc)
1162         throws java.io.IOException {
1163
1164             legalSource = legalSrc;
1165             sourceRange = new UnicodeSet(srcRange);
1166             sourceRange.removeAll(neverOk);
1167
1168             targetRange = new UnicodeSet(trgtRange);
1169             targetRange.removeAll(neverOk);
1170
1171             toSource = new UnicodeSet(backtoSourceRange);
1172             toSource.addAll(okAnyway);
1173
1174             toTarget = new UnicodeSet(trgtRange);
1175             toTarget.addAll(okAnyway);
1176
1177             if (rdtripExclusions != null && rdtripExclusions.length() > 0) {
1178                 roundtripExclusions = new UnicodeSet(rdtripExclusions);
1179             }else{
1180                 roundtripExclusions = new UnicodeSet(); // empty
1181             }
1182
1183             log = logger;
1184
1185             log.logln(Utility.escape("Source:  " + sourceRange));
1186             log.logln(Utility.escape("Target:  " + targetRange));
1187             log.logln(Utility.escape("Exclude: " + roundtripExclusions));
1188             if (log.isQuick()) log.logln("Abbreviated Test");
1189
1190             badCharacters = new UnicodeSet("[:other:]");
1191
1192             // make a UTF-8 output file we can read with a browser
1193
1194             // note: check that every transliterator transliterates the null string correctly!
1195
1196             // {dlf} reorganize so can run test in protected security environment
1197             //              String logFileName = "test_" + transliteratorID.replace('/', '_') + ".html";
1198
1199             //              File lf = new File(logFileName);
1200             //              log.logln("Creating log file " + lf.getAbsoluteFile());
1201
1202             //              out = new PrintWriter(new BufferedWriter(new OutputStreamWriter(
1203             //                        new FileOutputStream(logFileName), "UTF8"), 4*1024));
1204
1205             ByteArrayOutputStream bast = new ByteArrayOutputStream();
1206             out = new PrintWriter(new BufferedWriter(new OutputStreamWriter(
1207                     bast, "UTF8"), 4*1024));
1208             //out.write('\uFFEF');    // BOM
1209             out.println("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\">");
1210             out.println("<HTML><HEAD>");
1211             out.println("<META content=\"text/html; charset=utf-8\" http-equiv=Content-Type></HEAD>");
1212             out.println("<BODY bgcolor='#FFFFFF' style='font-family: Arial Unicode MS'>");
1213
1214             try {
1215                 test2();
1216             } catch (TestTruncated e) {
1217                 out.println(e.getMessage());
1218             }
1219             out.println("</BODY></HTML>");
1220             out.close();
1221
1222             if (errorCount > 0) {
1223                 try {
1224                     File translitErrorDirectory = new File("translitErrorLogs");
1225                     if (!translitErrorDirectory.exists()) {
1226                         translitErrorDirectory.mkdir();
1227                     }
1228                     String logFileName = "translitErrorLogs/test_" + transliteratorID.replace('/', '_') + ".html";
1229                     File lf = new File(logFileName);
1230                     logger.logln("Creating log file " + lf.getAbsoluteFile());
1231                     FileOutputStream fos = new FileOutputStream(lf);
1232                     fos.write(bast.toByteArray());
1233                     fos.close();
1234                     logger.errln(transliteratorID + " errors: "
1235                             + errorCount + (errorCount > errorLimit ? " (at least!)" : "")
1236                             + ", see " + lf.getAbsoluteFile());
1237                 }
1238                 catch (SecurityException e) {
1239                     logger.errln(transliteratorID + " errors: "
1240                             + errorCount + (errorCount > errorLimit ? " (at least!)" : "")
1241                             + ", no log provided due to protected test domain");
1242                 }
1243             } else {
1244                 logger.logln(transliteratorID + " ok");
1245                 //                  new File(logFileName).delete();
1246             }
1247         }
1248
1249         // ok if at least one is not equal
1250         public boolean checkIrrelevants(Transliterator t, String irrelevants) {
1251             for (int i = 0; i < irrelevants.length(); ++i) {
1252                 char c = irrelevants.charAt(i);
1253                 String cs = UTF16.valueOf(c);
1254                 String targ = t.transliterate(cs);
1255                 if (cs.equals(targ)) return true;
1256             }
1257             return false;
1258         }
1259
1260         AbbreviatedUnicodeSetIterator usi = new AbbreviatedUnicodeSetIterator();
1261         AbbreviatedUnicodeSetIterator usi2 = new AbbreviatedUnicodeSetIterator();
1262
1263         Transliterator sourceToTarget;
1264         Transliterator targetToSource;
1265
1266         public void test2() {
1267
1268             sourceToTarget = Transliterator.getInstance(transliteratorID);
1269             targetToSource = sourceToTarget.getInverse();
1270
1271             log.logln("Checking that at least one irrevant characters is not NFC'ed");
1272             out.println("<h3>Checking that at least one irrevant characters is not NFC'ed</h3>");
1273
1274             String irrelevants = "\u2000\u2001\u2126\u212A\u212B\u2329"; // string is from NFC_NO in the UCD
1275
1276             if (!checkIrrelevants(sourceToTarget, irrelevants)) {
1277                 logFails("" + getSourceTarget(transliteratorID) + ", Must not NFC everything");
1278             }
1279             if (!checkIrrelevants(targetToSource, irrelevants)) {
1280                 logFails("" + getTargetSource(transliteratorID) + ", irrelevants");
1281             }
1282
1283             if (EXTRA_TESTS) {
1284                 log.logln("Checking that toRules works");
1285                 String rules = "";
1286                 Transliterator sourceToTarget2;
1287                 Transliterator targetToSource2;
1288                 try {
1289                     rules = sourceToTarget.toRules(false);
1290                     sourceToTarget2 = Transliterator.createFromRules("s2t2", rules, Transliterator.FORWARD);
1291                     if (PRINT_RULES) {
1292                         out.println("<h3>Forward Rules:</h3><p>");
1293                         out.println(TestUtility.replace(rules, "\n", "\u200E<br>\n\u200E"));
1294                         out.println("</p>");
1295                     }
1296                     rules = targetToSource.toRules(false);
1297                     targetToSource2 = Transliterator.createFromRules("t2s2", rules, Transliterator.FORWARD);
1298                     if (PRINT_RULES) {
1299                         out.println("<h3>Backward Rules:</h3><p>");
1300                         out.println(TestUtility.replace(rules, "\n", "\u200E<br>\n\u200E"));
1301                         out.println("</p>");
1302                     }
1303                 } catch (RuntimeException e) {
1304                     out.println("<h3>Broken Rules:</h3><p>");
1305                     out.println(TestUtility.replace(rules, "\n", "<br>\n"));
1306                     out.println("</p>");
1307                     out.flush();
1308                     throw e;
1309                 }
1310
1311                 out.println("<h3>Roundtrip Exclusions: " + new UnicodeSet(roundtripExclusions) + "</h3>");
1312                 out.flush();
1313
1314                 checkSourceTargetSource(sourceToTarget2);
1315
1316                 checkTargetSourceTarget(targetToSource2);
1317             }
1318
1319             UnicodeSet failSourceTarg = new UnicodeSet();
1320
1321
1322             checkSourceTargetSingles(failSourceTarg);
1323
1324             boolean quickRt = checkSourceTargetDoubles(failSourceTarg);
1325
1326             UnicodeSet failTargSource = new UnicodeSet();
1327             UnicodeSet failRound = new UnicodeSet();
1328
1329             checkTargetSourceSingles(failTargSource, failRound);
1330             checkTargetSourceDoubles(quickRt, failTargSource, failRound);
1331         }
1332
1333         private void checkSourceTargetSource(Transliterator sourceToTarget2) {
1334             log.logln("Checking that source -> target -> source");
1335             out.println("<h3>Checking that source -> target -> source</h3>");
1336
1337             usi.reset(sourceRange);
1338             while (usi.next()) {
1339                 int c = usi.codepoint;
1340
1341                 String cs = UTF16.valueOf(c);
1342                 String targ = sourceToTarget.transliterate(cs);
1343                 String targ2 = sourceToTarget2.transliterate(cs);
1344                 if (!targ.equals(targ2)) {
1345                     logToRulesFails("" + getSourceTarget(transliteratorID) + ", toRules", cs, targ, targ2);
1346                 }
1347             }
1348         }
1349
1350         private void checkTargetSourceTarget(Transliterator targetToSource2) {
1351             log.logln("Checking that target -> source -> target");
1352             out.println("<h3>Checking that target -> source -> target</h3>");
1353             usi.reset(targetRange);
1354             while (usi.next()) {
1355                 int c = usi.codepoint;
1356
1357                 String cs = UTF16.valueOf(c);
1358                 String targ = targetToSource.transliterate(cs);
1359                 String targ2 = targetToSource2.transliterate(cs);
1360                 if (!targ.equals(targ2)) {
1361                     logToRulesFails("" + getTargetSource(transliteratorID) + ", toRules", cs, targ, targ2);
1362                 }
1363             }
1364         }
1365
1366         private void checkSourceTargetSingles(UnicodeSet failSourceTarg) {
1367             log.logln("Checking that source characters convert to target - Singles");
1368             out.println("<h3>Checking that source characters convert to target - Singles</h3>");
1369
1370
1371             /*
1372             for (char c = 0; c < 0xFFFF; ++c) {
1373                 if (!sourceRange.contains(c)) continue;
1374              */
1375             usi.reset(sourceRange);
1376             while (usi.next()) {
1377                 int c = usi.codepoint;
1378
1379                 String cs = UTF16.valueOf(c);
1380                 String targ = sourceToTarget.transliterate(cs);
1381                 if (!toTarget.containsAll(targ)
1382                         || badCharacters.containsSome(targ)) {
1383                     String targD = Normalizer.normalize(targ, Normalizer.NFD);
1384                     if (!toTarget.containsAll(targD)
1385                             || badCharacters.containsSome(targD)) {
1386                         logWrongScript("" + getSourceTarget(transliteratorID) + "", cs, targ, toTarget, badCharacters);
1387                         failSourceTarg.add(c);
1388                         continue;
1389                     }
1390                 }
1391
1392                 String cs2 = Normalizer.normalize(cs, Normalizer.NFD);
1393                 String targ2 = sourceToTarget.transliterate(cs2);
1394                 if (!targ.equals(targ2)) {
1395                     logNotCanonical("" + getSourceTarget(transliteratorID) + "", cs, targ, cs2, targ2);
1396                 }
1397             }
1398         }
1399
1400         private boolean checkSourceTargetDoubles(UnicodeSet failSourceTarg) {
1401             log.logln("Checking that source characters convert to target - Doubles");
1402             out.println("<h3>Checking that source characters convert to target - Doubles</h3>");
1403             long count = 0;
1404
1405             /*
1406             for (char c = 0; c < 0xFFFF; ++c) {
1407                 if (TestUtility.isUnassigned(c) ||
1408                     !sourceRange.contains(c)) continue;
1409                 if (failSourceTarg.get(c)) continue;
1410
1411              */
1412
1413             UnicodeSet sourceRangeMinusFailures = new UnicodeSet(sourceRange);
1414             sourceRangeMinusFailures.removeAll(failSourceTarg);
1415
1416             boolean quickRt = log.getInclusion() < 10;
1417
1418             usi.reset(sourceRangeMinusFailures, quickRt, density);
1419
1420             while (usi.next()) {
1421                 int c = usi.codepoint;
1422
1423                 /*
1424                 for (char d = 0; d < 0xFFFF; ++d) {
1425                     if (TestUtility.isUnassigned(d) ||
1426                         !sourceRange.contains(d)) continue;
1427                     if (failSourceTarg.get(d)) continue;
1428                  */
1429                 log.logln(count + "/" + pairLimit + " Checking starting with " + UTF16.valueOf(c));
1430                 usi2.reset(sourceRangeMinusFailures, quickRt, density);
1431
1432                 while (usi2.next()) {
1433                     int d = usi2.codepoint;
1434                     ++count;
1435
1436                     String cs = UTF16.valueOf(c) + UTF16.valueOf(d);
1437                     String targ = sourceToTarget.transliterate(cs);
1438                     if (!toTarget.containsAll(targ)
1439                             || badCharacters.containsSome(targ)) {
1440                         String targD = Normalizer.normalize(targ, Normalizer.NFD);
1441                         if (!toTarget.containsAll(targD)
1442                                 || badCharacters.containsSome(targD)) {
1443                             logWrongScript("" + getSourceTarget(transliteratorID) + "", cs, targ, toTarget, badCharacters);
1444                             continue;
1445                         }
1446                     }
1447                     String cs2 = Normalizer.normalize(cs, Normalizer.NFD);
1448                     String targ2 = sourceToTarget.transliterate(cs2);
1449                     if (!targ.equals(targ2)) {
1450                         logNotCanonical("" + getSourceTarget(transliteratorID) + "", cs, targ, cs2, targ2);
1451                     }
1452                 }
1453             }
1454             return quickRt;
1455         }
1456
1457         void checkTargetSourceSingles(UnicodeSet failTargSource, UnicodeSet failRound) {
1458             log.logln("Checking that target characters convert to source and back - Singles");
1459             out.println("<h3>Checking that target characters convert to source and back - Singles</h3>");
1460
1461
1462             /*for (char c = 0; c < 0xFFFF; ++c) {
1463                 if (TestUtility.isUnassigned(c) ||
1464                     !targetRange.contains(c)) continue;
1465              */
1466
1467             usi.reset(targetRange);
1468             while (usi.next()) {
1469                 String cs;
1470                 int c;
1471                 if(usi.codepoint == UnicodeSetIterator.IS_STRING){
1472                     cs = usi.string;
1473                     c = UTF16.charAt(cs,0);
1474                 }else{
1475                     c = usi.codepoint;
1476                     cs =UTF16.valueOf(c);
1477                 }
1478
1479                 String targ = targetToSource.transliterate(cs);
1480                 String reverse = sourceToTarget.transliterate(targ);
1481
1482                 if (!toSource.containsAll(targ)
1483                         || badCharacters.containsSome(targ)) {
1484                     String targD = Normalizer.normalize(targ, Normalizer.NFD);
1485                     if (!toSource.containsAll(targD)
1486                             || badCharacters.containsSome(targD)) {
1487                         /*UnicodeSet temp = */new UnicodeSet().addAll(targD);
1488                         logWrongScript("" + getTargetSource(transliteratorID) + "", cs, targ, toSource, badCharacters);
1489                         failTargSource.add(cs);
1490                         continue;
1491                     }
1492                 }
1493                 if (!isSame(cs, reverse) && !roundtripExclusions.contains(c)
1494                         && !roundtripExclusions.contains(cs)) {
1495                     logRoundTripFailure(cs,targetToSource.getID(), targ,sourceToTarget.getID(), reverse);
1496                     failRound.add(c);
1497                     continue;
1498                 }
1499                 String targ2 = Normalizer.normalize(targ, Normalizer.NFD);
1500                 String reverse2 = sourceToTarget.transliterate(targ2);
1501                 if (!reverse.equals(reverse2)) {
1502                     logNotCanonical("" + getTargetSource(transliteratorID) + "", targ, reverse, targ2, reverse2);
1503                 }
1504             }
1505
1506         }
1507
1508         private void checkTargetSourceDoubles(boolean quickRt, UnicodeSet failTargSource,
1509                 UnicodeSet failRound) {
1510             log.logln("Checking that target characters convert to source and back - Doubles");
1511             out.println("<h3>Checking that target characters convert to source and back - Doubles</h3>");
1512             long count = 0;
1513
1514             UnicodeSet targetRangeMinusFailures = new UnicodeSet(targetRange);
1515             targetRangeMinusFailures.removeAll(failTargSource);
1516             targetRangeMinusFailures.removeAll(failRound);
1517
1518             //char[] buf = new char[4]; // maximum we can have with 2 code points
1519             /*
1520             for (char c = 0; c < 0xFFFF; ++c) {
1521                 if (TestUtility.isUnassigned(c) ||
1522                     !targetRange.contains(c)) continue;
1523              */
1524             
1525             usi.reset(targetRangeMinusFailures, quickRt, density);
1526
1527             while (usi.next()) {
1528                 int c = usi.codepoint;
1529
1530                 //log.log(TestUtility.hex(c));
1531
1532                 /*
1533                 for (char d = 0; d < 0xFFFF; ++d) {
1534                     if (TestUtility.isUnassigned(d) ||
1535                         !targetRange.contains(d)) continue;
1536                  */
1537                 log.logln(count + "/" + pairLimit + " Checking starting with " + UTF16.valueOf(c));
1538                 usi2.reset(targetRangeMinusFailures, quickRt, density);
1539
1540                 while (usi2.next()) {
1541                     
1542                     int d = usi2.codepoint;
1543                     if (d < 0) break;
1544                     
1545                     if (++count > pairLimit) {
1546                         throw new TestTruncated("Test truncated at " + pairLimit);
1547                     }
1548
1549                     String cs = UTF16.valueOf(c) + UTF16.valueOf(d);
1550                     String targ = targetToSource.transliterate(cs);
1551                     String reverse = sourceToTarget.transliterate(targ);
1552
1553                     if (!toSource.containsAll(targ) /*&& !failTargSource.contains(c) && !failTargSource.contains(d)*/
1554                             || badCharacters.containsSome(targ)) {
1555                         String targD = Normalizer.normalize(targ, Normalizer.NFD);
1556                         if (!toSource.containsAll(targD) /*&& !failTargSource.contains(c) && !failTargSource.contains(d)*/
1557                                 || badCharacters.containsSome(targD)) {
1558                             logWrongScript("" + getTargetSource(transliteratorID) + "", cs, targ, toSource, badCharacters);
1559                             continue;
1560                         }
1561                     }
1562                     if (!isSame(cs, reverse) /*&& !failRound.contains(c) && !failRound.contains(d)*/
1563                             && !roundtripExclusions.contains(c)
1564                             && !roundtripExclusions.contains(d)
1565                             && !roundtripExclusions.contains(cs)) {
1566                         logRoundTripFailure(cs,targetToSource.getID(), targ,sourceToTarget.getID(), reverse);
1567                         continue;
1568                     }
1569                     String targ2 = Normalizer.normalize(targ, Normalizer.NFD);
1570                     String reverse2 = sourceToTarget.transliterate(targ2);
1571                     if (!reverse.equals(reverse2)) {
1572                         logNotCanonical("" + getTargetSource(transliteratorID) + "", targ, reverse, targ2, reverse2);
1573                     }
1574                 }
1575             }
1576             log.logln("");
1577         }
1578
1579         /**
1580          * @param transliteratorID2
1581          * @return
1582          */
1583         private String getTargetSource(String transliteratorID2) {
1584             return "Target-Source [" + transliteratorID2 + "]";
1585         }
1586
1587         /**
1588          * @param transliteratorID2
1589          * @return
1590          */
1591         private String getSourceTarget(String transliteratorID2) {
1592             return "Source-Target [" + transliteratorID2 + "]";
1593         }
1594
1595         final String info(String s) {
1596             StringBuffer result = new StringBuffer();
1597             result.append("\u200E").append(s).append("\u200E (").append(TestUtility.hex(s)).append("/");
1598             if (false) { // append age, as a check
1599                 int cp = 0;    
1600                 for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
1601                     cp = UTF16.charAt(s, i);
1602                     if (i > 0) result.append(", ");
1603                     result.append(UCharacter.getAge(cp));
1604                 }
1605             }
1606             result.append(")");
1607             return result.toString();
1608         }
1609
1610         final void logWrongScript(String label, String from, String to, 
1611                 UnicodeSet shouldContainAll, UnicodeSet shouldNotContainAny) {
1612             if (++errorCount > errorLimit) {
1613                 throw new TestTruncated("Test truncated; too many failures");
1614             }
1615             String toD = Normalizer.normalize(to, Normalizer.NFD);
1616             UnicodeSet temp = new UnicodeSet().addAll(toD);
1617             UnicodeSet bad = new UnicodeSet(shouldNotContainAny).retainAll(temp)
1618             .addAll(new UnicodeSet(temp).removeAll(shouldContainAll));
1619
1620             out.println("<br>Fail " + label + ": " +
1621                     info(from) + " => " + info(to) + " " + bad
1622             );
1623         }
1624
1625         final void logNotCanonical(String label, String from, String to, String fromCan, String toCan) {
1626             if (++errorCount > errorLimit) {
1627                 throw new TestTruncated("Test truncated; too many failures");
1628             }
1629             out.println("<br>Fail (can.equiv) " + label + ": " +
1630                     info(from) + " => " + info(to) +
1631                     " -- " +
1632                     info(fromCan) + " => " + info(toCan) + ")"
1633             );
1634         }
1635
1636         final void logFails(String label) {
1637             if (++errorCount > errorLimit) {
1638                 throw new TestTruncated("Test truncated; too many failures");
1639             }
1640             out.println("<br>Fail (can.equiv)" + label);
1641         }
1642
1643         final void logToRulesFails(String label, String from, String to, String toCan) {
1644             if (++errorCount > errorLimit) {
1645                 throw new TestTruncated("Test truncated; too many failures");
1646             }
1647             out.println("<br>Fail " + label + ": " +
1648                     info(from) + " => " + info(to) + ", " + info(toCan)
1649             );
1650         }
1651
1652         final void logRoundTripFailure(String from,String toID, String to,String backID, String back) {
1653             if (!legalSource.is(from)) return; // skip illegals
1654
1655             if (++errorCount > errorLimit) {
1656                 throw new TestTruncated("Test truncated; too many failures");
1657             }
1658             out.println("<br>Fail Roundtrip: " +
1659                     info(from) + " "+toID+" => " + info(to) + " " + backID+" => " + info(back)
1660             );
1661         }
1662
1663         /*
1664          * Characters to filter for source-target mapping completeness
1665          * Typically is base alphabet, minus extended characters
1666          * Default is ASCII letters for Latin
1667          */
1668         /*
1669         public boolean isSource(char c) {
1670             if (!sourceRange.contains(c)) return false;
1671             return true;
1672         }
1673          */
1674
1675         /*
1676          * Characters to check for target back to source mapping.
1677          * Typically the same as the target script, plus punctuation
1678          */
1679         /*
1680         public boolean isReceivingSource(char c) {
1681             if (!targetRange.contains(c)) return false;
1682             return true;
1683         }
1684          */
1685         /*
1686          * Characters to filter for target-source mapping
1687          * Typically is base alphabet, minus extended characters
1688          */
1689         /*
1690         public boolean isTarget(char c) {
1691             byte script = TestUtility.getScript(c);
1692             if (script != targetScript) return false;
1693             if (!TestUtility.isLetter(c)) return false;
1694             if (targetRange != null && !targetRange.contains(c)) return false;
1695             return true;
1696         }
1697          */
1698
1699         /*
1700          * Characters to check for target-source mapping
1701          * Typically the same as the source script, plus punctuation
1702          */
1703         /*
1704         public boolean isReceivingTarget(char c) {
1705             byte script = TestUtility.getScript(c);
1706             return (script == targetScript || script == TestUtility.COMMON_SCRIPT);
1707         }
1708
1709         final boolean isSource(String s) {
1710             for (int i = 0; i < s.length(); ++i) {
1711                 if (!isSource(s.charAt(i))) return false;
1712             }
1713             return true;
1714         }
1715
1716         final boolean isTarget(String s) {
1717             for (int i = 0; i < s.length(); ++i) {
1718                 if (!isTarget(s.charAt(i))) return false;
1719             }
1720             return true;
1721         }
1722
1723         final boolean isReceivingSource(String s) {
1724             for (int i = 0; i < s.length(); ++i) {
1725                 if (!isReceivingSource(s.charAt(i))) return false;
1726             }
1727             return true;
1728         }
1729
1730         final boolean isReceivingTarget(String s) {
1731             for (int i = 0; i < s.length(); ++i) {
1732                 if (!isReceivingTarget(s.charAt(i))) return false;
1733             }
1734             return true;
1735         }
1736          */
1737
1738         static class TestTruncated extends RuntimeException {
1739             /**
1740              * For serialization
1741              */
1742             private static final long serialVersionUID = 3361828190488168323L;
1743
1744             TestTruncated(String msg) {
1745                 super(msg);
1746             }
1747         }
1748     }
1749
1750     //  static class TestHangul extends Test {
1751     //      TestHangul () {
1752     //          super("Jamo-Hangul", TestUtility.JAMO_SCRIPT, TestUtility.HANGUL_SCRIPT);
1753     //      }
1754     //
1755     //      public boolean isSource(char c) {
1756     //          if (0x1113 <= c && c <= 0x1160) return false;
1757     //          if (0x1176 <= c && c <= 0x11F9) return false;
1758     //          if (0x3131 <= c && c <= 0x318E) return false;
1759     //          return super.isSource(c);
1760     //      }
1761     //  }
1762
1763
1764 }