]> gitweb.fperrin.net Git - Dictionary.git/blob - jars/icu4j-4_2_1-src/src/com/ibm/icu/dev/test/collator/CollationThaiTest.java
icu4jsrc
[Dictionary.git] / jars / icu4j-4_2_1-src / src / com / ibm / icu / dev / test / collator / CollationThaiTest.java
1 /*\r
2  *******************************************************************************\r
3  * Copyright (C) 2002-2005, International Business Machines Corporation and    *\r
4  * others. All Rights Reserved.                                                *\r
5  *******************************************************************************\r
6  */\r
7 \r
8 /** \r
9  * Port From:   ICU4C v2.1 : collate/CollationRegressionTest\r
10  * Source File: $ICU4CRoot/source/test/intltest/regcoll.cpp\r
11  **/\r
12  \r
13 package com.ibm.icu.dev.test.collator;\r
14 \r
15 import com.ibm.icu.dev.test.*;\r
16 import com.ibm.icu.text.*;\r
17 import java.util.Locale;\r
18 import java.util.Comparator;\r
19 import java.util.Arrays;\r
20 import java.io.*;\r
21 \r
22 public class CollationThaiTest extends TestFmwk {\r
23     \r
24     final int MAX_FAILURES_TO_SHOW = -1;\r
25     \r
26     public static void main(String[] args) throws Exception {\r
27         new CollationThaiTest().run(args);\r
28     }\r
29     \r
30     /**\r
31      * Odd corner conditions taken from "How to Sort Thai Without Rewriting Sort",\r
32      * by Doug Cooper, http://seasrc.th.net/paper/thaisort.zip\r
33      */\r
34     public void TestCornerCases() {\r
35         String TESTS[] = {\r
36             // Shorter words precede longer\r
37             "\u0e01",                               "<",    "\u0e01\u0e01",\r
38     \r
39             // Tone marks are considered after letters (i.e. are primary ignorable)\r
40             "\u0e01\u0e32",                        "<",    "\u0e01\u0e49\u0e32",\r
41     \r
42             // ditto for other over-marks\r
43             "\u0e01\u0e32",                        "<",    "\u0e01\u0e32\u0e4c",\r
44     \r
45             // commonly used mark-in-context order.\r
46             // In effect, marks are sorted after each syllable.\r
47             "\u0e01\u0e32\u0e01\u0e49\u0e32",   "<",    "\u0e01\u0e48\u0e32\u0e01\u0e49\u0e32",\r
48     \r
49             // Hyphens and other punctuation follow whitespace but come before letters\r
50             "\u0e01\u0e32",                        "<",    "\u0e01\u0e32-",\r
51             "\u0e01\u0e32-",                       "<",    "\u0e01\u0e32\u0e01\u0e32",\r
52     \r
53             // Doubler follows an indentical word without the doubler\r
54             "\u0e01\u0e32",                        "<",    "\u0e01\u0e32\u0e46",\r
55             "\u0e01\u0e32\u0e46",                 "<",    "\u0e01\u0e32\u0e01\u0e32",\r
56     \r
57             // \u0e45 after either \u0e24 or \u0e26 is treated as a single\r
58             // combining character, similar to "c < ch" in traditional spanish.\r
59             // TODO: beef up this case\r
60             "\u0e24\u0e29\u0e35",                 "<",    "\u0e24\u0e45\u0e29\u0e35",\r
61             "\u0e26\u0e29\u0e35",                 "<",    "\u0e26\u0e45\u0e29\u0e35",\r
62     \r
63             // Vowels reorder, should compare \u0e2d and \u0e34\r
64             "\u0e40\u0e01\u0e2d",                 "<",    "\u0e40\u0e01\u0e34",\r
65     \r
66             // Tones are compared after the rest of the word (e.g. primary ignorable)\r
67             "\u0e01\u0e32\u0e01\u0e48\u0e32",   "<",    "\u0e01\u0e49\u0e32\u0e01\u0e32",\r
68     \r
69             // Periods are ignored entirely\r
70             "\u0e01.\u0e01.",                      "<",    "\u0e01\u0e32",\r
71         };\r
72         \r
73         RuleBasedCollator coll = null;\r
74         try {\r
75             coll = getThaiCollator();\r
76         } catch (Exception e) {\r
77             warnln("could not construct Thai collator");\r
78             return;\r
79         }\r
80         compareArray(coll, TESTS); \r
81     }\r
82     \r
83     void compareArray(RuleBasedCollator c, String[] tests) {\r
84         for (int i = 0; i < tests.length; i += 3) {\r
85             int expect = 0;\r
86             if (tests[i+1].equals("<")) {\r
87                 expect = -1;\r
88             } else if (tests[i+1].equals(">")) {\r
89                 expect = 1;\r
90             } else if (tests[i+1].equals("=")) {\r
91                 expect = 0;\r
92             } else {\r
93                 // expect = Integer.decode(tests[i+1]).intValue();\r
94                 errln("Error: unknown operator " + tests[i+1]);\r
95                 return;\r
96             }\r
97             String s1 = tests[i];\r
98             String s2 = tests[i+2];\r
99             CollationTest.doTest(this, c, s1, s2, expect);\r
100         }\r
101     }\r
102     \r
103     int sign(int i ) {\r
104         if (i < 0) return -1;\r
105         if (i > 0) return 1;\r
106         return 0;\r
107     }\r
108     \r
109     /**\r
110      * Read the external dictionary file, which is already in proper\r
111      * sorted order, and confirm that the collator compares each line as\r
112      * preceding the following line.\r
113      */\r
114     public void TestDictionary() {\r
115         RuleBasedCollator coll = null;\r
116         try {\r
117             coll = getThaiCollator();\r
118         } catch (Exception e) {\r
119             warnln("could not construct Thai collator");\r
120             return;\r
121         }\r
122      \r
123         // Read in a dictionary of Thai words\r
124         BufferedReader in = null;\r
125         String fileName = "riwords.txt";\r
126         try {\r
127             in = TestUtil.getDataReader(fileName, "UTF-8");\r
128         } catch (SecurityException e) {\r
129             warnln("Security exception encountered reading test data file.");\r
130                    return;\r
131         } catch (Exception e) {\r
132             try {\r
133                 if (in != null) {\r
134                     in.close();\r
135                 }\r
136             } catch (IOException ioe) {}\r
137             errln("Error: could not open test file: " + fileName \r
138                   + ". Aborting test.");\r
139             return;        \r
140         }\r
141     \r
142         //\r
143         // Loop through each word in the dictionary and compare it to the previous\r
144         // word.  They should be in sorted order.\r
145         //\r
146         String lastWord = "";\r
147         int line = 0;\r
148         int failed = 0;\r
149         int wordCount = 0;\r
150         try {\r
151         String word = in.readLine();\r
152         while (word != null) {\r
153             line++;\r
154              \r
155             // Skip comments and blank lines\r
156             if (word.length() == 0 || word.charAt(0) == 0x23) {\r
157                 word = in.readLine();\r
158                 continue;\r
159             }\r
160     \r
161             // Show the first 8 words being compared, so we can see what's happening\r
162             ++wordCount;\r
163             if (wordCount <= 8) {\r
164                 logln("Word " + wordCount + ": " + word);\r
165             }\r
166     \r
167             if (lastWord.length() > 0) {\r
168                 CollationTest.doTest(this, coll, lastWord, word, -1);\r
169                 int result = coll.compare(lastWord, word); \r
170         \r
171                 if (result >= 0) {\r
172                     failed++;\r
173                     if (MAX_FAILURES_TO_SHOW < 0 || failed <= MAX_FAILURES_TO_SHOW) {\r
174                         String msg = "--------------------------------------------\n"\r
175                                     + line\r
176                                     + " compare(" + lastWord\r
177                                     + ", " + word + ") returned " + result\r
178                                     + ", expected -1\n";\r
179                         CollationKey k1, k2;\r
180                         try {\r
181                             k1 = coll.getCollationKey(lastWord);\r
182                             k2 = coll.getCollationKey(word);\r
183                         } catch (Exception e) {\r
184                             errln("Fail: getCollationKey returned ");\r
185                             return;\r
186                         }\r
187                         msg += "key1: " + prettify(k1) + "\n"\r
188                                     + "key2: " + prettify(k2);\r
189                         errln(msg);\r
190                     }\r
191                 }\r
192             }\r
193             lastWord = word;\r
194             word = in.readLine();\r
195         }\r
196         } catch (IOException e) {\r
197             errln("IOException " + e.getMessage());\r
198         }\r
199     \r
200         if (failed != 0) {\r
201             if (failed > MAX_FAILURES_TO_SHOW) {\r
202                 errln("Too many failures; only the first " +\r
203                       MAX_FAILURES_TO_SHOW + " failures were shown");\r
204             }\r
205             errln("Summary: " + failed + " of " + (line - 1) +\r
206                   " comparisons failed");\r
207         }\r
208     \r
209         logln("Words checked: " + wordCount);\r
210     }\r
211     \r
212     public void TestInvalidThai() \r
213     {\r
214         String tests[] = { "\u0E44\u0E01\u0E44\u0E01",\r
215                            "\u0E44\u0E01\u0E01\u0E44",\r
216                            "\u0E01\u0E44\u0E01\u0E44",\r
217                            "\u0E01\u0E01\u0E44\u0E44",\r
218                            "\u0E44\u0E44\u0E01\u0E01",\r
219                            "\u0E01\u0E44\u0E44\u0E01",\r
220                          };\r
221      \r
222         RuleBasedCollator collator;\r
223         StrCmp comparator;\r
224         try {\r
225             collator = getThaiCollator();\r
226             comparator = new StrCmp();\r
227         } catch (Exception e) {\r
228             warnln("could not construct Thai collator");\r
229             return;\r
230         }\r
231         \r
232         Arrays.sort(tests, comparator);\r
233      \r
234         for (int i = 0; i < tests.length; i ++)\r
235         {\r
236             for (int j = i + 1; j < tests.length; j ++) {\r
237                 if (collator.compare(tests[i], tests[j]) > 0) {\r
238                     // inconsistency ordering found!\r
239                     errln("Inconsistent ordering between strings " + i \r
240                           + " and " + j);\r
241                 }\r
242             }\r
243             CollationElementIterator iterator \r
244                 = collator.getCollationElementIterator(tests[i]);\r
245             CollationTest.backAndForth(this, iterator);\r
246         }\r
247     }\r
248     \r
249     public void TestReordering() \r
250     {\r
251         String tests[] = {\r
252             "\u0E41c\u0301",      "=", "\u0E41\u0107", // composition\r
253             "\u0E41\uD835\uDFCE", "<", "\u0E41\uD835\uDFCF", // supplementaries\r
254             "\u0E41\uD834\uDD5F", "=", "\u0E41\uD834\uDD58\uD834\uDD65", // supplementary composition decomps to supplementary\r
255             "\u0E41\uD87E\uDC02", "=", "\u0E41\u4E41", // supplementary composition decomps to BMP\r
256             "\u0E41\u0301",       "=", "\u0E41\u0301", // unsafe (just checking backwards iteration)\r
257             "\u0E41\u0301\u0316", "=", "\u0E41\u0316\u0301",\r
258 \r
259             "abc\u0E41c\u0301",      "=", "abc\u0E41\u0107", // composition\r
260             "abc\u0E41\uD834\uDC00", "<", "abc\u0E41\uD834\uDC01", // supplementaries\r
261             "abc\u0E41\uD834\uDD5F", "=", "abc\u0E41\uD834\uDD58\uD834\uDD65", // supplementary composition decomps to supplementary\r
262             "abc\u0E41\uD87E\uDC02", "=", "abc\u0E41\u4E41", // supplementary composition decomps to BMP\r
263             "abc\u0E41\u0301",       "=", "abc\u0E41\u0301", // unsafe (just checking backwards iteration)\r
264             "abc\u0E41\u0301\u0316", "=", "abc\u0E41\u0316\u0301",\r
265 \r
266             "\u0E41c\u0301abc",      "=", "\u0E41\u0107abc", // composition\r
267             "\u0E41\uD834\uDC00abc", "<", "\u0E41\uD834\uDC01abc", // supplementaries\r
268             "\u0E41\uD834\uDD5Fabc", "=", "\u0E41\uD834\uDD58\uD834\uDD65abc", // supplementary composition decomps to supplementary\r
269             "\u0E41\uD87E\uDC02abc", "=", "\u0E41\u4E41abc", // supplementary composition decomps to BMP\r
270             "\u0E41\u0301abc",       "=", "\u0E41\u0301abc", // unsafe (just checking backwards iteration)\r
271             "\u0E41\u0301\u0316abc", "=", "\u0E41\u0316\u0301abc",\r
272 \r
273             "abc\u0E41c\u0301abc",      "=", "abc\u0E41\u0107abc", // composition\r
274             "abc\u0E41\uD834\uDC00abc", "<", "abc\u0E41\uD834\uDC01abc", // supplementaries\r
275             "abc\u0E41\uD834\uDD5Fabc", "=", "abc\u0E41\uD834\uDD58\uD834\uDD65abc", // supplementary composition decomps to supplementary\r
276             "abc\u0E41\uD87E\uDC02abc", "=", "abc\u0E41\u4E41abc", // supplementary composition decomps to BMP\r
277             "abc\u0E41\u0301abc",       "=", "abc\u0E41\u0301abc", // unsafe (just checking backwards iteration)\r
278             "abc\u0E41\u0301\u0316abc", "=", "abc\u0E41\u0316\u0301abc",\r
279         };\r
280 \r
281         RuleBasedCollator collator;\r
282         try {\r
283             collator = (RuleBasedCollator)getThaiCollator();\r
284         } catch (Exception e) {\r
285             warnln("could not construct Thai collator");\r
286             return;\r
287         }\r
288         compareArray(collator, tests);\r
289     \r
290         String rule = "& c < ab";\r
291         String testcontraction[] = { "\u0E41ab", ">", "\u0E41c"};\r
292         try {\r
293             collator = new RuleBasedCollator(rule);\r
294         } catch (Exception e) {\r
295             errln("Error: could not construct collator with rule " + rule);\r
296             return;\r
297         }\r
298         compareArray(collator, testcontraction);\r
299     }\r
300     \r
301     \r
302     \r
303     \r
304     \r
305     \r
306     \r
307     \r
308     \r
309 \r
310     \r
311     String prettify(CollationKey sourceKey) {\r
312         int i;\r
313         byte[] bytes= sourceKey.toByteArray();\r
314         String target = "[";\r
315     \r
316         for (i = 0; i < bytes.length; i++) {\r
317             target += Integer.toHexString(bytes[i]);\r
318             target += " ";\r
319         }\r
320         target += "]";\r
321         return target;\r
322     }\r
323     \r
324     // private inner class -------------------------------------------------\r
325     \r
326     private static final class StrCmp implements Comparator \r
327     {\r
328         public int compare(Object string1, Object string2) \r
329         {\r
330             return collator.compare(string1, string2);\r
331         }\r
332         \r
333         StrCmp() throws Exception\r
334         {\r
335             collator = getThaiCollator();\r
336         }\r
337         \r
338         Collator collator;\r
339     }\r
340     \r
341     // private data members ------------------------------------------------\r
342     \r
343     private static RuleBasedCollator m_collator_;\r
344     \r
345     // private methods -----------------------------------------------------\r
346     \r
347     private static RuleBasedCollator getThaiCollator() throws Exception\r
348     {\r
349         if (m_collator_ == null) {\r
350             m_collator_ = (RuleBasedCollator)Collator.getInstance(\r
351                                                 new Locale("th", "TH", ""));\r
352         }\r
353         return m_collator_;\r
354     }\r
355 }\r