]> gitweb.fperrin.net Git - Dictionary.git/blob - jars/icu4j-4_4_2-src/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationThaiTest.java
go
[Dictionary.git] / jars / icu4j-4_4_2-src / main / tests / collate / src / com / ibm / icu / dev / test / collator / CollationThaiTest.java
1 /*\r
2  *******************************************************************************\r
3  * Copyright (C) 2002-2010, International Business Machines Corporation and    *\r
4  * others. All Rights Reserved.                                                *\r
5  *******************************************************************************\r
6  */\r
7 \r
8 /** \r
9  * Port From:   ICU4C v2.1 : collate/CollationRegressionTest\r
10  * Source File: $ICU4CRoot/source/test/intltest/regcoll.cpp\r
11  **/\r
12  \r
13 package com.ibm.icu.dev.test.collator;\r
14 \r
15 import java.io.BufferedReader;\r
16 import java.io.IOException;\r
17 import java.util.Arrays;\r
18 import java.util.Comparator;\r
19 import java.util.Locale;\r
20 \r
21 import com.ibm.icu.dev.test.TestFmwk;\r
22 import com.ibm.icu.dev.test.TestUtil;\r
23 import com.ibm.icu.text.CollationElementIterator;\r
24 import com.ibm.icu.text.CollationKey;\r
25 import com.ibm.icu.text.Collator;\r
26 import com.ibm.icu.text.RuleBasedCollator;\r
27 \r
28 public class CollationThaiTest extends TestFmwk {\r
29     \r
30     final int MAX_FAILURES_TO_SHOW = -1;\r
31     \r
32     public static void main(String[] args) throws Exception {\r
33         new CollationThaiTest().run(args);\r
34     }\r
35     \r
36     /**\r
37      * Odd corner conditions taken from "How to Sort Thai Without Rewriting Sort",\r
38      * by Doug Cooper, http://seasrc.th.net/paper/thaisort.zip\r
39      */\r
40     public void TestCornerCases() {\r
41         String TESTS[] = {\r
42             // Shorter words precede longer\r
43             "\u0e01",                               "<",    "\u0e01\u0e01",\r
44     \r
45             // Tone marks are considered after letters (i.e. are primary ignorable)\r
46             "\u0e01\u0e32",                        "<",    "\u0e01\u0e49\u0e32",\r
47     \r
48             // ditto for other over-marks\r
49             "\u0e01\u0e32",                        "<",    "\u0e01\u0e32\u0e4c",\r
50     \r
51             // commonly used mark-in-context order.\r
52             // In effect, marks are sorted after each syllable.\r
53             "\u0e01\u0e32\u0e01\u0e49\u0e32",   "<",    "\u0e01\u0e48\u0e32\u0e01\u0e49\u0e32",\r
54     \r
55             // Hyphens and other punctuation follow whitespace but come before letters\r
56             "\u0e01\u0e32",                        "<",    "\u0e01\u0e32-",\r
57             "\u0e01\u0e32-",                       "<",    "\u0e01\u0e32\u0e01\u0e32",\r
58     \r
59             // Doubler follows an indentical word without the doubler\r
60             "\u0e01\u0e32",                        "<",    "\u0e01\u0e32\u0e46",\r
61             "\u0e01\u0e32\u0e46",                 "<",    "\u0e01\u0e32\u0e01\u0e32",\r
62     \r
63             // \u0e45 after either \u0e24 or \u0e26 is treated as a single\r
64             // combining character, similar to "c < ch" in traditional spanish.\r
65             // TODO: beef up this case\r
66             "\u0e24\u0e29\u0e35",                 "<",    "\u0e24\u0e45\u0e29\u0e35",\r
67             "\u0e26\u0e29\u0e35",                 "<",    "\u0e26\u0e45\u0e29\u0e35",\r
68     \r
69             // Vowels reorder, should compare \u0e2d and \u0e34\r
70             "\u0e40\u0e01\u0e2d",                 "<",    "\u0e40\u0e01\u0e34",\r
71     \r
72             // Tones are compared after the rest of the word (e.g. primary ignorable)\r
73             "\u0e01\u0e32\u0e01\u0e48\u0e32",   "<",    "\u0e01\u0e49\u0e32\u0e01\u0e32",\r
74     \r
75             // Periods are ignored entirely\r
76             "\u0e01.\u0e01.",                      "<",    "\u0e01\u0e32",\r
77         };\r
78         \r
79         RuleBasedCollator coll = null;\r
80         try {\r
81             coll = getThaiCollator();\r
82         } catch (Exception e) {\r
83             warnln("could not construct Thai collator");\r
84             return;\r
85         }\r
86         compareArray(coll, TESTS); \r
87     }\r
88     \r
89     void compareArray(RuleBasedCollator c, String[] tests) {\r
90         for (int i = 0; i < tests.length; i += 3) {\r
91             int expect = 0;\r
92             if (tests[i+1].equals("<")) {\r
93                 expect = -1;\r
94             } else if (tests[i+1].equals(">")) {\r
95                 expect = 1;\r
96             } else if (tests[i+1].equals("=")) {\r
97                 expect = 0;\r
98             } else {\r
99                 // expect = Integer.decode(tests[i+1]).intValue();\r
100                 errln("Error: unknown operator " + tests[i+1]);\r
101                 return;\r
102             }\r
103             String s1 = tests[i];\r
104             String s2 = tests[i+2];\r
105             CollationTest.doTest(this, c, s1, s2, expect);\r
106         }\r
107     }\r
108     \r
109     int sign(int i ) {\r
110         if (i < 0) return -1;\r
111         if (i > 0) return 1;\r
112         return 0;\r
113     }\r
114     \r
115     /**\r
116      * Read the external dictionary file, which is already in proper\r
117      * sorted order, and confirm that the collator compares each line as\r
118      * preceding the following line.\r
119      */\r
120     public void TestDictionary() {\r
121         RuleBasedCollator coll = null;\r
122         try {\r
123             coll = getThaiCollator();\r
124         } catch (Exception e) {\r
125             warnln("could not construct Thai collator");\r
126             return;\r
127         }\r
128      \r
129         // Read in a dictionary of Thai words\r
130         BufferedReader in = null;\r
131         String fileName = "riwords.txt";\r
132         try {\r
133             in = TestUtil.getDataReader(fileName, "UTF-8");\r
134         } catch (SecurityException e) {\r
135             warnln("Security exception encountered reading test data file.");\r
136                    return;\r
137         } catch (Exception e) {\r
138             try {\r
139                 if (in != null) {\r
140                     in.close();\r
141                 }\r
142             } catch (IOException ioe) {}\r
143             errln("Error: could not open test file: " + fileName \r
144                   + ". Aborting test.");\r
145             return;        \r
146         }\r
147     \r
148         //\r
149         // Loop through each word in the dictionary and compare it to the previous\r
150         // word.  They should be in sorted order.\r
151         //\r
152         String lastWord = "";\r
153         int line = 0;\r
154         int failed = 0;\r
155         int wordCount = 0;\r
156         try {\r
157         String word = in.readLine();\r
158         while (word != null) {\r
159             line++;\r
160              \r
161             // Skip comments and blank lines\r
162             if (word.length() == 0 || word.charAt(0) == 0x23) {\r
163                 word = in.readLine();\r
164                 continue;\r
165             }\r
166     \r
167             // Show the first 8 words being compared, so we can see what's happening\r
168             ++wordCount;\r
169             if (wordCount <= 8) {\r
170                 logln("Word " + wordCount + ": " + word);\r
171             }\r
172     \r
173             if (lastWord.length() > 0) {\r
174                 CollationTest.doTest(this, coll, lastWord, word, -1);\r
175                 int result = coll.compare(lastWord, word); \r
176         \r
177                 if (result >= 0) {\r
178                     failed++;\r
179                     if (MAX_FAILURES_TO_SHOW < 0 || failed <= MAX_FAILURES_TO_SHOW) {\r
180                         String msg = "--------------------------------------------\n"\r
181                                     + line\r
182                                     + " compare(" + lastWord\r
183                                     + ", " + word + ") returned " + result\r
184                                     + ", expected -1\n";\r
185                         CollationKey k1, k2;\r
186                         try {\r
187                             k1 = coll.getCollationKey(lastWord);\r
188                             k2 = coll.getCollationKey(word);\r
189                         } catch (Exception e) {\r
190                             errln("Fail: getCollationKey returned ");\r
191                             return;\r
192                         }\r
193                         msg += "key1: " + prettify(k1) + "\n"\r
194                                     + "key2: " + prettify(k2);\r
195                         errln(msg);\r
196                     }\r
197                 }\r
198             }\r
199             lastWord = word;\r
200             word = in.readLine();\r
201         }\r
202         } catch (IOException e) {\r
203             errln("IOException " + e.getMessage());\r
204         }\r
205     \r
206         if (failed != 0) {\r
207             if (failed > MAX_FAILURES_TO_SHOW) {\r
208                 errln("Too many failures; only the first " +\r
209                       MAX_FAILURES_TO_SHOW + " failures were shown");\r
210             }\r
211             errln("Summary: " + failed + " of " + (line - 1) +\r
212                   " comparisons failed");\r
213         }\r
214     \r
215         logln("Words checked: " + wordCount);\r
216     }\r
217     \r
218     public void TestInvalidThai() \r
219     {\r
220         String tests[] = { "\u0E44\u0E01\u0E44\u0E01",\r
221                            "\u0E44\u0E01\u0E01\u0E44",\r
222                            "\u0E01\u0E44\u0E01\u0E44",\r
223                            "\u0E01\u0E01\u0E44\u0E44",\r
224                            "\u0E44\u0E44\u0E01\u0E01",\r
225                            "\u0E01\u0E44\u0E44\u0E01",\r
226                          };\r
227      \r
228         RuleBasedCollator collator;\r
229         StrCmp comparator;\r
230         try {\r
231             collator = getThaiCollator();\r
232             comparator = new StrCmp();\r
233         } catch (Exception e) {\r
234             warnln("could not construct Thai collator");\r
235             return;\r
236         }\r
237         \r
238         Arrays.sort(tests, comparator);\r
239      \r
240         for (int i = 0; i < tests.length; i ++)\r
241         {\r
242             for (int j = i + 1; j < tests.length; j ++) {\r
243                 if (collator.compare(tests[i], tests[j]) > 0) {\r
244                     // inconsistency ordering found!\r
245                     errln("Inconsistent ordering between strings " + i \r
246                           + " and " + j);\r
247                 }\r
248             }\r
249             CollationElementIterator iterator \r
250                 = collator.getCollationElementIterator(tests[i]);\r
251             CollationTest.backAndForth(this, iterator);\r
252         }\r
253     }\r
254     \r
255     public void TestReordering() \r
256     {\r
257         String tests[] = {\r
258             "\u0E41c\u0301",      "=", "\u0E41\u0107", // composition\r
259             "\u0E41\uD835\uDFCE", "<", "\u0E41\uD835\uDFCF", // supplementaries\r
260             "\u0E41\uD834\uDD5F", "=", "\u0E41\uD834\uDD58\uD834\uDD65", // supplementary composition decomps to supplementary\r
261             "\u0E41\uD87E\uDC02", "=", "\u0E41\u4E41", // supplementary composition decomps to BMP\r
262             "\u0E41\u0301",       "=", "\u0E41\u0301", // unsafe (just checking backwards iteration)\r
263             "\u0E41\u0301\u0316", "=", "\u0E41\u0316\u0301",\r
264 \r
265             "abc\u0E41c\u0301",      "=", "abc\u0E41\u0107", // composition\r
266             "abc\u0E41\uD834\uDC00", "<", "abc\u0E41\uD834\uDC01", // supplementaries\r
267             "abc\u0E41\uD834\uDD5F", "=", "abc\u0E41\uD834\uDD58\uD834\uDD65", // supplementary composition decomps to supplementary\r
268             "abc\u0E41\uD87E\uDC02", "=", "abc\u0E41\u4E41", // supplementary composition decomps to BMP\r
269             "abc\u0E41\u0301",       "=", "abc\u0E41\u0301", // unsafe (just checking backwards iteration)\r
270             "abc\u0E41\u0301\u0316", "=", "abc\u0E41\u0316\u0301",\r
271 \r
272             "\u0E41c\u0301abc",      "=", "\u0E41\u0107abc", // composition\r
273             "\u0E41\uD834\uDC00abc", "<", "\u0E41\uD834\uDC01abc", // supplementaries\r
274             "\u0E41\uD834\uDD5Fabc", "=", "\u0E41\uD834\uDD58\uD834\uDD65abc", // supplementary composition decomps to supplementary\r
275             "\u0E41\uD87E\uDC02abc", "=", "\u0E41\u4E41abc", // supplementary composition decomps to BMP\r
276             "\u0E41\u0301abc",       "=", "\u0E41\u0301abc", // unsafe (just checking backwards iteration)\r
277             "\u0E41\u0301\u0316abc", "=", "\u0E41\u0316\u0301abc",\r
278 \r
279             "abc\u0E41c\u0301abc",      "=", "abc\u0E41\u0107abc", // composition\r
280             "abc\u0E41\uD834\uDC00abc", "<", "abc\u0E41\uD834\uDC01abc", // supplementaries\r
281             "abc\u0E41\uD834\uDD5Fabc", "=", "abc\u0E41\uD834\uDD58\uD834\uDD65abc", // supplementary composition decomps to supplementary\r
282             "abc\u0E41\uD87E\uDC02abc", "=", "abc\u0E41\u4E41abc", // supplementary composition decomps to BMP\r
283             "abc\u0E41\u0301abc",       "=", "abc\u0E41\u0301abc", // unsafe (just checking backwards iteration)\r
284             "abc\u0E41\u0301\u0316abc", "=", "abc\u0E41\u0316\u0301abc",\r
285         };\r
286 \r
287         RuleBasedCollator collator;\r
288         try {\r
289             collator = (RuleBasedCollator)getThaiCollator();\r
290         } catch (Exception e) {\r
291             warnln("could not construct Thai collator");\r
292             return;\r
293         }\r
294         compareArray(collator, tests);\r
295     \r
296         String rule = "& c < ab";\r
297         String testcontraction[] = { "\u0E41ab", ">", "\u0E41c"};\r
298         try {\r
299             collator = new RuleBasedCollator(rule);\r
300         } catch (Exception e) {\r
301             errln("Error: could not construct collator with rule " + rule);\r
302             return;\r
303         }\r
304         compareArray(collator, testcontraction);\r
305     }\r
306     \r
307     \r
308     \r
309     \r
310     \r
311     \r
312     \r
313     \r
314     \r
315 \r
316     \r
317     String prettify(CollationKey sourceKey) {\r
318         int i;\r
319         byte[] bytes= sourceKey.toByteArray();\r
320         String target = "[";\r
321     \r
322         for (i = 0; i < bytes.length; i++) {\r
323             target += Integer.toHexString(bytes[i]);\r
324             target += " ";\r
325         }\r
326         target += "]";\r
327         return target;\r
328     }\r
329     \r
330     // private inner class -------------------------------------------------\r
331     \r
332     private static final class StrCmp implements Comparator<String> \r
333     {\r
334         public int compare(String string1, String string2) \r
335         {\r
336             return collator.compare(string1, string2);\r
337         }\r
338         \r
339         StrCmp() throws Exception\r
340         {\r
341             collator = getThaiCollator();\r
342         }\r
343         \r
344         Collator collator;\r
345     }\r
346     \r
347     // private data members ------------------------------------------------\r
348     \r
349     private static RuleBasedCollator m_collator_;\r
350     \r
351     // private methods -----------------------------------------------------\r
352     \r
353     private static RuleBasedCollator getThaiCollator() throws Exception\r
354     {\r
355         if (m_collator_ == null) {\r
356             m_collator_ = (RuleBasedCollator)Collator.getInstance(\r
357                                                 new Locale("th", "TH", ""));\r
358         }\r
359         return m_collator_;\r
360     }\r
361 }\r