]> gitweb.fperrin.net Git - Dictionary.git/blob - jars/icu4j-4_4_2-src/main/tests/core/src/com/ibm/icu/dev/test/util/TrieTest.java
go
[Dictionary.git] / jars / icu4j-4_4_2-src / main / tests / core / src / com / ibm / icu / dev / test / util / TrieTest.java
1 /**\r
2 *******************************************************************************\r
3 * Copyright (C) 1996-2010, International Business Machines Corporation and    *\r
4 * others. All Rights Reserved.                                                *\r
5 *******************************************************************************\r
6 */\r
7 \r
8 package com.ibm.icu.dev.test.util;\r
9 \r
10 import com.ibm.icu.dev.test.TestFmwk;\r
11 import com.ibm.icu.impl.CharTrie;\r
12 import com.ibm.icu.impl.IntTrie;\r
13 import com.ibm.icu.impl.IntTrieBuilder;\r
14 import com.ibm.icu.impl.Trie;\r
15 import com.ibm.icu.impl.TrieBuilder;\r
16 import com.ibm.icu.impl.TrieIterator;\r
17 import com.ibm.icu.impl.UCharacterProperty;\r
18 import com.ibm.icu.text.UTF16;\r
19 import com.ibm.icu.util.RangeValueIterator;\r
20 \r
21 /**\r
22 * Testing class for Trie. Tests here will be simple, since both CharTrie and \r
23 * IntTrie are very similar and are heavily used in other parts of ICU4J.\r
24 * Codes using Tries are expected to have detailed tests.\r
25 * @author Syn Wee Quek\r
26 * @since release 2.1 Jan 01 2002\r
27 */\r
28 public final class TrieTest extends TestFmwk \r
29\r
30     // constructor ---------------------------------------------------\r
31   \r
32     /**\r
33     * Constructor\r
34     */\r
35     public TrieTest()\r
36     {\r
37     }\r
38       \r
39     // public methods -----------------------------------------------\r
40     \r
41     public static void main(String arg[]) \r
42     {\r
43         TrieTest test = new TrieTest();\r
44         try {\r
45             test.run(arg);\r
46         } catch (Exception e) {\r
47             test.errln("Error testing trietest");\r
48         }\r
49     }\r
50     \r
51     /** \r
52      * Values for setting possibly overlapping, out-of-order ranges of values\r
53      */\r
54     private static final class SetRange \r
55     {\r
56         SetRange(int start, int limit, int value, boolean overwrite)\r
57         {\r
58             this.start = start;\r
59             this.limit = limit;\r
60             this.value = value;\r
61             this.overwrite = overwrite;\r
62         }\r
63         \r
64         int start, limit;\r
65         int value;\r
66         boolean overwrite;\r
67     }\r
68     \r
69     /**\r
70      * Values for testing:\r
71      * value is set from the previous boundary's limit to before\r
72      * this boundary's limit\r
73      */\r
74     private static final class CheckRange \r
75     {\r
76         CheckRange(int limit, int value)\r
77         {\r
78             this.limit = limit;\r
79             this.value = value;\r
80         }\r
81         \r
82         int limit;\r
83         int value;\r
84     }\r
85     \r
86     private static final class _testFoldedValue \r
87                                         implements TrieBuilder.DataManipulate  \r
88     {\r
89         public _testFoldedValue(IntTrieBuilder builder)\r
90         {\r
91             m_builder_ = builder;\r
92         }\r
93         \r
94         public int getFoldedValue(int start, int offset)\r
95         {\r
96             int foldedValue = 0;\r
97             int limit = start + 0x400;\r
98             while (start < limit) {\r
99                 int value = m_builder_.getValue(start);\r
100                 if (m_builder_.isInZeroBlock(start)) {\r
101                     start += TrieBuilder.DATA_BLOCK_LENGTH;\r
102                 } \r
103                 else {\r
104                     foldedValue |= value;\r
105                     ++ start;\r
106                 }\r
107             }\r
108         \r
109             if (foldedValue != 0) {\r
110                 return (offset << 16) | foldedValue;\r
111             } \r
112             return 0;\r
113         }\r
114         \r
115         private IntTrieBuilder m_builder_;\r
116     }\r
117     \r
118     private static final class _testFoldingOffset \r
119                                                 implements Trie.DataManipulate \r
120     {\r
121         public int getFoldingOffset(int value)\r
122         {\r
123             return value >>> 16;\r
124         }\r
125     }\r
126     \r
127     private static final class _testEnumValue extends TrieIterator\r
128     {\r
129         public _testEnumValue(Trie data)\r
130         {\r
131             super(data);\r
132         }\r
133         \r
134         protected int extract(int value)\r
135         {\r
136             return value ^ 0x5555;\r
137         }\r
138     }\r
139     \r
140     private void _testTrieIteration(IntTrie trie, CheckRange checkRanges[],\r
141                                     int countCheckRanges) \r
142     {\r
143         // write a string\r
144         int countValues = 0;\r
145         StringBuffer s = new StringBuffer();\r
146         int values[] = new int[30];\r
147         for (int i = 0; i < countCheckRanges; ++ i) {\r
148             int c = checkRanges[i].limit;\r
149             if (c != 0) {\r
150                 -- c;\r
151                 UTF16.append(s, c);\r
152                 values[countValues ++] = checkRanges[i].value;\r
153             }\r
154         }\r
155         int limit = s.length();\r
156         // try forward\r
157         int p = 0;\r
158         int i = 0;\r
159         while(p < limit) {\r
160             int c = UTF16.charAt(s, p);\r
161             p += UTF16.getCharCount(c);\r
162             int value = trie.getCodePointValue(c);\r
163             if (value != values[i]) {\r
164                 errln("wrong value from UTRIE_NEXT(U+" \r
165                       + Integer.toHexString(c) + "): 0x" \r
166                       + Integer.toHexString(value) + " instead of 0x"\r
167                       + Integer.toHexString(values[i]));\r
168             }\r
169             // unlike the c version lead is 0 if c is non-supplementary\r
170             char lead = UTF16.getLeadSurrogate(c);\r
171             char trail = UTF16.getTrailSurrogate(c); \r
172             if (lead == 0 \r
173                 ? trail != s.charAt(p - 1) \r
174                 : !UTF16.isLeadSurrogate(lead) \r
175                   || !UTF16.isTrailSurrogate(trail) || lead != s.charAt(p - 2) \r
176                   || trail != s.charAt(p - 1)) {\r
177                 errln("wrong (lead, trail) from UTRIE_NEXT(U+" \r
178                       + Integer.toHexString(c));\r
179                 continue;\r
180             }\r
181             if (lead != 0) {\r
182                 value = trie.getLeadValue(lead);\r
183                 value = trie.getTrailValue(value, trail);\r
184                 if (value != trie.getSurrogateValue(lead, trail)\r
185                     && value != values[i]) {\r
186                     errln("wrong value from getting supplementary " \r
187                           + "values (U+" \r
188                           + Integer.toHexString(c) + "): 0x"\r
189                           + Integer.toHexString(value) + " instead of 0x"\r
190                           + Integer.toHexString(values[i]));\r
191                 }\r
192             }\r
193             ++ i;\r
194         }\r
195     }\r
196     \r
197     private void _testTrieRanges(SetRange setRanges[], int countSetRanges, \r
198                                  CheckRange checkRanges[], int countCheckRanges,\r
199                                  boolean latin1Linear) \r
200     {\r
201         IntTrieBuilder newTrie = new IntTrieBuilder(null, 2000,\r
202                                                     checkRanges[0].value, \r
203                                                     checkRanges[0].value,\r
204                                                     latin1Linear);\r
205     \r
206         // set values from setRanges[]\r
207         boolean ok = true;\r
208         for (int i = 0; i < countSetRanges; ++ i) {\r
209             int start = setRanges[i].start;\r
210             int limit = setRanges[i].limit;\r
211             int value = setRanges[i].value;\r
212             boolean overwrite = setRanges[i].overwrite;\r
213             if ((limit - start) == 1 && overwrite) {\r
214                 ok &= newTrie.setValue(start, value);\r
215             } \r
216             else {\r
217                 ok &= newTrie.setRange(start, limit, value, overwrite);\r
218             }\r
219         }\r
220         if (!ok) {\r
221             errln("setting values into a trie failed");\r
222             return;\r
223         }\r
224     \r
225         // verify that all these values are in the new Trie\r
226         int start = 0;\r
227         for (int i = 0; i < countCheckRanges; ++ i) {\r
228             int limit = checkRanges[i].limit;\r
229             int value = checkRanges[i].value;\r
230     \r
231             while (start < limit) {\r
232                 if (value != newTrie.getValue(start)) {\r
233                     errln("newTrie [U+" \r
234                           + Integer.toHexString(start) + "]==0x" \r
235                           + Integer.toHexString(newTrie.getValue(start)) \r
236                           + " instead of 0x" + Integer.toHexString(value));\r
237                 }\r
238                 ++ start;\r
239             }\r
240         }\r
241     \r
242         IntTrie trie = newTrie.serialize(new _testFoldedValue(newTrie), \r
243                                          new _testFoldingOffset());\r
244     \r
245         // test linear Latin-1 range from utrie_getData()\r
246         if (latin1Linear) {\r
247             start = 0;\r
248             for (int i = 0; i < countCheckRanges && start <= 0xff; ++ i) {\r
249                 int limit = checkRanges[i].limit;\r
250                 int value = checkRanges[i].value;\r
251     \r
252                 while (start < limit && start <= 0xff) {\r
253                     if (value != trie.getLatin1LinearValue((char)start)) {\r
254                         errln("IntTrie.getLatin1LinearValue[U+" \r
255                               + Integer.toHexString(start) + "]==0x"\r
256                               + Integer.toHexString(\r
257                                         trie.getLatin1LinearValue((char) start)) \r
258                               + " instead of 0x" + Integer.toHexString(value));\r
259                     }\r
260                     ++ start;\r
261                 }\r
262             }\r
263         }\r
264     \r
265         if (latin1Linear != trie.isLatin1Linear()) {\r
266             errln("trie serialization did not preserve "\r
267                   + "Latin-1-linearity");\r
268         }\r
269     \r
270         // verify that all these values are in the serialized Trie\r
271         start = 0;\r
272         for (int i = 0; i < countCheckRanges; ++ i) {\r
273             int limit = checkRanges[i].limit;\r
274             int value = checkRanges[i].value;\r
275     \r
276             if (start == 0xd800) {\r
277                 // skip surrogates\r
278                 start = limit;\r
279                 continue;\r
280             }\r
281     \r
282             while (start < limit) {\r
283                 if (start <= 0xffff) {\r
284                     int value2 = trie.getBMPValue((char)start);\r
285                     if (value != value2) {\r
286                         errln("serialized trie.getBMPValue(U+"\r
287                               + Integer.toHexString(start) + " == 0x" \r
288                               + Integer.toHexString(value2) + " instead of 0x"\r
289                               + Integer.toHexString(value));\r
290                     }\r
291                     if (!UTF16.isLeadSurrogate((char)start)) {\r
292                         value2 = trie.getLeadValue((char)start);\r
293                         if (value != value2) {\r
294                             errln("serialized trie.getLeadValue(U+"\r
295                               + Integer.toHexString(start) + " == 0x" \r
296                               + Integer.toHexString(value2) + " instead of 0x"\r
297                               + Integer.toHexString(value));\r
298                         }\r
299                     }\r
300                 }\r
301                 int value2 = trie.getCodePointValue(start);\r
302                 if (value != value2) {\r
303                     errln("serialized trie.getCodePointValue(U+"\r
304                           + Integer.toHexString(start) + ")==0x" \r
305                           + Integer.toHexString(value2) + " instead of 0x" \r
306                           + Integer.toHexString(value));\r
307                 }\r
308                 ++ start;\r
309             }\r
310         }\r
311     \r
312         // enumerate and verify all ranges\r
313         \r
314         int enumRanges = 1;\r
315         TrieIterator iter  = new _testEnumValue(trie);\r
316         RangeValueIterator.Element result = new RangeValueIterator.Element();\r
317         while (iter.next(result)) {\r
318             if (result.start != checkRanges[enumRanges -1].limit \r
319                 || result.limit != checkRanges[enumRanges].limit\r
320                 || (result.value ^ 0x5555) != checkRanges[enumRanges].value) {\r
321                 errln("utrie_enum() delivers wrong range [U+"\r
322                       + Integer.toHexString(result.start) + "..U+" \r
323                       + Integer.toHexString(result.limit) + "].0x" \r
324                       + Integer.toHexString(result.value ^ 0x5555) \r
325                       + " instead of [U+"\r
326                       + Integer.toHexString(checkRanges[enumRanges -1].limit) \r
327                       + "..U+" \r
328                       + Integer.toHexString(checkRanges[enumRanges].limit) \r
329                       + "].0x" \r
330                       + Integer.toHexString(checkRanges[enumRanges].value));\r
331             }\r
332             enumRanges ++;\r
333         }\r
334     \r
335         // test linear Latin-1 range\r
336         if (trie.isLatin1Linear()) {\r
337             for (start = 0; start < 0x100; ++ start) {\r
338                 if (trie.getLatin1LinearValue((char)start) \r
339                     != trie.getLeadValue((char)start)) {\r
340                     errln("trie.getLatin1LinearValue[U+"\r
341                           + Integer.toHexString(start) + "]=0x"\r
342                           + Integer.toHexString(\r
343                                         trie.getLatin1LinearValue((char)start))\r
344                           + " instead of 0x" \r
345                           + Integer.toHexString(\r
346                                         trie.getLeadValue((char)start)));\r
347                 }\r
348             }\r
349         }\r
350     \r
351         _testTrieIteration(trie, checkRanges, countCheckRanges);\r
352     }\r
353     \r
354     private void _testTrieRanges2(SetRange setRanges[], \r
355                                   int countSetRanges, \r
356                                   CheckRange checkRanges[], \r
357                                   int countCheckRanges) \r
358     {\r
359         _testTrieRanges(setRanges, countSetRanges, checkRanges, countCheckRanges,\r
360                         false);\r
361         \r
362         _testTrieRanges(setRanges, countSetRanges, checkRanges, countCheckRanges,\r
363                         true);\r
364     }\r
365     \r
366     private void _testTrieRanges4(SetRange setRanges[], int countSetRanges,\r
367                                   CheckRange checkRanges[], \r
368                                   int countCheckRanges) \r
369     {\r
370         _testTrieRanges2(setRanges, countSetRanges, checkRanges, \r
371                          countCheckRanges);\r
372     }\r
373     \r
374     // test data ------------------------------------------------------------\r
375     \r
376     /** \r
377      * set consecutive ranges, even with value 0\r
378      */\r
379     private static SetRange setRanges1[]={\r
380         new SetRange(0,      0x20,       0,      false),\r
381         new SetRange(0x20,   0xa7,       0x1234, false),\r
382         new SetRange(0xa7,   0x3400,     0,      false),\r
383         new SetRange(0x3400, 0x9fa6,     0x6162, false),\r
384         new SetRange(0x9fa6, 0xda9e,     0x3132, false),\r
385         // try to disrupt _testFoldingOffset16()\r
386         new SetRange(0xdada, 0xeeee,     0x87ff, false), \r
387         new SetRange(0xeeee, 0x11111,    1,      false),\r
388         new SetRange(0x11111, 0x44444,   0x6162, false),\r
389         new SetRange(0x44444, 0x60003,   0,      false),\r
390         new SetRange(0xf0003, 0xf0004,   0xf,    false),\r
391         new SetRange(0xf0004, 0xf0006,   0x10,   false),\r
392         new SetRange(0xf0006, 0xf0007,   0x11,   false),\r
393         new SetRange(0xf0007, 0xf0020,   0x12,   false),\r
394         new SetRange(0xf0020, 0x110000,  0,      false)\r
395     };\r
396     \r
397     private static CheckRange checkRanges1[]={\r
398         new CheckRange(0,      0), // dummy start range to make _testEnumRange() simpler\r
399         new CheckRange(0x20,   0),\r
400         new CheckRange(0xa7,   0x1234),\r
401         new CheckRange(0x3400, 0),\r
402         new CheckRange(0x9fa6, 0x6162),\r
403         new CheckRange(0xda9e, 0x3132),\r
404         new CheckRange(0xdada, 0),\r
405         new CheckRange(0xeeee, 0x87ff),\r
406         new CheckRange(0x11111,1),\r
407         new CheckRange(0x44444,0x6162),\r
408         new CheckRange(0xf0003,0),\r
409         new CheckRange(0xf0004,0xf),\r
410         new CheckRange(0xf0006,0x10),\r
411         new CheckRange(0xf0007,0x11),\r
412         new CheckRange(0xf0020,0x12),\r
413         new CheckRange(0x110000, 0)\r
414     };\r
415     \r
416     /** \r
417      * set some interesting overlapping ranges\r
418      */\r
419     private static SetRange setRanges2[]={\r
420         new SetRange(0x21,   0x7f,       0x5555, true),\r
421         new SetRange(0x2f800,0x2fedc,    0x7a,   true),\r
422         new SetRange(0x72,   0xdd,       3,      true),\r
423         new SetRange(0xdd,   0xde,       4,      false),\r
424         new SetRange(0x2f987,0x2fa98,    5,      true),\r
425         new SetRange(0x2f777,0x2f833,    0,      true),\r
426         new SetRange(0x2f900,0x2ffee,    1,      false),\r
427         new SetRange(0x2ffee,0x2ffef,    2,      true)\r
428     };\r
429     \r
430     private static CheckRange checkRanges2[]={\r
431         // dummy start range to make _testEnumRange() simpler\r
432         new CheckRange(0,      0),       \r
433         new CheckRange(0x21,   0),\r
434         new CheckRange(0x72,   0x5555),\r
435         new CheckRange(0xdd,   3),\r
436         new CheckRange(0xde,   4),\r
437         new CheckRange(0x2f833,0),\r
438         new CheckRange(0x2f987,0x7a),\r
439         new CheckRange(0x2fa98,5),\r
440         new CheckRange(0x2fedc,0x7a),\r
441         new CheckRange(0x2ffee,1),\r
442         new CheckRange(0x2ffef,2),\r
443         new CheckRange(0x110000, 0)\r
444     };\r
445     \r
446     /** \r
447      * use a non-zero initial value\r
448      */\r
449     private static SetRange setRanges3[]={\r
450         new SetRange(0x31,   0xa4,   1,  false),\r
451         new SetRange(0x3400, 0x6789, 2,  false),\r
452         new SetRange(0x30000,0x34567,9,  true),\r
453         new SetRange(0x45678,0x56789,3,  true)\r
454     };\r
455     \r
456     private static CheckRange checkRanges3[]={\r
457         // dummy start range, also carries the initial value\r
458         new CheckRange(0,      9),  \r
459         new CheckRange(0x31,   9),\r
460         new CheckRange(0xa4,   1),\r
461         new CheckRange(0x3400, 9),\r
462         new CheckRange(0x6789, 2),\r
463         new CheckRange(0x45678,9),\r
464         new CheckRange(0x56789,3),\r
465         new CheckRange(0x110000,9)\r
466     };\r
467     \r
468     public void TestIntTrie() \r
469     {\r
470         _testTrieRanges4(setRanges1, setRanges1.length, checkRanges1, \r
471                          checkRanges1.length);\r
472         _testTrieRanges4(setRanges2, setRanges2.length, checkRanges2, \r
473                          checkRanges2.length); \r
474         _testTrieRanges4(setRanges3, setRanges3.length, checkRanges3, \r
475                          checkRanges3.length);\r
476     }\r
477     \r
478     public void TestCharValues()\r
479     {\r
480         CharTrie trie = null;\r
481         try {\r
482              trie = UCharacterProperty.INSTANCE.m_trie_;\r
483         } catch (Exception e) {\r
484             warnln("Error creating ucharacter trie");\r
485             return;\r
486         }\r
487         \r
488         for (int i = 0; i < 0xFFFF; i ++) {\r
489             if (i < 0xFF \r
490                 && trie.getBMPValue((char)i) \r
491                     != trie.getLatin1LinearValue((char)i)) {\r
492                 errln("For latin 1 codepoint, getBMPValue should be the same " +\r
493                        "as getLatin1LinearValue");\r
494             }\r
495             if (trie.getBMPValue((char)i) != trie.getCodePointValue(i)) {\r
496                 errln("For BMP codepoint, getBMPValue should be the same " +\r
497                        "as getCodepointValue");\r
498             }\r
499         }\r
500         for (int i = 0x10000; i < 0x10ffff; i ++) {\r
501             char lead = UTF16.getLeadSurrogate(i);\r
502             char trail = UTF16.getTrailSurrogate(i);\r
503             char value = trie.getCodePointValue(i);\r
504             if (value != trie.getSurrogateValue(lead, trail) ||\r
505                 value != trie.getTrailValue(trie.getLeadValue(lead), \r
506                                             trail)) {\r
507                 errln("For Non-BMP codepoints, getSurrogateValue should be "\r
508                       + "the same s getCodepointValue and getTrailValue");\r
509             }        \r
510         }\r
511     }\r
512 \r
513     private static class DummyGetFoldingOffset implements Trie.DataManipulate {\r
514         public int getFoldingOffset(int value) {\r
515             return -1; /* never get non-initialValue data for supplementary code points */\r
516         }\r
517     }\r
518 \r
519     public void TestDummyCharTrie() {\r
520         CharTrie trie;\r
521         final int initialValue=0x313, leadUnitValue=0xaffe; \r
522         int value;\r
523         int c;\r
524         trie=new CharTrie(initialValue, leadUnitValue, new DummyGetFoldingOffset());\r
525 \r
526         /* test that all code points have initialValue */\r
527         for(c=0; c<=0x10ffff; ++c) {\r
528             value=trie.getCodePointValue(c);\r
529             if(value!=initialValue) {\r
530                 errln("CharTrie/dummy.getCodePointValue(c)(U+"+hex(c)+")=0x"+hex(value)+" instead of 0x"+hex(initialValue));\r
531             }\r
532         }\r
533 \r
534         /* test that the lead surrogate code units have leadUnitValue */\r
535         for(c=0xd800; c<=0xdbff; ++c) {\r
536             value=trie.getLeadValue((char)c);\r
537             if(value!=leadUnitValue) {\r
538                 errln("CharTrie/dummy.getLeadValue(c)(U+"+hex(c)+")=0x"+hex(value)+" instead of 0x"+hex(leadUnitValue));\r
539             }\r
540         }\r
541     }\r
542 \r
543     public void TestDummyIntTrie() {\r
544         IntTrie trie;\r
545         final int initialValue=0x01234567, leadUnitValue=0x89abcdef; \r
546         int value;\r
547         int c;\r
548         trie=new IntTrie(initialValue, leadUnitValue, new DummyGetFoldingOffset());\r
549 \r
550         /* test that all code points have initialValue */\r
551         for(c=0; c<=0x10ffff; ++c) {\r
552             value=trie.getCodePointValue(c);\r
553             if(value!=initialValue) {\r
554                 errln("IntTrie/dummy.getCodePointValue(c)(U+"+hex(c)+")=0x"+hex(value)+" instead of 0x"+hex(initialValue));\r
555             }\r
556         }\r
557 \r
558         /* test that the lead surrogate code units have leadUnitValue */\r
559         for(c=0xd800; c<=0xdbff; ++c) {\r
560             value=trie.getLeadValue((char)c);\r
561             if(value!=leadUnitValue) {\r
562                 errln("IntTrie/dummy.getLeadValue(c)(U+"+hex(c)+")=0x"+hex(value)+" instead of 0x"+hex(leadUnitValue));\r
563             }\r
564         }\r
565     }\r
566 }\r