2 *******************************************************************************
\r
3 * Copyright (C) 1996-2007, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 *******************************************************************************
\r
8 package com.ibm.icu.dev.test.util;
\r
10 import com.ibm.icu.dev.test.TestFmwk;
\r
11 import com.ibm.icu.impl.Trie;
\r
12 import com.ibm.icu.impl.IntTrie;
\r
13 import com.ibm.icu.impl.CharTrie;
\r
14 import com.ibm.icu.impl.TrieBuilder;
\r
15 import com.ibm.icu.impl.IntTrieBuilder;
\r
16 import com.ibm.icu.impl.TrieIterator;
\r
17 import com.ibm.icu.impl.UCharacterProperty;
\r
18 import com.ibm.icu.text.UTF16;
\r
19 import com.ibm.icu.util.RangeValueIterator;
\r
22 * Testing class for Trie. Tests here will be simple, since both CharTrie and
\r
23 * IntTrie are very similar and are heavily used in other parts of ICU4J.
\r
24 * Codes using Tries are expected to have detailed tests.
\r
25 * @author Syn Wee Quek
\r
26 * @since release 2.1 Jan 01 2002
\r
28 public final class TrieTest extends TestFmwk
\r
30 // constructor ---------------------------------------------------
\r
39 // public methods -----------------------------------------------
\r
41 public static void main(String arg[])
\r
43 TrieTest test = new TrieTest();
\r
46 } catch (Exception e) {
\r
47 test.errln("Error testing trietest");
\r
52 * Values for setting possibly overlapping, out-of-order ranges of values
\r
54 private static final class SetRange
\r
56 SetRange(int start, int limit, int value, boolean overwrite)
\r
61 this.overwrite = overwrite;
\r
70 * Values for testing:
\r
71 * value is set from the previous boundary's limit to before
\r
72 * this boundary's limit
\r
74 private static final class CheckRange
\r
76 CheckRange(int limit, int value)
\r
86 private static final class _testFoldedValue
\r
87 implements TrieBuilder.DataManipulate
\r
89 public _testFoldedValue(IntTrieBuilder builder)
\r
91 m_builder_ = builder;
\r
94 public int getFoldedValue(int start, int offset)
\r
96 int foldedValue = 0;
\r
97 int limit = start + 0x400;
\r
98 while (start < limit) {
\r
99 int value = m_builder_.getValue(start);
\r
100 if (m_builder_.isInZeroBlock(start)) {
\r
101 start += TrieBuilder.DATA_BLOCK_LENGTH;
\r
104 foldedValue |= value;
\r
109 if (foldedValue != 0) {
\r
110 return (offset << 16) | foldedValue;
\r
115 private IntTrieBuilder m_builder_;
\r
118 private static final class _testFoldingOffset
\r
119 implements Trie.DataManipulate
\r
121 public int getFoldingOffset(int value)
\r
123 return value >>> 16;
\r
127 private static final class _testEnumValue extends TrieIterator
\r
129 public _testEnumValue(Trie data)
\r
134 protected int extract(int value)
\r
136 return value ^ 0x5555;
\r
140 private void _testTrieIteration(IntTrie trie, CheckRange checkRanges[],
\r
141 int countCheckRanges)
\r
144 int countValues = 0;
\r
145 StringBuffer s = new StringBuffer();
\r
146 int values[] = new int[30];
\r
147 for (int i = 0; i < countCheckRanges; ++ i) {
\r
148 int c = checkRanges[i].limit;
\r
151 UTF16.append(s, c);
\r
152 values[countValues ++] = checkRanges[i].value;
\r
155 int limit = s.length();
\r
160 int c = UTF16.charAt(s, p);
\r
161 p += UTF16.getCharCount(c);
\r
162 int value = trie.getCodePointValue(c);
\r
163 if (value != values[i]) {
\r
164 errln("wrong value from UTRIE_NEXT(U+"
\r
165 + Integer.toHexString(c) + "): 0x"
\r
166 + Integer.toHexString(value) + " instead of 0x"
\r
167 + Integer.toHexString(values[i]));
\r
169 // unlike the c version lead is 0 if c is non-supplementary
\r
170 char lead = UTF16.getLeadSurrogate(c);
\r
171 char trail = UTF16.getTrailSurrogate(c);
\r
173 ? trail != s.charAt(p - 1)
\r
174 : !UTF16.isLeadSurrogate(lead)
\r
175 || !UTF16.isTrailSurrogate(trail) || lead != s.charAt(p - 2)
\r
176 || trail != s.charAt(p - 1)) {
\r
177 errln("wrong (lead, trail) from UTRIE_NEXT(U+"
\r
178 + Integer.toHexString(c));
\r
182 value = trie.getLeadValue(lead);
\r
183 value = trie.getTrailValue(value, trail);
\r
184 if (value != trie.getSurrogateValue(lead, trail)
\r
185 && value != values[i]) {
\r
186 errln("wrong value from getting supplementary "
\r
188 + Integer.toHexString(c) + "): 0x"
\r
189 + Integer.toHexString(value) + " instead of 0x"
\r
190 + Integer.toHexString(values[i]));
\r
197 private void _testTrieRanges(SetRange setRanges[], int countSetRanges,
\r
198 CheckRange checkRanges[], int countCheckRanges,
\r
199 boolean latin1Linear)
\r
201 IntTrieBuilder newTrie = new IntTrieBuilder(null, 2000,
\r
202 checkRanges[0].value,
\r
203 checkRanges[0].value,
\r
206 // set values from setRanges[]
\r
208 for (int i = 0; i < countSetRanges; ++ i) {
\r
209 int start = setRanges[i].start;
\r
210 int limit = setRanges[i].limit;
\r
211 int value = setRanges[i].value;
\r
212 boolean overwrite = setRanges[i].overwrite;
\r
213 if ((limit - start) == 1 && overwrite) {
\r
214 ok &= newTrie.setValue(start, value);
\r
217 ok &= newTrie.setRange(start, limit, value, overwrite);
\r
221 errln("setting values into a trie failed");
\r
225 // verify that all these values are in the new Trie
\r
227 for (int i = 0; i < countCheckRanges; ++ i) {
\r
228 int limit = checkRanges[i].limit;
\r
229 int value = checkRanges[i].value;
\r
231 while (start < limit) {
\r
232 if (value != newTrie.getValue(start)) {
\r
233 errln("newTrie [U+"
\r
234 + Integer.toHexString(start) + "]==0x"
\r
235 + Integer.toHexString(newTrie.getValue(start))
\r
236 + " instead of 0x" + Integer.toHexString(value));
\r
242 IntTrie trie = newTrie.serialize(new _testFoldedValue(newTrie),
\r
243 new _testFoldingOffset());
\r
245 // test linear Latin-1 range from utrie_getData()
\r
246 if (latin1Linear) {
\r
248 for (int i = 0; i < countCheckRanges && start <= 0xff; ++ i) {
\r
249 int limit = checkRanges[i].limit;
\r
250 int value = checkRanges[i].value;
\r
252 while (start < limit && start <= 0xff) {
\r
253 if (value != trie.getLatin1LinearValue((char)start)) {
\r
254 errln("IntTrie.getLatin1LinearValue[U+"
\r
255 + Integer.toHexString(start) + "]==0x"
\r
256 + Integer.toHexString(
\r
257 trie.getLatin1LinearValue((char) start))
\r
258 + " instead of 0x" + Integer.toHexString(value));
\r
265 if (latin1Linear != trie.isLatin1Linear()) {
\r
266 errln("trie serialization did not preserve "
\r
267 + "Latin-1-linearity");
\r
270 // verify that all these values are in the serialized Trie
\r
272 for (int i = 0; i < countCheckRanges; ++ i) {
\r
273 int limit = checkRanges[i].limit;
\r
274 int value = checkRanges[i].value;
\r
276 if (start == 0xd800) {
\r
282 while (start < limit) {
\r
283 if (start <= 0xffff) {
\r
284 int value2 = trie.getBMPValue((char)start);
\r
285 if (value != value2) {
\r
286 errln("serialized trie.getBMPValue(U+"
\r
287 + Integer.toHexString(start) + " == 0x"
\r
288 + Integer.toHexString(value2) + " instead of 0x"
\r
289 + Integer.toHexString(value));
\r
291 if (!UTF16.isLeadSurrogate((char)start)) {
\r
292 value2 = trie.getLeadValue((char)start);
\r
293 if (value != value2) {
\r
294 errln("serialized trie.getLeadValue(U+"
\r
295 + Integer.toHexString(start) + " == 0x"
\r
296 + Integer.toHexString(value2) + " instead of 0x"
\r
297 + Integer.toHexString(value));
\r
301 int value2 = trie.getCodePointValue(start);
\r
302 if (value != value2) {
\r
303 errln("serialized trie.getCodePointValue(U+"
\r
304 + Integer.toHexString(start) + ")==0x"
\r
305 + Integer.toHexString(value2) + " instead of 0x"
\r
306 + Integer.toHexString(value));
\r
312 // enumerate and verify all ranges
\r
314 int enumRanges = 1;
\r
315 TrieIterator iter = new _testEnumValue(trie);
\r
316 RangeValueIterator.Element result = new RangeValueIterator.Element();
\r
317 while (iter.next(result)) {
\r
318 if (result.start != checkRanges[enumRanges -1].limit
\r
319 || result.limit != checkRanges[enumRanges].limit
\r
320 || (result.value ^ 0x5555) != checkRanges[enumRanges].value) {
\r
321 errln("utrie_enum() delivers wrong range [U+"
\r
322 + Integer.toHexString(result.start) + "..U+"
\r
323 + Integer.toHexString(result.limit) + "].0x"
\r
324 + Integer.toHexString(result.value ^ 0x5555)
\r
325 + " instead of [U+"
\r
326 + Integer.toHexString(checkRanges[enumRanges -1].limit)
\r
328 + Integer.toHexString(checkRanges[enumRanges].limit)
\r
330 + Integer.toHexString(checkRanges[enumRanges].value));
\r
335 // test linear Latin-1 range
\r
336 if (trie.isLatin1Linear()) {
\r
337 for (start = 0; start < 0x100; ++ start) {
\r
338 if (trie.getLatin1LinearValue((char)start)
\r
339 != trie.getLeadValue((char)start)) {
\r
340 errln("trie.getLatin1LinearValue[U+"
\r
341 + Integer.toHexString(start) + "]=0x"
\r
342 + Integer.toHexString(
\r
343 trie.getLatin1LinearValue((char)start))
\r
344 + " instead of 0x"
\r
345 + Integer.toHexString(
\r
346 trie.getLeadValue((char)start)));
\r
351 _testTrieIteration(trie, checkRanges, countCheckRanges);
\r
354 private void _testTrieRanges2(SetRange setRanges[],
\r
355 int countSetRanges,
\r
356 CheckRange checkRanges[],
\r
357 int countCheckRanges)
\r
359 _testTrieRanges(setRanges, countSetRanges, checkRanges, countCheckRanges,
\r
362 _testTrieRanges(setRanges, countSetRanges, checkRanges, countCheckRanges,
\r
366 private void _testTrieRanges4(SetRange setRanges[], int countSetRanges,
\r
367 CheckRange checkRanges[],
\r
368 int countCheckRanges)
\r
370 _testTrieRanges2(setRanges, countSetRanges, checkRanges,
\r
374 // test data ------------------------------------------------------------
\r
377 * set consecutive ranges, even with value 0
\r
379 private static SetRange setRanges1[]={
\r
380 new SetRange(0, 0x20, 0, false),
\r
381 new SetRange(0x20, 0xa7, 0x1234, false),
\r
382 new SetRange(0xa7, 0x3400, 0, false),
\r
383 new SetRange(0x3400, 0x9fa6, 0x6162, false),
\r
384 new SetRange(0x9fa6, 0xda9e, 0x3132, false),
\r
385 // try to disrupt _testFoldingOffset16()
\r
386 new SetRange(0xdada, 0xeeee, 0x87ff, false),
\r
387 new SetRange(0xeeee, 0x11111, 1, false),
\r
388 new SetRange(0x11111, 0x44444, 0x6162, false),
\r
389 new SetRange(0x44444, 0x60003, 0, false),
\r
390 new SetRange(0xf0003, 0xf0004, 0xf, false),
\r
391 new SetRange(0xf0004, 0xf0006, 0x10, false),
\r
392 new SetRange(0xf0006, 0xf0007, 0x11, false),
\r
393 new SetRange(0xf0007, 0xf0020, 0x12, false),
\r
394 new SetRange(0xf0020, 0x110000, 0, false)
\r
397 private static CheckRange checkRanges1[]={
\r
398 new CheckRange(0, 0), // dummy start range to make _testEnumRange() simpler
\r
399 new CheckRange(0x20, 0),
\r
400 new CheckRange(0xa7, 0x1234),
\r
401 new CheckRange(0x3400, 0),
\r
402 new CheckRange(0x9fa6, 0x6162),
\r
403 new CheckRange(0xda9e, 0x3132),
\r
404 new CheckRange(0xdada, 0),
\r
405 new CheckRange(0xeeee, 0x87ff),
\r
406 new CheckRange(0x11111,1),
\r
407 new CheckRange(0x44444,0x6162),
\r
408 new CheckRange(0xf0003,0),
\r
409 new CheckRange(0xf0004,0xf),
\r
410 new CheckRange(0xf0006,0x10),
\r
411 new CheckRange(0xf0007,0x11),
\r
412 new CheckRange(0xf0020,0x12),
\r
413 new CheckRange(0x110000, 0)
\r
417 * set some interesting overlapping ranges
\r
419 private static SetRange setRanges2[]={
\r
420 new SetRange(0x21, 0x7f, 0x5555, true),
\r
421 new SetRange(0x2f800,0x2fedc, 0x7a, true),
\r
422 new SetRange(0x72, 0xdd, 3, true),
\r
423 new SetRange(0xdd, 0xde, 4, false),
\r
424 new SetRange(0x2f987,0x2fa98, 5, true),
\r
425 new SetRange(0x2f777,0x2f833, 0, true),
\r
426 new SetRange(0x2f900,0x2ffee, 1, false),
\r
427 new SetRange(0x2ffee,0x2ffef, 2, true)
\r
430 private static CheckRange checkRanges2[]={
\r
431 // dummy start range to make _testEnumRange() simpler
\r
432 new CheckRange(0, 0),
\r
433 new CheckRange(0x21, 0),
\r
434 new CheckRange(0x72, 0x5555),
\r
435 new CheckRange(0xdd, 3),
\r
436 new CheckRange(0xde, 4),
\r
437 new CheckRange(0x2f833,0),
\r
438 new CheckRange(0x2f987,0x7a),
\r
439 new CheckRange(0x2fa98,5),
\r
440 new CheckRange(0x2fedc,0x7a),
\r
441 new CheckRange(0x2ffee,1),
\r
442 new CheckRange(0x2ffef,2),
\r
443 new CheckRange(0x110000, 0)
\r
447 * use a non-zero initial value
\r
449 private static SetRange setRanges3[]={
\r
450 new SetRange(0x31, 0xa4, 1, false),
\r
451 new SetRange(0x3400, 0x6789, 2, false),
\r
452 new SetRange(0x30000,0x34567,9, true),
\r
453 new SetRange(0x45678,0x56789,3, true)
\r
456 private static CheckRange checkRanges3[]={
\r
457 // dummy start range, also carries the initial value
\r
458 new CheckRange(0, 9),
\r
459 new CheckRange(0x31, 9),
\r
460 new CheckRange(0xa4, 1),
\r
461 new CheckRange(0x3400, 9),
\r
462 new CheckRange(0x6789, 2),
\r
463 new CheckRange(0x45678,9),
\r
464 new CheckRange(0x56789,3),
\r
465 new CheckRange(0x110000,9)
\r
468 public void TestIntTrie()
\r
470 _testTrieRanges4(setRanges1, setRanges1.length, checkRanges1,
\r
471 checkRanges1.length);
\r
472 _testTrieRanges4(setRanges2, setRanges2.length, checkRanges2,
\r
473 checkRanges2.length);
\r
474 _testTrieRanges4(setRanges3, setRanges3.length, checkRanges3,
\r
475 checkRanges3.length);
\r
478 public void TestCharValues()
\r
480 CharTrie trie = null;
\r
482 trie = UCharacterProperty.getInstance().m_trie_;
\r
483 } catch (Exception e) {
\r
484 warnln("Error creating ucharacter trie");
\r
488 for (int i = 0; i < 0xFFFF; i ++) {
\r
490 && trie.getBMPValue((char)i)
\r
491 != trie.getLatin1LinearValue((char)i)) {
\r
492 errln("For latin 1 codepoint, getBMPValue should be the same " +
\r
493 "as getLatin1LinearValue");
\r
495 if (trie.getBMPValue((char)i) != trie.getCodePointValue(i)) {
\r
496 errln("For BMP codepoint, getBMPValue should be the same " +
\r
497 "as getCodepointValue");
\r
500 for (int i = 0x10000; i < 0x10ffff; i ++) {
\r
501 char lead = UTF16.getLeadSurrogate(i);
\r
502 char trail = UTF16.getTrailSurrogate(i);
\r
503 char value = trie.getCodePointValue(i);
\r
504 if (value != trie.getSurrogateValue(lead, trail) ||
\r
505 value != trie.getTrailValue(trie.getLeadValue(lead),
\r
507 errln("For Non-BMP codepoints, getSurrogateValue should be "
\r
508 + "the same s getCodepointValue and getTrailValue");
\r
513 private static class DummyGetFoldingOffset implements Trie.DataManipulate {
\r
514 public int getFoldingOffset(int value) {
\r
515 return -1; /* never get non-initialValue data for supplementary code points */
\r
519 public void TestDummyCharTrie() {
\r
521 final int initialValue=0x313, leadUnitValue=0xaffe;
\r
524 trie=new CharTrie(initialValue, leadUnitValue, new DummyGetFoldingOffset());
\r
526 /* test that all code points have initialValue */
\r
527 for(c=0; c<=0x10ffff; ++c) {
\r
528 value=trie.getCodePointValue(c);
\r
529 if(value!=initialValue) {
\r
530 errln("CharTrie/dummy.getCodePointValue(c)(U+"+hex(c)+")=0x"+hex(value)+" instead of 0x"+hex(initialValue));
\r
534 /* test that the lead surrogate code units have leadUnitValue */
\r
535 for(c=0xd800; c<=0xdbff; ++c) {
\r
536 value=trie.getLeadValue((char)c);
\r
537 if(value!=leadUnitValue) {
\r
538 errln("CharTrie/dummy.getLeadValue(c)(U+"+hex(c)+")=0x"+hex(value)+" instead of 0x"+hex(leadUnitValue));
\r
543 public void TestDummyIntTrie() {
\r
545 final int initialValue=0x01234567, leadUnitValue=0x89abcdef;
\r
548 trie=new IntTrie(initialValue, leadUnitValue, new DummyGetFoldingOffset());
\r
550 /* test that all code points have initialValue */
\r
551 for(c=0; c<=0x10ffff; ++c) {
\r
552 value=trie.getCodePointValue(c);
\r
553 if(value!=initialValue) {
\r
554 errln("IntTrie/dummy.getCodePointValue(c)(U+"+hex(c)+")=0x"+hex(value)+" instead of 0x"+hex(initialValue));
\r
558 /* test that the lead surrogate code units have leadUnitValue */
\r
559 for(c=0xd800; c<=0xdbff; ++c) {
\r
560 value=trie.getLeadValue((char)c);
\r
561 if(value!=leadUnitValue) {
\r
562 errln("IntTrie/dummy.getLeadValue(c)(U+"+hex(c)+")=0x"+hex(value)+" instead of 0x"+hex(leadUnitValue));
\r