2 *******************************************************************************
\r
3 * Copyright (C) 2002-2007, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 *******************************************************************************
\r
9 * Port From: ICU4C v2.1 : collate/CollationIteratorTest
\r
10 * Source File: $ICU4CRoot/source/test/intltest/itercoll.cpp
\r
13 package com.ibm.icu.dev.test.collator;
\r
15 import java.util.Locale;
\r
16 import java.util.Arrays;
\r
17 import java.text.CharacterIterator;
\r
18 import java.text.StringCharacterIterator;
\r
19 import com.ibm.icu.dev.test.*;
\r
20 import com.ibm.icu.text.*;
\r
21 import com.ibm.icu.lang.UCharacter;
\r
23 public class CollationIteratorTest extends TestFmwk {
\r
25 String test1 = "What subset of all possible test cases?";
\r
26 String test2 = "has the highest probability of detecting";
\r
28 public static void main(String[] args) throws Exception {
\r
29 new CollationIteratorTest().run(args);
\r
30 // new CollationIteratorTest().TestNormalizedUnicodeChar();
\r
36 public void TestClearBuffers(/* char* par */) {
\r
37 RuleBasedCollator c = null;
\r
39 c = new RuleBasedCollator("&a < b < c & ab = d");
\r
40 } catch (Exception e) {
\r
41 warnln("Couldn't create a RuleBasedCollator.");
\r
45 String source = "abcd";
\r
46 CollationElementIterator i = c.getCollationElementIterator(source);
\r
49 e0 = i.next(); // save the first collation element
\r
50 } catch (Exception e) {
\r
51 errln("call to i.next() failed.");
\r
56 i.setOffset(3); // go to the expanding character
\r
57 } catch (Exception e) {
\r
58 errln("call to i.setOffset(3) failed.");
\r
63 i.next(); // but only use up half of it
\r
64 } catch (Exception e) {
\r
65 errln("call to i.next() failed.");
\r
70 i.setOffset(0); // go back to the beginning
\r
71 } catch (Exception e) {
\r
72 errln("call to i.setOffset(0) failed. ");
\r
77 e = i.next(); // and get this one again
\r
78 } catch (Exception ee) {
\r
79 errln("call to i.next() failed. ");
\r
84 errln("got 0x" + Integer.toHexString(e) + ", expected 0x" + Integer.toHexString(e0));
\r
89 * Test for getMaxExpansion()
\r
91 public void TestMaxExpansion(/* char* par */) {
\r
92 int unassigned = 0xEFFFD;
\r
93 String rule = "&a < ab < c/aba < d < z < ch";
\r
94 RuleBasedCollator coll = null;
\r
96 coll = new RuleBasedCollator(rule);
\r
97 } catch (Exception e) {
\r
98 warnln("Fail to create RuleBasedCollator");
\r
102 String str = String.valueOf(ch);
\r
104 CollationElementIterator iter = coll.getCollationElementIterator(str);
\r
106 while (ch < 0xFFFF) {
\r
109 str = String.valueOf(ch);
\r
111 int order = iter.previous();
\r
113 // thai management
\r
115 order = iter.previous();
\r
118 while (iter.previous() != CollationElementIterator.NULLORDER) {
\r
122 if (iter.getMaxExpansion(order) < count) {
\r
123 errln("Failure at codepoint " + ch + ", maximum expansion count < " + count);
\r
127 // testing for exact max expansion
\r
129 while (ch < 0x61) {
\r
130 str = String.valueOf(ch);
\r
132 int order = iter.previous();
\r
134 if (iter.getMaxExpansion(order) != 1) {
\r
135 errln("Failure at codepoint 0x" + Integer.toHexString(ch)
\r
136 + " maximum expansion count == 1");
\r
142 str = String.valueOf(ch);
\r
144 int temporder = iter.previous();
\r
146 if (iter.getMaxExpansion(temporder) != 3) {
\r
147 errln("Failure at codepoint 0x" + Integer.toHexString(ch)
\r
148 + " maximum expansion count == 3");
\r
152 str = String.valueOf(ch);
\r
154 temporder = iter.previous();
\r
156 if (iter.getMaxExpansion(temporder) != 1) {
\r
157 errln("Failure at codepoint 0x" + Integer.toHexString(ch)
\r
158 + " maximum expansion count == 1");
\r
161 str = UCharacter.toString(unassigned);
\r
163 temporder = iter.previous();
\r
165 if (iter.getMaxExpansion(temporder) != 2) {
\r
166 errln("Failure at codepoint 0x" + Integer.toHexString(ch)
\r
167 + " maximum expansion count == 2");
\r
173 str = String.valueOf(ch);
\r
175 temporder = iter.previous();
\r
177 if (iter.getMaxExpansion(temporder) > 3) {
\r
178 errln("Failure at codepoint 0x" + Integer.toHexString(ch)
\r
179 + " maximum expansion count < 3");
\r
182 // testing special jamo &a<\u1165
\r
183 rule = "\u0026\u0071\u003c\u1165\u002f\u0071\u0071\u0071\u0071";
\r
186 coll = new RuleBasedCollator(rule);
\r
187 } catch (Exception e) {
\r
188 errln("Fail to create RuleBasedCollator");
\r
191 iter = coll.getCollationElementIterator(str);
\r
193 temporder = iter.previous();
\r
195 if (iter.getMaxExpansion(temporder) != 6) {
\r
196 errln("Failure at codepoint 0x" + Integer.toHexString(ch)
\r
197 + " maximum expansion count == 6");
\r
202 * Test for getOffset() and setOffset()
\r
204 public void TestOffset(/* char* par */) {
\r
205 RuleBasedCollator en_us;
\r
207 en_us = (RuleBasedCollator)Collator.getInstance(Locale.US);
\r
208 } catch (Exception e) {
\r
209 warnln("ERROR: in creation of collator of ENGLISH locale");
\r
213 CollationElementIterator iter = en_us.getCollationElementIterator(test1);
\r
214 // testing boundaries
\r
216 if (iter.previous() != CollationElementIterator.NULLORDER) {
\r
217 errln("Error: After setting offset to 0, we should be at the end "
\r
218 + "of the backwards iteration");
\r
220 iter.setOffset(test1.length());
\r
221 if (iter.next() != CollationElementIterator.NULLORDER) {
\r
222 errln("Error: After setting offset to the end of the string, we "
\r
223 + "should be at the end of the forwards iteration");
\r
226 // Run all the way through the iterator, then get the offset
\r
227 int[] orders = CollationTest.getOrders(iter);
\r
228 logln("orders.length = " + orders.length);
\r
230 int offset = iter.getOffset();
\r
232 if (offset != test1.length()) {
\r
233 String msg1 = "offset at end != length: ";
\r
234 String msg2 = " vs ";
\r
235 errln(msg1 + offset + msg2 + test1.length());
\r
238 // Now set the offset back to the beginning and see if it works
\r
239 CollationElementIterator pristine = en_us.getCollationElementIterator(test1);
\r
243 } catch(Exception e) {
\r
244 errln("setOffset failed.");
\r
246 assertEqual(iter, pristine);
\r
248 // setting offset in the middle of a contraction
\r
249 String contraction = "change";
\r
250 RuleBasedCollator tailored = null;
\r
252 tailored = new RuleBasedCollator("& a < ch");
\r
253 } catch (Exception e) {
\r
254 errln("Error: in creation of Spanish collator");
\r
257 iter = tailored.getCollationElementIterator(contraction);
\r
258 int order[] = CollationTest.getOrders(iter);
\r
259 iter.setOffset(1); // sets offset in the middle of ch
\r
260 int order2[] = CollationTest.getOrders(iter);
\r
261 if (!Arrays.equals(order, order2)) {
\r
262 errln("Error: setting offset in the middle of a contraction should be the same as setting it to the start of the contraction");
\r
264 contraction = "peache";
\r
265 iter = tailored.getCollationElementIterator(contraction);
\r
267 order = CollationTest.getOrders(iter);
\r
268 iter.setOffset(4); // sets offset in the middle of ch
\r
269 order2 = CollationTest.getOrders(iter);
\r
270 if (!Arrays.equals(order, order2)) {
\r
271 errln("Error: setting offset in the middle of a contraction should be the same as setting it to the start of the contraction");
\r
273 // setting offset in the middle of a surrogate pair
\r
274 String surrogate = "\ud800\udc00str";
\r
275 iter = tailored.getCollationElementIterator(surrogate);
\r
276 order = CollationTest.getOrders(iter);
\r
277 iter.setOffset(1); // sets offset in the middle of surrogate
\r
278 order2 = CollationTest.getOrders(iter);
\r
279 if (!Arrays.equals(order, order2)) {
\r
280 errln("Error: setting offset in the middle of a surrogate pair should be the same as setting it to the start of the surrogate pair");
\r
282 surrogate = "simple\ud800\udc00str";
\r
283 iter = tailored.getCollationElementIterator(surrogate);
\r
285 order = CollationTest.getOrders(iter);
\r
286 iter.setOffset(7); // sets offset in the middle of surrogate
\r
287 order2 = CollationTest.getOrders(iter);
\r
288 if (!Arrays.equals(order, order2)) {
\r
289 errln("Error: setting offset in the middle of a surrogate pair should be the same as setting it to the start of the surrogate pair");
\r
291 // TODO: try iterating halfway through a messy string.
\r
296 void assertEqual(CollationElementIterator i1, CollationElementIterator i2) {
\r
297 int c1, c2, count = 0;
\r
302 errln(" " + count + ": strength(0x" +
\r
303 Integer.toHexString(c1) + ") != strength(0x" + Integer.toHexString(c2) + ")");
\r
307 } while (c1 != CollationElementIterator.NULLORDER);
\r
308 CollationTest.backAndForth(this, i1);
\r
309 CollationTest.backAndForth(this, i2);
\r
313 * Test for CollationElementIterator.previous()
\r
315 * @bug 4108758 - Make sure it works with contracting characters
\r
318 public void TestPrevious(/* char* par */) {
\r
319 RuleBasedCollator en_us = (RuleBasedCollator)Collator.getInstance(Locale.US);
\r
320 CollationElementIterator iter = en_us.getCollationElementIterator(test1);
\r
322 // A basic test to see if it's working at all
\r
323 CollationTest.backAndForth(this, iter);
\r
325 // Test with a contracting character sequence
\r
327 RuleBasedCollator c1 = null;
\r
329 c1 = new RuleBasedCollator("&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH");
\r
330 } catch (Exception e) {
\r
331 errln("Couldn't create a RuleBasedCollator with a contracting sequence.");
\r
335 source = "abchdcba";
\r
336 iter = c1.getCollationElementIterator(source);
\r
337 CollationTest.backAndForth(this, iter);
\r
339 // Test with an expanding character sequence
\r
340 RuleBasedCollator c2 = null;
\r
342 c2 = new RuleBasedCollator("&a < b < c/abd < d");
\r
343 } catch (Exception e ) {
\r
344 errln("Couldn't create a RuleBasedCollator with an expanding sequence.");
\r
349 iter = c2.getCollationElementIterator(source);
\r
350 CollationTest.backAndForth(this, iter);
\r
353 RuleBasedCollator c3 = null;
\r
355 c3 = new RuleBasedCollator("&a < b < c/aba < d < z < ch");
\r
356 } catch (Exception e) {
\r
357 errln("Couldn't create a RuleBasedCollator with both an expanding and a contracting sequence.");
\r
361 source = "abcdbchdc";
\r
362 iter = c3.getCollationElementIterator(source);
\r
363 CollationTest.backAndForth(this, iter);
\r
365 source= "\u0e41\u0e02\u0e41\u0e02\u0e27abc";
\r
366 Collator c4 = null;
\r
368 c4 = Collator.getInstance(new Locale("th", "TH", ""));
\r
369 } catch (Exception e) {
\r
370 errln("Couldn't create a collator");
\r
374 iter = ((RuleBasedCollator)c4).getCollationElementIterator(source);
\r
375 CollationTest.backAndForth(this, iter);
\r
377 source= "\u0061\u30CF\u3099\u30FC";
\r
378 Collator c5 = null;
\r
380 c5 = Collator.getInstance(new Locale("ja", "JP", ""));
\r
381 } catch (Exception e) {
\r
382 errln("Couldn't create Japanese collator\n");
\r
385 iter = ((RuleBasedCollator)c5).getCollationElementIterator(source);
\r
387 CollationTest.backAndForth(this, iter);
\r
393 * Test for setText()
\r
395 public void TestSetText(/* char* par */) {
\r
396 RuleBasedCollator en_us = (RuleBasedCollator)Collator.getInstance(Locale.US);
\r
397 CollationElementIterator iter1 = en_us.getCollationElementIterator(test1);
\r
398 CollationElementIterator iter2 = en_us.getCollationElementIterator(test2);
\r
400 // Run through the second iterator just to exercise it
\r
401 int c = iter2.next();
\r
404 while ( ++i < 10 && c != CollationElementIterator.NULLORDER) {
\r
407 } catch (Exception e) {
\r
408 errln("iter2.next() returned an error.");
\r
413 // Now set it to point to the same string as the first iterator
\r
415 iter2.setText(test1);
\r
416 } catch (Exception e) {
\r
417 errln("call to iter2->setText(test1) failed.");
\r
420 assertEqual(iter1, iter2);
\r
423 //now use the overloaded setText(ChracterIterator&, UErrorCode) function to set the text
\r
424 CharacterIterator chariter = new StringCharacterIterator(test1);
\r
426 iter2.setText(chariter);
\r
427 } catch (Exception e ) {
\r
428 errln("call to iter2->setText(chariter(test1)) failed.");
\r
431 assertEqual(iter1, iter2);
\r
434 //now use the overloaded setText(ChracterIterator&, UErrorCode) function to set the text
\r
435 UCharacterIterator uchariter = UCharacterIterator.getInstance(test1);
\r
437 iter2.setText(uchariter);
\r
438 } catch (Exception e ) {
\r
439 errln("call to iter2->setText(uchariter(test1)) failed.");
\r
442 assertEqual(iter1, iter2);
\r
446 * Test for CollationElementIterator previous and next for the whole set of
\r
447 * unicode characters.
\r
449 public void TestUnicodeChar() {
\r
450 RuleBasedCollator en_us = (RuleBasedCollator)Collator.getInstance(Locale.US);
\r
451 CollationElementIterator iter;
\r
453 StringBuffer source = new StringBuffer();
\r
454 source.append("\u0e4d\u0e4e\u0e4f");
\r
455 // source.append("\u04e8\u04e9");
\r
456 iter = en_us.getCollationElementIterator(source.toString());
\r
457 // A basic test to see if it's working at all
\r
458 CollationTest.backAndForth(this, iter);
\r
459 for (codepoint = 1; codepoint < 0xFFFE;) {
\r
460 source.delete(0, source.length());
\r
461 while (codepoint % 0xFF != 0) {
\r
462 if (UCharacter.isDefined(codepoint)) {
\r
463 source.append(codepoint);
\r
468 if (UCharacter.isDefined(codepoint)) {
\r
469 source.append(codepoint);
\r
472 if (codepoint != 0xFFFF) {
\r
475 /*if (codepoint >= 0x04fc) {
\r
476 System.out.println("codepoint " + Integer.toHexString(codepoint));
\r
477 String str = source.substring(230, 232);
\r
478 System.out.println(com.ibm.icu.impl.Utility.escape(str));
\r
479 System.out.println("codepoint " + Integer.toHexString(codepoint)
\r
480 + "length " + str.length());
\r
481 iter = en_us.getCollationElementIterator(str);
\r
482 CollationTest.backAndForth(this, iter);
\r
485 iter = en_us.getCollationElementIterator(source.toString());
\r
486 // A basic test to see if it's working at all
\r
487 CollationTest.backAndForth(this, iter);
\r
492 * Test for CollationElementIterator previous and next for the whole set of
\r
493 * unicode characters with normalization on.
\r
495 public void TestNormalizedUnicodeChar()
\r
497 // thai should have normalization on
\r
498 RuleBasedCollator th_th = null;
\r
500 th_th = (RuleBasedCollator)Collator.getInstance(
\r
501 new Locale("th", "TH"));
\r
502 } catch (Exception e) {
\r
503 warnln("Error creating Thai collator");
\r
506 StringBuffer source = new StringBuffer();
\r
507 source.append('\uFDFA');
\r
508 CollationElementIterator iter
\r
509 = th_th.getCollationElementIterator(source.toString());
\r
510 CollationTest.backAndForth(this, iter);
\r
511 for (char codepoint = 0x1; codepoint < 0xfffe;) {
\r
512 source.delete(0, source.length());
\r
513 while (codepoint % 0xFF != 0) {
\r
514 if (UCharacter.isDefined(codepoint)) {
\r
515 source.append(codepoint);
\r
520 if (UCharacter.isDefined(codepoint)) {
\r
521 source.append(codepoint);
\r
524 if (codepoint != 0xFFFF) {
\r
528 /*if (((int)codepoint) >= 0xfe00) {
\r
529 String str = source.substring(185, 190);
\r
530 System.out.println(com.ibm.icu.impl.Utility.escape(str));
\r
531 System.out.println("codepoint "
\r
532 + Integer.toHexString(codepoint)
\r
533 + "length " + str.length());
\r
534 iter = th_th.getCollationElementIterator(str);
\r
535 CollationTest.backAndForth(this, iter);
\r
537 iter = th_th.getCollationElementIterator(source.toString());
\r
538 // A basic test to see if it's working at all
\r
539 CollationTest.backAndForth(this, iter);
\r
544 * Testing the discontiguous contractions
\r
546 public void TestDiscontiguous()
\r
548 String rulestr ="&z < AB < X\u0300 < ABC < X\u0300\u0315";
\r
549 String src[] = {"ADB", "ADBC", "A\u0315B", "A\u0315BC",
\r
550 // base character blocked
\r
551 "XD\u0300", "XD\u0300\u0315",
\r
552 // non blocking combining character
\r
553 "X\u0319\u0300", "X\u0319\u0300\u0315",
\r
554 // blocking combining character
\r
555 "X\u0314\u0300", "X\u0314\u0300\u0315",
\r
556 // contraction prefix
\r
557 "ABDC", "AB\u0315C","X\u0300D\u0315",
\r
558 "X\u0300\u0319\u0315", "X\u0300\u031A\u0315",
\r
559 // ends not with a contraction character
\r
560 "X\u0319\u0300D", "X\u0319\u0300\u0315D",
\r
561 "X\u0300D\u0315D", "X\u0300\u0319\u0315D",
\r
562 "X\u0300\u031A\u0315D"
\r
564 String tgt[] = {// non blocking combining character
\r
565 "A D B", "A D BC", "A \u0315 B", "A \u0315 BC",
\r
566 // base character blocked
\r
567 "X D \u0300", "X D \u0300\u0315",
\r
568 // non blocking combining character
\r
569 "X\u0300 \u0319", "X\u0300\u0315 \u0319",
\r
570 // blocking combining character
\r
571 "X \u0314 \u0300", "X \u0314 \u0300\u0315",
\r
572 // contraction prefix
\r
573 "AB DC", "AB \u0315 C","X\u0300 D \u0315",
\r
574 "X\u0300\u0315 \u0319", "X\u0300 \u031A \u0315",
\r
575 // ends not with a contraction character
\r
576 "X\u0300 \u0319D", "X\u0300\u0315 \u0319D",
\r
577 "X\u0300 D\u0315D", "X\u0300\u0315 \u0319D",
\r
578 "X\u0300 \u031A\u0315D"
\r
582 RuleBasedCollator coll = new RuleBasedCollator(rulestr);
\r
583 CollationElementIterator iter
\r
584 = coll.getCollationElementIterator("");
\r
585 CollationElementIterator resultiter
\r
586 = coll.getCollationElementIterator("");
\r
587 while (count < src.length) {
\r
588 iter.setText(src[count]);
\r
590 while (s < tgt[count].length()) {
\r
591 int e = tgt[count].indexOf(' ', s);
\r
593 e = tgt[count].length();
\r
595 String resultstr = tgt[count].substring(s, e);
\r
596 resultiter.setText(resultstr);
\r
597 int ce = resultiter.next();
\r
598 while (ce != CollationElementIterator.NULLORDER) {
\r
599 if (ce != iter.next()) {
\r
600 errln("Discontiguos contraction test mismatch at"
\r
604 ce = resultiter.next();
\r
609 CollationTest.backAndForth(this, iter);
\r
613 catch (Exception e) {
\r
614 warnln("Error running discontiguous tests " + e.toString());
\r
619 * Test the incremental normalization
\r
621 public void TestNormalization()
\r
623 String rules = "&a < \u0300\u0315 < A\u0300\u0315 < \u0316\u0315B < \u0316\u0300\u0315";
\r
624 String testdata[] = {"\u1ED9", "o\u0323\u0302",
\r
625 "\u0300\u0315", "\u0315\u0300",
\r
626 "A\u0300\u0315B", "A\u0315\u0300B",
\r
627 "A\u0316\u0315B", "A\u0315\u0316B",
\r
628 "\u0316\u0300\u0315", "\u0315\u0300\u0316",
\r
629 "A\u0316\u0300\u0315B", "A\u0315\u0300\u0316B",
\r
630 "\u0316\u0315\u0300", "A\u0316\u0315\u0300B"};
\r
631 RuleBasedCollator coll = null;
\r
633 coll = new RuleBasedCollator(rules);
\r
634 coll.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
\r
635 } catch (Exception e) {
\r
636 warnln("ERROR: in creation of collator using rules " + rules);
\r
640 CollationElementIterator iter = coll.getCollationElementIterator("testing");
\r
641 for (int count = 0; count < testdata.length; count ++) {
\r
642 iter.setText(testdata[count]);
\r
643 CollationTest.backAndForth(this, iter);
\r