2 *******************************************************************************
\r
3 * Copyright (C) 2002-2010, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 *******************************************************************************
\r
9 * Port From: ICU4C v2.1 : collate/CollationIteratorTest
\r
10 * Source File: $ICU4CRoot/source/test/intltest/itercoll.cpp
\r
13 package com.ibm.icu.dev.test.collator;
\r
15 import java.text.CharacterIterator;
\r
16 import java.text.StringCharacterIterator;
\r
17 import java.util.Arrays;
\r
18 import java.util.Locale;
\r
20 import com.ibm.icu.dev.test.TestFmwk;
\r
21 import com.ibm.icu.lang.UCharacter;
\r
22 import com.ibm.icu.text.CollationElementIterator;
\r
23 import com.ibm.icu.text.Collator;
\r
24 import com.ibm.icu.text.RuleBasedCollator;
\r
25 import com.ibm.icu.text.UCharacterIterator;
\r
27 public class CollationIteratorTest extends TestFmwk {
\r
29 String test1 = "What subset of all possible test cases?";
\r
30 String test2 = "has the highest probability of detecting";
\r
32 public static void main(String[] args) throws Exception {
\r
33 new CollationIteratorTest().run(args);
\r
34 // new CollationIteratorTest().TestNormalizedUnicodeChar();
\r
40 public void TestClearBuffers(/* char* par */) {
\r
41 RuleBasedCollator c = null;
\r
43 c = new RuleBasedCollator("&a < b < c & ab = d");
\r
44 } catch (Exception e) {
\r
45 warnln("Couldn't create a RuleBasedCollator.");
\r
49 String source = "abcd";
\r
50 CollationElementIterator i = c.getCollationElementIterator(source);
\r
53 e0 = i.next(); // save the first collation element
\r
54 } catch (Exception e) {
\r
55 errln("call to i.next() failed.");
\r
60 i.setOffset(3); // go to the expanding character
\r
61 } catch (Exception e) {
\r
62 errln("call to i.setOffset(3) failed.");
\r
67 i.next(); // but only use up half of it
\r
68 } catch (Exception e) {
\r
69 errln("call to i.next() failed.");
\r
74 i.setOffset(0); // go back to the beginning
\r
75 } catch (Exception e) {
\r
76 errln("call to i.setOffset(0) failed. ");
\r
81 e = i.next(); // and get this one again
\r
82 } catch (Exception ee) {
\r
83 errln("call to i.next() failed. ");
\r
88 errln("got 0x" + Integer.toHexString(e) + ", expected 0x" + Integer.toHexString(e0));
\r
93 * Test for getMaxExpansion()
\r
95 public void TestMaxExpansion(/* char* par */) {
\r
96 int unassigned = 0xEFFFD;
\r
97 String rule = "&a < ab < c/aba < d < z < ch";
\r
98 RuleBasedCollator coll = null;
\r
100 coll = new RuleBasedCollator(rule);
\r
101 } catch (Exception e) {
\r
102 warnln("Fail to create RuleBasedCollator");
\r
106 String str = String.valueOf(ch);
\r
108 CollationElementIterator iter = coll.getCollationElementIterator(str);
\r
110 while (ch < 0xFFFF) {
\r
113 str = String.valueOf(ch);
\r
115 int order = iter.previous();
\r
117 // thai management
\r
119 order = iter.previous();
\r
122 while (iter.previous() != CollationElementIterator.NULLORDER) {
\r
126 if (iter.getMaxExpansion(order) < count) {
\r
127 errln("Failure at codepoint " + ch + ", maximum expansion count < " + count);
\r
131 // testing for exact max expansion
\r
133 while (ch < 0x61) {
\r
134 str = String.valueOf(ch);
\r
136 int order = iter.previous();
\r
138 if (iter.getMaxExpansion(order) != 1) {
\r
139 errln("Failure at codepoint 0x" + Integer.toHexString(ch)
\r
140 + " maximum expansion count == 1");
\r
146 str = String.valueOf(ch);
\r
148 int temporder = iter.previous();
\r
150 if (iter.getMaxExpansion(temporder) != 3) {
\r
151 errln("Failure at codepoint 0x" + Integer.toHexString(ch)
\r
152 + " maximum expansion count == 3");
\r
156 str = String.valueOf(ch);
\r
158 temporder = iter.previous();
\r
160 if (iter.getMaxExpansion(temporder) != 1) {
\r
161 errln("Failure at codepoint 0x" + Integer.toHexString(ch)
\r
162 + " maximum expansion count == 1");
\r
165 str = UCharacter.toString(unassigned);
\r
167 temporder = iter.previous();
\r
169 if (iter.getMaxExpansion(temporder) != 2) {
\r
170 errln("Failure at codepoint 0x" + Integer.toHexString(ch)
\r
171 + " maximum expansion count == 2");
\r
177 str = String.valueOf(ch);
\r
179 temporder = iter.previous();
\r
181 if (iter.getMaxExpansion(temporder) > 3) {
\r
182 errln("Failure at codepoint 0x" + Integer.toHexString(ch)
\r
183 + " maximum expansion count < 3");
\r
186 // testing special jamo &a<\u1165
\r
187 rule = "\u0026\u0071\u003c\u1165\u002f\u0071\u0071\u0071\u0071";
\r
190 coll = new RuleBasedCollator(rule);
\r
191 } catch (Exception e) {
\r
192 errln("Fail to create RuleBasedCollator");
\r
195 iter = coll.getCollationElementIterator(str);
\r
197 temporder = iter.previous();
\r
199 if (iter.getMaxExpansion(temporder) != 6) {
\r
200 errln("Failure at codepoint 0x" + Integer.toHexString(ch)
\r
201 + " maximum expansion count == 6");
\r
206 * Test for getOffset() and setOffset()
\r
208 public void TestOffset(/* char* par */) {
\r
209 RuleBasedCollator en_us;
\r
211 en_us = (RuleBasedCollator)Collator.getInstance(Locale.US);
\r
212 } catch (Exception e) {
\r
213 warnln("ERROR: in creation of collator of ENGLISH locale");
\r
217 CollationElementIterator iter = en_us.getCollationElementIterator(test1);
\r
218 // testing boundaries
\r
220 if (iter.previous() != CollationElementIterator.NULLORDER) {
\r
221 errln("Error: After setting offset to 0, we should be at the end "
\r
222 + "of the backwards iteration");
\r
224 iter.setOffset(test1.length());
\r
225 if (iter.next() != CollationElementIterator.NULLORDER) {
\r
226 errln("Error: After setting offset to the end of the string, we "
\r
227 + "should be at the end of the forwards iteration");
\r
230 // Run all the way through the iterator, then get the offset
\r
231 int[] orders = CollationTest.getOrders(iter);
\r
232 logln("orders.length = " + orders.length);
\r
234 int offset = iter.getOffset();
\r
236 if (offset != test1.length()) {
\r
237 String msg1 = "offset at end != length: ";
\r
238 String msg2 = " vs ";
\r
239 errln(msg1 + offset + msg2 + test1.length());
\r
242 // Now set the offset back to the beginning and see if it works
\r
243 CollationElementIterator pristine = en_us.getCollationElementIterator(test1);
\r
247 } catch(Exception e) {
\r
248 errln("setOffset failed.");
\r
250 assertEqual(iter, pristine);
\r
252 // setting offset in the middle of a contraction
\r
253 String contraction = "change";
\r
254 RuleBasedCollator tailored = null;
\r
256 tailored = new RuleBasedCollator("& a < ch");
\r
257 } catch (Exception e) {
\r
258 errln("Error: in creation of Spanish collator");
\r
261 iter = tailored.getCollationElementIterator(contraction);
\r
262 int order[] = CollationTest.getOrders(iter);
\r
263 iter.setOffset(1); // sets offset in the middle of ch
\r
264 int order2[] = CollationTest.getOrders(iter);
\r
265 if (!Arrays.equals(order, order2)) {
\r
266 errln("Error: setting offset in the middle of a contraction should be the same as setting it to the start of the contraction");
\r
268 contraction = "peache";
\r
269 iter = tailored.getCollationElementIterator(contraction);
\r
271 order = CollationTest.getOrders(iter);
\r
272 iter.setOffset(4); // sets offset in the middle of ch
\r
273 order2 = CollationTest.getOrders(iter);
\r
274 if (!Arrays.equals(order, order2)) {
\r
275 errln("Error: setting offset in the middle of a contraction should be the same as setting it to the start of the contraction");
\r
277 // setting offset in the middle of a surrogate pair
\r
278 String surrogate = "\ud800\udc00str";
\r
279 iter = tailored.getCollationElementIterator(surrogate);
\r
280 order = CollationTest.getOrders(iter);
\r
281 iter.setOffset(1); // sets offset in the middle of surrogate
\r
282 order2 = CollationTest.getOrders(iter);
\r
283 if (!Arrays.equals(order, order2)) {
\r
284 errln("Error: setting offset in the middle of a surrogate pair should be the same as setting it to the start of the surrogate pair");
\r
286 surrogate = "simple\ud800\udc00str";
\r
287 iter = tailored.getCollationElementIterator(surrogate);
\r
289 order = CollationTest.getOrders(iter);
\r
290 iter.setOffset(7); // sets offset in the middle of surrogate
\r
291 order2 = CollationTest.getOrders(iter);
\r
292 if (!Arrays.equals(order, order2)) {
\r
293 errln("Error: setting offset in the middle of a surrogate pair should be the same as setting it to the start of the surrogate pair");
\r
295 // TODO: try iterating halfway through a messy string.
\r
300 void assertEqual(CollationElementIterator i1, CollationElementIterator i2) {
\r
301 int c1, c2, count = 0;
\r
306 errln(" " + count + ": strength(0x" +
\r
307 Integer.toHexString(c1) + ") != strength(0x" + Integer.toHexString(c2) + ")");
\r
311 } while (c1 != CollationElementIterator.NULLORDER);
\r
312 CollationTest.backAndForth(this, i1);
\r
313 CollationTest.backAndForth(this, i2);
\r
317 * Test for CollationElementIterator.previous()
\r
319 * @bug 4108758 - Make sure it works with contracting characters
\r
322 public void TestPrevious(/* char* par */) {
\r
323 RuleBasedCollator en_us = (RuleBasedCollator)Collator.getInstance(Locale.US);
\r
324 CollationElementIterator iter = en_us.getCollationElementIterator(test1);
\r
326 // A basic test to see if it's working at all
\r
327 CollationTest.backAndForth(this, iter);
\r
329 // Test with a contracting character sequence
\r
331 RuleBasedCollator c1 = null;
\r
333 c1 = new RuleBasedCollator("&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH");
\r
334 } catch (Exception e) {
\r
335 errln("Couldn't create a RuleBasedCollator with a contracting sequence.");
\r
339 source = "abchdcba";
\r
340 iter = c1.getCollationElementIterator(source);
\r
341 CollationTest.backAndForth(this, iter);
\r
343 // Test with an expanding character sequence
\r
344 RuleBasedCollator c2 = null;
\r
346 c2 = new RuleBasedCollator("&a < b < c/abd < d");
\r
347 } catch (Exception e ) {
\r
348 errln("Couldn't create a RuleBasedCollator with an expanding sequence.");
\r
353 iter = c2.getCollationElementIterator(source);
\r
354 CollationTest.backAndForth(this, iter);
\r
357 RuleBasedCollator c3 = null;
\r
359 c3 = new RuleBasedCollator("&a < b < c/aba < d < z < ch");
\r
360 } catch (Exception e) {
\r
361 errln("Couldn't create a RuleBasedCollator with both an expanding and a contracting sequence.");
\r
365 source = "abcdbchdc";
\r
366 iter = c3.getCollationElementIterator(source);
\r
367 CollationTest.backAndForth(this, iter);
\r
369 source= "\u0e41\u0e02\u0e41\u0e02\u0e27abc";
\r
370 Collator c4 = null;
\r
372 c4 = Collator.getInstance(new Locale("th", "TH", ""));
\r
373 } catch (Exception e) {
\r
374 errln("Couldn't create a collator");
\r
378 iter = ((RuleBasedCollator)c4).getCollationElementIterator(source);
\r
379 CollationTest.backAndForth(this, iter);
\r
381 source= "\u0061\u30CF\u3099\u30FC";
\r
382 Collator c5 = null;
\r
384 c5 = Collator.getInstance(new Locale("ja", "JP", ""));
\r
385 } catch (Exception e) {
\r
386 errln("Couldn't create Japanese collator\n");
\r
389 iter = ((RuleBasedCollator)c5).getCollationElementIterator(source);
\r
391 CollationTest.backAndForth(this, iter);
\r
397 * Test for setText()
\r
399 public void TestSetText(/* char* par */) {
\r
400 RuleBasedCollator en_us = (RuleBasedCollator)Collator.getInstance(Locale.US);
\r
401 CollationElementIterator iter1 = en_us.getCollationElementIterator(test1);
\r
402 CollationElementIterator iter2 = en_us.getCollationElementIterator(test2);
\r
404 // Run through the second iterator just to exercise it
\r
405 int c = iter2.next();
\r
408 while ( ++i < 10 && c != CollationElementIterator.NULLORDER) {
\r
411 } catch (Exception e) {
\r
412 errln("iter2.next() returned an error.");
\r
417 // Now set it to point to the same string as the first iterator
\r
419 iter2.setText(test1);
\r
420 } catch (Exception e) {
\r
421 errln("call to iter2->setText(test1) failed.");
\r
424 assertEqual(iter1, iter2);
\r
427 //now use the overloaded setText(ChracterIterator&, UErrorCode) function to set the text
\r
428 CharacterIterator chariter = new StringCharacterIterator(test1);
\r
430 iter2.setText(chariter);
\r
431 } catch (Exception e ) {
\r
432 errln("call to iter2->setText(chariter(test1)) failed.");
\r
435 assertEqual(iter1, iter2);
\r
438 //now use the overloaded setText(ChracterIterator&, UErrorCode) function to set the text
\r
439 UCharacterIterator uchariter = UCharacterIterator.getInstance(test1);
\r
441 iter2.setText(uchariter);
\r
442 } catch (Exception e ) {
\r
443 errln("call to iter2->setText(uchariter(test1)) failed.");
\r
446 assertEqual(iter1, iter2);
\r
450 * Test for CollationElementIterator previous and next for the whole set of
\r
451 * unicode characters.
\r
453 public void TestUnicodeChar() {
\r
454 RuleBasedCollator en_us = (RuleBasedCollator)Collator.getInstance(Locale.US);
\r
455 CollationElementIterator iter;
\r
457 StringBuffer source = new StringBuffer();
\r
458 source.append("\u0e4d\u0e4e\u0e4f");
\r
459 // source.append("\u04e8\u04e9");
\r
460 iter = en_us.getCollationElementIterator(source.toString());
\r
461 // A basic test to see if it's working at all
\r
462 CollationTest.backAndForth(this, iter);
\r
463 for (codepoint = 1; codepoint < 0xFFFE;) {
\r
464 source.delete(0, source.length());
\r
465 while (codepoint % 0xFF != 0) {
\r
466 if (UCharacter.isDefined(codepoint)) {
\r
467 source.append(codepoint);
\r
472 if (UCharacter.isDefined(codepoint)) {
\r
473 source.append(codepoint);
\r
476 if (codepoint != 0xFFFF) {
\r
479 /*if (codepoint >= 0x04fc) {
\r
480 System.out.println("codepoint " + Integer.toHexString(codepoint));
\r
481 String str = source.substring(230, 232);
\r
482 System.out.println(com.ibm.icu.impl.Utility.escape(str));
\r
483 System.out.println("codepoint " + Integer.toHexString(codepoint)
\r
484 + "length " + str.length());
\r
485 iter = en_us.getCollationElementIterator(str);
\r
486 CollationTest.backAndForth(this, iter);
\r
489 iter = en_us.getCollationElementIterator(source.toString());
\r
490 // A basic test to see if it's working at all
\r
491 CollationTest.backAndForth(this, iter);
\r
496 * Test for CollationElementIterator previous and next for the whole set of
\r
497 * unicode characters with normalization on.
\r
499 public void TestNormalizedUnicodeChar()
\r
501 // thai should have normalization on
\r
502 RuleBasedCollator th_th = null;
\r
504 th_th = (RuleBasedCollator)Collator.getInstance(
\r
505 new Locale("th", "TH"));
\r
506 } catch (Exception e) {
\r
507 warnln("Error creating Thai collator");
\r
510 StringBuffer source = new StringBuffer();
\r
511 source.append('\uFDFA');
\r
512 CollationElementIterator iter
\r
513 = th_th.getCollationElementIterator(source.toString());
\r
514 CollationTest.backAndForth(this, iter);
\r
515 for (char codepoint = 0x1; codepoint < 0xfffe;) {
\r
516 source.delete(0, source.length());
\r
517 while (codepoint % 0xFF != 0) {
\r
518 if (UCharacter.isDefined(codepoint)) {
\r
519 source.append(codepoint);
\r
524 if (UCharacter.isDefined(codepoint)) {
\r
525 source.append(codepoint);
\r
528 if (codepoint != 0xFFFF) {
\r
532 /*if (((int)codepoint) >= 0xfe00) {
\r
533 String str = source.substring(185, 190);
\r
534 System.out.println(com.ibm.icu.impl.Utility.escape(str));
\r
535 System.out.println("codepoint "
\r
536 + Integer.toHexString(codepoint)
\r
537 + "length " + str.length());
\r
538 iter = th_th.getCollationElementIterator(str);
\r
539 CollationTest.backAndForth(this, iter);
\r
541 iter = th_th.getCollationElementIterator(source.toString());
\r
542 // A basic test to see if it's working at all
\r
543 CollationTest.backAndForth(this, iter);
\r
548 * Testing the discontiguous contractions
\r
550 public void TestDiscontiguous()
\r
552 String rulestr ="&z < AB < X\u0300 < ABC < X\u0300\u0315";
\r
553 String src[] = {"ADB", "ADBC", "A\u0315B", "A\u0315BC",
\r
554 // base character blocked
\r
555 "XD\u0300", "XD\u0300\u0315",
\r
556 // non blocking combining character
\r
557 "X\u0319\u0300", "X\u0319\u0300\u0315",
\r
558 // blocking combining character
\r
559 "X\u0314\u0300", "X\u0314\u0300\u0315",
\r
560 // contraction prefix
\r
561 "ABDC", "AB\u0315C","X\u0300D\u0315",
\r
562 "X\u0300\u0319\u0315", "X\u0300\u031A\u0315",
\r
563 // ends not with a contraction character
\r
564 "X\u0319\u0300D", "X\u0319\u0300\u0315D",
\r
565 "X\u0300D\u0315D", "X\u0300\u0319\u0315D",
\r
566 "X\u0300\u031A\u0315D"
\r
568 String tgt[] = {// non blocking combining character
\r
569 "A D B", "A D BC", "A \u0315 B", "A \u0315 BC",
\r
570 // base character blocked
\r
571 "X D \u0300", "X D \u0300\u0315",
\r
572 // non blocking combining character
\r
573 "X\u0300 \u0319", "X\u0300\u0315 \u0319",
\r
574 // blocking combining character
\r
575 "X \u0314 \u0300", "X \u0314 \u0300\u0315",
\r
576 // contraction prefix
\r
577 "AB DC", "AB \u0315 C","X\u0300 D \u0315",
\r
578 "X\u0300\u0315 \u0319", "X\u0300 \u031A \u0315",
\r
579 // ends not with a contraction character
\r
580 "X\u0300 \u0319D", "X\u0300\u0315 \u0319D",
\r
581 "X\u0300 D\u0315D", "X\u0300\u0315 \u0319D",
\r
582 "X\u0300 \u031A\u0315D"
\r
586 RuleBasedCollator coll = new RuleBasedCollator(rulestr);
\r
587 CollationElementIterator iter
\r
588 = coll.getCollationElementIterator("");
\r
589 CollationElementIterator resultiter
\r
590 = coll.getCollationElementIterator("");
\r
591 while (count < src.length) {
\r
592 iter.setText(src[count]);
\r
594 while (s < tgt[count].length()) {
\r
595 int e = tgt[count].indexOf(' ', s);
\r
597 e = tgt[count].length();
\r
599 String resultstr = tgt[count].substring(s, e);
\r
600 resultiter.setText(resultstr);
\r
601 int ce = resultiter.next();
\r
602 while (ce != CollationElementIterator.NULLORDER) {
\r
603 if (ce != iter.next()) {
\r
604 errln("Discontiguos contraction test mismatch at"
\r
608 ce = resultiter.next();
\r
613 CollationTest.backAndForth(this, iter);
\r
617 catch (Exception e) {
\r
618 warnln("Error running discontiguous tests " + e.toString());
\r
623 * Test the incremental normalization
\r
625 public void TestNormalization()
\r
627 String rules = "&a < \u0300\u0315 < A\u0300\u0315 < \u0316\u0315B < \u0316\u0300\u0315";
\r
628 String testdata[] = {"\u1ED9", "o\u0323\u0302",
\r
629 "\u0300\u0315", "\u0315\u0300",
\r
630 "A\u0300\u0315B", "A\u0315\u0300B",
\r
631 "A\u0316\u0315B", "A\u0315\u0316B",
\r
632 "\u0316\u0300\u0315", "\u0315\u0300\u0316",
\r
633 "A\u0316\u0300\u0315B", "A\u0315\u0300\u0316B",
\r
634 "\u0316\u0315\u0300", "A\u0316\u0315\u0300B"};
\r
635 RuleBasedCollator coll = null;
\r
637 coll = new RuleBasedCollator(rules);
\r
638 coll.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
\r
639 } catch (Exception e) {
\r
640 warnln("ERROR: in creation of collator using rules " + rules);
\r
644 CollationElementIterator iter = coll.getCollationElementIterator("testing");
\r
645 for (int count = 0; count < testdata.length; count ++) {
\r
646 iter.setText(testdata[count]);
\r
647 CollationTest.backAndForth(this, iter);
\r