2 *******************************************************************************
3 * Copyright (C) 2002-2013, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 *******************************************************************************
9 * Port From: ICU4C v2.1 : collate/CollationRegressionTest
10 * Source File: $ICU4CRoot/source/test/intltest/regcoll.cpp
13 package com.ibm.icu.dev.test.collator;
15 import java.text.ParseException;
16 import java.util.ArrayList;
17 import java.util.List;
18 import java.util.Locale;
20 import com.ibm.icu.dev.test.TestFmwk;
21 import com.ibm.icu.text.CollationElementIterator;
22 import com.ibm.icu.text.CollationKey;
23 import com.ibm.icu.text.Collator;
24 import com.ibm.icu.text.RuleBasedCollator;
26 public class CollationRegressionTest extends TestFmwk {
27 public static void main(String[] args) throws Exception{
28 new CollationRegressionTest().run(args);
33 // CollationElementIterator.reset() doesn't work
35 public void Test4048446() {
36 final String test1 = "XFILE What subset of all possible test cases has the highest probability of detecting the most errors?";
37 //final String test2 = "Xf_ile What subset of all possible test cases has the lowest probability of detecting the least errors?";
38 RuleBasedCollator en_us = (RuleBasedCollator) Collator.getInstance(Locale.US);
39 CollationElementIterator i1 = en_us.getCollationElementIterator(test1);
40 CollationElementIterator i2 = en_us.getCollationElementIterator(test1);
42 if (i1 == null || i2 == null) {
43 errln("Could not create CollationElementIterator's");
47 while (i1.next() != CollationElementIterator.NULLORDER) {
55 void assertEqual(CollationElementIterator i1, CollationElementIterator i2) {
56 int c1, c2, count = 0;
67 msg += ": strength(0x" + Integer.toHexString(c1);
68 msg += ") != strength(0x" + Integer.toHexString(c2);
74 } while (c1 != CollationElementIterator.NULLORDER);
79 // Collator -> rules -> Collator round-trip broken for expanding characters
81 public void Test4051866() {
82 String rules = "< o & oe ,o\u3080& oe ,\u1530 ,O& OE ,O\u3080& OE ,\u1520< p ,P";
84 // Build a collator containing expanding characters
85 RuleBasedCollator c1 = null;
88 c1 = new RuleBasedCollator(rules);
89 } catch (Exception e) {
90 errln("Fail to create RuleBasedCollator with rules:" + rules);
94 // Build another using the rules from the first
95 RuleBasedCollator c2 = null;
97 c2 = new RuleBasedCollator(c1.getRules());
98 } catch (Exception e) {
99 errln("Fail to create RuleBasedCollator with rules:" + rules);
103 // Make sure they're the same
104 if (!(c1.getRules().equals(c2.getRules())))
106 errln("Rules are not equal");
112 // Collator thinks "black-bird" == "black"
114 public void Test4053636() {
115 RuleBasedCollator en_us = (RuleBasedCollator) Collator.getInstance(Locale.US);
116 if (en_us.equals("black_bird", "black")) {
117 errln("black-bird == black");
123 // CollationElementIterator will not work correctly if the associated
124 // Collator object's mode is changed
126 public void Test4054238(/* char* par */) {
127 final char[] chars3 = {0x61, 0x00FC, 0x62, 0x65, 0x63, 0x6b, 0x20, 0x47, 0x72, 0x00F6, 0x00DF, 0x65, 0x20, 0x4c, 0x00FC, 0x62, 0x63, 0x6b, 0};
128 final String test3 = new String(chars3);
129 RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(Locale.US);
131 // NOTE: The Java code uses en_us to create the CollationElementIterators
132 // but I'm pretty sure that's wrong, so I've changed this to use c.
133 c.setDecomposition(Collator.NO_DECOMPOSITION);
134 CollationElementIterator i1 = c.getCollationElementIterator(test3);
135 logln("Offset:" + i1.getOffset());
140 // Collator::IDENTICAL documented but not implemented
142 public void Test4054734(/* char* par */) {
144 //Here's the original Java:
147 "\u0001", "<", "\u0002",
148 "\u0001", "=", "\u0001",
149 "A\u0001", ">", "~\u0002", // Ensure A and ~ are not compared bitwise
150 "\u00C0", "=", "A\u0300", // Decomp should make these equal
153 RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(Locale.US);
154 c.setStrength(Collator.IDENTICAL);
155 c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
156 compareArray(c, decomp);
159 void compareArray(Collator c, String[] tests) {
161 int expectedResult = 0;
163 for (int i = 0; i < tests.length; i += 3) {
164 String source = tests[i];
165 String comparison = tests[i + 1];
166 String target = tests[i + 2];
168 if (comparison.equals("<")) {
170 } else if (comparison.equals(">")) {
172 } else if (comparison.equals("=")) {
175 errln("Bogus comparison string \"" + comparison + "\"");
178 int compareResult = 0;
184 compareResult = c.compare(source, target);
185 } catch (Exception e) {
189 CollationKey sourceKey = null, targetKey = null;
191 sourceKey = c.getCollationKey(source);
192 } catch (Exception e) {
193 errln("Couldn't get collationKey for source");
198 targetKey = c.getCollationKey(target);
199 } catch (Exception e) {
200 errln("Couldn't get collationKey for target");
204 int keyResult = sourceKey.compareTo(targetKey);
205 reportCResult( source, target, sourceKey, targetKey, compareResult, keyResult, compareResult, expectedResult );
209 void reportCResult( String source, String target, CollationKey sourceKey, CollationKey targetKey,
210 int compareResult, int keyResult, int incResult, int expectedResult ){
211 if (expectedResult < -1 || expectedResult > 1)
213 errln("***** invalid call to reportCResult ****");
217 boolean ok1 = (compareResult == expectedResult);
218 boolean ok2 = (keyResult == expectedResult);
219 boolean ok3 = (incResult == expectedResult);
221 if (ok1 && ok2 && ok3 && !isVerbose()){
224 String msg1 = ok1? "Ok: compare(\"" : "FAIL: compare(\"";
225 String msg2 = "\", \"";
226 String msg3 = "\") returned ";
227 String msg4 = "; expected ";
229 String sExpect = new String("");
230 String sResult = new String("");
231 sResult = appendCompareResult(compareResult, sResult);
232 sExpect = appendCompareResult(expectedResult, sExpect);
234 logln(msg1 + source + msg2 + target + msg3 + sResult);
236 errln(msg1 + source + msg2 + target + msg3 + sResult + msg4 + sExpect);
239 msg1 = ok2 ? "Ok: key(\"" : "FAIL: key(\"";
240 msg2 = "\").compareTo(key(\"";
241 msg3 = "\")) returned ";
242 sResult = appendCompareResult(keyResult, sResult);
244 logln(msg1 + source + msg2 + target + msg3 + sResult);
246 errln(msg1 + source + msg2 + target + msg3 + sResult + msg4 + sExpect);
249 errln(msg1 + prettify(sourceKey) + msg2 + prettify(targetKey));
252 msg1 = ok3 ? "Ok: incCompare(\"" : "FAIL: incCompare(\"";
254 msg3 = "\") returned ";
256 sResult = appendCompareResult(incResult, sResult);
259 logln(msg1 + source + msg2 + target + msg3 + sResult);
261 errln(msg1 + source + msg2 + target + msg3 + sResult + msg4 + sExpect);
266 String appendCompareResult(int result, String target) {
267 if (result == -1) { //LESS
269 } else if (result == 0) { //EQUAL
271 } else if (result == 1) { //GREATER
275 target += huh + result;
280 String prettify(CollationKey sourceKey) {
282 byte[] bytes= sourceKey.toByteArray();
285 for (i = 0; i < bytes.length; i++) {
286 target += Integer.toHexString(bytes[i]);
295 // Full Decomposition mode not implemented
297 public void Test4054736(/* char* par */) {
298 RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(Locale.US);
300 c.setStrength(Collator.SECONDARY);
301 c.setDecomposition(Collator.NO_DECOMPOSITION);
303 final String[] tests = { "\uFB4F", "\u003d", "\u05D0\u05DC" }; // Alef-Lamed vs. Alef, Lamed
304 compareArray(c, tests);
309 // Collator::createInstance() causes an ArrayIndexOutofBoundsException for Korean
311 public void Test4058613(/* char* par */) {
312 // Creating a default collator doesn't work when Korean is the default
315 Locale oldDefault = Locale.getDefault();
316 Locale.setDefault(new Locale("ko", ""));
320 c = Collator.getInstance(new Locale("en", "US"));
323 errln("Could not create a Korean collator");
324 Locale.setDefault(oldDefault);
328 // Since the fix to this bug was to turn off decomposition for Korean collators,
329 // ensure that's what we got
330 if (c.getDecomposition() != Collator.NO_DECOMPOSITION) {
331 errln("Decomposition is not set to NO_DECOMPOSITION for Korean collator");
334 Locale.setDefault(oldDefault);
339 // RuleBasedCollator.getRules does not return the exact pattern as input
340 // for expanding character sequences
342 public void Test4059820(/* char* par */) {
343 RuleBasedCollator c = null;
344 String rules = "< a < b , c/a < d < z";
346 c = new RuleBasedCollator(rules);
347 } catch (Exception e) {
348 errln("Failure building a collator.");
352 if ( c.getRules().indexOf("c/a") == -1)
354 errln("returned rules do not contain 'c/a'");
360 // MergeCollation::fixEntry broken for "& H < \u0131, \u0130, i, I"
362 public void Test4060154(/* char* par */) {
363 String rules ="< g, G < h, H < i, I < j, J & H < \u0131, \u0130, i, I";
365 RuleBasedCollator c = null;
367 c = new RuleBasedCollator(rules);
368 } catch (Exception e) {
369 //System.out.println(e);
370 errln("failure building collator.");
374 c.setDecomposition(Collator.NO_DECOMPOSITION);
376 String[] tertiary = {
380 "\u0131", "<", "\u0130",
385 c.setStrength(Collator.TERTIARY);
386 compareArray(c, tertiary);
388 String[] secondary = {
390 "\u0131", "=", "\u0130",
393 c.setStrength(Collator.PRIMARY);
394 compareArray(c, secondary);
399 // Secondary/Tertiary comparison incorrect in French Secondary
401 public void Test4062418(/* char* par */) {
402 RuleBasedCollator c = null;
404 c = (RuleBasedCollator) Collator.getInstance(Locale.CANADA_FRENCH);
405 } catch (Exception e) {
406 errln("Failed to create collator for Locale.CANADA_FRENCH");
409 c.setStrength(Collator.SECONDARY);
412 "p\u00eache", "<", "p\u00e9ch\u00e9", // Comparing accents from end, p\u00e9ch\u00e9 is greater
415 compareArray(c, tests);
420 // Collator::compare() method broken if either string contains spaces
422 public void Test4065540(/* char* par */) {
423 RuleBasedCollator en_us = (RuleBasedCollator) Collator.getInstance(Locale.US);
424 if (en_us.compare("abcd e", "abcd f") == 0) {
425 errln("'abcd e' == 'abcd f'");
431 // Unicode characters need to be recursively decomposed to get the
432 // correct result. For example,
433 // u1EB1 -> \u0103 + \u0300 -> a + \u0306 + \u0300.
435 public void Test4066189(/* char* par */) {
436 final String test1 = "\u1EB1";
437 final String test2 = "\u0061\u0306\u0300";
439 // NOTE: The java code used en_us to create the
440 // CollationElementIterator's. I'm pretty sure that
441 // was wrong, so I've change the code to use c1 and c2
442 RuleBasedCollator c1 = (RuleBasedCollator) Collator.getInstance(Locale.US);
443 c1.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
444 CollationElementIterator i1 = c1.getCollationElementIterator(test1);
446 RuleBasedCollator c2 = (RuleBasedCollator) Collator.getInstance(Locale.US);
447 c2.setDecomposition(Collator.NO_DECOMPOSITION);
448 CollationElementIterator i2 = c2.getCollationElementIterator(test2);
455 // French secondary collation checking at the end of compare iteration fails
457 public void Test4066696(/* char* par */) {
458 RuleBasedCollator c = null;
460 c = (RuleBasedCollator)Collator.getInstance(Locale.CANADA_FRENCH);
461 } catch(Exception e) {
462 errln("Failure creating collator for Locale.CANADA_FRENCH");
465 c.setStrength(Collator.SECONDARY);
468 "\u00e0", ">", "\u01fa", // a-grave < A-ring-acute
470 compareArray(c, tests);
475 // Bad canonicalization of same-class combining characters
477 public void Test4076676(/* char* par */) {
478 // These combining characters are all in the same class, so they should not
479 // be reordered, and they should compare as unequal.
480 final String s1 = "\u0041\u0301\u0302\u0300";
481 final String s2 = "\u0041\u0302\u0300\u0301";
483 RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(Locale.US);
484 c.setStrength(Collator.TERTIARY);
486 if (c.compare(s1,s2) == 0) {
487 errln("Same-class combining chars were reordered");
493 // RuleBasedCollator breaks on "< a < bb" rule
495 public void Test4078588(/* char *par */) {
496 RuleBasedCollator rbc = null;
498 rbc = new RuleBasedCollator("< a < bb");
499 } catch (Exception e) {
500 errln("Failed to create RuleBasedCollator.");
504 int result = rbc.compare("a","bb");
507 errln("Compare(a,bb) returned " + result + "; expected -1");
513 // RuleBasedCollator::operator==(NULL) throws NullPointerException
515 public void Test4079231(/* char* par */) {
516 RuleBasedCollator en_us = (RuleBasedCollator) Collator.getInstance(Locale.US);
518 if (en_us.equals(null)) {
519 errln("en_us.equals(null) returned true");
521 } catch (Exception e) {
522 errln("en_us.equals(null) threw " + e.toString());
528 // Combining characters in different classes not reordered properly.
530 public void Test4081866(/* char* par */) {
531 // These combining characters are all in different classes,
532 // so they should be reordered and the strings should compare as equal.
533 String s1 = "\u0041\u0300\u0316\u0327\u0315";
534 String s2 = "\u0041\u0327\u0316\u0315\u0300";
536 RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(Locale.US);
537 c.setStrength(Collator.TERTIARY);
539 // Now that the default collators are set to NO_DECOMPOSITION
540 // (as a result of fixing bug 4114077), we must set it explicitly
541 // when we're testing reordering behavior. -- lwerner, 5/5/98
542 c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
543 if (c.compare(s1,s2) != 0) {
544 errln("Combining chars were not reordered");
550 // string comparison errors in Scandinavian collators
552 public void Test4087241(/* char* par */) {
553 Locale da_DK = new Locale("da", "DK");
554 RuleBasedCollator c = null;
556 c = (RuleBasedCollator) Collator.getInstance(da_DK);
557 } catch (Exception e) {
558 errln("Failed to create collator for da_DK locale");
561 c.setStrength(Collator.SECONDARY);
563 "\u007a", "\u003c", "\u00E6", // z < ae
564 "\u0061\u0308", "\u003c", "\u0061\u030A", // a-unlaut < a-ring
565 "\u0059", "\u003c", "\u0075\u0308", // Y < u-umlaut
567 compareArray(c, tests);
572 // CollationKey takes ignorable strings into account when it shouldn't
574 public void Test4087243(/* char* par */) {
575 RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(Locale.US);
576 c.setStrength(Collator.TERTIARY);
578 "\u0031\u0032\u0033", "\u003d", "\u0031\u0032\u0033\u0001" // 1 2 3 = 1 2 3 ctrl-A
580 compareArray(c, tests);
586 // Micro symbol and greek lowercase letter Mu should sort identically
588 public void Test4092260(/* char* par */) {
589 Locale el = new Locale("el", "");
592 c = Collator.getInstance(el);
593 } catch (Exception e) {
594 errln("Failed to create collator for el locale.");
597 // These now have tertiary differences in UCA
598 c.setStrength(Collator.SECONDARY);
600 "\u00B5", "\u003d", "\u03BC",
602 compareArray(c, tests);
607 public void Test4095316(/* char* par */) {
608 Locale el_GR = new Locale("el", "GR");
611 c = Collator.getInstance(el_GR);
612 } catch (Exception e) {
613 errln("Failed to create collator for el_GR locale");
616 // These now have tertiary differences in UCA
617 //c->setStrength(Collator::TERTIARY);
618 //c->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status);
619 c.setStrength(Collator.SECONDARY);
621 "\u03D4", "\u003d", "\u03AB",
623 compareArray(c, tests);
628 public void Test4101940(/* char* par */) {
629 RuleBasedCollator c = null;
630 String rules = "< a < b";
633 c = new RuleBasedCollator(rules);
634 } catch (Exception e) {
635 errln("Failed to create RuleBasedCollator");
638 CollationElementIterator i = c.getCollationElementIterator(nothing);
640 if (i.next() != CollationElementIterator.NULLORDER) {
641 errln("next did not return NULLORDER");
647 // Collator::compare not handling spaces properly
649 public void Test4103436(/* char* par */) {
650 RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(Locale.US);
651 c.setStrength(Collator.TERTIARY);
653 "\u0066\u0069\u006c\u0065", "\u003c", "\u0066\u0069\u006c\u0065\u0020\u0061\u0063\u0063\u0065\u0073\u0073",
654 "\u0066\u0069\u006c\u0065", "\u003c", "\u0066\u0069\u006c\u0065\u0061\u0063\u0063\u0065\u0073\u0073",
656 compareArray(c, tests);
661 // Collation not Unicode conformant with Hangul syllables
663 public void Test4114076(/* char* par */) {
664 RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(Locale.US);
665 c.setStrength(Collator.TERTIARY);
668 // With Canonical decomposition, Hangul syllables should get decomposed
669 // into Jamo, but Jamo characters should not be decomposed into
673 "\ud4db", "\u003d", "\u1111\u1171\u11b6"
676 c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
677 compareArray(c, test1);
680 // *In earlier versions of Unicode, jamo characters like ksf
681 // had compatibility mappings to kf + sf. These mappings were
682 // removed in Unicode 2.1.9 to ensure that Hangul syllables are maintained.)
683 // That is, the following test is obsolete as of 2.1.9
685 //obsolete- // With Full decomposition, it should go all the way down to
686 //obsolete- // conjoining Jamo characters.
688 //obsolete- static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN] =
690 //obsolete- {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x116e, 0x1175, 0x11af, 0x11c2, 0}
693 //obsolete- c->setDecomposition(Normalizer::DECOMP_COMPAT);
694 //obsolete- compareArray(*c, test2, ARRAY_LENGTH(test2));
699 // Collation with decomposition off doesn't work for Europe
701 public void Test4114077(/* char* par */) {
702 // Ensure that we get the same results with decomposition off
703 // as we do with it on....
704 RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(Locale.US);
705 c.setStrength(Collator.TERTIARY);
707 "\u00C0", "\u003d", "\u0041\u0300", // Should be equivalent
708 "\u0070\u00ea\u0063\u0068\u0065", "\u003e", "\u0070\u00e9\u0063\u0068\u00e9",
709 "\u0204", "\u003d", "\u0045\u030F",
710 "\u01fa", "\u003d", "\u0041\u030a\u0301", // a-ring-acute -> a-ring, acute
712 "\u0041\u0300\u0316", "\u003c", "\u0041\u0316\u0300" // No reordering --> unequal
715 c.setDecomposition(Collator.NO_DECOMPOSITION);
716 compareArray(c, test1);
719 "\u0041\u0300\u0316", "\u003d", "\u0041\u0316\u0300" // Reordering --> equal
722 c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
723 compareArray(c, test2);
728 // Collator::getCollationKey was hanging on certain character sequences
730 public void Test4124632(/* char* par */) {
731 Collator coll = null;
733 coll = Collator.getInstance(Locale.JAPAN);
734 } catch (Exception e) {
735 errln("Failed to create collator for Locale::JAPAN");
738 String test = "\u0041\u0308\u0062\u0063";
741 key = coll.getCollationKey(test);
742 logln(key.getSourceString());
743 } catch (Exception e) {
744 errln("CollationKey creation failed.");
750 // sort order of french words with multiple accents has errors
752 public void Test4132736(/* char* par */) {
755 c = Collator.getInstance(Locale.CANADA_FRENCH);
756 c.setStrength(Collator.TERTIARY);
757 } catch (Exception e) {
758 errln("Failed to create a collator for Locale.CANADA_FRENCH");
762 "\u0065\u0300\u0065\u0301", "\u003c", "\u0065\u0301\u0065\u0300",
763 "\u0065\u0300\u0301", "\u003c", "\u0065\u0301\u0300",
765 compareArray(c, test1);
770 // The sorting using java.text.CollationKey is not in the exact order
772 public void Test4133509(/* char* par */) {
773 RuleBasedCollator en_us = (RuleBasedCollator) Collator.getInstance(Locale.US);
775 "\u0045\u0078\u0063\u0065\u0070\u0074\u0069\u006f\u006e", "\u003c", "\u0045\u0078\u0063\u0065\u0070\u0074\u0069\u006f\u006e\u0049\u006e\u0049\u006e\u0069\u0074\u0069\u0061\u006c\u0069\u007a\u0065\u0072\u0045\u0072\u0072\u006f\u0072",
776 "\u0047\u0072\u0061\u0070\u0068\u0069\u0063\u0073", "\u003c", "\u0047\u0072\u0061\u0070\u0068\u0069\u0063\u0073\u0045\u006e\u0076\u0069\u0072\u006f\u006e\u006d\u0065\u006e\u0074",
777 "\u0053\u0074\u0072\u0069\u006e\u0067", "\u003c", "\u0053\u0074\u0072\u0069\u006e\u0067\u0042\u0075\u0066\u0066\u0065\u0072",
780 compareArray(en_us, test1);
785 // getCollationKey throws exception for spanish text
786 // Cannot reproduce this bug on 1.2, however it DOES fail on 1.1.6
788 public void Test4139572(/* char* par */) {
790 // Code pasted straight from the bug report
791 // (and then translated to C++ ;-)
793 // create spanish locale and collator
794 Locale l = new Locale("es", "es");
797 col = Collator.getInstance(l);
798 } catch (Exception e) {
799 errln("Failed to create a collator for es_es locale.");
802 CollationKey key = null;
803 // this spanish phrase kills it!
805 key = col.getCollationKey("Nombre De Objeto");
806 logln("source:" + key.getSourceString());
807 } catch (Exception e) {
808 errln("Error creating CollationKey for \"Nombre De Ojbeto\"");
814 // Support for Swedish gone in 1.1.6 (Can't create Swedish collator)
816 public void Test4141640(/* char* par */) {
818 // Rather than just creating a Swedish collator, we might as well
819 // try to instantiate one for every locale available on the system
820 // in order to prevent this sort of bug from cropping up in the future
822 Locale locales[] = Collator.getAvailableLocales();
824 for (int i = 0; i < locales.length; i += 1)
828 c = Collator.getInstance(locales[i]);
829 logln("source: " + c.getStrength());
830 } catch (Exception e) {
832 msg += "Could not create collator for locale ";
833 msg += locales[i].getDisplayName();
839 private void checkListOrder(String[] sortedList, Collator c) {
840 // this function uses the specified Collator to make sure the
841 // passed-in list is already sorted into ascending order
842 for (int i = 0; i < sortedList.length - 1; i++) {
843 if (c.compare(sortedList[i], sortedList[i + 1]) >= 0) {
844 errln("List out of order at element #" + i + ": "
845 + sortedList[i] + " >= "
846 + sortedList[i + 1]);
851 public void Test4171974() {
852 // test French accent ordering more thoroughly
853 /*String[] frenchList = {
854 "\u0075\u0075", // u u
855 "\u00fc\u0075", // u-umlaut u
856 "\u01d6\u0075", // u-umlaut-macron u
857 "\u016b\u0075", // u-macron u
858 "\u1e7b\u0075", // u-macron-umlaut u
859 "\u0075\u00fc", // u u-umlaut
860 "\u00fc\u00fc", // u-umlaut u-umlaut
861 "\u01d6\u00fc", // u-umlaut-macron u-umlaut
862 "\u016b\u00fc", // u-macron u-umlaut
863 "\u1e7b\u00fc", // u-macron-umlaut u-umlaut
864 "\u0075\u01d6", // u u-umlaut-macron
865 "\u00fc\u01d6", // u-umlaut u-umlaut-macron
866 "\u01d6\u01d6", // u-umlaut-macron u-umlaut-macron
867 "\u016b\u01d6", // u-macron u-umlaut-macron
868 "\u1e7b\u01d6", // u-macron-umlaut u-umlaut-macron
869 "\u0075\u016b", // u u-macron
870 "\u00fc\u016b", // u-umlaut u-macron
871 "\u01d6\u016b", // u-umlaut-macron u-macron
872 "\u016b\u016b", // u-macron u-macron
873 "\u1e7b\u016b", // u-macron-umlaut u-macron
874 "\u0075\u1e7b", // u u-macron-umlaut
875 "\u00fc\u1e7b", // u-umlaut u-macron-umlaut
876 "\u01d6\u1e7b", // u-umlaut-macron u-macron-umlaut
877 "\u016b\u1e7b", // u-macron u-macron-umlaut
878 "\u1e7b\u1e7b" // u-macron-umlaut u-macron-umlaut
880 Collator french = Collator.getInstance(Locale.FRENCH);
882 logln("Testing French order...");
883 checkListOrder(frenchList, french);
885 logln("Testing French order without decomposition...");
886 french.setDecomposition(Collator.NO_DECOMPOSITION);
887 checkListOrder(frenchList, french);*/
889 String[] englishList = {
890 "\u0075\u0075", // u u
891 "\u0075\u00fc", // u u-umlaut
892 "\u0075\u01d6", // u u-umlaut-macron
893 "\u0075\u016b", // u u-macron
894 "\u0075\u1e7b", // u u-macron-umlaut
895 "\u00fc\u0075", // u-umlaut u
896 "\u00fc\u00fc", // u-umlaut u-umlaut
897 "\u00fc\u01d6", // u-umlaut u-umlaut-macron
898 "\u00fc\u016b", // u-umlaut u-macron
899 "\u00fc\u1e7b", // u-umlaut u-macron-umlaut
900 "\u01d6\u0075", // u-umlaut-macron u
901 "\u01d6\u00fc", // u-umlaut-macron u-umlaut
902 "\u01d6\u01d6", // u-umlaut-macron u-umlaut-macron
903 "\u01d6\u016b", // u-umlaut-macron u-macron
904 "\u01d6\u1e7b", // u-umlaut-macron u-macron-umlaut
905 "\u016b\u0075", // u-macron u
906 "\u016b\u00fc", // u-macron u-umlaut
907 "\u016b\u01d6", // u-macron u-umlaut-macron
908 "\u016b\u016b", // u-macron u-macron
909 "\u016b\u1e7b", // u-macron u-macron-umlaut
910 "\u1e7b\u0075", // u-macron-umlaut u
911 "\u1e7b\u00fc", // u-macron-umlaut u-umlaut
912 "\u1e7b\u01d6", // u-macron-umlaut u-umlaut-macron
913 "\u1e7b\u016b", // u-macron-umlaut u-macron
914 "\u1e7b\u1e7b" // u-macron-umlaut u-macron-umlaut
916 Collator english = Collator.getInstance(Locale.ENGLISH);
918 logln("Testing English order...");
919 checkListOrder(englishList, english);
921 logln("Testing English order without decomposition...");
922 english.setDecomposition(Collator.NO_DECOMPOSITION);
923 checkListOrder(englishList, english);
926 public void Test4179216() throws Exception {
927 // you can position a CollationElementIterator in the middle of
928 // a contracting character sequence, yielding a bogus collation
930 RuleBasedCollator coll = (RuleBasedCollator)Collator.getInstance(Locale.US);
931 coll = new RuleBasedCollator(coll.getRules()
932 + " & C < ch , cH , Ch , CH < cat < crunchy");
933 String testText = "church church catcatcher runcrunchynchy";
934 CollationElementIterator iter = coll.getCollationElementIterator(
937 // test that the "ch" combination works properly
939 int elt4 = CollationElementIterator.primaryOrder(iter.next());
942 int elt0 = CollationElementIterator.primaryOrder(iter.next());
945 int elt5 = CollationElementIterator.primaryOrder(iter.next());
947 if (elt4 != elt0 || elt5 != elt0)
948 errln("The collation elements at positions 0 (" + elt0 + "), 4 ("
949 + elt4 + "), and 5 (" + elt5 + ") don't match.");
951 // test that the "cat" combination works properly
953 int elt14 = CollationElementIterator.primaryOrder(iter.next());
956 int elt15 = CollationElementIterator.primaryOrder(iter.next());
959 int elt16 = CollationElementIterator.primaryOrder(iter.next());
962 int elt17 = CollationElementIterator.primaryOrder(iter.next());
965 int elt18 = CollationElementIterator.primaryOrder(iter.next());
968 int elt19 = CollationElementIterator.primaryOrder(iter.next());
970 if (elt14 != elt15 || elt14 != elt16 || elt14 != elt17
971 || elt14 != elt18 || elt14 != elt19)
972 errln("\"cat\" elements don't match: elt14 = " + elt14 + ", elt15 = "
973 + elt15 + ", elt16 = " + elt16 + ", elt17 = " + elt17
974 + ", elt18 = " + elt18 + ", elt19 = " + elt19);
976 // now generate a complete list of the collation elements,
977 // first using next() and then using setOffset(), and
978 // make sure both interfaces return the same set of elements
981 int elt = iter.next();
983 while (elt != CollationElementIterator.NULLORDER) {
988 String[] nextElements = new String[count];
989 String[] setOffsetElements = new String[count];
995 while (elt != CollationElementIterator.NULLORDER) {
996 nextElements[count++] = testText.substring(lastPos, iter.getOffset());
997 lastPos = iter.getOffset();
1001 for (int i = 0; i < testText.length(); ) {
1003 lastPos = iter.getOffset();
1005 setOffsetElements[count++] = testText.substring(lastPos, iter.getOffset());
1006 i = iter.getOffset();
1008 for (int i = 0; i < nextElements.length; i++) {
1009 if (nextElements[i].equals(setOffsetElements[i])) {
1010 logln(nextElements[i]);
1012 errln("Error: next() yielded " + nextElements[i] + ", but setOffset() yielded "
1013 + setOffsetElements[i]);
1018 public void Test4216006() throws Exception {
1019 // rule parser barfs on "<\u00e0=a\u0300", and on other cases
1020 // where the same token (after normalization) appears twice in a row
1021 boolean caughtException = false;
1023 new RuleBasedCollator("\u00e0<a\u0300");
1025 catch (ParseException e) {
1026 caughtException = true;
1028 if (!caughtException) {
1029 throw new Exception("\"a<a\" collation sequence didn't cause parse error!");
1032 RuleBasedCollator collator = new RuleBasedCollator("<\u00e0=a\u0300");
1033 //commented by Kevin 2003/10/21
1034 //for "FULL_DECOMPOSITION is not supported here." in ICU4J DOC
1035 //collator.setDecomposition(Collator.FULL_DECOMPOSITION);
1036 collator.setStrength(Collator.IDENTICAL);
1039 "a\u0300", "=", "\u00e0",
1040 "\u00e0", "=", "a\u0300"
1043 compareArray(collator, tests);
1046 // CollationElementIterator.previous broken for expanding char sequences
1048 public void Test4179686() throws Exception {
1049 RuleBasedCollator en_us = (RuleBasedCollator) Collator.getInstance(Locale.US);
1050 // Create a collator with a few expanding character sequences in it....
1051 RuleBasedCollator coll = new RuleBasedCollator(en_us.getRules()
1052 + " & ae ; \u00e4 & AE ; \u00c4"
1053 + " & oe ; \u00f6 & OE ; \u00d6"
1054 + " & ue ; \u00fc & UE ; \u00dc");
1056 String text = "T\u00f6ne"; // o-umlaut
1058 CollationElementIterator iter = coll.getCollationElementIterator(text);
1059 List elements = new ArrayList();
1062 // Iterate forward and collect all of the elements into a Vector
1063 while ((elem = iter.next()) != CollationElementIterator.NULLORDER) {
1064 elements.add(new Integer(elem));
1067 // Now iterate backward and make sure they're the same
1069 int index = elements.size() - 1;
1070 while ((elem = iter.previous()) != CollationElementIterator.NULLORDER) {
1071 int expect = ((Integer)elements.get(index)).intValue();
1073 if (elem != expect) {
1074 errln("Mismatch at index " + index
1075 + ": got " + Integer.toString(elem,16)
1076 + ", expected " + Integer.toString(expect,16));
1082 protected void init()throws Exception{
1085 public void Test4244884() throws Exception {
1086 RuleBasedCollator coll = (RuleBasedCollator)Collator.getInstance(Locale.US);
1087 coll = new RuleBasedCollator(coll.getRules()
1088 + " & C < ch , cH , Ch , CH < cat < crunchy");
1090 String[] testStrings = new String[] {
1102 for (int i = 1; i < testStrings.length; i++) {
1103 if (coll.compare(testStrings[i - 1], testStrings[i]) >= 0) {
1104 errln("error: \"" + testStrings[i - 1]
1105 + "\" is greater than or equal to \"" + testStrings[i]
1111 // CollationElementIterator set doesn't work propertly with next/prev
1112 public void Test4663220() {
1113 RuleBasedCollator collator = (RuleBasedCollator)Collator.getInstance(Locale.US);
1114 java.text.StringCharacterIterator stringIter = new java.text.StringCharacterIterator("fox");
1115 CollationElementIterator iter = collator.getCollationElementIterator(stringIter);
1117 int[] elements_next = new int[3];
1118 logln("calling next:");
1119 for (int i = 0; i < 3; ++i) {
1120 logln("[" + i + "] " + (elements_next[i] = iter.next()));
1123 int[] elements_fwd = new int[3];
1124 logln("calling set/next:");
1125 for (int i = 0; i < 3; ++i) {
1127 logln("[" + i + "] " + (elements_fwd[i] = iter.next()));
1130 for (int i = 0; i < 3; ++i) {
1131 if (elements_next[i] != elements_fwd[i]) {
1132 errln("mismatch at position " + i +
1133 ": " + elements_next[i] +
1134 " != " + elements_fwd[i]);
1139 // Fixing the infinite loop for surrogates
1140 public void Test8484()
1142 String s = "\u9FE1\uCEF3\u2798\uAAB6\uDA7C";
1143 Collator coll = Collator.getInstance();
1144 CollationKey collKey = coll.getCollationKey(s);
1145 logln("Pass: " + collKey.toString() + " generated OK.");
1148 public void TestBengaliSortKey() throws Exception {
1149 char rules[] = { 0x26, 0x9fa, 0x3c, 0x98c, 0x3c, 0x9e1, 0x3c, 0x98f, 0x3c, 0x990, 0x3c, 0x993,
1150 0x3c, 0x994, 0x3c, 0x9bc, 0x3c, 0x982, 0x3c, 0x983, 0x3c, 0x981, 0x3c, 0x9b0, 0x3c,
1151 0x9b8, 0x3c, 0x9b9, 0x3c, 0x9bd, 0x3c, 0x9be, 0x3c, 0x9bf, 0x3c, 0x9c8, 0x3c, 0x9cb,
1154 Collator col = new RuleBasedCollator(String.copyValueOf(rules));
1156 String str1 = "\u09be";
1157 String str2 = "\u0b70";
1159 int result = col.compare(str1, str2);
1163 errln("\nERROR: result is " + result + " , wanted negative.");
1164 errln(printKey(col, str1).toString());
1165 errln(printKey(col, str2).toString());
1167 logln("Pass: result is OK.");
1171 private static StringBuilder printKey(Collator col, String str1) {
1172 StringBuilder sb = new StringBuilder();
1173 CollationKey sortk1 = col.getCollationKey(str1);
1174 byte[] bytes = sortk1.toByteArray();
1175 for(int i=0;i<str1.length();i++) {
1176 sb.append("\\u"+Integer.toHexString(str1.charAt(i)));
1178 System.out.print(": ");
1179 for(int i=0;i<bytes.length;i++) {
1180 sb.append(" 0x"+Integer.toHexString(((int)bytes[i])&0xff));
1187 * Test case for ticket#8624
1188 * Bad collation key with upper first option.
1190 public void TestCaseFirstCompression() {
1191 RuleBasedCollator col = (RuleBasedCollator)Collator.getInstance(Locale.US);
1194 caseFirstCompressionSub(col, "default");
1197 col.setUpperCaseFirst(true);
1198 caseFirstCompressionSub(col, "upper first");
1201 col.setLowerCaseFirst(true);
1202 caseFirstCompressionSub(col, "lower first");
1206 * Compare two strings - "aaa...A" and "aaa...a" with
1207 * Collation#compare and CollationKey#compareTo, called from
1208 * TestCaseFirstCompression.
1210 private void caseFirstCompressionSub(RuleBasedCollator col, String opt) {
1211 final int maxLength = 50;
1213 StringBuilder buf1 = new StringBuilder();
1214 StringBuilder buf2 = new StringBuilder();
1217 for (int n = 1; n <= maxLength; n++) {
1221 for (int i = 0; i < n - 1; i++) {
1228 str1 = buf1.toString();
1229 str2 = buf2.toString();
1231 CollationKey key1 = col.getCollationKey(str1);
1232 CollationKey key2 = col.getCollationKey(str2);
1234 int cmpKey = key1.compareTo(key2);
1235 int cmpCol = col.compare(str1, str2);
1237 if ((cmpKey < 0 && cmpCol >= 0) || (cmpKey > 0 && cmpCol <= 0) || (cmpKey == 0 && cmpCol != 0)) {
1238 errln("Inconsistent comparison(" + opt + "): str1=" + str1 + ", str2=" + str2 + ", cmpKey=" + cmpKey + " , cmpCol=" + cmpCol);
1243 /* RuleBasedCollator not subclassable
1246 // RuleBasedCollator doesn't use createCollationElementIterator internally
1248 public void Test4146160() {
1250 // Use a custom collator class whose createCollationElementIterator
1251 // methods increment a count....
1253 RuleBasedCollator en_us = (RuleBasedCollator) Collator.getInstance(Locale.US);
1254 My4146160Collator.count = 0;
1255 My4146160Collator mc = null;
1257 mc = new My4146160Collator(en_us);
1258 } catch (Exception e) {
1259 errln("Failed to create a My4146160Collator.");
1263 CollationKey key = null;
1265 key = mc.getCollationKey("1");
1266 } catch (Exception e) {
1267 errln("Failure to get a CollationKey from a My4146160Collator.");
1271 if (My4146160Collator.count < 1) {
1272 errln("My4146160Collator.getCollationElementIterator not called for getCollationKey");
1275 My4146160Collator.count = 0;
1276 mc.compare("1", "2");
1278 if (My4146160Collator.count < 1) {
1279 errln("My4146160Collator.getCollationElementIterator not called for compare");
1284 /* RuleBasedCollator not subclassable
1285 * class My4146160Collator extends RuleBasedCollator {
1286 static int count = 0;
1288 public My4146160Collator(RuleBasedCollator rbc) throws Exception {
1289 super(rbc.getRules());
1292 public CollationElementIterator getCollationElementIterator(String text) {
1294 return super.getCollationElementIterator(text);
1297 public CollationElementIterator getCollationElementIterator(java.text.CharacterIterator text) {
1299 return super.getCollationElementIterator(text);