2 *******************************************************************************
\r
3 * Copyright (C) 2002-2010, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 *******************************************************************************
\r
9 * Port From: ICU4C v2.1 : collate/CollationRegressionTest
\r
10 * Source File: $ICU4CRoot/source/test/intltest/regcoll.cpp
\r
13 package com.ibm.icu.dev.test.collator;
\r
15 import java.text.ParseException;
\r
16 import java.util.Locale;
\r
17 import java.util.Vector;
\r
19 import com.ibm.icu.dev.test.TestFmwk;
\r
20 import com.ibm.icu.text.CollationElementIterator;
\r
21 import com.ibm.icu.text.CollationKey;
\r
22 import com.ibm.icu.text.Collator;
\r
23 import com.ibm.icu.text.RuleBasedCollator;
\r
25 public class CollationRegressionTest extends TestFmwk {
\r
26 public static void main(String[] args) throws Exception{
\r
27 new CollationRegressionTest().run(args);
\r
32 // CollationElementIterator.reset() doesn't work
\r
34 public void Test4048446() {
\r
35 final String test1 = "XFILE What subset of all possible test cases has the highest probability of detecting the most errors?";
\r
36 //final String test2 = "Xf_ile What subset of all possible test cases has the lowest probability of detecting the least errors?";
\r
37 RuleBasedCollator en_us = (RuleBasedCollator) Collator.getInstance(Locale.US);
\r
38 CollationElementIterator i1 = en_us.getCollationElementIterator(test1);
\r
39 CollationElementIterator i2 = en_us.getCollationElementIterator(test1);
\r
41 if (i1 == null || i2 == null) {
\r
42 errln("Could not create CollationElementIterator's");
\r
46 while (i1.next() != CollationElementIterator.NULLORDER) {
\r
51 assertEqual(i1, i2);
\r
54 void assertEqual(CollationElementIterator i1, CollationElementIterator i2) {
\r
55 int c1, c2, count = 0;
\r
65 msg += msg1 + count;
\r
66 msg += ": strength(0x" + Integer.toHexString(c1);
\r
67 msg += ") != strength(0x" + Integer.toHexString(c2);
\r
73 } while (c1 != CollationElementIterator.NULLORDER);
\r
78 // Collator -> rules -> Collator round-trip broken for expanding characters
\r
80 public void Test4051866() {
\r
81 String rules = "< o & oe ,o\u3080& oe ,\u1530 ,O& OE ,O\u3080& OE ,\u1520< p ,P";
\r
83 // Build a collator containing expanding characters
\r
84 RuleBasedCollator c1 = null;
\r
87 c1 = new RuleBasedCollator(rules);
\r
88 } catch (Exception e) {
\r
89 errln("Fail to create RuleBasedCollator with rules:" + rules);
\r
93 // Build another using the rules from the first
\r
94 RuleBasedCollator c2 = null;
\r
96 c2 = new RuleBasedCollator(c1.getRules());
\r
97 } catch (Exception e) {
\r
98 errln("Fail to create RuleBasedCollator with rules:" + rules);
\r
102 // Make sure they're the same
\r
103 if (!(c1.getRules().equals(c2.getRules())))
\r
105 errln("Rules are not equal");
\r
111 // Collator thinks "black-bird" == "black"
\r
113 public void Test4053636() {
\r
114 RuleBasedCollator en_us = (RuleBasedCollator) Collator.getInstance(Locale.US);
\r
115 if (en_us.equals("black_bird", "black")) {
\r
116 errln("black-bird == black");
\r
122 // CollationElementIterator will not work correctly if the associated
\r
123 // Collator object's mode is changed
\r
125 public void Test4054238(/* char* par */) {
\r
126 final char[] chars3 = {0x61, 0x00FC, 0x62, 0x65, 0x63, 0x6b, 0x20, 0x47, 0x72, 0x00F6, 0x00DF, 0x65, 0x20, 0x4c, 0x00FC, 0x62, 0x63, 0x6b, 0};
\r
127 final String test3 = new String(chars3);
\r
128 RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(Locale.US);
\r
130 // NOTE: The Java code uses en_us to create the CollationElementIterators
\r
131 // but I'm pretty sure that's wrong, so I've changed this to use c.
\r
132 c.setDecomposition(Collator.NO_DECOMPOSITION);
\r
133 CollationElementIterator i1 = c.getCollationElementIterator(test3);
\r
134 logln("Offset:" + i1.getOffset());
\r
139 // Collator::IDENTICAL documented but not implemented
\r
141 public void Test4054734(/* char* par */) {
\r
143 //Here's the original Java:
\r
145 String[] decomp = {
\r
146 "\u0001", "<", "\u0002",
\r
147 "\u0001", "=", "\u0001",
\r
148 "A\u0001", ">", "~\u0002", // Ensure A and ~ are not compared bitwise
\r
149 "\u00C0", "=", "A\u0300", // Decomp should make these equal
\r
152 RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(Locale.US);
\r
153 c.setStrength(Collator.IDENTICAL);
\r
154 c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
\r
155 compareArray(c, decomp);
\r
158 void compareArray(Collator c, String[] tests) {
\r
160 int expectedResult = 0;
\r
162 for (int i = 0; i < tests.length; i += 3) {
\r
163 String source = tests[i];
\r
164 String comparison = tests[i + 1];
\r
165 String target = tests[i + 2];
\r
167 if (comparison.equals("<")) {
\r
168 expectedResult = -1;
\r
169 } else if (comparison.equals(">")) {
\r
170 expectedResult = 1;
\r
171 } else if (comparison.equals("=")) {
\r
172 expectedResult = 0;
\r
174 errln("Bogus comparison string \"" + comparison + "\"");
\r
177 int compareResult = 0;
\r
183 compareResult = c.compare(source, target);
\r
184 } catch (Exception e) {
\r
185 errln(e.toString());
\r
188 CollationKey sourceKey = null, targetKey = null;
\r
190 sourceKey = c.getCollationKey(source);
\r
191 } catch (Exception e) {
\r
192 errln("Couldn't get collationKey for source");
\r
197 targetKey = c.getCollationKey(target);
\r
198 } catch (Exception e) {
\r
199 errln("Couldn't get collationKey for target");
\r
203 int keyResult = sourceKey.compareTo(targetKey);
\r
204 reportCResult( source, target, sourceKey, targetKey, compareResult, keyResult, compareResult, expectedResult );
\r
208 void reportCResult( String source, String target, CollationKey sourceKey, CollationKey targetKey,
\r
209 int compareResult, int keyResult, int incResult, int expectedResult ){
\r
210 if (expectedResult < -1 || expectedResult > 1)
\r
212 errln("***** invalid call to reportCResult ****");
\r
216 boolean ok1 = (compareResult == expectedResult);
\r
217 boolean ok2 = (keyResult == expectedResult);
\r
218 boolean ok3 = (incResult == expectedResult);
\r
220 if (ok1 && ok2 && ok3 && !isVerbose()){
\r
223 String msg1 = ok1? "Ok: compare(\"" : "FAIL: compare(\"";
\r
224 String msg2 = "\", \"";
\r
225 String msg3 = "\") returned ";
\r
226 String msg4 = "; expected ";
\r
228 String sExpect = new String("");
\r
229 String sResult = new String("");
\r
230 sResult = appendCompareResult(compareResult, sResult);
\r
231 sExpect = appendCompareResult(expectedResult, sExpect);
\r
233 logln(msg1 + source + msg2 + target + msg3 + sResult);
\r
235 errln(msg1 + source + msg2 + target + msg3 + sResult + msg4 + sExpect);
\r
238 msg1 = ok2 ? "Ok: key(\"" : "FAIL: key(\"";
\r
239 msg2 = "\").compareTo(key(\"";
\r
240 msg3 = "\")) returned ";
\r
241 sResult = appendCompareResult(keyResult, sResult);
\r
243 logln(msg1 + source + msg2 + target + msg3 + sResult);
\r
245 errln(msg1 + source + msg2 + target + msg3 + sResult + msg4 + sExpect);
\r
248 errln(msg1 + prettify(sourceKey) + msg2 + prettify(targetKey));
\r
251 msg1 = ok3 ? "Ok: incCompare(\"" : "FAIL: incCompare(\"";
\r
253 msg3 = "\") returned ";
\r
255 sResult = appendCompareResult(incResult, sResult);
\r
258 logln(msg1 + source + msg2 + target + msg3 + sResult);
\r
260 errln(msg1 + source + msg2 + target + msg3 + sResult + msg4 + sExpect);
\r
265 String appendCompareResult(int result, String target) {
\r
266 if (result == -1) { //LESS
\r
268 } else if (result == 0) { //EQUAL
\r
270 } else if (result == 1) { //GREATER
\r
271 target += "GREATER";
\r
274 target += huh + result;
\r
279 String prettify(CollationKey sourceKey) {
\r
281 byte[] bytes= sourceKey.toByteArray();
\r
282 String target = "[";
\r
284 for (i = 0; i < bytes.length; i++) {
\r
285 target += Integer.toHexString(bytes[i]);
\r
294 // Full Decomposition mode not implemented
\r
296 public void Test4054736(/* char* par */) {
\r
297 RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(Locale.US);
\r
299 c.setStrength(Collator.SECONDARY);
\r
300 c.setDecomposition(Collator.NO_DECOMPOSITION);
\r
302 final String[] tests = { "\uFB4F", "\u003d", "\u05D0\u05DC" }; // Alef-Lamed vs. Alef, Lamed
\r
303 compareArray(c, tests);
\r
308 // Collator::createInstance() causes an ArrayIndexOutofBoundsException for Korean
\r
310 public void Test4058613(/* char* par */) {
\r
311 // Creating a default collator doesn't work when Korean is the default
\r
314 Locale oldDefault = Locale.getDefault();
\r
315 Locale.setDefault(new Locale("ko", ""));
\r
319 c = Collator.getInstance(new Locale("en", "US"));
\r
322 errln("Could not create a Korean collator");
\r
323 Locale.setDefault(oldDefault);
\r
327 // Since the fix to this bug was to turn off decomposition for Korean collators,
\r
328 // ensure that's what we got
\r
329 if (c.getDecomposition() != Collator.NO_DECOMPOSITION) {
\r
330 errln("Decomposition is not set to NO_DECOMPOSITION for Korean collator");
\r
333 Locale.setDefault(oldDefault);
\r
338 // RuleBasedCollator.getRules does not return the exact pattern as input
\r
339 // for expanding character sequences
\r
341 public void Test4059820(/* char* par */) {
\r
342 RuleBasedCollator c = null;
\r
343 String rules = "< a < b , c/a < d < z";
\r
345 c = new RuleBasedCollator(rules);
\r
346 } catch (Exception e) {
\r
347 errln("Failure building a collator.");
\r
351 if ( c.getRules().indexOf("c/a") == -1)
\r
353 errln("returned rules do not contain 'c/a'");
\r
359 // MergeCollation::fixEntry broken for "& H < \u0131, \u0130, i, I"
\r
361 public void Test4060154(/* char* par */) {
\r
362 String rules ="< g, G < h, H < i, I < j, J & H < \u0131, \u0130, i, I";
\r
364 RuleBasedCollator c = null;
\r
366 c = new RuleBasedCollator(rules);
\r
367 } catch (Exception e) {
\r
368 //System.out.println(e);
\r
369 errln("failure building collator.");
\r
373 c.setDecomposition(Collator.NO_DECOMPOSITION);
\r
375 String[] tertiary = {
\r
377 "H", "<", "\u0131",
\r
379 "\u0131", "<", "\u0130",
\r
380 "\u0130", "<", "i",
\r
381 "\u0130", ">", "H",
\r
384 c.setStrength(Collator.TERTIARY);
\r
385 compareArray(c, tertiary);
\r
387 String[] secondary = {
\r
389 "\u0131", "=", "\u0130",
\r
392 c.setStrength(Collator.PRIMARY);
\r
393 compareArray(c, secondary);
\r
398 // Secondary/Tertiary comparison incorrect in French Secondary
\r
400 public void Test4062418(/* char* par */) {
\r
401 RuleBasedCollator c = null;
\r
403 c = (RuleBasedCollator) Collator.getInstance(Locale.FRANCE);
\r
404 } catch (Exception e) {
\r
405 errln("Failed to create collator for Locale::FRANCE()");
\r
408 c.setStrength(Collator.SECONDARY);
\r
411 "p\u00eache", "<", "p\u00e9ch\u00e9", // Comparing accents from end, p\u00e9ch\u00e9 is greater
\r
414 compareArray(c, tests);
\r
419 // Collator::compare() method broken if either string contains spaces
\r
421 public void Test4065540(/* char* par */) {
\r
422 RuleBasedCollator en_us = (RuleBasedCollator) Collator.getInstance(Locale.US);
\r
423 if (en_us.compare("abcd e", "abcd f") == 0) {
\r
424 errln("'abcd e' == 'abcd f'");
\r
430 // Unicode characters need to be recursively decomposed to get the
\r
431 // correct result. For example,
\r
432 // u1EB1 -> \u0103 + \u0300 -> a + \u0306 + \u0300.
\r
434 public void Test4066189(/* char* par */) {
\r
435 final String test1 = "\u1EB1";
\r
436 final String test2 = "\u0061\u0306\u0300";
\r
438 // NOTE: The java code used en_us to create the
\r
439 // CollationElementIterator's. I'm pretty sure that
\r
440 // was wrong, so I've change the code to use c1 and c2
\r
441 RuleBasedCollator c1 = (RuleBasedCollator) Collator.getInstance(Locale.US);
\r
442 c1.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
\r
443 CollationElementIterator i1 = c1.getCollationElementIterator(test1);
\r
445 RuleBasedCollator c2 = (RuleBasedCollator) Collator.getInstance(Locale.US);
\r
446 c2.setDecomposition(Collator.NO_DECOMPOSITION);
\r
447 CollationElementIterator i2 = c2.getCollationElementIterator(test2);
\r
449 assertEqual(i1, i2);
\r
454 // French secondary collation checking at the end of compare iteration fails
\r
456 public void Test4066696(/* char* par */) {
\r
457 RuleBasedCollator c = null;
\r
459 c = (RuleBasedCollator)Collator.getInstance(Locale.FRANCE);
\r
460 } catch(Exception e) {
\r
461 errln("Failure creating collator for Locale::getFrance()");
\r
464 c.setStrength(Collator.SECONDARY);
\r
467 "\u00e0", ">", "\u01fa", // a-grave < A-ring-acute
\r
469 compareArray(c, tests);
\r
474 // Bad canonicalization of same-class combining characters
\r
476 public void Test4076676(/* char* par */) {
\r
477 // These combining characters are all in the same class, so they should not
\r
478 // be reordered, and they should compare as unequal.
\r
479 final String s1 = "\u0041\u0301\u0302\u0300";
\r
480 final String s2 = "\u0041\u0302\u0300\u0301";
\r
482 RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(Locale.US);
\r
483 c.setStrength(Collator.TERTIARY);
\r
485 if (c.compare(s1,s2) == 0) {
\r
486 errln("Same-class combining chars were reordered");
\r
492 // RuleBasedCollator breaks on "< a < bb" rule
\r
494 public void Test4078588(/* char *par */) {
\r
495 RuleBasedCollator rbc = null;
\r
497 rbc = new RuleBasedCollator("< a < bb");
\r
498 } catch (Exception e) {
\r
499 errln("Failed to create RuleBasedCollator.");
\r
503 int result = rbc.compare("a","bb");
\r
506 errln("Compare(a,bb) returned " + result + "; expected -1");
\r
512 // RuleBasedCollator::operator==(NULL) throws NullPointerException
\r
514 public void Test4079231(/* char* par */) {
\r
515 RuleBasedCollator en_us = (RuleBasedCollator) Collator.getInstance(Locale.US);
\r
517 if (en_us.equals(null)) {
\r
518 errln("en_us.equals(null) returned true");
\r
520 } catch (Exception e) {
\r
521 errln("en_us.equals(null) threw " + e.toString());
\r
527 // Combining characters in different classes not reordered properly.
\r
529 public void Test4081866(/* char* par */) {
\r
530 // These combining characters are all in different classes,
\r
531 // so they should be reordered and the strings should compare as equal.
\r
532 String s1 = "\u0041\u0300\u0316\u0327\u0315";
\r
533 String s2 = "\u0041\u0327\u0316\u0315\u0300";
\r
535 RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(Locale.US);
\r
536 c.setStrength(Collator.TERTIARY);
\r
538 // Now that the default collators are set to NO_DECOMPOSITION
\r
539 // (as a result of fixing bug 4114077), we must set it explicitly
\r
540 // when we're testing reordering behavior. -- lwerner, 5/5/98
\r
541 c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
\r
542 if (c.compare(s1,s2) != 0) {
\r
543 errln("Combining chars were not reordered");
\r
549 // string comparison errors in Scandinavian collators
\r
551 public void Test4087241(/* char* par */) {
\r
552 Locale da_DK = new Locale("da", "DK");
\r
553 RuleBasedCollator c = null;
\r
555 c = (RuleBasedCollator) Collator.getInstance(da_DK);
\r
556 } catch (Exception e) {
\r
557 errln("Failed to create collator for da_DK locale");
\r
560 c.setStrength(Collator.SECONDARY);
\r
562 "\u007a", "\u003c", "\u00E6", // z < ae
\r
563 "\u0061\u0308", "\u003c", "\u0061\u030A", // a-unlaut < a-ring
\r
564 "\u0059", "\u003c", "\u0075\u0308", // Y < u-umlaut
\r
566 compareArray(c, tests);
\r
571 // CollationKey takes ignorable strings into account when it shouldn't
\r
573 public void Test4087243(/* char* par */) {
\r
574 RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(Locale.US);
\r
575 c.setStrength(Collator.TERTIARY);
\r
577 "\u0031\u0032\u0033", "\u003d", "\u0031\u0032\u0033\u0001" // 1 2 3 = 1 2 3 ctrl-A
\r
579 compareArray(c, tests);
\r
584 // Mu/micro conflict
\r
585 // Micro symbol and greek lowercase letter Mu should sort identically
\r
587 public void Test4092260(/* char* par */) {
\r
588 Locale el = new Locale("el", "");
\r
591 c = Collator.getInstance(el);
\r
592 } catch (Exception e) {
\r
593 errln("Failed to create collator for el locale.");
\r
596 // These now have tertiary differences in UCA
\r
597 c.setStrength(Collator.SECONDARY);
\r
599 "\u00B5", "\u003d", "\u03BC",
\r
601 compareArray(c, tests);
\r
606 public void Test4095316(/* char* par */) {
\r
607 Locale el_GR = new Locale("el", "GR");
\r
610 c = Collator.getInstance(el_GR);
\r
611 } catch (Exception e) {
\r
612 errln("Failed to create collator for el_GR locale");
\r
615 // These now have tertiary differences in UCA
\r
616 //c->setStrength(Collator::TERTIARY);
\r
617 //c->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status);
\r
618 c.setStrength(Collator.SECONDARY);
\r
620 "\u03D4", "\u003d", "\u03AB",
\r
622 compareArray(c, tests);
\r
627 public void Test4101940(/* char* par */) {
\r
628 RuleBasedCollator c = null;
\r
629 String rules = "< a < b";
\r
630 String nothing = "";
\r
632 c = new RuleBasedCollator(rules);
\r
633 } catch (Exception e) {
\r
634 errln("Failed to create RuleBasedCollator");
\r
637 CollationElementIterator i = c.getCollationElementIterator(nothing);
\r
639 if (i.next() != CollationElementIterator.NULLORDER) {
\r
640 errln("next did not return NULLORDER");
\r
646 // Collator::compare not handling spaces properly
\r
648 public void Test4103436(/* char* par */) {
\r
649 RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(Locale.US);
\r
650 c.setStrength(Collator.TERTIARY);
\r
652 "\u0066\u0069\u006c\u0065", "\u003c", "\u0066\u0069\u006c\u0065\u0020\u0061\u0063\u0063\u0065\u0073\u0073",
\r
653 "\u0066\u0069\u006c\u0065", "\u003c", "\u0066\u0069\u006c\u0065\u0061\u0063\u0063\u0065\u0073\u0073",
\r
655 compareArray(c, tests);
\r
660 // Collation not Unicode conformant with Hangul syllables
\r
662 public void Test4114076(/* char* par */) {
\r
663 RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(Locale.US);
\r
664 c.setStrength(Collator.TERTIARY);
\r
667 // With Canonical decomposition, Hangul syllables should get decomposed
\r
668 // into Jamo, but Jamo characters should not be decomposed into
\r
672 "\ud4db", "\u003d", "\u1111\u1171\u11b6"
\r
675 c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
\r
676 compareArray(c, test1);
\r
679 // *In earlier versions of Unicode, jamo characters like ksf
\r
680 // had compatibility mappings to kf + sf. These mappings were
\r
681 // removed in Unicode 2.1.9 to ensure that Hangul syllables are maintained.)
\r
682 // That is, the following test is obsolete as of 2.1.9
\r
684 //obsolete- // With Full decomposition, it should go all the way down to
\r
685 //obsolete- // conjoining Jamo characters.
\r
687 //obsolete- static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN] =
\r
689 //obsolete- {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x116e, 0x1175, 0x11af, 0x11c2, 0}
\r
692 //obsolete- c->setDecomposition(Normalizer::DECOMP_COMPAT);
\r
693 //obsolete- compareArray(*c, test2, ARRAY_LENGTH(test2));
\r
698 // Collation with decomposition off doesn't work for Europe
\r
700 public void Test4114077(/* char* par */) {
\r
701 // Ensure that we get the same results with decomposition off
\r
702 // as we do with it on....
\r
703 RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(Locale.US);
\r
704 c.setStrength(Collator.TERTIARY);
\r
706 "\u00C0", "\u003d", "\u0041\u0300", // Should be equivalent
\r
707 "\u0070\u00ea\u0063\u0068\u0065", "\u003e", "\u0070\u00e9\u0063\u0068\u00e9",
\r
708 "\u0204", "\u003d", "\u0045\u030F",
\r
709 "\u01fa", "\u003d", "\u0041\u030a\u0301", // a-ring-acute -> a-ring, acute
\r
710 // -> a, ring, acute
\r
711 "\u0041\u0300\u0316", "\u003c", "\u0041\u0316\u0300" // No reordering --> unequal
\r
714 c.setDecomposition(Collator.NO_DECOMPOSITION);
\r
715 compareArray(c, test1);
\r
718 "\u0041\u0300\u0316", "\u003d", "\u0041\u0316\u0300" // Reordering --> equal
\r
721 c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
\r
722 compareArray(c, test2);
\r
727 // Collator::getCollationKey was hanging on certain character sequences
\r
729 public void Test4124632(/* char* par */) {
\r
730 Collator coll = null;
\r
732 coll = Collator.getInstance(Locale.JAPAN);
\r
733 } catch (Exception e) {
\r
734 errln("Failed to create collator for Locale::JAPAN");
\r
737 String test = "\u0041\u0308\u0062\u0063";
\r
740 key = coll.getCollationKey(test);
\r
741 logln(key.getSourceString());
\r
742 } catch (Exception e) {
\r
743 errln("CollationKey creation failed.");
\r
749 // sort order of french words with multiple accents has errors
\r
751 public void Test4132736(/* char* par */) {
\r
754 c = Collator.getInstance(Locale.FRANCE);
\r
755 c.setStrength(Collator.TERTIARY);
\r
756 } catch (Exception e) {
\r
757 errln("Failed to create a collator for Locale::getFrance()");
\r
761 "\u0065\u0300\u0065\u0301", "\u003c", "\u0065\u0301\u0065\u0300",
\r
762 "\u0065\u0300\u0301", "\u003c", "\u0065\u0301\u0300",
\r
764 compareArray(c, test1);
\r
769 // The sorting using java.text.CollationKey is not in the exact order
\r
771 public void Test4133509(/* char* par */) {
\r
772 RuleBasedCollator en_us = (RuleBasedCollator) Collator.getInstance(Locale.US);
\r
774 "\u0045\u0078\u0063\u0065\u0070\u0074\u0069\u006f\u006e", "\u003c", "\u0045\u0078\u0063\u0065\u0070\u0074\u0069\u006f\u006e\u0049\u006e\u0049\u006e\u0069\u0074\u0069\u0061\u006c\u0069\u007a\u0065\u0072\u0045\u0072\u0072\u006f\u0072",
\r
775 "\u0047\u0072\u0061\u0070\u0068\u0069\u0063\u0073", "\u003c", "\u0047\u0072\u0061\u0070\u0068\u0069\u0063\u0073\u0045\u006e\u0076\u0069\u0072\u006f\u006e\u006d\u0065\u006e\u0074",
\r
776 "\u0053\u0074\u0072\u0069\u006e\u0067", "\u003c", "\u0053\u0074\u0072\u0069\u006e\u0067\u0042\u0075\u0066\u0066\u0065\u0072",
\r
779 compareArray(en_us, test1);
\r
784 // getCollationKey throws exception for spanish text
\r
785 // Cannot reproduce this bug on 1.2, however it DOES fail on 1.1.6
\r
787 public void Test4139572(/* char* par */) {
\r
789 // Code pasted straight from the bug report
\r
790 // (and then translated to C++ ;-)
\r
792 // create spanish locale and collator
\r
793 Locale l = new Locale("es", "es");
\r
794 Collator col = null;
\r
796 col = Collator.getInstance(l);
\r
797 } catch (Exception e) {
\r
798 errln("Failed to create a collator for es_es locale.");
\r
801 CollationKey key = null;
\r
802 // this spanish phrase kills it!
\r
804 key = col.getCollationKey("Nombre De Objeto");
\r
805 logln("source:" + key.getSourceString());
\r
806 } catch (Exception e) {
\r
807 errln("Error creating CollationKey for \"Nombre De Ojbeto\"");
\r
813 // Support for Swedish gone in 1.1.6 (Can't create Swedish collator)
\r
815 public void Test4141640(/* char* par */) {
\r
817 // Rather than just creating a Swedish collator, we might as well
\r
818 // try to instantiate one for every locale available on the system
\r
819 // in order to prevent this sort of bug from cropping up in the future
\r
821 Locale locales[] = Collator.getAvailableLocales();
\r
823 for (int i = 0; i < locales.length; i += 1)
\r
827 c = Collator.getInstance(locales[i]);
\r
828 logln("source: " + c.getStrength());
\r
829 } catch (Exception e) {
\r
831 msg += "Could not create collator for locale ";
\r
832 msg += locales[i].getDisplayName();
\r
838 private void checkListOrder(String[] sortedList, Collator c) {
\r
839 // this function uses the specified Collator to make sure the
\r
840 // passed-in list is already sorted into ascending order
\r
841 for (int i = 0; i < sortedList.length - 1; i++) {
\r
842 if (c.compare(sortedList[i], sortedList[i + 1]) >= 0) {
\r
843 errln("List out of order at element #" + i + ": "
\r
844 + sortedList[i] + " >= "
\r
845 + sortedList[i + 1]);
\r
850 public void Test4171974() {
\r
851 // test French accent ordering more thoroughly
\r
852 /*String[] frenchList = {
\r
853 "\u0075\u0075", // u u
\r
854 "\u00fc\u0075", // u-umlaut u
\r
855 "\u01d6\u0075", // u-umlaut-macron u
\r
856 "\u016b\u0075", // u-macron u
\r
857 "\u1e7b\u0075", // u-macron-umlaut u
\r
858 "\u0075\u00fc", // u u-umlaut
\r
859 "\u00fc\u00fc", // u-umlaut u-umlaut
\r
860 "\u01d6\u00fc", // u-umlaut-macron u-umlaut
\r
861 "\u016b\u00fc", // u-macron u-umlaut
\r
862 "\u1e7b\u00fc", // u-macron-umlaut u-umlaut
\r
863 "\u0075\u01d6", // u u-umlaut-macron
\r
864 "\u00fc\u01d6", // u-umlaut u-umlaut-macron
\r
865 "\u01d6\u01d6", // u-umlaut-macron u-umlaut-macron
\r
866 "\u016b\u01d6", // u-macron u-umlaut-macron
\r
867 "\u1e7b\u01d6", // u-macron-umlaut u-umlaut-macron
\r
868 "\u0075\u016b", // u u-macron
\r
869 "\u00fc\u016b", // u-umlaut u-macron
\r
870 "\u01d6\u016b", // u-umlaut-macron u-macron
\r
871 "\u016b\u016b", // u-macron u-macron
\r
872 "\u1e7b\u016b", // u-macron-umlaut u-macron
\r
873 "\u0075\u1e7b", // u u-macron-umlaut
\r
874 "\u00fc\u1e7b", // u-umlaut u-macron-umlaut
\r
875 "\u01d6\u1e7b", // u-umlaut-macron u-macron-umlaut
\r
876 "\u016b\u1e7b", // u-macron u-macron-umlaut
\r
877 "\u1e7b\u1e7b" // u-macron-umlaut u-macron-umlaut
\r
879 Collator french = Collator.getInstance(Locale.FRENCH);
\r
881 logln("Testing French order...");
\r
882 checkListOrder(frenchList, french);
\r
884 logln("Testing French order without decomposition...");
\r
885 french.setDecomposition(Collator.NO_DECOMPOSITION);
\r
886 checkListOrder(frenchList, french);*/
\r
888 String[] englishList = {
\r
889 "\u0075\u0075", // u u
\r
890 "\u0075\u00fc", // u u-umlaut
\r
891 "\u0075\u01d6", // u u-umlaut-macron
\r
892 "\u0075\u016b", // u u-macron
\r
893 "\u0075\u1e7b", // u u-macron-umlaut
\r
894 "\u00fc\u0075", // u-umlaut u
\r
895 "\u00fc\u00fc", // u-umlaut u-umlaut
\r
896 "\u00fc\u01d6", // u-umlaut u-umlaut-macron
\r
897 "\u00fc\u016b", // u-umlaut u-macron
\r
898 "\u00fc\u1e7b", // u-umlaut u-macron-umlaut
\r
899 "\u01d6\u0075", // u-umlaut-macron u
\r
900 "\u01d6\u00fc", // u-umlaut-macron u-umlaut
\r
901 "\u01d6\u01d6", // u-umlaut-macron u-umlaut-macron
\r
902 "\u01d6\u016b", // u-umlaut-macron u-macron
\r
903 "\u01d6\u1e7b", // u-umlaut-macron u-macron-umlaut
\r
904 "\u016b\u0075", // u-macron u
\r
905 "\u016b\u00fc", // u-macron u-umlaut
\r
906 "\u016b\u01d6", // u-macron u-umlaut-macron
\r
907 "\u016b\u016b", // u-macron u-macron
\r
908 "\u016b\u1e7b", // u-macron u-macron-umlaut
\r
909 "\u1e7b\u0075", // u-macron-umlaut u
\r
910 "\u1e7b\u00fc", // u-macron-umlaut u-umlaut
\r
911 "\u1e7b\u01d6", // u-macron-umlaut u-umlaut-macron
\r
912 "\u1e7b\u016b", // u-macron-umlaut u-macron
\r
913 "\u1e7b\u1e7b" // u-macron-umlaut u-macron-umlaut
\r
915 Collator english = Collator.getInstance(Locale.ENGLISH);
\r
917 logln("Testing English order...");
\r
918 checkListOrder(englishList, english);
\r
920 logln("Testing English order without decomposition...");
\r
921 english.setDecomposition(Collator.NO_DECOMPOSITION);
\r
922 checkListOrder(englishList, english);
\r
925 public void Test4179216() throws Exception {
\r
926 // you can position a CollationElementIterator in the middle of
\r
927 // a contracting character sequence, yielding a bogus collation
\r
929 RuleBasedCollator coll = (RuleBasedCollator)Collator.getInstance(Locale.US);
\r
930 coll = new RuleBasedCollator(coll.getRules()
\r
931 + " & C < ch , cH , Ch , CH < cat < crunchy");
\r
932 String testText = "church church catcatcher runcrunchynchy";
\r
933 CollationElementIterator iter = coll.getCollationElementIterator(
\r
936 // test that the "ch" combination works properly
\r
938 int elt4 = CollationElementIterator.primaryOrder(iter.next());
\r
941 int elt0 = CollationElementIterator.primaryOrder(iter.next());
\r
944 int elt5 = CollationElementIterator.primaryOrder(iter.next());
\r
946 if (elt4 != elt0 || elt5 != elt0)
\r
947 errln("The collation elements at positions 0 (" + elt0 + "), 4 ("
\r
948 + elt4 + "), and 5 (" + elt5 + ") don't match.");
\r
950 // test that the "cat" combination works properly
\r
951 iter.setOffset(14);
\r
952 int elt14 = CollationElementIterator.primaryOrder(iter.next());
\r
954 iter.setOffset(15);
\r
955 int elt15 = CollationElementIterator.primaryOrder(iter.next());
\r
957 iter.setOffset(16);
\r
958 int elt16 = CollationElementIterator.primaryOrder(iter.next());
\r
960 iter.setOffset(17);
\r
961 int elt17 = CollationElementIterator.primaryOrder(iter.next());
\r
963 iter.setOffset(18);
\r
964 int elt18 = CollationElementIterator.primaryOrder(iter.next());
\r
966 iter.setOffset(19);
\r
967 int elt19 = CollationElementIterator.primaryOrder(iter.next());
\r
969 if (elt14 != elt15 || elt14 != elt16 || elt14 != elt17
\r
970 || elt14 != elt18 || elt14 != elt19)
\r
971 errln("\"cat\" elements don't match: elt14 = " + elt14 + ", elt15 = "
\r
972 + elt15 + ", elt16 = " + elt16 + ", elt17 = " + elt17
\r
973 + ", elt18 = " + elt18 + ", elt19 = " + elt19);
\r
975 // now generate a complete list of the collation elements,
\r
976 // first using next() and then using setOffset(), and
\r
977 // make sure both interfaces return the same set of elements
\r
980 int elt = iter.next();
\r
982 while (elt != CollationElementIterator.NULLORDER) {
\r
987 String[] nextElements = new String[count];
\r
988 String[] setOffsetElements = new String[count];
\r
994 while (elt != CollationElementIterator.NULLORDER) {
\r
995 nextElements[count++] = testText.substring(lastPos, iter.getOffset());
\r
996 lastPos = iter.getOffset();
\r
1000 for (int i = 0; i < testText.length(); ) {
\r
1001 iter.setOffset(i);
\r
1002 lastPos = iter.getOffset();
\r
1003 elt = iter.next();
\r
1004 setOffsetElements[count++] = testText.substring(lastPos, iter.getOffset());
\r
1005 i = iter.getOffset();
\r
1007 for (int i = 0; i < nextElements.length; i++) {
\r
1008 if (nextElements[i].equals(setOffsetElements[i])) {
\r
1009 logln(nextElements[i]);
\r
1011 errln("Error: next() yielded " + nextElements[i] + ", but setOffset() yielded "
\r
1012 + setOffsetElements[i]);
\r
1017 public void Test4216006() throws Exception {
\r
1018 // rule parser barfs on "<\u00e0=a\u0300", and on other cases
\r
1019 // where the same token (after normalization) appears twice in a row
\r
1020 boolean caughtException = false;
\r
1022 new RuleBasedCollator("\u00e0<a\u0300");
\r
1024 catch (ParseException e) {
\r
1025 caughtException = true;
\r
1027 if (!caughtException) {
\r
1028 throw new Exception("\"a<a\" collation sequence didn't cause parse error!");
\r
1031 RuleBasedCollator collator = new RuleBasedCollator("<\u00e0=a\u0300");
\r
1032 //commented by Kevin 2003/10/21
\r
1033 //for "FULL_DECOMPOSITION is not supported here." in ICU4J DOC
\r
1034 //collator.setDecomposition(Collator.FULL_DECOMPOSITION);
\r
1035 collator.setStrength(Collator.IDENTICAL);
\r
1037 String[] tests = {
\r
1038 "a\u0300", "=", "\u00e0",
\r
1039 "\u00e0", "=", "a\u0300"
\r
1042 compareArray(collator, tests);
\r
1045 // CollationElementIterator.previous broken for expanding char sequences
\r
1047 public void Test4179686() throws Exception {
\r
1048 RuleBasedCollator en_us = (RuleBasedCollator) Collator.getInstance(Locale.US);
\r
1049 // Create a collator with a few expanding character sequences in it....
\r
1050 RuleBasedCollator coll = new RuleBasedCollator(en_us.getRules()
\r
1051 + " & ae ; \u00e4 & AE ; \u00c4"
\r
1052 + " & oe ; \u00f6 & OE ; \u00d6"
\r
1053 + " & ue ; \u00fc & UE ; \u00dc");
\r
1055 String text = "T\u00f6ne"; // o-umlaut
\r
1057 CollationElementIterator iter = coll.getCollationElementIterator(text);
\r
1058 Vector elements = new Vector();
\r
1061 // Iterate forward and collect all of the elements into a Vector
\r
1062 while ((elem = iter.next()) != CollationElementIterator.NULLORDER) {
\r
1063 elements.addElement(new Integer(elem));
\r
1066 // Now iterate backward and make sure they're the same
\r
1067 int index = elements.size() - 1;
\r
1068 while ((elem = iter.previous()) != CollationElementIterator.NULLORDER) {
\r
1069 int expect = ((Integer)elements.elementAt(index)).intValue();
\r
1071 if (elem != expect) {
\r
1072 errln("Mismatch at index " + index
\r
1073 + ": got " + Integer.toString(elem,16)
\r
1074 + ", expected " + Integer.toString(expect,16));
\r
1080 protected void init()throws Exception{
\r
1083 public void Test4244884() throws Exception {
\r
1084 RuleBasedCollator coll = (RuleBasedCollator)Collator.getInstance(Locale.US);
\r
1085 coll = new RuleBasedCollator(coll.getRules()
\r
1086 + " & C < ch , cH , Ch , CH < cat < crunchy");
\r
1088 String[] testStrings = new String[] {
\r
1100 for (int i = 1; i < testStrings.length; i++) {
\r
1101 if (coll.compare(testStrings[i - 1], testStrings[i]) >= 0) {
\r
1102 errln("error: \"" + testStrings[i - 1]
\r
1103 + "\" is greater than or equal to \"" + testStrings[i]
\r
1109 // CollationElementIterator set doesn't work propertly with next/prev
\r
1110 public void Test4663220() {
\r
1111 RuleBasedCollator collator = (RuleBasedCollator)Collator.getInstance(Locale.US);
\r
1112 java.text.StringCharacterIterator stringIter = new java.text.StringCharacterIterator("fox");
\r
1113 CollationElementIterator iter = collator.getCollationElementIterator(stringIter);
\r
1115 int[] elements_next = new int[3];
\r
1116 logln("calling next:");
\r
1117 for (int i = 0; i < 3; ++i) {
\r
1118 logln("[" + i + "] " + (elements_next[i] = iter.next()));
\r
1121 int[] elements_fwd = new int[3];
\r
1122 logln("calling set/next:");
\r
1123 for (int i = 0; i < 3; ++i) {
\r
1124 iter.setOffset(i);
\r
1125 logln("[" + i + "] " + (elements_fwd[i] = iter.next()));
\r
1128 for (int i = 0; i < 3; ++i) {
\r
1129 if (elements_next[i] != elements_fwd[i]) {
\r
1130 errln("mismatch at position " + i +
\r
1131 ": " + elements_next[i] +
\r
1132 " != " + elements_fwd[i]);
\r
1137 /* RuleBasedCollator not subclassable
\r
1140 // RuleBasedCollator doesn't use createCollationElementIterator internally
\r
1142 public void Test4146160() {
\r
1144 // Use a custom collator class whose createCollationElementIterator
\r
1145 // methods increment a count....
\r
1147 RuleBasedCollator en_us = (RuleBasedCollator) Collator.getInstance(Locale.US);
\r
1148 My4146160Collator.count = 0;
\r
1149 My4146160Collator mc = null;
\r
1151 mc = new My4146160Collator(en_us);
\r
1152 } catch (Exception e) {
\r
1153 errln("Failed to create a My4146160Collator.");
\r
1157 CollationKey key = null;
\r
1159 key = mc.getCollationKey("1");
\r
1160 } catch (Exception e) {
\r
1161 errln("Failure to get a CollationKey from a My4146160Collator.");
\r
1165 if (My4146160Collator.count < 1) {
\r
1166 errln("My4146160Collator.getCollationElementIterator not called for getCollationKey");
\r
1169 My4146160Collator.count = 0;
\r
1170 mc.compare("1", "2");
\r
1172 if (My4146160Collator.count < 1) {
\r
1173 errln("My4146160Collator.getCollationElementIterator not called for compare");
\r
1178 /* RuleBasedCollator not subclassable
\r
1179 * class My4146160Collator extends RuleBasedCollator {
\r
1180 static int count = 0;
\r
1182 public My4146160Collator(RuleBasedCollator rbc) throws Exception {
\r
1183 super(rbc.getRules());
\r
1186 public CollationElementIterator getCollationElementIterator(String text) {
\r
1188 return super.getCollationElementIterator(text);
\r
1191 public CollationElementIterator getCollationElementIterator(java.text.CharacterIterator text) {
\r
1193 return super.getCollationElementIterator(text);
\r