2 *******************************************************************************
3 * Copyright (C) 1996-2012, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 *******************************************************************************
7 package com.ibm.icu.dev.test.lang;
9 import java.text.NumberFormat;
10 import java.text.ParsePosition;
11 import java.util.ArrayList;
12 import java.util.Arrays;
13 import java.util.Collection;
14 import java.util.Comparator;
15 import java.util.HashMap;
16 import java.util.HashSet;
17 import java.util.Iterator;
18 import java.util.LinkedHashSet;
19 import java.util.List;
21 import java.util.SortedSet;
22 import java.util.TreeSet;
24 import com.ibm.icu.dev.test.TestFmwk;
25 import com.ibm.icu.impl.SortedSetRelation;
26 import com.ibm.icu.impl.Utility;
27 import com.ibm.icu.lang.UCharacter;
28 import com.ibm.icu.lang.UCharacterEnums.ECharacterCategory;
29 import com.ibm.icu.lang.UProperty;
30 import com.ibm.icu.lang.UScript;
31 import com.ibm.icu.text.SymbolTable;
32 import com.ibm.icu.text.UTF16;
33 import com.ibm.icu.text.UnicodeMatcher;
34 import com.ibm.icu.text.UnicodeSet;
35 import com.ibm.icu.text.UnicodeSet.ComparisonStyle;
36 import com.ibm.icu.text.UnicodeSetIterator;
40 * @summary General test of UnicodeSet
42 public class UnicodeSetTest extends TestFmwk {
44 static final String NOT = "%%%%";
46 public static void main(String[] args) throws Exception {
47 new UnicodeSetTest().run(args);
50 private static final boolean isCccValue(int ccc) {
77 public void TestPropertyAccess() {
79 // test to see that all of the names work
80 for (int propNum = UProperty.BINARY_START; propNum < UProperty.INT_LIMIT; ++propNum) {
82 //Skipping tests in the non-exhaustive mode to shorten the test time ticket#6475
83 if(getInclusion()<=5 && count%5!=0){
86 if (propNum >= UProperty.BINARY_LIMIT && propNum < UProperty.INT_START) { // skip the gap
87 propNum = UProperty.INT_START;
89 for (int nameChoice = UProperty.NameChoice.SHORT; nameChoice <= UProperty.NameChoice.LONG; ++nameChoice) {
92 propName = UCharacter.getPropertyName(propNum, nameChoice);
93 if (propName == null) {
94 if (nameChoice == UProperty.NameChoice.SHORT) continue; // allow non-existent short names
95 throw new NullPointerException();
97 } catch (RuntimeException e1) {
98 errln("Can't get property name for: "
99 + "Property (" + propNum + ")"
100 + ", NameChoice: " + nameChoice + ", "
101 + e1.getClass().getName());
104 logln("Property (" + propNum + "): " + propName);
105 for (int valueNum = UCharacter.getIntPropertyMinValue(propNum); valueNum <= UCharacter.getIntPropertyMaxValue(propNum); ++valueNum) {
108 valueName = UCharacter.getPropertyValueName(propNum, valueNum, nameChoice);
109 if (valueName == null) {
110 if (nameChoice == UProperty.NameChoice.SHORT) continue; // allow non-existent short names
111 if ((propNum == UProperty.CANONICAL_COMBINING_CLASS ||
112 propNum == UProperty.LEAD_CANONICAL_COMBINING_CLASS ||
113 propNum == UProperty.TRAIL_CANONICAL_COMBINING_CLASS) &&
114 !isCccValue(valueNum)) {
115 // Only a few of the canonical combining classes have names.
116 // Otherwise they are just integer values.
119 throw new NullPointerException();
122 } catch (RuntimeException e1) {
123 errln("Can't get property value name for: "
124 + "Property (" + propNum + "): " + propName + ", "
125 + "Value (" + valueNum + ") "
126 + ", NameChoice: " + nameChoice + ", "
127 + e1.getClass().getName());
130 logln("Value (" + valueNum + "): " + valueName);
133 testSet = new UnicodeSet("[:" + propName + "=" + valueName + ":]");
134 } catch (RuntimeException e) {
135 errln("Can't create UnicodeSet for: "
136 + "Property (" + propNum + "): " + propName + ", "
137 + "Value (" + valueNum + "): " + valueName + ", "
138 + e.getClass().getName());
141 UnicodeSet collectedErrors = new UnicodeSet();
142 for (UnicodeSetIterator it = new UnicodeSetIterator(testSet); it.next();) {
143 int value = UCharacter.getIntPropertyValue(it.codepoint, propNum);
144 if (value != valueNum) {
145 collectedErrors.add(it.codepoint);
148 if (collectedErrors.size() != 0) {
149 errln("Property Value Differs: "
150 + "Property (" + propNum + "): " + propName + ", "
151 + "Value (" + valueNum + "): " + valueName + ", "
152 + "Differing values: " + collectedErrors.toPattern(true));
163 public void TestToPattern() throws Exception {
164 // Test that toPattern() round trips with syntax characters
166 for (int i = 0; i < OTHER_TOPATTERN_TESTS.length; ++i) {
167 checkPat(OTHER_TOPATTERN_TESTS[i], new UnicodeSet(OTHER_TOPATTERN_TESTS[i]));
169 for (int i = 0; i <= 0x10FFFF; ++i) {
170 if ((i <= 0xFF && !UCharacter.isLetter(i)) || UCharacter.isWhitespace(i)) {
171 // check various combinations to make sure they all work.
172 if (i != 0 && !toPatternAux(i, i)) continue;
173 if (!toPatternAux(0, i)) continue;
174 if (!toPatternAux(i, 0xFFFF)) continue;
178 // Test pattern behavior of multicharacter strings.
179 UnicodeSet s = new UnicodeSet("[a-z {aa} {ab}]");
180 expectToPattern(s, "[a-z{aa}{ab}]",
181 new String[] {"aa", "ab", NOT, "ac"});
183 expectToPattern(s, "[a-z{aa}{ab}{ac}]",
184 new String[] {"aa", "ab", "ac", NOT, "xy"});
186 s.applyPattern("[a-z {\\{l} {r\\}}]");
187 expectToPattern(s, "[a-z{r\\}}{\\{l}]",
188 new String[] {"{l", "r}", NOT, "xy"});
190 expectToPattern(s, "[a-z{\\[\\]}{r\\}}{\\{l}]",
191 new String[] {"{l", "r}", "[]", NOT, "xy"});
193 s.applyPattern("[a-z {\u4E01\u4E02}{\\n\\r}]");
194 expectToPattern(s, "[a-z{\\u000A\\u000D}{\\u4E01\\u4E02}]",
195 new String[] {"\u4E01\u4E02", "\n\r"});
200 expectToPattern(s, "[{abc}]",
201 new String[] {"abc", NOT, "ab"});
203 // JB#3400: For 2 character ranges prefer [ab] to [a-b]
206 expectToPattern(s, "[ab]", null);
208 // Cover applyPattern, applyPropertyAlias
210 s.applyPattern("[ab ]", true);
211 expectToPattern(s, "[ab]", new String[] {"a", NOT, "ab", " "});
213 s.applyPattern("[ab ]", false);
214 expectToPattern(s, "[\\ ab]", new String[] {"a", "\u0020", NOT, "ab"});
217 s.applyPropertyAlias("nv", "0.5");
218 expectToPattern(s, "[\\u00BD\\u0B73\\u0D74\\u0F2A\\u2CFD\\uA831\\U00010141\\U00010175\\U00010176\\U00010E7B]", null);
219 // Unicode 5.1 adds Malayalam 1/2 (\u0D74)
220 // Unicode 5.2 adds U+A831 NORTH INDIC FRACTION ONE HALF and U+10E7B RUMI FRACTION ONE HALF
221 // Unicode 6.0 adds U+0B73 ORIYA FRACTION ONE HALF
224 s.applyPropertyAlias("gc", "Lu");
225 // TODO expectToPattern(s, what?)
227 // RemoveAllStrings()
229 s.applyPattern("[a-z{abc}{def}]");
230 expectToPattern(s, "[a-z{abc}{def}]", null);
231 s.removeAllStrings();
232 expectToPattern(s, "[a-z]", null);
235 static String[] OTHER_TOPATTERN_TESTS = {
236 "[[:latin:]&[:greek:]]",
237 "[[:latin:]-[:greek:]]",
238 "[:nonspacing mark:]"
242 public boolean toPatternAux(int start, int end) {
243 // use Integer.toString because Utility.hex doesn't handle ints
244 String source = "0x" + Integer.toString(start,16).toUpperCase();
245 if (start != end) source += "..0x" + Integer.toString(end,16).toUpperCase();
246 UnicodeSet testSet = new UnicodeSet();
247 testSet.add(start, end);
248 return checkPat(source, testSet);
251 boolean checkPat (String source, UnicodeSet testSet) {
254 // What we want to make sure of is that a pattern generated
255 // by toPattern(), with or without escaped unprintables, can
256 // be passed back into the UnicodeSet constructor.
257 String pat0 = testSet.toPattern(true);
258 if (!checkPat(source + " (escaped)", testSet, pat0)) return false;
260 //String pat1 = unescapeLeniently(pat0);
261 //if (!checkPat(source + " (in code)", testSet, pat1)) return false;
263 String pat2 = testSet.toPattern(false);
264 if (!checkPat(source, testSet, pat2)) return false;
266 //String pat3 = unescapeLeniently(pat2);
267 //if (!checkPat(source + " (in code)", testSet, pat3)) return false;
269 //logln(source + " => " + pat0 + ", " + pat1 + ", " + pat2 + ", " + pat3);
270 logln(source + " => " + pat0 + ", " + pat2);
271 } catch (Exception e) {
272 errln("EXCEPTION in toPattern: " + source + " => " + pat);
278 boolean checkPat (String source, UnicodeSet testSet, String pat) {
279 UnicodeSet testSet2 = new UnicodeSet(pat);
280 if (!testSet2.equals(testSet)) {
281 errln("Fail toPattern: " + source + "; " + pat + " => " +
282 testSet2.toPattern(false) + ", expected " +
283 testSet.toPattern(false));
289 // NOTE: copied the following from Utility. There ought to be a version in there with a flag
290 // that does the Java stuff
292 public static int unescapeAt(String s, int[] offset16) {
298 int bitsPerDigit = 4;
302 /* Check that offset is in range */
303 int offset = offset16[0];
304 int length = s.length();
305 if (offset < 0 || offset >= length) {
309 /* Fetch first UChar after '\\' */
310 c = UTF16.charAt(s, offset);
311 offset += UTF16.getCharCount(c);
313 /* Convert hexadecimal and octal escapes */
328 dig = UCharacter.digit(c, 8);
332 n = 1; /* Already have first octal digit */
339 while (offset < length && n < maxDig) {
341 // TODO: Restore the char32-based code when UCharacter.digit
342 // is working (Bug 66).
344 //c = UTF16.charAt(s, offset);
345 //dig = UCharacter.digit(c, (bitsPerDigit == 3) ? 8 : 16);
346 c = s.charAt(offset);
347 dig = Character.digit((char)c, (bitsPerDigit == 3) ? 8 : 16);
351 result = (result << bitsPerDigit) | dig;
352 //offset += UTF16.getCharCount(c);
359 offset16[0] = offset;
363 /* Convert C-style escapes in table */
364 for (i=0; i<UNESCAPE_MAP.length; i+=2) {
365 if (c == UNESCAPE_MAP[i]) {
366 offset16[0] = offset;
367 return UNESCAPE_MAP[i+1];
368 } else if (c < UNESCAPE_MAP[i]) {
373 /* If no special forms are recognized, then consider
374 * the backslash to generically escape the next character. */
375 offset16[0] = offset;
379 /* This map must be in ASCENDING ORDER OF THE ESCAPE CODE */
380 static private final char[] UNESCAPE_MAP = {
395 * Convert all escapes in a given string using unescapeAt().
396 * Leave invalid escape sequences unchanged.
398 public static String unescapeLeniently(String s) {
399 StringBuffer buf = new StringBuffer();
400 int[] pos = new int[1];
401 for (int i=0; i<s.length(); ) {
402 char c = s.charAt(i++);
405 int e = unescapeAt(s, pos);
409 UTF16.append(buf, e);
416 return buf.toString();
419 public void TestPatterns() {
420 UnicodeSet set = new UnicodeSet();
421 expectPattern(set, "[[a-m]&[d-z]&[k-y]]", "km");
422 expectPattern(set, "[[a-z]-[m-y]-[d-r]]", "aczz");
423 expectPattern(set, "[a\\-z]", "--aazz");
424 expectPattern(set, "[-az]", "--aazz");
425 expectPattern(set, "[az-]", "--aazz");
426 expectPattern(set, "[[[a-z]-[aeiou]i]]", "bdfnptvz");
428 // Throw in a test of complement
430 String exp = '\u0000' + "aeeoouu" + (char)('z'+1) + '\uFFFF';
431 expectPairs(set, exp);
434 public void TestCategories() {
436 UnicodeSet set = new UnicodeSet("[:Lu:]");
437 expectContainment(set, "ABC", "abc");
439 // Make sure generation of L doesn't pollute cached Lu set
440 // First generate L, then Lu
441 // not used int TOP = 0x200; // Don't need to go over the whole range:
442 set = new UnicodeSet("[:L:]");
443 for (int i=0; i<0x200; ++i) {
444 boolean l = UCharacter.isLetter(i);
445 if (l != set.contains((char)i)) {
446 errln("FAIL: L contains " + (char)i + " = " +
447 set.contains((char)i));
448 if (++failures == 10) break;
452 set = new UnicodeSet("[:Lu:]");
453 for (int i=0; i<0x200; ++i) {
454 boolean lu = (UCharacter.getType(i) == ECharacterCategory.UPPERCASE_LETTER);
455 if (lu != set.contains((char)i)) {
456 errln("FAIL: Lu contains " + (char)i + " = " +
457 set.contains((char)i));
458 if (++failures == 20) break;
463 public void TestAddRemove() {
464 UnicodeSet set = new UnicodeSet();
466 expectPairs(set, "az");
467 set.remove('m', 'p');
468 expectPairs(set, "alqz");
469 set.remove('e', 'g');
470 expectPairs(set, "adhlqz");
471 set.remove('d', 'i');
472 expectPairs(set, "acjlqz");
473 set.remove('c', 'r');
474 expectPairs(set, "absz");
476 expectPairs(set, "abfqsz");
477 set.remove('a', 'g');
478 expectPairs(set, "hqsz");
479 set.remove('a', 'z');
480 expectPairs(set, "");
482 // Try removing an entire set from another set
483 expectPattern(set, "[c-x]", "cx");
484 UnicodeSet set2 = new UnicodeSet();
485 expectPattern(set2, "[f-ky-za-bc[vw]]", "acfkvwyz");
487 expectPairs(set, "deluxx");
489 // Try adding an entire set to another set
490 expectPattern(set, "[jackiemclean]", "aacceein");
491 expectPattern(set2, "[hitoshinamekatajamesanderson]", "aadehkmort");
493 expectPairs(set, "aacehort");
495 // Test commutativity
496 expectPattern(set, "[hitoshinamekatajamesanderson]", "aadehkmort");
497 expectPattern(set2, "[jackiemclean]", "aacceein");
499 expectPairs(set, "aacehort");
503 * Make sure minimal representation is maintained.
505 public void TestMinimalRep() {
506 // This is pretty thoroughly tested by checkCanonicalRep()
507 // run against the exhaustive operation results. Use the code
508 // here for debugging specific spot problems.
510 // 1 overlap against 2
511 UnicodeSet set = new UnicodeSet("[h-km-q]");
512 UnicodeSet set2 = new UnicodeSet("[i-o]");
514 expectPairs(set, "hq");
516 set.applyPattern("[a-m]");
517 set2.applyPattern("[e-o]");
519 expectPairs(set, "ao");
521 set.applyPattern("[e-o]");
522 set2.applyPattern("[a-m]");
524 expectPairs(set, "ao");
525 // 1 overlap against 3
526 set.applyPattern("[a-eg-mo-w]");
527 set2.applyPattern("[d-q]");
529 expectPairs(set, "aw");
532 public void TestAPI() {
534 UnicodeSet set = new UnicodeSet();
535 if (!set.isEmpty() || set.getRangeCount() != 0) {
536 errln("FAIL, set should be empty but isn't: " +
540 // clear(), isEmpty()
543 errln("FAIL, set shouldn't be empty but is: " +
547 if (!set.isEmpty()) {
548 errln("FAIL, set should be empty but isn't: " +
554 if (set.size() != 0) {
555 errln("FAIL, size should be 0, but is " + set.size() +
559 if (set.size() != 1) {
560 errln("FAIL, size should be 1, but is " + set.size() +
564 if (set.size() != 10) {
565 errln("FAIL, size should be 10, but is " + set.size() +
570 if (set.size() != 0x110000) {
571 errln("FAIL, size should be 0x110000, but is" + set.size());
574 // contains(first, last)
576 set.applyPattern("[A-Y 1-8 b-d l-y]");
577 for (int i = 0; i<set.getRangeCount(); ++i) {
578 int a = set.getRangeStart(i);
579 int b = set.getRangeEnd(i);
580 if (!set.contains(a, b)) {
581 errln("FAIL, should contain " + (char)a + '-' + (char)b +
582 " but doesn't: " + set);
584 if (set.contains((char)(a-1), b)) {
585 errln("FAIL, shouldn't contain " +
586 (char)(a-1) + '-' + (char)b +
587 " but does: " + set);
589 if (set.contains(a, (char)(b+1))) {
590 errln("FAIL, shouldn't contain " +
591 (char)a + '-' + (char)(b+1) +
592 " but does: " + set);
596 // Ported InversionList test.
597 UnicodeSet a = new UnicodeSet((char)3,(char)10);
598 UnicodeSet b = new UnicodeSet((char)7,(char)15);
599 UnicodeSet c = new UnicodeSet();
601 logln("a [3-10]: " + a);
602 logln("b [7-15]: " + b);
603 c.set(a); c.addAll(b);
604 UnicodeSet exp = new UnicodeSet((char)3,(char)15);
606 logln("c.set(a).add(b): " + c);
608 errln("FAIL: c.set(a).add(b) = " + c + ", expect " + exp);
611 exp.set((char)0, (char)2);
612 exp.add((char)16, UnicodeSet.MAX_VALUE);
614 logln("c.complement(): " + c);
616 errln(Utility.escape("FAIL: c.complement() = " + c + ", expect " + exp));
619 exp.set((char)3, (char)15);
621 logln("c.complement(): " + c);
623 errln("FAIL: c.complement() = " + c + ", expect " + exp);
625 c.set(a); c.complementAll(b);
626 exp.set((char)3,(char)6);
627 exp.add((char)11,(char) 15);
629 logln("c.set(a).complement(b): " + c);
631 errln("FAIL: c.set(a).complement(b) = " + c + ", expect " + exp);
635 c = bitsToSet(setToBits(c));
637 logln("bitsToSet(setToBits(c)): " + c);
639 errln("FAIL: bitsToSet(setToBits(c)) = " + c + ", expect " + exp);
642 // Additional tests for coverage JB#2118
643 //UnicodeSet::complement(class UnicodeString const &)
644 //UnicodeSet::complementAll(class UnicodeString const &)
645 //UnicodeSet::containsNone(class UnicodeSet const &)
646 //UnicodeSet::containsNone(long,long)
647 //UnicodeSet::containsSome(class UnicodeSet const &)
648 //UnicodeSet::containsSome(long,long)
649 //UnicodeSet::removeAll(class UnicodeString const &)
650 //UnicodeSet::retain(long)
651 //UnicodeSet::retainAll(class UnicodeString const &)
652 //UnicodeSet::serialize(unsigned short *,long,enum UErrorCode &)
653 //UnicodeSetIterator::getString(void)
655 set.complement("ab");
656 exp.applyPattern("[{ab}]");
657 if (!set.equals(exp)) { errln("FAIL: complement(\"ab\")"); return; }
659 UnicodeSetIterator iset = new UnicodeSetIterator(set);
660 if (!iset.next() || iset.codepoint != UnicodeSetIterator.IS_STRING) {
661 errln("FAIL: UnicodeSetIterator.next/IS_STRING");
662 } else if (!iset.string.equals("ab")) {
663 errln("FAIL: UnicodeSetIterator.string");
666 set.add((char)0x61, (char)0x7A);
667 set.complementAll("alan");
668 exp.applyPattern("[{ab}b-kmo-z]");
669 if (!set.equals(exp)) { errln("FAIL: complementAll(\"alan\")"); return; }
671 exp.applyPattern("[a-z]");
672 if (set.containsNone(exp)) { errln("FAIL: containsNone(UnicodeSet)"); }
673 if (!set.containsSome(exp)) { errln("FAIL: containsSome(UnicodeSet)"); }
674 exp.applyPattern("[aln]");
675 if (!set.containsNone(exp)) { errln("FAIL: containsNone(UnicodeSet)"); }
676 if (set.containsSome(exp)) { errln("FAIL: containsSome(UnicodeSet)"); }
678 if (set.containsNone((char)0x61, (char)0x7A)) {
679 errln("FAIL: containsNone(char, char)");
681 if (!set.containsSome((char)0x61, (char)0x7A)) {
682 errln("FAIL: containsSome(char, char)");
684 if (!set.containsNone((char)0x41, (char)0x5A)) {
685 errln("FAIL: containsNone(char, char)");
687 if (set.containsSome((char)0x41, (char)0x5A)) {
688 errln("FAIL: containsSome(char, char)");
691 set.removeAll("liu");
692 exp.applyPattern("[{ab}b-hj-kmo-tv-z]");
693 if (!set.equals(exp)) { errln("FAIL: removeAll(\"liu\")"); return; }
695 set.retainAll("star");
696 exp.applyPattern("[rst]");
697 if (!set.equals(exp)) { errln("FAIL: retainAll(\"star\")"); return; }
699 set.retain((char)0x73);
700 exp.applyPattern("[s]");
701 if (!set.equals(exp)) { errln("FAIL: retain('s')"); return; }
703 // ICU 2.6 coverage tests
704 // public final UnicodeSet retain(String s);
705 // public final UnicodeSet remove(int c);
706 // public final UnicodeSet remove(String s);
707 // public int hashCode();
708 set.applyPattern("[a-z{ab}{cd}]");
710 exp.applyPattern("[{cd}]");
711 if (!set.equals(exp)) { errln("FAIL: retain(\"cd\")"); return; }
713 set.applyPattern("[a-z{ab}{cd}]");
714 set.remove((char)0x63);
715 exp.applyPattern("[abd-z{ab}{cd}]");
716 if (!set.equals(exp)) { errln("FAIL: remove('c')"); return; }
719 exp.applyPattern("[abd-z{ab}]");
720 if (!set.equals(exp)) { errln("FAIL: remove(\"cd\")"); return; }
722 if (set.hashCode() != exp.hashCode()) {
723 errln("FAIL: hashCode() unequal");
726 if (set.hashCode() == exp.hashCode()) {
727 errln("FAIL: hashCode() equal");
731 //Cover addAll(Collection) and addAllTo(Collection)
732 // Seems that there is a bug in addAll(Collection) operation
733 // Ram also add a similar test to UtilityTest.java
734 logln("Testing addAll(Collection) ... ");
735 String[] array = {"a", "b", "c", "de"};
736 List list = Arrays.asList(array);
737 Set aset = new HashSet(list);
738 logln(" *** The source set's size is: " + aset.size());
742 if (set.size() != aset.size()) {
743 errln("FAIL: After addAll, the UnicodeSet size expected " + aset.size() +
744 ", " + set.size() + " seen instead!");
746 logln("OK: After addAll, the UnicodeSet size got " + set.size());
749 List list2 = new ArrayList();
753 log(" *** The elements are: ");
754 String s = set.toPattern(true);
756 Iterator myiter = list2.iterator();
757 while(myiter.hasNext()) {
758 log(myiter.next().toString() + " ");
760 logln(""); // a new line
765 public void TestStrings() {
766 // Object[][] testList = {
767 // {I_EQUALS, UnicodeSet.fromAll("abc"),
768 // new UnicodeSet("[a-c]")},
770 // {I_EQUALS, UnicodeSet.from("ch").add('a','z').add("ll"),
771 // new UnicodeSet("[{ll}{ch}a-z]")},
773 // {I_EQUALS, UnicodeSet.from("ab}c"),
774 // new UnicodeSet("[{ab\\}c}]")},
776 // {I_EQUALS, new UnicodeSet('a','z').add('A', 'Z').retain('M','m').complement('X'),
777 // new UnicodeSet("[[a-zA-Z]&[M-m]-[X]]")},
780 // for (int i = 0; i < testList.length; ++i) {
781 // expectRelation(testList[i][0], testList[i][1], testList[i][2], "(" + i + ")");
784 UnicodeSet[][] testList = {
785 {UnicodeSet.fromAll("abc"),
786 new UnicodeSet("[a-c]")},
788 {UnicodeSet.from("ch").add('a','z').add("ll"),
789 new UnicodeSet("[{ll}{ch}a-z]")},
791 {UnicodeSet.from("ab}c"),
792 new UnicodeSet("[{ab\\}c}]")},
794 {new UnicodeSet('a','z').add('A', 'Z').retain('M','m').complement('X'),
795 new UnicodeSet("[[a-zA-Z]&[M-m]-[X]]")},
798 for (int i = 0; i < testList.length; ++i) {
799 if (!testList[i][0].equals(testList[i][1])) {
800 errln("FAIL: sets unequal; see source code (" + i + ")");
806 I_ANY = new Integer(SortedSetRelation.ANY),
807 I_CONTAINS = new Integer(SortedSetRelation.CONTAINS),
808 I_DISJOINT = new Integer(SortedSetRelation.DISJOINT),
809 I_NO_B = new Integer(SortedSetRelation.NO_B),
810 I_ISCONTAINED = new Integer(SortedSetRelation.ISCONTAINED),
811 I_EQUALS = new Integer(SortedSetRelation.EQUALS),
812 I_NO_A = new Integer(SortedSetRelation.NO_A),
813 I_NONE = new Integer(SortedSetRelation.NONE);
815 public void TestSetRelation() {
817 String[] choices = {"a", "b", "cd", "ef"};
818 int limit = 1 << choices.length;
820 SortedSet iset = new TreeSet();
821 SortedSet jset = new TreeSet();
823 for (int i = 0; i < limit; ++i) {
824 pick(i, choices, iset);
825 for (int j = 0; j < limit; ++j) {
826 pick(j, choices, jset);
827 checkSetRelation(iset, jset, "(" + i + ")");
832 public void TestSetSpeed() {
833 // skip unless verbose
834 if (!isVerbose()) return;
840 public void SetSpeed2(int size) {
842 SortedSet iset = new TreeSet();
843 SortedSet jset = new TreeSet();
845 for (int i = 0; i < size*2; i += 2) { // only even values
846 iset.add(new Integer(i));
847 jset.add(new Integer(i));
850 int iterations = 1000000 / size;
852 logln("Timing comparison of Java vs Utility");
853 logln("For about " + size + " objects that are almost all the same.");
855 CheckSpeed(iset, jset, "when a = b", iterations);
857 iset.add(new Integer(size + 1)); // add odd value in middle
859 CheckSpeed(iset, jset, "when a contains b", iterations);
860 CheckSpeed(jset, iset, "when b contains a", iterations);
862 jset.add(new Integer(size - 1)); // add different odd value in middle
864 CheckSpeed(jset, iset, "when a, b are disjoint", iterations);
867 void CheckSpeed(SortedSet iset, SortedSet jset, String message, int iterations) {
868 CheckSpeed2(iset, jset, message, iterations);
869 CheckSpeed3(iset, jset, message, iterations);
872 void CheckSpeed2(SortedSet iset, SortedSet jset, String message, int iterations) {
876 // make sure code is loaded:
877 x = iset.containsAll(jset);
878 y = SortedSetRelation.hasRelation(iset, SortedSetRelation.CONTAINS, jset);
879 if (x != y) errln("FAIL contains comparison");
881 double start = System.currentTimeMillis();
882 for (int i = 0; i < iterations; ++i) {
883 x |= iset.containsAll(jset);
885 double middle = System.currentTimeMillis();
886 for (int i = 0; i < iterations; ++i) {
887 y |= SortedSetRelation.hasRelation(iset, SortedSetRelation.CONTAINS, jset);
889 double end = System.currentTimeMillis();
891 double jtime = (middle - start)/iterations;
892 double utime = (end - middle)/iterations;
894 NumberFormat nf = NumberFormat.getPercentInstance();
895 logln("Test contains: " + message + ": Java: " + jtime
896 + ", Utility: " + utime + ", u:j: " + nf.format(utime/jtime));
899 void CheckSpeed3(SortedSet iset, SortedSet jset, String message, int iterations) {
903 // make sure code is loaded:
904 x = iset.equals(jset);
905 y = SortedSetRelation.hasRelation(iset, SortedSetRelation.EQUALS, jset);
906 if (x != y) errln("FAIL equality comparison");
909 double start = System.currentTimeMillis();
910 for (int i = 0; i < iterations; ++i) {
911 x |= iset.equals(jset);
913 double middle = System.currentTimeMillis();
914 for (int i = 0; i < iterations; ++i) {
915 y |= SortedSetRelation.hasRelation(iset, SortedSetRelation.EQUALS, jset);
917 double end = System.currentTimeMillis();
919 double jtime = (middle - start)/iterations;
920 double utime = (end - middle)/iterations;
922 NumberFormat nf = NumberFormat.getPercentInstance();
923 logln("Test equals: " + message + ": Java: " + jtime
924 + ", Utility: " + utime + ", u:j: " + nf.format(utime/jtime));
927 void pick(int bits, Object[] examples, SortedSet output) {
929 for (int k = 0; k < 32; ++k) {
930 if (((1<<k) & bits) != 0) output.add(examples[k]);
934 public static final String[] RELATION_NAME = {
944 boolean dumbHasRelation(Collection A, int filter, Collection B) {
945 Collection ab = new TreeSet(A);
947 if (ab.size() > 0 && (filter & SortedSetRelation.A_AND_B) == 0) return false;
949 // A - B size == A.size - A&B.size
950 if (A.size() > ab.size() && (filter & SortedSetRelation.A_NOT_B) == 0) return false;
952 // B - A size == B.size - A&B.size
953 if (B.size() > ab.size() && (filter & SortedSetRelation.B_NOT_A) == 0) return false;
959 void checkSetRelation(SortedSet a, SortedSet b, String message) {
960 for (int i = 0; i < 8; ++i) {
962 boolean hasRelation = SortedSetRelation.hasRelation(a, i, b);
963 boolean dumbHasRelation = dumbHasRelation(a, i, b);
965 logln(message + " " + hasRelation + ":\t" + a + "\t" + RELATION_NAME[i] + "\t" + b);
967 if (hasRelation != dumbHasRelation) {
969 message + " " + dumbHasRelation + ":\t" + a + "\t" + RELATION_NAME[i] + "\t" + b);
976 * Test the [:Latin:] syntax.
978 public void TestScriptSet() {
980 expectContainment("[:Latin:]", "aA", CharsToUnicodeString("\\u0391\\u03B1"));
982 expectContainment("[:Greek:]", CharsToUnicodeString("\\u0391\\u03B1"), "aA");
985 expectContainment("[[:Common:][:Inherited:]]", CharsToUnicodeString("\\U00003099\\U0001D169\\u0000"), "aA");
990 * Test the [:Latin:] syntax.
992 public void TestPropertySet() {
994 // Pattern, Chars IN, Chars NOT in
1004 "\\P{ GENERAL Category = upper case letter }",
1008 // Combining class: @since ICU 2.2
1009 // Check both symbolic and numeric
1014 "\\p{Canonical Combining Class = 11}",
1018 "[:c c c = iota subscript :]",
1022 // Bidi class: @since ICU 2.2
1023 "\\p{bidiclass=lefttoright}",
1027 // Binary properties: @since ICU 2.2
1033 "q)*(", // )(and * were removed from math in Unicode 4.0.1
1036 // JB#1767 \N{}, \p{ASCII}
1041 "[\\N{ latin small letter a }[:name= latin small letter z:]]",
1057 "\u03D8\u03D9", // 3.2
1060 "\\u1800\\u3400\\U0002f800",
1061 "\\u0220\\u034f\\u30ff\\u33ff\\ufe73\\U00010000\\U00050000",
1063 // JB#2350: Case_Sensitive
1064 "[:Case Sensitive:]",
1065 "A\u1FFC\\U00010410",
1066 ";\u00B4\\U00010500",
1069 // Regex compatibility test
1070 "[-b]", // leading '-' is literal
1074 "[^-b]", // leading '-' is literal
1078 "[b-]", // trailing '-' is literal
1082 "[^b-]", // trailing '-' is literal
1086 "[a-b-]", // trailing '-' is literal
1090 "[[a-q]&[p-z]-]", // trailing '-' is literal
1094 "[\\s|\\)|:|$|\\>]", // from regex tests
1098 "[\uDC00cd]", // JB#2906: isolated trail at start
1100 "ab\uD800\\U00010000",
1102 "[ab\uD800]", // JB#2906: isolated trail at start
1104 "cd\uDC00\\U00010000",
1106 "[ab\uD800cd]", // JB#2906: isolated lead in middle
1108 "ef\uDC00\\U00010000",
1110 "[ab\uDC00cd]", // JB#2906: isolated trail in middle
1112 "ef\uD800\\U00010000",
1114 "[:^lccc=0:]", // Lead canonical class
1118 "[:^tccc=0:]", // Trail canonical class
1119 "\u0300\u0301\u00c0\u00c5",
1122 "[[:^lccc=0:][:^tccc=0:]]", // Lead and trail canonical class
1123 "\u0300\u0301\u00c0\u00c5",
1126 "[[:^lccc=0:]-[:^tccc=0:]]", // Stuff that starts with an accent but ends with a base (none right now)
1128 "abcd\u0300\u0301\u00c0\u00c5",
1130 "[[:ccc=0:]-[:lccc=0:]-[:tccc=0:]]", // Weirdos. Complete canonical class is zero, but both lead and trail are not
1131 "\u0F73\u0F75\u0F81",
1132 "abcd\u0300\u0301\u00c0\u00c5",
1135 "A\\uE000\\uF8FF\\uFDC7\\U00010000\\U0010FFFD",
1136 "\\u0888\\uFDD3\\uFFFE\\U00050005",
1138 // Script_Extensions, new in Unicode 6.0
1140 "\\u061E\\u061F\\u0620\\u0621\\u063F\\u0640\\u0650\\u065E\\uFDF1\\uFDF2\\uFDF3",
1141 "\\u061D\\uFDEF\\uFDFE",
1143 // U+FDF2 has Script=Arabic and also Arab in its Script_Extensions,
1144 // so scx-sc is missing U+FDF2.
1145 "[[:Script_Extensions=Arabic:]-[:Arab:]]",
1146 "\\u0640\\u064B\\u0650\\u0655\\uFDFD",
1150 for (int i=0; i<DATA.length; i+=3) {
1151 expectContainment(DATA[i], DATA[i+1], DATA[i+2]);
1155 public void TestUnicodeSetStrings() {
1156 UnicodeSet uset = new UnicodeSet("[a{bc}{cd}pqr\u0000]");
1157 logln(uset + " ~ " + uset.getRegexEquivalent());
1158 String[][] testStrings = {{"x", "none"},
1168 for (int i = 0; i < testStrings.length; ++i) {
1169 check(uset, testStrings[i][0], testStrings[i][1]);
1174 private void check(UnicodeSet uset, String string, String desiredStatus) {
1175 boolean shouldContainAll = desiredStatus.equals("all");
1176 boolean shouldContainNone = desiredStatus.equals("none");
1177 if (uset.containsAll(string) != shouldContainAll) {
1178 errln("containsAll " + string + " should be " + shouldContainAll);
1180 logln("containsAll " + string + " = " + shouldContainAll);
1182 if (uset.containsNone(string) != shouldContainNone) {
1183 errln("containsNone " + string + " should be " + shouldContainNone);
1185 logln("containsNone " + string + " = " + shouldContainNone);
1190 * Test cloning of UnicodeSet
1192 public void TestClone() {
1193 UnicodeSet s = new UnicodeSet("[abcxyz]");
1194 UnicodeSet t = (UnicodeSet) s.clone();
1195 expectContainment(t, "abc", "def");
1199 * Test the indexOf() and charAt() methods.
1201 public void TestIndexOf() {
1202 UnicodeSet set = new UnicodeSet("[a-cx-y3578]");
1203 for (int i=0; i<set.size(); ++i) {
1204 int c = set.charAt(i);
1205 if (set.indexOf(c) != i) {
1206 errln("FAIL: charAt(" + i + ") = " + c +
1207 " => indexOf() => " + set.indexOf(c));
1210 int c = set.charAt(set.size());
1212 errln("FAIL: charAt(<out of range>) = " +
1213 Utility.escape(String.valueOf(c)));
1215 int j = set.indexOf('q');
1217 errln("FAIL: indexOf('q') = " + j);
1221 public void TestContainsString() {
1222 UnicodeSet x = new UnicodeSet("[a{bc}]");
1223 if (x.contains("abc")) errln("FAIL");
1226 public void TestExhaustive() {
1227 // exhaustive tests. Simulate UnicodeSets with integers.
1228 // That gives us very solid tests (except for large memory tests).
1230 char limit = (char)128;
1232 for (char i = 0; i < limit; ++i) {
1233 logln("Testing " + i + ", " + bitsToSet(i));
1236 // AS LONG AS WE ARE HERE, check roundtrip
1237 checkRoundTrip(bitsToSet(i));
1239 for (char j = 0; j < limit; ++j) {
1249 * Make sure each script name and abbreviated name can be used
1250 * to construct a UnicodeSet.
1252 public void TestScriptNames() {
1253 for (int i=0; i<UScript.CODE_LIMIT; ++i) {
1254 for (int j=0; j<2; ++j) {
1258 (j==0) ? UScript.getName(i) : UScript.getShortName(i);
1259 pat = "[:" + name + ":]";
1260 UnicodeSet set = new UnicodeSet(pat);
1261 logln("Ok: " + pat + " -> " + set.toPattern(false));
1262 } catch (IllegalArgumentException e) {
1263 if (pat.length() == 0) {
1264 errln("FAIL (in UScript): No name for script " + i);
1266 errln("FAIL: Couldn't create " + pat);
1276 public void TestCloseOver() {
1277 String CASE = String.valueOf(UnicodeSet.CASE);
1279 // selector, input, output
1281 "[aq\u00DF{Bc}{bC}{Fi}]",
1282 "[aAqQ\u00DF\u1E9E\uFB01{ss}{bc}{fi}]", // U+1E9E LATIN CAPITAL LETTER SHARP S is new in Unicode 5.1
1286 "[\u01F1\u01F2\u01F3]",
1290 "[\u1FB4{\u03AC\u03B9}]",
1297 "[a-z]","[A-Za-z\u017F\u212A]",
1304 UnicodeSet s = new UnicodeSet();
1305 UnicodeSet t = new UnicodeSet();
1306 for (int i=0; i<DATA.length; i+=3) {
1307 int selector = Integer.parseInt(DATA[i]);
1308 String pat = DATA[i+1];
1309 String exp = DATA[i+2];
1310 s.applyPattern(pat);
1311 s.closeOver(selector);
1312 t.applyPattern(exp);
1314 logln("Ok: " + pat + ".closeOver(" + selector + ") => " + exp);
1316 errln("FAIL: " + pat + ".closeOver(" + selector + ") => " +
1317 s.toPattern(true) + ", expected " + exp);
1321 // Test the pattern API
1322 s.applyPattern("[abc]", UnicodeSet.CASE);
1323 expectContainment(s, "abcABC", "defDEF");
1324 s = new UnicodeSet("[^abc]", UnicodeSet.CASE);
1325 expectContainment(s, "defDEF", "abcABC");
1328 public void TestEscapePattern() {
1329 // The following pattern must contain at least one range "c-d"
1330 // where c or d is a Pattern_White_Space.
1332 "[\\uFEFF \\u200E-\\u20FF \\uFFF9-\\uFFFC \\U0001D173-\\U0001D17A \\U000F0000-\\U000FFFFD ]";
1334 "[\\u200E-\\u20FF\\uFEFF\\uFFF9-\\uFFFC\\U0001D173-\\U0001D17A\\U000F0000-\\U000FFFFD]";
1335 // We test this with two passes; in the second pass we
1336 // pre-unescape the pattern. Since U+200E is Pattern_White_Space,
1337 // this fails -- which is what we expect.
1338 for (int pass=1; pass<=2; ++pass) {
1339 String pat = pattern;
1341 pat = Utility.unescape(pat);
1343 // Pattern is only good for pass 1
1344 boolean isPatternValid = (pass==1);
1346 UnicodeSet set = null;
1348 set = new UnicodeSet(pat);
1349 } catch (IllegalArgumentException e) {
1352 if ((set != null) != isPatternValid){
1353 errln("FAIL: applyPattern(" +
1354 Utility.escape(pat) + ") => " + set);
1360 if (set.contains((char)0x0644)){
1361 errln("FAIL: " + Utility.escape(pat) + " contains(U+0664)");
1364 String newpat = set.toPattern(true);
1365 if (newpat.equals(exp)) {
1366 logln(Utility.escape(pat) + " => " + newpat);
1368 errln("FAIL: " + Utility.escape(pat) + " => " + newpat);
1371 for (int i=0; i<set.getRangeCount(); ++i) {
1372 StringBuffer str = new StringBuffer("Range ");
1373 str.append((char)(0x30 + i))
1375 UTF16.append(str, set.getRangeStart(i));
1377 UTF16.append(str, set.getRangeEnd(i));
1378 String s = Utility.escape(str.toString() + " (" + set.getRangeStart(i) + " - " +
1379 set.getRangeEnd(i) + ")");
1380 if (set.getRangeStart(i) < 0) {
1381 errln("FAIL: " + s);
1389 public void TestSymbolTable() {
1390 // Multiple test cases can be set up here. Each test case
1391 // is terminated by null:
1392 // var, value, var, value,..., input pat., exp. output pat., null
1394 "us", "a-z", "[0-1$us]", "[0-1a-z]", null,
1395 "us", "[a-z]", "[0-1$us]", "[0-1[a-z]]", null,
1396 "us", "\\[a\\-z\\]", "[0-1$us]", "[-01\\[\\]az]", null
1399 for (int i=0; i<DATA.length; ++i) {
1400 TokenSymbolTable sym = new TokenSymbolTable();
1403 while (DATA[i+2] != null) {
1404 sym.add(DATA[i], DATA[i+1]);
1408 // Input pattern and expected output pattern
1409 String inpat = DATA[i], exppat = DATA[i+1];
1412 ParsePosition pos = new ParsePosition(0);
1413 UnicodeSet us = new UnicodeSet(inpat, pos, sym);
1416 if (pos.getIndex() != inpat.length()) {
1417 errln("Failed to read to end of string \""
1418 + inpat + "\": read to "
1419 + pos.getIndex() + ", length is "
1423 UnicodeSet us2 = new UnicodeSet(exppat);
1424 if (!us.equals(us2)) {
1425 errln("Failed, got " + us + ", expected " + us2);
1427 logln("Ok, got " + us);
1430 //cover Unicode(String,ParsePosition,SymbolTable,int)
1431 ParsePosition inpos = new ParsePosition(0);
1432 UnicodeSet inSet = new UnicodeSet(inpat, inpos, sym, UnicodeSet.IGNORE_SPACE);
1433 UnicodeSet expSet = new UnicodeSet(exppat);
1434 if (!inSet.equals(expSet)) {
1435 errln("FAIL: Failed, got " + inSet + ", expected " + expSet);
1437 logln("OK: got " + inSet);
1443 * Test that Posix style character classes [:digit:], etc.
1444 * have the Unicode definitions from TR 18.
1446 public void TestPosixClasses() {
1447 expectEqual("POSIX alpha", "[:alpha:]", "\\p{Alphabetic}");
1448 expectEqual("POSIX lower", "[:lower:]", "\\p{lowercase}");
1449 expectEqual("POSIX upper", "[:upper:]", "\\p{Uppercase}");
1450 expectEqual("POSIX punct", "[:punct:]", "\\p{gc=Punctuation}");
1451 expectEqual("POSIX digit", "[:digit:]", "\\p{gc=DecimalNumber}");
1452 expectEqual("POSIX xdigit", "[:xdigit:]", "[\\p{DecimalNumber}\\p{HexDigit}]");
1453 expectEqual("POSIX alnum", "[:alnum:]", "[\\p{Alphabetic}\\p{DecimalNumber}]");
1454 expectEqual("POSIX space", "[:space:]", "\\p{Whitespace}");
1455 expectEqual("POSIX blank", "[:blank:]", "[\\p{Whitespace}-[\\u000a\\u000B\\u000c\\u000d\\u0085\\p{LineSeparator}\\p{ParagraphSeparator}]]");
1456 expectEqual("POSIX cntrl", "[:cntrl:]", "\\p{Control}");
1457 expectEqual("POSIX graph", "[:graph:]", "[^\\p{Whitespace}\\p{Control}\\p{Surrogate}\\p{Unassigned}]");
1458 expectEqual("POSIX print", "[:print:]", "[[:graph:][:blank:]-[\\p{Control}]]");
1461 public void TestHangulSyllable() {
1462 final UnicodeSet lvt = new UnicodeSet("[:Hangul_Syllable_Type=LVT_Syllable:]");
1463 assertNotEquals("LVT count", new UnicodeSet(), lvt);
1464 logln(lvt + ": " + lvt.size());
1465 final UnicodeSet lv = new UnicodeSet("[:Hangul_Syllable_Type=LV_Syllable:]");
1466 assertNotEquals("LV count", new UnicodeSet(), lv);
1467 logln(lv + ": " + lv.size());
1471 * Test that frozen classes disallow changes. For 4217
1473 public void TestFrozen() {
1474 UnicodeSet test = new UnicodeSet("[[:whitespace:]A]");
1476 checkModification(test, true);
1477 checkModification(test, false);
1481 * Test Generic support
1483 public void TestGenerics() {
1484 UnicodeSet set1 = new UnicodeSet("[a-b d-g {ch} {zh}]").freeze();
1485 UnicodeSet set2 = new UnicodeSet("[e-f {ch}]").freeze();
1486 UnicodeSet set3 = new UnicodeSet("[d m-n {dh}]").freeze();
1487 // A useful range of sets for testing, including both characters and strings
1488 // set 1 contains set2
1489 // set 1 is overlaps with set 3
1490 // set 2 is disjoint with set 3
1492 //public Iterator<String> iterator() {
1494 ArrayList<String> oldList = new ArrayList<String>();
1495 for (UnicodeSetIterator it = new UnicodeSetIterator(set1); it.next();) {
1496 oldList.add(it.getString());
1499 ArrayList<String> list1 = new ArrayList<String>();
1500 for (String s : set1) {
1503 assertEquals("iteration test", oldList, list1);
1505 //addAllTo(Iterable<T>, U)
1507 set1.addAllTo(list1);
1508 assertEquals("iteration test", oldList, list1);
1510 list1 = set1.addAllTo(new ArrayList<String>());
1511 assertEquals("addAllTo", oldList, list1);
1513 ArrayList<String> list2 = set2.addAllTo(new ArrayList<String>());
1514 ArrayList<String> list3 = set3.addAllTo(new ArrayList<String>());
1516 // put them into different order, to check that order doesn't matter
1517 TreeSet sorted1 = set1.addAllTo(new TreeSet<String>());
1518 TreeSet sorted2 = set2.addAllTo(new TreeSet<String>());
1519 TreeSet sorted3 = set3.addAllTo(new TreeSet<String>());
1521 //containsAll(Collection<String> collection)
1522 assertTrue("containsAll", set1.containsAll(list1));
1523 assertTrue("containsAll", set1.containsAll(sorted1));
1524 assertTrue("containsAll", set1.containsAll(list2));
1525 assertTrue("containsAll", set1.containsAll(sorted2));
1526 assertFalse("containsAll", set1.containsAll(list3));
1527 assertFalse("containsAll", set1.containsAll(sorted3));
1528 assertFalse("containsAll", set2.containsAll(list3));
1529 assertFalse("containsAll", set2.containsAll(sorted3));
1531 //containsSome(Collection<String>)
1532 assertTrue("containsSome", set1.containsSome(list1));
1533 assertTrue("containsSome", set1.containsSome(sorted1));
1534 assertTrue("containsSome", set1.containsSome(list2));
1535 assertTrue("containsSome", set1.containsSome(sorted2));
1536 assertTrue("containsSome", set1.containsSome(list3));
1537 assertTrue("containsSome", set1.containsSome(sorted3));
1538 assertFalse("containsSome", set2.containsSome(list3));
1539 assertFalse("containsSome", set2.containsSome(sorted3));
1541 //containsNone(Collection<String>)
1542 assertFalse("containsNone", set1.containsNone(list1));
1543 assertFalse("containsNone", set1.containsNone(sorted1));
1544 assertFalse("containsNone", set1.containsNone(list2));
1545 assertFalse("containsNone", set1.containsNone(sorted2));
1546 assertFalse("containsNone", set1.containsNone(list3));
1547 assertFalse("containsNone", set1.containsNone(sorted3));
1548 assertTrue("containsNone", set2.containsNone(list3));
1549 assertTrue("containsNone", set2.containsNone(sorted3));
1552 UnicodeSet other3 = new UnicodeSet().addAll("d", "m", "n", "dh");
1553 assertEquals("addAll", set3, other3);
1555 //removeAll(Collection<String>)
1556 UnicodeSet mod1 = new UnicodeSet(set1).removeAll(set2);
1557 UnicodeSet mod2 = new UnicodeSet(set1).removeAll(list2);
1558 assertEquals("remove all", mod1, mod2);
1560 //retainAll(Collection<String>)
1561 mod1 = new UnicodeSet(set1).retainAll(set2);
1562 mod2 = new UnicodeSet(set1).retainAll(set2.addAllTo(new LinkedHashSet<String>()));
1563 assertEquals("remove all", mod1, mod2);
1566 public void TestComparison() {
1567 UnicodeSet set1 = new UnicodeSet("[a-b d-g {ch} {zh}]").freeze();
1568 UnicodeSet set2 = new UnicodeSet("[c-e {ch}]").freeze();
1569 UnicodeSet set3 = new UnicodeSet("[d m-n z {dh}]").freeze();
1571 //compareTo(UnicodeSet)
1572 // do indirectly, by sorting
1573 List<UnicodeSet> unsorted = Arrays.asList(set3, set2, set1);
1574 List<UnicodeSet> goalShortest = Arrays.asList(set2, set3, set1);
1575 List<UnicodeSet> goalLongest = Arrays.asList(set1, set3, set2);
1576 List<UnicodeSet> goalLex = Arrays.asList(set1, set2, set3);
1578 List<UnicodeSet> sorted = new ArrayList(new TreeSet<UnicodeSet>(unsorted));
1579 assertNotEquals("compareTo-shorter-first", unsorted, sorted);
1580 assertEquals("compareTo-shorter-first", goalShortest, sorted);
1582 TreeSet<UnicodeSet> sorted1 = new TreeSet<UnicodeSet>(new Comparator<UnicodeSet>(){
1583 public int compare(UnicodeSet o1, UnicodeSet o2) {
1584 // TODO Auto-generated method stub
1585 return o1.compareTo(o2, ComparisonStyle.LONGER_FIRST);
1587 sorted1.addAll(unsorted);
1588 sorted = new ArrayList(sorted1);
1589 assertNotEquals("compareTo-longer-first", unsorted, sorted);
1590 assertEquals("compareTo-longer-first", goalLongest, sorted);
1592 sorted1 = new TreeSet<UnicodeSet>(new Comparator<UnicodeSet>(){
1593 public int compare(UnicodeSet o1, UnicodeSet o2) {
1594 // TODO Auto-generated method stub
1595 return o1.compareTo(o2, ComparisonStyle.LEXICOGRAPHIC);
1597 sorted1.addAll(unsorted);
1598 sorted = new ArrayList(sorted1);
1599 assertNotEquals("compareTo-lex", unsorted, sorted);
1600 assertEquals("compareTo-lex", goalLex, sorted);
1602 //compare(String, int)
1603 // make a list of interesting combinations
1604 List<String> sources = Arrays.asList("\u0000", "a", "b", "\uD7FF", "\uD800", "\uDBFF", "\uDC00", "\uDFFF", "\uE000", "\uFFFD", "\uFFFF");
1605 TreeSet<String> target = new TreeSet<String>();
1606 for (String s : sources) {
1608 for (String t : sources) {
1610 for (String u : sources) {
1611 target.add(s + t + u);
1615 // now compare all the combinations. If any of them is a code point, use it.
1616 int maxErrorCount = 0;
1618 for (String last : target) {
1619 for (String curr : target) {
1620 int lastCount = Character.codePointCount(last, 0, last.length());
1621 int currCount = Character.codePointCount(curr, 0, curr.length());
1623 if (lastCount == 1) {
1624 comparison = UnicodeSet.compare(last.codePointAt(0), curr);
1625 } else if (currCount == 1) {
1626 comparison = UnicodeSet.compare(last, curr.codePointAt(0));
1630 if (comparison != last.compareTo(curr)) {
1631 // repeat for debugging
1632 if (lastCount == 1) {
1633 comparison = UnicodeSet.compare(last.codePointAt(0), curr);
1634 } else if (currCount == 1) {
1635 comparison = UnicodeSet.compare(last, curr.codePointAt(0));
1637 if (maxErrorCount++ > 10) {
1638 errln(maxErrorCount + " Failure in comparing " + last + " & " + curr + "\tOmitting others...");
1641 errln(maxErrorCount + " Failure in comparing " + last + " & " + curr);
1646 //compare(Iterable<T>, Iterable<T>)
1648 List<String> test1 = new ArrayList<String>(max);
1649 List<String> test2 = new ArrayList<String>(max);
1650 for (int i = 0; i <= max; ++i) {
1652 test2.add("a" + (max - i)); // add in reverse order
1654 assertNotEquals("compare iterable test", test1, test2);
1655 TreeSet<CharSequence> sortedTest1 = new TreeSet<CharSequence>(test1);
1656 TreeSet<CharSequence> sortedTest2 = new TreeSet<CharSequence>(test2);
1657 assertEquals("compare iterable test", sortedTest1, sortedTest2);
1660 public void TestRangeConstructor() {
1661 UnicodeSet w = new UnicodeSet().addAll(3,5);
1662 UnicodeSet s = new UnicodeSet(3,5);
1663 assertEquals("new constructor", w, s);
1665 w = new UnicodeSet().addAll(3,5).addAll(7,7);
1666 UnicodeSet t = new UnicodeSet(3,5, 7,7);
1667 assertEquals("new constructor", w, t);
1668 // check to make sure right exceptions are thrown
1669 Class expected = IllegalArgumentException.class;
1674 @SuppressWarnings("unused")
1675 UnicodeSet u = new UnicodeSet(5);
1676 } catch (IllegalArgumentException e) {
1677 actual = e.getClass();
1679 assertEquals("exception if odd", expected, actual);
1683 @SuppressWarnings("unused")
1684 UnicodeSet u = new UnicodeSet(3, 2, 7, 9);
1685 } catch (IllegalArgumentException e) {
1686 actual = e.getClass();
1688 assertEquals("exception for start/end problem", expected, actual);
1692 @SuppressWarnings("unused")
1693 UnicodeSet u = new UnicodeSet(3, 5, 6, 9);
1694 } catch (IllegalArgumentException e) {
1695 actual = e.getClass();
1697 assertEquals("exception for end/start problem", expected, actual);
1699 CheckRangeSpeed(10000, new UnicodeSet("[:whitespace:]"));
1700 CheckRangeSpeed(1000, new UnicodeSet("[:letter:]"));
1707 private void CheckRangeSpeed(int iterations, UnicodeSet testSet) {
1708 testSet.complement().complement();
1709 String testPattern = testSet.toString();
1710 // fill a set of pairs from the pattern
1711 int[] pairs = new int[testSet.getRangeCount()*2];
1713 for (UnicodeSetIterator it = new UnicodeSetIterator(testSet); it.nextRange();) {
1714 pairs[j++] = it.codepoint;
1715 pairs[j++] = it.codepointEnd;
1717 UnicodeSet fromRange = new UnicodeSet(testSet);
1718 assertEquals("from range vs pattern", testSet, fromRange);
1720 double start = System.currentTimeMillis();
1721 for (int i = 0; i < iterations; ++i) {
1722 fromRange = new UnicodeSet(testSet);
1724 double middle = System.currentTimeMillis();
1725 for (int i = 0; i < iterations; ++i) {
1726 new UnicodeSet(testPattern);
1728 double end = System.currentTimeMillis();
1730 double rangeConstructorTime = (middle - start)/iterations;
1731 double patternConstructorTime = (end - middle)/iterations;
1732 String message = "Range constructor:\t" + rangeConstructorTime + ";\tPattern constructor:\t" + patternConstructorTime + "\t\t"
1733 + percent.format(rangeConstructorTime/patternConstructorTime-1);
1734 if (rangeConstructorTime < 2*patternConstructorTime) {
1741 NumberFormat percent = NumberFormat.getPercentInstance();
1743 percent.setMaximumFractionDigits(2);
1745 // ****************************************
1747 // ****************************************
1749 public void checkModification(UnicodeSet original, boolean isFrozen) {
1751 for (int i = 0; ;++i) {
1752 UnicodeSet test = (UnicodeSet) (isFrozen ? original.clone() : original.cloneAsThawed());
1753 boolean gotException = true;
1754 boolean checkEquals = true;
1757 case 0: test.add(0); break;
1758 case 1: test.add(0,1); break;
1759 case 2: test.add("a"); break;
1760 case 3: List a = new ArrayList(); a.add("a"); test.addAll(a); break;
1761 case 4: test.addAll("ab"); break;
1762 case 5: test.addAll(new UnicodeSet("[ab]")); break;
1763 case 6: test.applyIntPropertyValue(0,0); break;
1764 case 7: test.applyPattern("[ab]"); break;
1765 case 8: test.applyPattern("[ab]", true); break;
1766 case 9: test.applyPattern("[ab]", 0); break;
1767 case 10: test.applyPropertyAlias("hex","true"); break;
1768 case 11: test.applyPropertyAlias("hex", "true", null); break;
1769 case 12: test.closeOver(UnicodeSet.CASE); break;
1770 case 13: test.compact(); checkEquals = false; break;
1771 case 14: test.complement(0); break;
1772 case 15: test.complement(0,0); break;
1773 case 16: test.complement("ab"); break;
1774 case 17: test.complementAll("ab"); break;
1775 case 18: test.complementAll(new UnicodeSet("[ab]")); break;
1776 case 19: test.remove(' '); break;
1777 case 20: test.remove(' ','a'); break;
1778 case 21: test.remove(" "); break;
1779 case 22: test.removeAll(" a"); break;
1780 case 23: test.removeAll(new UnicodeSet("[\\ a]")); break;
1781 case 24: test.retain(' '); break;
1782 case 25: test.retain(' ','a'); break;
1783 case 26: test.retain(" "); break;
1784 case 27: test.retainAll(" a"); break;
1785 case 28: test.retainAll(new UnicodeSet("[\\ a]")); break;
1786 case 29: test.set(0,1); break;
1787 case 30: test.set(new UnicodeSet("[ab]")); break;
1789 default: continue main; // so we don't keep having to change the endpoint, and gaps are not skipped.
1792 gotException = false;
1793 } catch (UnsupportedOperationException e) {
1796 if (isFrozen && !gotException) errln(i + ") attempt to modify frozen object didn't result in an exception");
1797 if (!isFrozen && gotException) errln(i + ") attempt to modify thawed object did result in an exception");
1799 if (test.equals(original)) {
1800 if (!isFrozen) errln(i + ") attempt to modify thawed object didn't change the object");
1802 if (isFrozen) errln(i + ") attempt to modify frozen object changed the object");
1808 // Following cod block is commented out to eliminate PrettyPrinter depenencies
1810 // String[] prettyData = {
1811 // "[\\uD7DE-\\uD90C \\uDCB5-\\uDD9F]", // special case
1813 // "[:whitespace:]",
1814 // "[:linebreak=AL:]",
1817 // public void TestPrettyPrinting() {
1819 // PrettyPrinter pp = new PrettyPrinter();
1822 // for (; i < prettyData.length; ++i) {
1823 // UnicodeSet test = new UnicodeSet(prettyData[i]);
1824 // checkPrettySet(pp, i, test);
1826 // Random random = new Random(0);
1827 // UnicodeSet test = new UnicodeSet();
1829 // // To keep runtimes under control, make the number of random test cases
1830 // // to try depends on the test framework exhaustive setting.
1831 // // params.inclusions = 5: default exhaustive value
1832 // // params.inclusions = 10: max exhaustive value.
1833 // int iterations = 50;
1834 // if (params.inclusion > 5) {
1835 // iterations = (params.inclusion-5) * 200;
1837 // for (; i < iterations; ++i) {
1838 // double start = random.nextGaussian() * 0x10000;
1839 // if (start < 0) start = - start;
1840 // if (start > 0x10FFFF) {
1841 // start = 0x10FFFF;
1843 // double end = random.nextGaussian() * 0x100;
1844 // if (end < 0) end = -end;
1845 // end = start + end;
1846 // if (end > 0x10FFFF) {
1849 // test.complement((int)start, (int)end);
1850 // checkPrettySet(pp, i, test);
1852 // }catch(RuntimeException ex){
1853 // warnln("Could not load Collator");
1857 // private void checkPrettySet(PrettyPrinter pp, int i, UnicodeSet test) {
1858 // String pretty = pp.toPattern(test);
1859 // UnicodeSet retry = new UnicodeSet(pretty);
1860 // if (!test.equals(retry)) {
1861 // errln(i + ". Failed test: " + test + " != " + pretty);
1863 // logln(i + ". Worked for " + truncate(test.toString()) + " => " + truncate(pretty));
1867 // private String truncate(String string) {
1868 // if (string.length() <= 100) return string;
1869 // return string.substring(0,97) + "...";
1872 public class TokenSymbolTable implements SymbolTable {
1873 HashMap contents = new HashMap();
1876 * (Non-SymbolTable API) Add the given variable and value to
1877 * the table. Variable should NOT contain leading '$'.
1879 public void add(String var, String value) {
1880 char[] buffer = new char[value.length()];
1881 value.getChars(0, value.length(), buffer, 0);
1886 * (Non-SymbolTable API) Add the given variable and value to
1887 * the table. Variable should NOT contain leading '$'.
1889 public void add(String var, char[] body) {
1890 logln("TokenSymbolTable: add \"" + var + "\" => \"" +
1891 new String(body) + "\"");
1892 contents.put(var, body);
1896 * @see com.ibm.icu.text.SymbolTable#lookup(java.lang.String)
1898 public char[] lookup(String s) {
1899 logln("TokenSymbolTable: lookup \"" + s + "\" => \"" +
1900 new String((char[]) contents.get(s)) + "\"");
1901 return (char[])contents.get(s);
1905 * @see com.ibm.icu.text.SymbolTable#lookupMatcher(int)
1907 public UnicodeMatcher lookupMatcher(int ch) {
1912 * @see com.ibm.icu.text.SymbolTable#parseReference(java.lang.String,
1913 java.text.ParsePosition, int)
1915 public String parseReference(String text, ParsePosition pos, int
1918 int start = pos.getIndex();
1920 for (i = start; i < limit; i += UTF16.getCharCount(cp)) {
1921 cp = UTF16.charAt(text, i);
1922 if (!com.ibm.icu.lang.UCharacter.isUnicodeIdentifierPart(cp)) {
1926 logln("TokenSymbolTable: parse \"" + text + "\" from " +
1927 start + " to " + i +
1928 " => \"" + text.substring(start,i) + "\"");
1930 return text.substring(start,i);
1934 public void TestSurrogate() {
1936 // These should all behave identically
1937 "[abc\\uD800\\uDC00]",
1938 "[abc\uD800\uDC00]",
1941 for (int i=0; i<DATA.length; ++i) {
1942 logln("Test pattern " + i + " :" + Utility.escape(DATA[i]));
1943 UnicodeSet set = new UnicodeSet(DATA[i]);
1944 expectContainment(set,
1945 CharsToUnicodeString("abc\\U00010000"),
1946 "\uD800;\uDC00"); // split apart surrogate-pair
1947 if (set.size() != 4) {
1948 errln(Utility.escape("FAIL: " + DATA[i] + ".size() == " +
1949 set.size() + ", expected 4"));
1954 public void TestContains() {
1955 int limit = 256; // combinations to test
1956 for (int i = 0; i < limit; ++i) {
1957 logln("Trying: " + i);
1958 UnicodeSet x = bitsToSet(i);
1959 for (int j = 0; j < limit; ++j) {
1960 UnicodeSet y = bitsToSet(j);
1961 boolean containsNone = (i & j) == 0;
1962 boolean containsAll = (i & j) == j;
1963 boolean equals = i == j;
1964 if (containsNone != x.containsNone(y)) {
1965 x.containsNone(y); // repeat for debugging
1966 errln("FAILED: " + x + " containsSome " + y);
1968 if (containsAll != x.containsAll(y)) {
1969 x.containsAll(y); // repeat for debugging
1970 errln("FAILED: " + x + " containsAll " + y);
1972 if (equals != x.equals(y)) {
1973 x.equals(y); // repeat for debugging
1974 errln("FAILED: " + x + " equals " + y);
1980 void _testComplement(int a) {
1981 UnicodeSet x = bitsToSet(a);
1982 UnicodeSet z = bitsToSet(a);
1984 int c = setToBits(z);
1986 errln("FAILED: add: ~" + x + " != " + z);
1987 errln("FAILED: add: ~" + a + " != " + c);
1989 checkCanonicalRep(z, "complement " + a);
1992 void _testAdd(int a, int b) {
1993 UnicodeSet x = bitsToSet(a);
1994 UnicodeSet y = bitsToSet(b);
1995 UnicodeSet z = bitsToSet(a);
1997 int c = setToBits(z);
1999 errln(Utility.escape("FAILED: add: " + x + " | " + y + " != " + z));
2000 errln("FAILED: add: " + a + " | " + b + " != " + c);
2002 checkCanonicalRep(z, "add " + a + "," + b);
2005 void _testRetain(int a, int b) {
2006 UnicodeSet x = bitsToSet(a);
2007 UnicodeSet y = bitsToSet(b);
2008 UnicodeSet z = bitsToSet(a);
2010 int c = setToBits(z);
2012 errln("FAILED: retain: " + x + " & " + y + " != " + z);
2013 errln("FAILED: retain: " + a + " & " + b + " != " + c);
2015 checkCanonicalRep(z, "retain " + a + "," + b);
2018 void _testRemove(int a, int b) {
2019 UnicodeSet x = bitsToSet(a);
2020 UnicodeSet y = bitsToSet(b);
2021 UnicodeSet z = bitsToSet(a);
2023 int c = setToBits(z);
2024 if (c != (a &~ b)) {
2025 errln("FAILED: remove: " + x + " &~ " + y + " != " + z);
2026 errln("FAILED: remove: " + a + " &~ " + b + " != " + c);
2028 checkCanonicalRep(z, "remove " + a + "," + b);
2031 void _testXor(int a, int b) {
2032 UnicodeSet x = bitsToSet(a);
2033 UnicodeSet y = bitsToSet(b);
2034 UnicodeSet z = bitsToSet(a);
2036 int c = setToBits(z);
2038 errln("FAILED: complement: " + x + " ^ " + y + " != " + z);
2039 errln("FAILED: complement: " + a + " ^ " + b + " != " + c);
2041 checkCanonicalRep(z, "complement " + a + "," + b);
2045 * Check that ranges are monotonically increasing and non-
2048 void checkCanonicalRep(UnicodeSet set, String msg) {
2049 int n = set.getRangeCount();
2051 errln("FAIL result of " + msg +
2052 ": range count should be >= 0 but is " +
2053 n + " for " + Utility.escape(set.toString()));
2057 for (int i=0; i<n; ++i) {
2058 int start = set.getRangeStart(i);
2059 int end = set.getRangeEnd(i);
2061 errln("FAIL result of " + msg +
2062 ": range " + (i+1) +
2063 " start > end: " + start + ", " + end +
2064 " for " + Utility.escape(set.toString()));
2066 if (i > 0 && start <= last) {
2067 errln("FAIL result of " + msg +
2068 ": range " + (i+1) +
2069 " overlaps previous range: " + start + ", " + end +
2070 " for " + Utility.escape(set.toString()));
2077 * Convert a bitmask to a UnicodeSet.
2079 UnicodeSet bitsToSet(int a) {
2080 UnicodeSet result = new UnicodeSet();
2081 for (int i = 0; i < 32; ++i) {
2082 if ((a & (1<<i)) != 0) {
2083 result.add((char)i,(char)i);
2091 * Convert a UnicodeSet to a bitmask. Only the characters
2092 * U+0000 to U+0020 are represented in the bitmask.
2094 static int setToBits(UnicodeSet x) {
2096 for (int i = 0; i < 32; ++i) {
2097 if (x.contains((char)i)) {
2105 * Return the representation of an inversion list based UnicodeSet
2106 * as a pairs list. Ranges are listed in ascending Unicode order.
2107 * For example, the set [a-zA-M3] is represented as "33AMaz".
2109 static String getPairs(UnicodeSet set) {
2110 StringBuffer pairs = new StringBuffer();
2111 for (int i=0; i<set.getRangeCount(); ++i) {
2112 int start = set.getRangeStart(i);
2113 int end = set.getRangeEnd(i);
2116 i = set.getRangeCount(); // Should be unnecessary
2118 pairs.append((char)start).append((char)end);
2120 return pairs.toString();
2124 * Test function. Make sure that the sets have the right relation
2127 void expectRelation(Object relationObj, Object set1Obj, Object set2Obj, String message) {
2128 int relation = ((Integer) relationObj).intValue();
2129 UnicodeSet set1 = (UnicodeSet) set1Obj;
2130 UnicodeSet set2 = (UnicodeSet) set2Obj;
2132 // by-the-by, check the iterator
2133 checkRoundTrip(set1);
2134 checkRoundTrip(set2);
2136 boolean contains = set1.containsAll(set2);
2137 boolean isContained = set2.containsAll(set1);
2138 boolean disjoint = set1.containsNone(set2);
2139 boolean equals = set1.equals(set2);
2141 UnicodeSet intersection = new UnicodeSet(set1).retainAll(set2);
2142 UnicodeSet minus12 = new UnicodeSet(set1).removeAll(set2);
2143 UnicodeSet minus21 = new UnicodeSet(set2).removeAll(set1);
2145 // test basic properties
2147 if (contains != (intersection.size() == set2.size())) {
2148 errln("FAIL contains1" + set1.toPattern(true) + ", " + set2.toPattern(true));
2151 if (contains != (intersection.equals(set2))) {
2152 errln("FAIL contains2" + set1.toPattern(true) + ", " + set2.toPattern(true));
2155 if (isContained != (intersection.size() == set1.size())) {
2156 errln("FAIL isContained1" + set1.toPattern(true) + ", " + set2.toPattern(true));
2159 if (isContained != (intersection.equals(set1))) {
2160 errln("FAIL isContained2" + set1.toPattern(true) + ", " + set2.toPattern(true));
2163 if ((contains && isContained) != equals) {
2164 errln("FAIL equals" + set1.toPattern(true) + ", " + set2.toPattern(true));
2167 if (disjoint != (intersection.size() == 0)) {
2168 errln("FAIL disjoint" + set1.toPattern(true) + ", " + set2.toPattern(true));
2171 // Now see if the expected relation is true
2172 int status = (minus12.size() != 0 ? 4 : 0)
2173 | (intersection.size() != 0 ? 2 : 0)
2174 | (minus21.size() != 0 ? 1 : 0);
2176 if (status != relation) {
2177 errln("FAIL relation incorrect" + message
2178 + "; desired = " + RELATION_NAME[relation]
2179 + "; found = " + RELATION_NAME[status]
2180 + "; set1 = " + set1.toPattern(true)
2181 + "; set2 = " + set2.toPattern(true)
2187 * Basic consistency check for a few items.
2188 * That the iterator works, and that we can create a pattern and
2189 * get the same thing back
2192 void checkRoundTrip(UnicodeSet s) {
2193 String pat = s.toPattern(false);
2194 UnicodeSet t = copyWithIterator(s, false);
2195 checkEqual(s, t, "iterator roundtrip");
2197 t = copyWithIterator(s, true); // try range
2198 checkEqual(s, t, "iterator roundtrip");
2200 t = new UnicodeSet(pat);
2201 checkEqual(s, t, "toPattern(false)");
2203 pat = s.toPattern(true);
2204 t = new UnicodeSet(pat);
2205 checkEqual(s, t, "toPattern(true)");
2208 UnicodeSet copyWithIterator(UnicodeSet s, boolean withRange) {
2209 UnicodeSet t = new UnicodeSet();
2210 UnicodeSetIterator it = new UnicodeSetIterator(s);
2212 while (it.nextRange()) {
2213 if (it.codepoint == UnicodeSetIterator.IS_STRING) {
2216 t.add(it.codepoint, it.codepointEnd);
2221 if (it.codepoint == UnicodeSetIterator.IS_STRING) {
2224 t.add(it.codepoint);
2231 boolean checkEqual(UnicodeSet s, UnicodeSet t, String message) {
2233 errln("FAIL " + message
2234 + "; source = " + s.toPattern(true)
2235 + "; result = " + t.toPattern(true)
2242 void expectEqual(String name, String pat1, String pat2) {
2243 UnicodeSet set1, set2;
2245 set1 = new UnicodeSet(pat1);
2246 set2 = new UnicodeSet(pat2);
2247 } catch (IllegalArgumentException e) {
2248 errln("FAIL: Couldn't create UnicodeSet from pattern for \"" + name + "\": " + e.getMessage());
2251 if(!set1.equals(set2)) {
2252 errln("FAIL: Sets built from patterns differ for \"" + name + "\"");
2257 * Expect the given set to contain the characters in charsIn and
2258 * to not contain those in charsOut.
2260 void expectContainment(String pat, String charsIn, String charsOut) {
2263 set = new UnicodeSet(pat);
2264 } catch (IllegalArgumentException e) {
2265 errln("FAIL: Couldn't create UnicodeSet from pattern \"" +
2266 pat + "\": " + e.getMessage());
2269 expectContainment(set, charsIn, charsOut);
2273 * Expect the given set to contain the characters in charsIn and
2274 * to not contain those in charsOut.
2276 void expectContainment(UnicodeSet set, String charsIn, String charsOut) {
2277 StringBuffer bad = new StringBuffer();
2278 if (charsIn != null) {
2279 charsIn = Utility.unescape(charsIn);
2280 for (int i=0; i<charsIn.length(); ) {
2281 int c = UTF16.charAt(charsIn,i);
2282 i += UTF16.getCharCount(c);
2283 if (!set.contains(c)) {
2284 UTF16.append(bad,c);
2287 if (bad.length() > 0) {
2288 errln(Utility.escape("FAIL: set " + set + " does not contain " + bad +
2289 ", expected containment of " + charsIn));
2291 logln(Utility.escape("Ok: set " + set + " contains " + charsIn));
2294 if (charsOut != null) {
2295 charsOut = Utility.unescape(charsOut);
2297 for (int i=0; i<charsOut.length(); ) {
2298 int c = UTF16.charAt(charsOut,i);
2299 i += UTF16.getCharCount(c);
2300 if (set.contains(c)) {
2301 UTF16.append(bad, c);
2304 if (bad.length() > 0) {
2305 errln(Utility.escape("FAIL: set " + set + " contains " + bad +
2306 ", expected non-containment of " + charsOut));
2308 logln(Utility.escape("Ok: set " + set + " does not contain " + charsOut));
2313 void expectPattern(UnicodeSet set,
2315 String expectedPairs) {
2316 set.applyPattern(pattern);
2317 if (!getPairs(set).equals(expectedPairs)) {
2318 errln("FAIL: applyPattern(\"" + pattern +
2320 Utility.escape(getPairs(set)) + "\", expected \"" +
2321 Utility.escape(expectedPairs) + "\"");
2323 logln("Ok: applyPattern(\"" + pattern +
2325 Utility.escape(getPairs(set)) + "\"");
2329 void expectToPattern(UnicodeSet set,
2331 String[] expStrings) {
2332 String pat = set.toPattern(true);
2333 if (pat.equals(expPat)) {
2334 logln("Ok: toPattern() => \"" + pat + "\"");
2336 errln("FAIL: toPattern() => \"" + pat + "\", expected \"" + expPat + "\"");
2339 if (expStrings == null) {
2343 for (int i=0; i<expStrings.length; ++i) {
2344 if (expStrings[i] == NOT) { // sic; pointer comparison
2348 boolean contained = set.contains(expStrings[i]);
2349 if (contained == in) {
2350 logln("Ok: " + expPat +
2351 (contained ? " contains {" : " does not contain {") +
2352 Utility.escape(expStrings[i]) + "}");
2354 errln("FAIL: " + expPat +
2355 (contained ? " contains {" : " does not contain {") +
2356 Utility.escape(expStrings[i]) + "}");
2361 void expectPairs(UnicodeSet set, String expectedPairs) {
2362 if (!getPairs(set).equals(expectedPairs)) {
2363 errln("FAIL: Expected pair list \"" +
2364 Utility.escape(expectedPairs) + "\", got \"" +
2365 Utility.escape(getPairs(set)) + "\"");
2368 static final String CharsToUnicodeString(String s) {
2369 return Utility.unescape(s);
2372 /* Test the method public UnicodeSet getSet() */
2373 public void TestGetSet() {
2374 UnicodeSetIterator us = new UnicodeSetIterator();
2377 } catch (Exception e) {
2378 errln("UnicodeSetIterator.getSet() was not suppose to given an " + "an exception.");
2382 /* Tests the method public UnicodeSet add(Collection<?> source) */
2383 public void TestAddCollection() {
2384 UnicodeSet us = new UnicodeSet();
2385 Collection<?> s = null;
2388 errln("UnicodeSet.add(Collection<?>) was suppose to return an exception for a null parameter.");
2389 } catch (Exception e) {
2393 public void TestConstants() {
2394 assertEquals("Empty", new UnicodeSet(), UnicodeSet.EMPTY);
2395 assertEquals("All", new UnicodeSet(0,0x10FFFF), UnicodeSet.ALL_CODE_POINTS);