2 *******************************************************************************
\r
3 * Copyright (C) 1996-2009, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 *******************************************************************************
\r
7 package com.ibm.icu.dev.test.lang;
\r
9 import java.text.NumberFormat;
\r
10 import java.text.ParsePosition;
\r
11 import java.util.ArrayList;
\r
12 import java.util.Arrays;
\r
13 import java.util.Collection;
\r
14 import java.util.Comparator;
\r
15 import java.util.HashMap;
\r
16 import java.util.HashSet;
\r
17 import java.util.Iterator;
\r
18 import java.util.LinkedHashSet;
\r
19 import java.util.List;
\r
20 import java.util.Set;
\r
21 import java.util.SortedSet;
\r
22 import java.util.TreeSet;
\r
24 import com.ibm.icu.dev.test.TestFmwk;
\r
25 import com.ibm.icu.impl.SortedSetRelation;
\r
26 import com.ibm.icu.impl.Utility;
\r
27 import com.ibm.icu.lang.UCharacter;
\r
28 import com.ibm.icu.lang.UProperty;
\r
29 import com.ibm.icu.lang.UScript;
\r
30 import com.ibm.icu.lang.UCharacterEnums.ECharacterCategory;
\r
31 import com.ibm.icu.text.SymbolTable;
\r
32 import com.ibm.icu.text.UTF16;
\r
33 import com.ibm.icu.text.UnicodeMatcher;
\r
34 import com.ibm.icu.text.UnicodeSet;
\r
35 import com.ibm.icu.text.UnicodeSetIterator;
\r
36 import com.ibm.icu.text.UnicodeSet.ComparisonStyle;
\r
40 * @summary General test of UnicodeSet
\r
42 public class UnicodeSetTest extends TestFmwk {
\r
44 static final String NOT = "%%%%";
\r
46 public static void main(String[] args) throws Exception {
\r
47 new UnicodeSetTest().run(args);
\r
50 private static final boolean isCccValue(int ccc) {
\r
77 public void TestPropertyAccess() {
\r
79 // test to see that all of the names work
\r
80 for (int propNum = UProperty.BINARY_START; propNum < UProperty.INT_LIMIT; ++propNum) {
\r
82 //Skipping tests in the non-exhaustive mode to shorten the test time ticket#6475
\r
83 if(getInclusion()<=5 && count%5!=0){
\r
86 if (propNum >= UProperty.BINARY_LIMIT && propNum < UProperty.INT_START) { // skip the gap
\r
87 propNum = UProperty.INT_START;
\r
89 for (int nameChoice = UProperty.NameChoice.SHORT; nameChoice <= UProperty.NameChoice.LONG; ++nameChoice) {
\r
92 propName = UCharacter.getPropertyName(propNum, nameChoice);
\r
93 if (propName == null) {
\r
94 if (nameChoice == UProperty.NameChoice.SHORT) continue; // allow non-existent short names
\r
95 throw new NullPointerException();
\r
97 } catch (RuntimeException e1) {
\r
98 errln("Can't get property name for: "
\r
99 + "Property (" + propNum + ")"
\r
100 + ", NameChoice: " + nameChoice + ", "
\r
101 + e1.getClass().getName());
\r
104 logln("Property (" + propNum + "): " + propName);
\r
105 for (int valueNum = UCharacter.getIntPropertyMinValue(propNum); valueNum <= UCharacter.getIntPropertyMaxValue(propNum); ++valueNum) {
\r
108 valueName = UCharacter.getPropertyValueName(propNum, valueNum, nameChoice);
\r
109 if (valueName == null) {
\r
110 if (nameChoice == UProperty.NameChoice.SHORT) continue; // allow non-existent short names
\r
111 if ((propNum == UProperty.CANONICAL_COMBINING_CLASS ||
\r
112 propNum == UProperty.LEAD_CANONICAL_COMBINING_CLASS ||
\r
113 propNum == UProperty.TRAIL_CANONICAL_COMBINING_CLASS) &&
\r
114 !isCccValue(valueNum)) {
\r
115 // Only a few of the canonical combining classes have names.
\r
116 // Otherwise they are just integer values.
\r
119 throw new NullPointerException();
\r
122 } catch (RuntimeException e1) {
\r
123 errln("Can't get property value name for: "
\r
124 + "Property (" + propNum + "): " + propName + ", "
\r
125 + "Value (" + valueNum + ") "
\r
126 + ", NameChoice: " + nameChoice + ", "
\r
127 + e1.getClass().getName());
\r
130 logln("Value (" + valueNum + "): " + valueName);
\r
131 UnicodeSet testSet;
\r
133 testSet = new UnicodeSet("[:" + propName + "=" + valueName + ":]");
\r
134 } catch (RuntimeException e) {
\r
135 errln("Can't create UnicodeSet for: "
\r
136 + "Property (" + propNum + "): " + propName + ", "
\r
137 + "Value (" + valueNum + "): " + valueName + ", "
\r
138 + e.getClass().getName());
\r
141 UnicodeSet collectedErrors = new UnicodeSet();
\r
142 for (UnicodeSetIterator it = new UnicodeSetIterator(testSet); it.next();) {
\r
143 int value = UCharacter.getIntPropertyValue(it.codepoint, propNum);
\r
144 if (value != valueNum) {
\r
145 collectedErrors.add(it.codepoint);
\r
148 if (collectedErrors.size() != 0) {
\r
149 errln("Property Value Differs: "
\r
150 + "Property (" + propNum + "): " + propName + ", "
\r
151 + "Value (" + valueNum + "): " + valueName + ", "
\r
152 + "Differing values: " + collectedErrors.toPattern(true));
\r
161 * Test toPattern().
\r
163 public void TestToPattern() throws Exception {
\r
164 // Test that toPattern() round trips with syntax characters
\r
166 for (int i = 0; i < OTHER_TOPATTERN_TESTS.length; ++i) {
\r
167 checkPat(OTHER_TOPATTERN_TESTS[i], new UnicodeSet(OTHER_TOPATTERN_TESTS[i]));
\r
169 for (int i = 0; i <= 0x10FFFF; ++i) {
\r
170 if ((i <= 0xFF && !UCharacter.isLetter(i)) || UCharacter.isWhitespace(i)) {
\r
171 // check various combinations to make sure they all work.
\r
172 if (i != 0 && !toPatternAux(i, i)) continue;
\r
173 if (!toPatternAux(0, i)) continue;
\r
174 if (!toPatternAux(i, 0xFFFF)) continue;
\r
178 // Test pattern behavior of multicharacter strings.
\r
179 UnicodeSet s = new UnicodeSet("[a-z {aa} {ab}]");
\r
180 expectToPattern(s, "[a-z{aa}{ab}]",
\r
181 new String[] {"aa", "ab", NOT, "ac"});
\r
183 expectToPattern(s, "[a-z{aa}{ab}{ac}]",
\r
184 new String[] {"aa", "ab", "ac", NOT, "xy"});
\r
186 s.applyPattern("[a-z {\\{l} {r\\}}]");
\r
187 expectToPattern(s, "[a-z{r\\}}{\\{l}]",
\r
188 new String[] {"{l", "r}", NOT, "xy"});
\r
190 expectToPattern(s, "[a-z{\\[\\]}{r\\}}{\\{l}]",
\r
191 new String[] {"{l", "r}", "[]", NOT, "xy"});
\r
193 s.applyPattern("[a-z {\u4E01\u4E02}{\\n\\r}]");
\r
194 expectToPattern(s, "[a-z{\\u000A\\u000D}{\\u4E01\\u4E02}]",
\r
195 new String[] {"\u4E01\u4E02", "\n\r"});
\r
200 expectToPattern(s, "[{abc}]",
\r
201 new String[] {"abc", NOT, "ab"});
\r
203 // JB#3400: For 2 character ranges prefer [ab] to [a-b]
\r
206 expectToPattern(s, "[ab]", null);
\r
208 // Cover applyPattern, applyPropertyAlias
\r
210 s.applyPattern("[ab ]", true);
\r
211 expectToPattern(s, "[ab]", new String[] {"a", NOT, "ab", " "});
\r
213 s.applyPattern("[ab ]", false);
\r
214 expectToPattern(s, "[\\ ab]", new String[] {"a", "\u0020", NOT, "ab"});
\r
217 s.applyPropertyAlias("nv", "0.5");
\r
218 expectToPattern(s, "[\\u00BD\\u0D74\\u0F2A\\u2CFD\\uA831\\U00010141\\U00010175\\U00010176\\U00010E7B]", null);
\r
219 // Unicode 5.1 adds Malayalam 1/2 (\u0D74)
\r
220 // Unicode 5.2 adds U+A831 NORTH INDIC FRACTION ONE HALF and U+10E7B RUMI FRACTION ONE HALF
\r
223 s.applyPropertyAlias("gc", "Lu");
\r
224 // TODO expectToPattern(s, what?)
\r
226 // RemoveAllStrings()
\r
228 s.applyPattern("[a-z{abc}{def}]");
\r
229 expectToPattern(s, "[a-z{abc}{def}]", null);
\r
230 s.removeAllStrings();
\r
231 expectToPattern(s, "[a-z]", null);
\r
234 static String[] OTHER_TOPATTERN_TESTS = {
\r
235 "[[:latin:]&[:greek:]]",
\r
236 "[[:latin:]-[:greek:]]",
\r
237 "[:nonspacing mark:]"
\r
241 public boolean toPatternAux(int start, int end) {
\r
242 // use Integer.toString because Utility.hex doesn't handle ints
\r
243 String source = "0x" + Integer.toString(start,16).toUpperCase();
\r
244 if (start != end) source += "..0x" + Integer.toString(end,16).toUpperCase();
\r
245 UnicodeSet testSet = new UnicodeSet();
\r
246 testSet.add(start, end);
\r
247 return checkPat(source, testSet);
\r
250 boolean checkPat (String source, UnicodeSet testSet) {
\r
253 // What we want to make sure of is that a pattern generated
\r
254 // by toPattern(), with or without escaped unprintables, can
\r
255 // be passed back into the UnicodeSet constructor.
\r
256 String pat0 = testSet.toPattern(true);
\r
257 if (!checkPat(source + " (escaped)", testSet, pat0)) return false;
\r
259 //String pat1 = unescapeLeniently(pat0);
\r
260 //if (!checkPat(source + " (in code)", testSet, pat1)) return false;
\r
262 String pat2 = testSet.toPattern(false);
\r
263 if (!checkPat(source, testSet, pat2)) return false;
\r
265 //String pat3 = unescapeLeniently(pat2);
\r
266 //if (!checkPat(source + " (in code)", testSet, pat3)) return false;
\r
268 //logln(source + " => " + pat0 + ", " + pat1 + ", " + pat2 + ", " + pat3);
\r
269 logln(source + " => " + pat0 + ", " + pat2);
\r
270 } catch (Exception e) {
\r
271 errln("EXCEPTION in toPattern: " + source + " => " + pat);
\r
277 boolean checkPat (String source, UnicodeSet testSet, String pat) {
\r
278 UnicodeSet testSet2 = new UnicodeSet(pat);
\r
279 if (!testSet2.equals(testSet)) {
\r
280 errln("Fail toPattern: " + source + "; " + pat + " => " +
\r
281 testSet2.toPattern(false) + ", expected " +
\r
282 testSet.toPattern(false));
\r
288 // NOTE: copied the following from Utility. There ought to be a version in there with a flag
\r
289 // that does the Java stuff
\r
291 public static int unescapeAt(String s, int[] offset16) {
\r
297 int bitsPerDigit = 4;
\r
301 /* Check that offset is in range */
\r
302 int offset = offset16[0];
\r
303 int length = s.length();
\r
304 if (offset < 0 || offset >= length) {
\r
308 /* Fetch first UChar after '\\' */
\r
309 c = UTF16.charAt(s, offset);
\r
310 offset += UTF16.getCharCount(c);
\r
312 /* Convert hexadecimal and octal escapes */
\r
315 minDig = maxDig = 4;
\r
319 minDig = maxDig = 8;
\r
327 dig = UCharacter.digit(c, 8);
\r
331 n = 1; /* Already have first octal digit */
\r
338 while (offset < length && n < maxDig) {
\r
340 // TODO: Restore the char32-based code when UCharacter.digit
\r
341 // is working (Bug 66).
\r
343 //c = UTF16.charAt(s, offset);
\r
344 //dig = UCharacter.digit(c, (bitsPerDigit == 3) ? 8 : 16);
\r
345 c = s.charAt(offset);
\r
346 dig = Character.digit((char)c, (bitsPerDigit == 3) ? 8 : 16);
\r
350 result = (result << bitsPerDigit) | dig;
\r
351 //offset += UTF16.getCharCount(c);
\r
358 offset16[0] = offset;
\r
362 /* Convert C-style escapes in table */
\r
363 for (i=0; i<UNESCAPE_MAP.length; i+=2) {
\r
364 if (c == UNESCAPE_MAP[i]) {
\r
365 offset16[0] = offset;
\r
366 return UNESCAPE_MAP[i+1];
\r
367 } else if (c < UNESCAPE_MAP[i]) {
\r
372 /* If no special forms are recognized, then consider
\r
373 * the backslash to generically escape the next character. */
\r
374 offset16[0] = offset;
\r
378 /* This map must be in ASCENDING ORDER OF THE ESCAPE CODE */
\r
379 static private final char[] UNESCAPE_MAP = {
\r
394 * Convert all escapes in a given string using unescapeAt().
\r
395 * Leave invalid escape sequences unchanged.
\r
397 public static String unescapeLeniently(String s) {
\r
398 StringBuffer buf = new StringBuffer();
\r
399 int[] pos = new int[1];
\r
400 for (int i=0; i<s.length(); ) {
\r
401 char c = s.charAt(i++);
\r
404 int e = unescapeAt(s, pos);
\r
408 UTF16.append(buf, e);
\r
415 return buf.toString();
\r
418 public void TestPatterns() {
\r
419 UnicodeSet set = new UnicodeSet();
\r
420 expectPattern(set, "[[a-m]&[d-z]&[k-y]]", "km");
\r
421 expectPattern(set, "[[a-z]-[m-y]-[d-r]]", "aczz");
\r
422 expectPattern(set, "[a\\-z]", "--aazz");
\r
423 expectPattern(set, "[-az]", "--aazz");
\r
424 expectPattern(set, "[az-]", "--aazz");
\r
425 expectPattern(set, "[[[a-z]-[aeiou]i]]", "bdfnptvz");
\r
427 // Throw in a test of complement
\r
429 String exp = '\u0000' + "aeeoouu" + (char)('z'+1) + '\uFFFF';
\r
430 expectPairs(set, exp);
\r
433 public void TestCategories() {
\r
435 UnicodeSet set = new UnicodeSet("[:Lu:]");
\r
436 expectContainment(set, "ABC", "abc");
\r
438 // Make sure generation of L doesn't pollute cached Lu set
\r
439 // First generate L, then Lu
\r
440 // not used int TOP = 0x200; // Don't need to go over the whole range:
\r
441 set = new UnicodeSet("[:L:]");
\r
442 for (int i=0; i<0x200; ++i) {
\r
443 boolean l = UCharacter.isLetter(i);
\r
444 if (l != set.contains((char)i)) {
\r
445 errln("FAIL: L contains " + (char)i + " = " +
\r
446 set.contains((char)i));
\r
447 if (++failures == 10) break;
\r
451 set = new UnicodeSet("[:Lu:]");
\r
452 for (int i=0; i<0x200; ++i) {
\r
453 boolean lu = (UCharacter.getType(i) == ECharacterCategory.UPPERCASE_LETTER);
\r
454 if (lu != set.contains((char)i)) {
\r
455 errln("FAIL: Lu contains " + (char)i + " = " +
\r
456 set.contains((char)i));
\r
457 if (++failures == 20) break;
\r
462 public void TestAddRemove() {
\r
463 UnicodeSet set = new UnicodeSet();
\r
465 expectPairs(set, "az");
\r
466 set.remove('m', 'p');
\r
467 expectPairs(set, "alqz");
\r
468 set.remove('e', 'g');
\r
469 expectPairs(set, "adhlqz");
\r
470 set.remove('d', 'i');
\r
471 expectPairs(set, "acjlqz");
\r
472 set.remove('c', 'r');
\r
473 expectPairs(set, "absz");
\r
475 expectPairs(set, "abfqsz");
\r
476 set.remove('a', 'g');
\r
477 expectPairs(set, "hqsz");
\r
478 set.remove('a', 'z');
\r
479 expectPairs(set, "");
\r
481 // Try removing an entire set from another set
\r
482 expectPattern(set, "[c-x]", "cx");
\r
483 UnicodeSet set2 = new UnicodeSet();
\r
484 expectPattern(set2, "[f-ky-za-bc[vw]]", "acfkvwyz");
\r
485 set.removeAll(set2);
\r
486 expectPairs(set, "deluxx");
\r
488 // Try adding an entire set to another set
\r
489 expectPattern(set, "[jackiemclean]", "aacceein");
\r
490 expectPattern(set2, "[hitoshinamekatajamesanderson]", "aadehkmort");
\r
492 expectPairs(set, "aacehort");
\r
494 // Test commutativity
\r
495 expectPattern(set, "[hitoshinamekatajamesanderson]", "aadehkmort");
\r
496 expectPattern(set2, "[jackiemclean]", "aacceein");
\r
498 expectPairs(set, "aacehort");
\r
502 * Make sure minimal representation is maintained.
\r
504 public void TestMinimalRep() {
\r
505 // This is pretty thoroughly tested by checkCanonicalRep()
\r
506 // run against the exhaustive operation results. Use the code
\r
507 // here for debugging specific spot problems.
\r
509 // 1 overlap against 2
\r
510 UnicodeSet set = new UnicodeSet("[h-km-q]");
\r
511 UnicodeSet set2 = new UnicodeSet("[i-o]");
\r
513 expectPairs(set, "hq");
\r
515 set.applyPattern("[a-m]");
\r
516 set2.applyPattern("[e-o]");
\r
518 expectPairs(set, "ao");
\r
520 set.applyPattern("[e-o]");
\r
521 set2.applyPattern("[a-m]");
\r
523 expectPairs(set, "ao");
\r
524 // 1 overlap against 3
\r
525 set.applyPattern("[a-eg-mo-w]");
\r
526 set2.applyPattern("[d-q]");
\r
528 expectPairs(set, "aw");
\r
531 public void TestAPI() {
\r
533 UnicodeSet set = new UnicodeSet();
\r
534 if (!set.isEmpty() || set.getRangeCount() != 0) {
\r
535 errln("FAIL, set should be empty but isn't: " +
\r
539 // clear(), isEmpty()
\r
541 if (set.isEmpty()) {
\r
542 errln("FAIL, set shouldn't be empty but is: " +
\r
546 if (!set.isEmpty()) {
\r
547 errln("FAIL, set should be empty but isn't: " +
\r
553 if (set.size() != 0) {
\r
554 errln("FAIL, size should be 0, but is " + set.size() +
\r
558 if (set.size() != 1) {
\r
559 errln("FAIL, size should be 1, but is " + set.size() +
\r
563 if (set.size() != 10) {
\r
564 errln("FAIL, size should be 10, but is " + set.size() +
\r
569 if (set.size() != 0x110000) {
\r
570 errln("FAIL, size should be 0x110000, but is" + set.size());
\r
573 // contains(first, last)
\r
575 set.applyPattern("[A-Y 1-8 b-d l-y]");
\r
576 for (int i = 0; i<set.getRangeCount(); ++i) {
\r
577 int a = set.getRangeStart(i);
\r
578 int b = set.getRangeEnd(i);
\r
579 if (!set.contains(a, b)) {
\r
580 errln("FAIL, should contain " + (char)a + '-' + (char)b +
\r
581 " but doesn't: " + set);
\r
583 if (set.contains((char)(a-1), b)) {
\r
584 errln("FAIL, shouldn't contain " +
\r
585 (char)(a-1) + '-' + (char)b +
\r
586 " but does: " + set);
\r
588 if (set.contains(a, (char)(b+1))) {
\r
589 errln("FAIL, shouldn't contain " +
\r
590 (char)a + '-' + (char)(b+1) +
\r
591 " but does: " + set);
\r
595 // Ported InversionList test.
\r
596 UnicodeSet a = new UnicodeSet((char)3,(char)10);
\r
597 UnicodeSet b = new UnicodeSet((char)7,(char)15);
\r
598 UnicodeSet c = new UnicodeSet();
\r
600 logln("a [3-10]: " + a);
\r
601 logln("b [7-15]: " + b);
\r
602 c.set(a); c.addAll(b);
\r
603 UnicodeSet exp = new UnicodeSet((char)3,(char)15);
\r
604 if (c.equals(exp)) {
\r
605 logln("c.set(a).add(b): " + c);
\r
607 errln("FAIL: c.set(a).add(b) = " + c + ", expect " + exp);
\r
610 exp.set((char)0, (char)2);
\r
611 exp.add((char)16, UnicodeSet.MAX_VALUE);
\r
612 if (c.equals(exp)) {
\r
613 logln("c.complement(): " + c);
\r
615 errln(Utility.escape("FAIL: c.complement() = " + c + ", expect " + exp));
\r
618 exp.set((char)3, (char)15);
\r
619 if (c.equals(exp)) {
\r
620 logln("c.complement(): " + c);
\r
622 errln("FAIL: c.complement() = " + c + ", expect " + exp);
\r
624 c.set(a); c.complementAll(b);
\r
625 exp.set((char)3,(char)6);
\r
626 exp.add((char)11,(char) 15);
\r
627 if (c.equals(exp)) {
\r
628 logln("c.set(a).complement(b): " + c);
\r
630 errln("FAIL: c.set(a).complement(b) = " + c + ", expect " + exp);
\r
634 c = bitsToSet(setToBits(c));
\r
635 if (c.equals(exp)) {
\r
636 logln("bitsToSet(setToBits(c)): " + c);
\r
638 errln("FAIL: bitsToSet(setToBits(c)) = " + c + ", expect " + exp);
\r
641 // Additional tests for coverage JB#2118
\r
642 //UnicodeSet::complement(class UnicodeString const &)
\r
643 //UnicodeSet::complementAll(class UnicodeString const &)
\r
644 //UnicodeSet::containsNone(class UnicodeSet const &)
\r
645 //UnicodeSet::containsNone(long,long)
\r
646 //UnicodeSet::containsSome(class UnicodeSet const &)
\r
647 //UnicodeSet::containsSome(long,long)
\r
648 //UnicodeSet::removeAll(class UnicodeString const &)
\r
649 //UnicodeSet::retain(long)
\r
650 //UnicodeSet::retainAll(class UnicodeString const &)
\r
651 //UnicodeSet::serialize(unsigned short *,long,enum UErrorCode &)
\r
652 //UnicodeSetIterator::getString(void)
\r
654 set.complement("ab");
\r
655 exp.applyPattern("[{ab}]");
\r
656 if (!set.equals(exp)) { errln("FAIL: complement(\"ab\")"); return; }
\r
658 UnicodeSetIterator iset = new UnicodeSetIterator(set);
\r
659 if (!iset.next() || iset.codepoint != UnicodeSetIterator.IS_STRING) {
\r
660 errln("FAIL: UnicodeSetIterator.next/IS_STRING");
\r
661 } else if (!iset.string.equals("ab")) {
\r
662 errln("FAIL: UnicodeSetIterator.string");
\r
665 set.add((char)0x61, (char)0x7A);
\r
666 set.complementAll("alan");
\r
667 exp.applyPattern("[{ab}b-kmo-z]");
\r
668 if (!set.equals(exp)) { errln("FAIL: complementAll(\"alan\")"); return; }
\r
670 exp.applyPattern("[a-z]");
\r
671 if (set.containsNone(exp)) { errln("FAIL: containsNone(UnicodeSet)"); }
\r
672 if (!set.containsSome(exp)) { errln("FAIL: containsSome(UnicodeSet)"); }
\r
673 exp.applyPattern("[aln]");
\r
674 if (!set.containsNone(exp)) { errln("FAIL: containsNone(UnicodeSet)"); }
\r
675 if (set.containsSome(exp)) { errln("FAIL: containsSome(UnicodeSet)"); }
\r
677 if (set.containsNone((char)0x61, (char)0x7A)) {
\r
678 errln("FAIL: containsNone(char, char)");
\r
680 if (!set.containsSome((char)0x61, (char)0x7A)) {
\r
681 errln("FAIL: containsSome(char, char)");
\r
683 if (!set.containsNone((char)0x41, (char)0x5A)) {
\r
684 errln("FAIL: containsNone(char, char)");
\r
686 if (set.containsSome((char)0x41, (char)0x5A)) {
\r
687 errln("FAIL: containsSome(char, char)");
\r
690 set.removeAll("liu");
\r
691 exp.applyPattern("[{ab}b-hj-kmo-tv-z]");
\r
692 if (!set.equals(exp)) { errln("FAIL: removeAll(\"liu\")"); return; }
\r
694 set.retainAll("star");
\r
695 exp.applyPattern("[rst]");
\r
696 if (!set.equals(exp)) { errln("FAIL: retainAll(\"star\")"); return; }
\r
698 set.retain((char)0x73);
\r
699 exp.applyPattern("[s]");
\r
700 if (!set.equals(exp)) { errln("FAIL: retain('s')"); return; }
\r
702 // ICU 2.6 coverage tests
\r
703 // public final UnicodeSet retain(String s);
\r
704 // public final UnicodeSet remove(int c);
\r
705 // public final UnicodeSet remove(String s);
\r
706 // public int hashCode();
\r
707 set.applyPattern("[a-z{ab}{cd}]");
\r
709 exp.applyPattern("[{cd}]");
\r
710 if (!set.equals(exp)) { errln("FAIL: retain(\"cd\")"); return; }
\r
712 set.applyPattern("[a-z{ab}{cd}]");
\r
713 set.remove((char)0x63);
\r
714 exp.applyPattern("[abd-z{ab}{cd}]");
\r
715 if (!set.equals(exp)) { errln("FAIL: remove('c')"); return; }
\r
718 exp.applyPattern("[abd-z{ab}]");
\r
719 if (!set.equals(exp)) { errln("FAIL: remove(\"cd\")"); return; }
\r
721 if (set.hashCode() != exp.hashCode()) {
\r
722 errln("FAIL: hashCode() unequal");
\r
725 if (set.hashCode() == exp.hashCode()) {
\r
726 errln("FAIL: hashCode() equal");
\r
730 //Cover addAll(Collection) and addAllTo(Collection)
\r
731 // Seems that there is a bug in addAll(Collection) operation
\r
732 // Ram also add a similar test to UtilityTest.java
\r
733 logln("Testing addAll(Collection) ... ");
\r
734 String[] array = {"a", "b", "c", "de"};
\r
735 List list = Arrays.asList(array);
\r
736 Set aset = new HashSet(list);
\r
737 logln(" *** The source set's size is: " + aset.size());
\r
741 if (set.size() != aset.size()) {
\r
742 errln("FAIL: After addAll, the UnicodeSet size expected " + aset.size() +
\r
743 ", " + set.size() + " seen instead!");
\r
745 logln("OK: After addAll, the UnicodeSet size got " + set.size());
\r
748 List list2 = new ArrayList();
\r
749 set.addAllTo(list2);
\r
751 //verify the result
\r
752 log(" *** The elements are: ");
\r
753 String s = set.toPattern(true);
\r
755 Iterator myiter = list2.iterator();
\r
756 while(myiter.hasNext()) {
\r
757 log(myiter.next().toString() + " ");
\r
759 logln(""); // a new line
\r
764 public void TestStrings() {
\r
765 // Object[][] testList = {
\r
766 // {I_EQUALS, UnicodeSet.fromAll("abc"),
\r
767 // new UnicodeSet("[a-c]")},
\r
769 // {I_EQUALS, UnicodeSet.from("ch").add('a','z').add("ll"),
\r
770 // new UnicodeSet("[{ll}{ch}a-z]")},
\r
772 // {I_EQUALS, UnicodeSet.from("ab}c"),
\r
773 // new UnicodeSet("[{ab\\}c}]")},
\r
775 // {I_EQUALS, new UnicodeSet('a','z').add('A', 'Z').retain('M','m').complement('X'),
\r
776 // new UnicodeSet("[[a-zA-Z]&[M-m]-[X]]")},
\r
779 // for (int i = 0; i < testList.length; ++i) {
\r
780 // expectRelation(testList[i][0], testList[i][1], testList[i][2], "(" + i + ")");
\r
783 UnicodeSet[][] testList = {
\r
784 {UnicodeSet.fromAll("abc"),
\r
785 new UnicodeSet("[a-c]")},
\r
787 {UnicodeSet.from("ch").add('a','z').add("ll"),
\r
788 new UnicodeSet("[{ll}{ch}a-z]")},
\r
790 {UnicodeSet.from("ab}c"),
\r
791 new UnicodeSet("[{ab\\}c}]")},
\r
793 {new UnicodeSet('a','z').add('A', 'Z').retain('M','m').complement('X'),
\r
794 new UnicodeSet("[[a-zA-Z]&[M-m]-[X]]")},
\r
797 for (int i = 0; i < testList.length; ++i) {
\r
798 if (!testList[i][0].equals(testList[i][1])) {
\r
799 errln("FAIL: sets unequal; see source code (" + i + ")");
\r
804 static final Integer
\r
805 I_ANY = new Integer(SortedSetRelation.ANY),
\r
806 I_CONTAINS = new Integer(SortedSetRelation.CONTAINS),
\r
807 I_DISJOINT = new Integer(SortedSetRelation.DISJOINT),
\r
808 I_NO_B = new Integer(SortedSetRelation.NO_B),
\r
809 I_ISCONTAINED = new Integer(SortedSetRelation.ISCONTAINED),
\r
810 I_EQUALS = new Integer(SortedSetRelation.EQUALS),
\r
811 I_NO_A = new Integer(SortedSetRelation.NO_A),
\r
812 I_NONE = new Integer(SortedSetRelation.NONE);
\r
814 public void TestSetRelation() {
\r
816 String[] choices = {"a", "b", "cd", "ef"};
\r
817 int limit = 1 << choices.length;
\r
819 SortedSet iset = new TreeSet();
\r
820 SortedSet jset = new TreeSet();
\r
822 for (int i = 0; i < limit; ++i) {
\r
823 pick(i, choices, iset);
\r
824 for (int j = 0; j < limit; ++j) {
\r
825 pick(j, choices, jset);
\r
826 checkSetRelation(iset, jset, "(" + i + ")");
\r
831 public void TestSetSpeed() {
\r
832 // skip unless verbose
\r
833 if (!isVerbose()) return;
\r
839 public void SetSpeed2(int size) {
\r
841 SortedSet iset = new TreeSet();
\r
842 SortedSet jset = new TreeSet();
\r
844 for (int i = 0; i < size*2; i += 2) { // only even values
\r
845 iset.add(new Integer(i));
\r
846 jset.add(new Integer(i));
\r
849 int iterations = 1000000 / size;
\r
851 logln("Timing comparison of Java vs Utility");
\r
852 logln("For about " + size + " objects that are almost all the same.");
\r
854 CheckSpeed(iset, jset, "when a = b", iterations);
\r
856 iset.add(new Integer(size + 1)); // add odd value in middle
\r
858 CheckSpeed(iset, jset, "when a contains b", iterations);
\r
859 CheckSpeed(jset, iset, "when b contains a", iterations);
\r
861 jset.add(new Integer(size - 1)); // add different odd value in middle
\r
863 CheckSpeed(jset, iset, "when a, b are disjoint", iterations);
\r
866 void CheckSpeed(SortedSet iset, SortedSet jset, String message, int iterations) {
\r
867 CheckSpeed2(iset, jset, message, iterations);
\r
868 CheckSpeed3(iset, jset, message, iterations);
\r
871 void CheckSpeed2(SortedSet iset, SortedSet jset, String message, int iterations) {
\r
875 // make sure code is loaded:
\r
876 x = iset.containsAll(jset);
\r
877 y = SortedSetRelation.hasRelation(iset, SortedSetRelation.CONTAINS, jset);
\r
878 if (x != y) errln("FAIL contains comparison");
\r
880 double start = System.currentTimeMillis();
\r
881 for (int i = 0; i < iterations; ++i) {
\r
882 x |= iset.containsAll(jset);
\r
884 double middle = System.currentTimeMillis();
\r
885 for (int i = 0; i < iterations; ++i) {
\r
886 y |= SortedSetRelation.hasRelation(iset, SortedSetRelation.CONTAINS, jset);
\r
888 double end = System.currentTimeMillis();
\r
890 double jtime = (middle - start)/iterations;
\r
891 double utime = (end - middle)/iterations;
\r
893 NumberFormat nf = NumberFormat.getPercentInstance();
\r
894 logln("Test contains: " + message + ": Java: " + jtime
\r
895 + ", Utility: " + utime + ", u:j: " + nf.format(utime/jtime));
\r
898 void CheckSpeed3(SortedSet iset, SortedSet jset, String message, int iterations) {
\r
902 // make sure code is loaded:
\r
903 x = iset.equals(jset);
\r
904 y = SortedSetRelation.hasRelation(iset, SortedSetRelation.EQUALS, jset);
\r
905 if (x != y) errln("FAIL equality comparison");
\r
908 double start = System.currentTimeMillis();
\r
909 for (int i = 0; i < iterations; ++i) {
\r
910 x |= iset.equals(jset);
\r
912 double middle = System.currentTimeMillis();
\r
913 for (int i = 0; i < iterations; ++i) {
\r
914 y |= SortedSetRelation.hasRelation(iset, SortedSetRelation.EQUALS, jset);
\r
916 double end = System.currentTimeMillis();
\r
918 double jtime = (middle - start)/iterations;
\r
919 double utime = (end - middle)/iterations;
\r
921 NumberFormat nf = NumberFormat.getPercentInstance();
\r
922 logln("Test equals: " + message + ": Java: " + jtime
\r
923 + ", Utility: " + utime + ", u:j: " + nf.format(utime/jtime));
\r
926 void pick(int bits, Object[] examples, SortedSet output) {
\r
928 for (int k = 0; k < 32; ++k) {
\r
929 if (((1<<k) & bits) != 0) output.add(examples[k]);
\r
933 public static final String[] RELATION_NAME = {
\r
939 "is-disjoint_with",
\r
943 boolean dumbHasRelation(Collection A, int filter, Collection B) {
\r
944 Collection ab = new TreeSet(A);
\r
946 if (ab.size() > 0 && (filter & SortedSetRelation.A_AND_B) == 0) return false;
\r
948 // A - B size == A.size - A&B.size
\r
949 if (A.size() > ab.size() && (filter & SortedSetRelation.A_NOT_B) == 0) return false;
\r
951 // B - A size == B.size - A&B.size
\r
952 if (B.size() > ab.size() && (filter & SortedSetRelation.B_NOT_A) == 0) return false;
\r
958 void checkSetRelation(SortedSet a, SortedSet b, String message) {
\r
959 for (int i = 0; i < 8; ++i) {
\r
961 boolean hasRelation = SortedSetRelation.hasRelation(a, i, b);
\r
962 boolean dumbHasRelation = dumbHasRelation(a, i, b);
\r
964 logln(message + " " + hasRelation + ":\t" + a + "\t" + RELATION_NAME[i] + "\t" + b);
\r
966 if (hasRelation != dumbHasRelation) {
\r
968 message + " " + dumbHasRelation + ":\t" + a + "\t" + RELATION_NAME[i] + "\t" + b);
\r
975 * Test the [:Latin:] syntax.
\r
977 public void TestScriptSet() {
\r
979 expectContainment("[:Latin:]", "aA", CharsToUnicodeString("\\u0391\\u03B1"));
\r
981 expectContainment("[:Greek:]", CharsToUnicodeString("\\u0391\\u03B1"), "aA");
\r
983 /* Jitterbug 1423 */
\r
984 expectContainment("[[:Common:][:Inherited:]]", CharsToUnicodeString("\\U00003099\\U0001D169\\u0000"), "aA");
\r
989 * Test the [:Latin:] syntax.
\r
991 public void TestPropertySet() {
\r
993 // Pattern, Chars IN, Chars NOT in
\r
1003 "\\P{ GENERAL Category = upper case letter }",
\r
1007 // Combining class: @since ICU 2.2
\r
1008 // Check both symbolic and numeric
\r
1013 "\\p{Canonical Combining Class = 11}",
\r
1017 "[:c c c = iota subscript :]",
\r
1021 // Bidi class: @since ICU 2.2
\r
1022 "\\p{bidiclass=lefttoright}",
\r
1026 // Binary properties: @since ICU 2.2
\r
1027 "\\p{ideographic}",
\r
1032 "q)*(", // )(and * were removed from math in Unicode 4.0.1
\r
1035 // JB#1767 \N{}, \p{ASCII}
\r
1037 "abc\u0000\u007F",
\r
1040 "[\\N{ latin small letter a }[:name= latin small letter z:]]",
\r
1056 "\u03D8\u03D9", // 3.2
\r
1059 "\\u1800\\u3400\\U0002f800",
\r
1060 "\\u0220\\u034f\\u30ff\\u33ff\\ufe73\\U00010000\\U00050000",
\r
1062 // JB#2350: Case_Sensitive
\r
1063 "[:Case Sensitive:]",
\r
1064 "A\u1FFC\\U00010410",
\r
1065 ";\u00B4\\U00010500",
\r
1068 // Regex compatibility test
\r
1069 "[-b]", // leading '-' is literal
\r
1073 "[^-b]", // leading '-' is literal
\r
1077 "[b-]", // trailing '-' is literal
\r
1081 "[^b-]", // trailing '-' is literal
\r
1085 "[a-b-]", // trailing '-' is literal
\r
1089 "[[a-q]&[p-z]-]", // trailing '-' is literal
\r
1093 "[\\s|\\)|:|$|\\>]", // from regex tests
\r
1097 "[\uDC00cd]", // JB#2906: isolated trail at start
\r
1099 "ab\uD800\\U00010000",
\r
1101 "[ab\uD800]", // JB#2906: isolated trail at start
\r
1103 "cd\uDC00\\U00010000",
\r
1105 "[ab\uD800cd]", // JB#2906: isolated lead in middle
\r
1107 "ef\uDC00\\U00010000",
\r
1109 "[ab\uDC00cd]", // JB#2906: isolated trail in middle
\r
1111 "ef\uD800\\U00010000",
\r
1113 "[:^lccc=0:]", // Lead canonical class
\r
1115 "abcd\u00c0\u00c5",
\r
1117 "[:^tccc=0:]", // Trail canonical class
\r
1118 "\u0300\u0301\u00c0\u00c5",
\r
1121 "[[:^lccc=0:][:^tccc=0:]]", // Lead and trail canonical class
\r
1122 "\u0300\u0301\u00c0\u00c5",
\r
1125 "[[:^lccc=0:]-[:^tccc=0:]]", // Stuff that starts with an accent but ends with a base (none right now)
\r
1127 "abcd\u0300\u0301\u00c0\u00c5",
\r
1129 "[[:ccc=0:]-[:lccc=0:]-[:tccc=0:]]", // Weirdos. Complete canonical class is zero, but both lead and trail are not
\r
1130 "\u0F73\u0F75\u0F81",
\r
1131 "abcd\u0300\u0301\u00c0\u00c5",
\r
1134 "A\\uE000\\uF8FF\\uFDC7\\U00010000\\U0010FFFD",
\r
1135 "\\u0888\\uFDD3\\uFFFE\\U00050005",
\r
1139 for (int i=0; i<DATA.length; i+=3) {
\r
1140 expectContainment(DATA[i], DATA[i+1], DATA[i+2]);
\r
1144 public void TestUnicodeSetStrings() {
\r
1145 UnicodeSet uset = new UnicodeSet("[a{bc}{cd}pqr\u0000]");
\r
1146 logln(uset + " ~ " + uset.getRegexEquivalent());
\r
1147 String[][] testStrings = {{"x", "none"},
\r
1155 {"dccbx", "none"},
\r
1157 for (int i = 0; i < testStrings.length; ++i) {
\r
1158 check(uset, testStrings[i][0], testStrings[i][1]);
\r
1163 private void check(UnicodeSet uset, String string, String desiredStatus) {
\r
1164 boolean shouldContainAll = desiredStatus.equals("all");
\r
1165 boolean shouldContainNone = desiredStatus.equals("none");
\r
1166 if (uset.containsAll(string) != shouldContainAll) {
\r
1167 errln("containsAll " + string + " should be " + shouldContainAll);
\r
1169 logln("containsAll " + string + " = " + shouldContainAll);
\r
1171 if (uset.containsNone(string) != shouldContainNone) {
\r
1172 errln("containsNone " + string + " should be " + shouldContainNone);
\r
1174 logln("containsNone " + string + " = " + shouldContainNone);
\r
1179 * Test cloning of UnicodeSet
\r
1181 public void TestClone() {
\r
1182 UnicodeSet s = new UnicodeSet("[abcxyz]");
\r
1183 UnicodeSet t = (UnicodeSet) s.clone();
\r
1184 expectContainment(t, "abc", "def");
\r
1188 * Test the indexOf() and charAt() methods.
\r
1190 public void TestIndexOf() {
\r
1191 UnicodeSet set = new UnicodeSet("[a-cx-y3578]");
\r
1192 for (int i=0; i<set.size(); ++i) {
\r
1193 int c = set.charAt(i);
\r
1194 if (set.indexOf(c) != i) {
\r
1195 errln("FAIL: charAt(" + i + ") = " + c +
\r
1196 " => indexOf() => " + set.indexOf(c));
\r
1199 int c = set.charAt(set.size());
\r
1201 errln("FAIL: charAt(<out of range>) = " +
\r
1202 Utility.escape(String.valueOf(c)));
\r
1204 int j = set.indexOf('q');
\r
1206 errln("FAIL: indexOf('q') = " + j);
\r
1210 public void TestContainsString() {
\r
1211 UnicodeSet x = new UnicodeSet("[a{bc}]");
\r
1212 if (x.contains("abc")) errln("FAIL");
\r
1215 public void TestExhaustive() {
\r
1216 // exhaustive tests. Simulate UnicodeSets with integers.
\r
1217 // That gives us very solid tests (except for large memory tests).
\r
1219 char limit = (char)128;
\r
1221 for (char i = 0; i < limit; ++i) {
\r
1222 logln("Testing " + i + ", " + bitsToSet(i));
\r
1223 _testComplement(i);
\r
1225 // AS LONG AS WE ARE HERE, check roundtrip
\r
1226 checkRoundTrip(bitsToSet(i));
\r
1228 for (char j = 0; j < limit; ++j) {
\r
1238 * Make sure each script name and abbreviated name can be used
\r
1239 * to construct a UnicodeSet.
\r
1241 public void TestScriptNames() {
\r
1242 for (int i=0; i<UScript.CODE_LIMIT; ++i) {
\r
1243 for (int j=0; j<2; ++j) {
\r
1247 (j==0) ? UScript.getName(i) : UScript.getShortName(i);
\r
1248 pat = "[:" + name + ":]";
\r
1249 UnicodeSet set = new UnicodeSet(pat);
\r
1250 logln("Ok: " + pat + " -> " + set.toPattern(false));
\r
1251 } catch (IllegalArgumentException e) {
\r
1252 if (pat.length() == 0) {
\r
1253 errln("FAIL (in UScript): No name for script " + i);
\r
1255 errln("FAIL: Couldn't create " + pat);
\r
1263 * Test closure API.
\r
1265 public void TestCloseOver() {
\r
1266 String CASE = String.valueOf(UnicodeSet.CASE);
\r
1268 // selector, input, output
\r
1270 "[aq\u00DF{Bc}{bC}{Fi}]",
\r
1271 "[aAqQ\u00DF\u1E9E\uFB01{ss}{bc}{fi}]", // U+1E9E LATIN CAPITAL LETTER SHARP S is new in Unicode 5.1
\r
1274 "[\u01F1]", // 'DZ'
\r
1275 "[\u01F1\u01F2\u01F3]",
\r
1279 "[\u1FB4{\u03AC\u03B9}]",
\r
1286 "[a-z]","[A-Za-z\u017F\u212A]",
\r
1288 "[abc]","[A-Ca-c]",
\r
1290 "[ABC]","[A-Ca-c]",
\r
1293 UnicodeSet s = new UnicodeSet();
\r
1294 UnicodeSet t = new UnicodeSet();
\r
1295 for (int i=0; i<DATA.length; i+=3) {
\r
1296 int selector = Integer.parseInt(DATA[i]);
\r
1297 String pat = DATA[i+1];
\r
1298 String exp = DATA[i+2];
\r
1299 s.applyPattern(pat);
\r
1300 s.closeOver(selector);
\r
1301 t.applyPattern(exp);
\r
1302 if (s.equals(t)) {
\r
1303 logln("Ok: " + pat + ".closeOver(" + selector + ") => " + exp);
\r
1305 errln("FAIL: " + pat + ".closeOver(" + selector + ") => " +
\r
1306 s.toPattern(true) + ", expected " + exp);
\r
1310 // Test the pattern API
\r
1311 s.applyPattern("[abc]", UnicodeSet.CASE);
\r
1312 expectContainment(s, "abcABC", "defDEF");
\r
1313 s = new UnicodeSet("[^abc]", UnicodeSet.CASE);
\r
1314 expectContainment(s, "defDEF", "abcABC");
\r
1317 public void TestEscapePattern() {
\r
1318 // The following pattern must contain at least one range "c-d"
\r
1319 // for which isRuleWhiteSpace(c) or isRuleWhiteSpace(d) is true.
\r
1321 "[\\uFEFF \\u200E-\\u20FF \\uFFF9-\\uFFFC \\U0001D173-\\U0001D17A \\U000F0000-\\U000FFFFD ]";
\r
1323 "[\\u200E-\\u20FF\\uFEFF\\uFFF9-\\uFFFC\\U0001D173-\\U0001D17A\\U000F0000-\\U000FFFFD]";
\r
1324 // We test this with two passes; in the second pass we
\r
1325 // pre-unescape the pattern. Since U+200E is rule whitespace,
\r
1326 // this fails -- which is what we expect.
\r
1327 for (int pass=1; pass<=2; ++pass) {
\r
1328 String pat = pattern;
\r
1330 pat = Utility.unescape(pat);
\r
1332 // Pattern is only good for pass 1
\r
1333 boolean isPatternValid = (pass==1);
\r
1335 UnicodeSet set = null;
\r
1337 set = new UnicodeSet(pat);
\r
1338 } catch (IllegalArgumentException e) {
\r
1341 if ((set != null) != isPatternValid){
\r
1342 errln("FAIL: applyPattern(" +
\r
1343 Utility.escape(pat) + ") => " + set);
\r
1346 if (set == null) {
\r
1349 if (set.contains((char)0x0644)){
\r
1350 errln("FAIL: " + Utility.escape(pat) + " contains(U+0664)");
\r
1353 String newpat = set.toPattern(true);
\r
1354 if (newpat.equals(exp)) {
\r
1355 logln(Utility.escape(pat) + " => " + newpat);
\r
1357 errln("FAIL: " + Utility.escape(pat) + " => " + newpat);
\r
1360 for (int i=0; i<set.getRangeCount(); ++i) {
\r
1361 StringBuffer str = new StringBuffer("Range ");
\r
1362 str.append((char)(0x30 + i))
\r
1364 UTF16.append(str, set.getRangeStart(i));
\r
1365 str.append(" - ");
\r
1366 UTF16.append(str, set.getRangeEnd(i));
\r
1367 String s = Utility.escape(str.toString() + " (" + set.getRangeStart(i) + " - " +
\r
1368 set.getRangeEnd(i) + ")");
\r
1369 if (set.getRangeStart(i) < 0) {
\r
1370 errln("FAIL: " + s);
\r
1378 public void TestSymbolTable() {
\r
1379 // Multiple test cases can be set up here. Each test case
\r
1380 // is terminated by null:
\r
1381 // var, value, var, value,..., input pat., exp. output pat., null
\r
1383 "us", "a-z", "[0-1$us]", "[0-1a-z]", null,
\r
1384 "us", "[a-z]", "[0-1$us]", "[0-1[a-z]]", null,
\r
1385 "us", "\\[a\\-z\\]", "[0-1$us]", "[-01\\[\\]az]", null
\r
1388 for (int i=0; i<DATA.length; ++i) {
\r
1389 TokenSymbolTable sym = new TokenSymbolTable();
\r
1391 // Set up variables
\r
1392 while (DATA[i+2] != null) {
\r
1393 sym.add(DATA[i], DATA[i+1]);
\r
1397 // Input pattern and expected output pattern
\r
1398 String inpat = DATA[i], exppat = DATA[i+1];
\r
1401 ParsePosition pos = new ParsePosition(0);
\r
1402 UnicodeSet us = new UnicodeSet(inpat, pos, sym);
\r
1405 if (pos.getIndex() != inpat.length()) {
\r
1406 errln("Failed to read to end of string \""
\r
1407 + inpat + "\": read to "
\r
1408 + pos.getIndex() + ", length is "
\r
1409 + inpat.length());
\r
1412 UnicodeSet us2 = new UnicodeSet(exppat);
\r
1413 if (!us.equals(us2)) {
\r
1414 errln("Failed, got " + us + ", expected " + us2);
\r
1416 logln("Ok, got " + us);
\r
1419 //cover Unicode(String,ParsePosition,SymbolTable,int)
\r
1420 ParsePosition inpos = new ParsePosition(0);
\r
1421 UnicodeSet inSet = new UnicodeSet(inpat, inpos, sym, UnicodeSet.IGNORE_SPACE);
\r
1422 UnicodeSet expSet = new UnicodeSet(exppat);
\r
1423 if (!inSet.equals(expSet)) {
\r
1424 errln("FAIL: Failed, got " + inSet + ", expected " + expSet);
\r
1426 logln("OK: got " + inSet);
\r
1432 * Test that Posix style character classes [:digit:], etc.
\r
1433 * have the Unicode definitions from TR 18.
\r
1435 public void TestPosixClasses() {
\r
1436 expectEqual("POSIX alpha", "[:alpha:]", "\\p{Alphabetic}");
\r
1437 expectEqual("POSIX lower", "[:lower:]", "\\p{lowercase}");
\r
1438 expectEqual("POSIX upper", "[:upper:]", "\\p{Uppercase}");
\r
1439 expectEqual("POSIX punct", "[:punct:]", "\\p{gc=Punctuation}");
\r
1440 expectEqual("POSIX digit", "[:digit:]", "\\p{gc=DecimalNumber}");
\r
1441 expectEqual("POSIX xdigit", "[:xdigit:]", "[\\p{DecimalNumber}\\p{HexDigit}]");
\r
1442 expectEqual("POSIX alnum", "[:alnum:]", "[\\p{Alphabetic}\\p{DecimalNumber}]");
\r
1443 expectEqual("POSIX space", "[:space:]", "\\p{Whitespace}");
\r
1444 expectEqual("POSIX blank", "[:blank:]", "[\\p{Whitespace}-[\\u000a\\u000B\\u000c\\u000d\\u0085\\p{LineSeparator}\\p{ParagraphSeparator}]]");
\r
1445 expectEqual("POSIX cntrl", "[:cntrl:]", "\\p{Control}");
\r
1446 expectEqual("POSIX graph", "[:graph:]", "[^\\p{Whitespace}\\p{Control}\\p{Surrogate}\\p{Unassigned}]");
\r
1447 expectEqual("POSIX print", "[:print:]", "[[:graph:][:blank:]-[\\p{Control}]]");
\r
1450 public void TestHangulSyllable() {
\r
1451 final UnicodeSet lvt = new UnicodeSet("[:Hangul_Syllable_Type=LVT_Syllable:]");
\r
1452 assertNotEquals("LVT count", new UnicodeSet(), lvt);
\r
1453 logln(lvt + ": " + lvt.size());
\r
1454 final UnicodeSet lv = new UnicodeSet("[:Hangul_Syllable_Type=LV_Syllable:]");
\r
1455 assertNotEquals("LV count", new UnicodeSet(), lv);
\r
1456 logln(lv + ": " + lv.size());
\r
1460 * Test that frozen classes disallow changes. For 4217
\r
1462 public void TestFrozen() {
\r
1463 UnicodeSet test = new UnicodeSet("[[:whitespace:]A]");
\r
1465 checkModification(test, true);
\r
1466 checkModification(test, false);
\r
1470 * Test Generic support
\r
1472 public void TestGenerics() {
\r
1473 UnicodeSet set1 = new UnicodeSet("[a-b d-g {ch} {zh}]").freeze();
\r
1474 UnicodeSet set2 = new UnicodeSet("[e-f {ch}]").freeze();
\r
1475 UnicodeSet set3 = new UnicodeSet("[d m-n {dh}]").freeze();
\r
1476 // A useful range of sets for testing, including both characters and strings
\r
1477 // set 1 contains set2
\r
1478 // set 1 is overlaps with set 3
\r
1479 // set 2 is disjoint with set 3
\r
1481 //public Iterator<String> iterator() {
\r
1483 ArrayList<String> oldList = new ArrayList<String>();
\r
1484 for (UnicodeSetIterator it = new UnicodeSetIterator(set1); it.next();) {
\r
1485 oldList.add(it.getString());
\r
1488 ArrayList<String> list1 = new ArrayList<String>();
\r
1489 for (String s : set1) {
\r
1492 assertEquals("iteration test", oldList, list1);
\r
1494 //addAllTo(Iterable<T>, U)
\r
1496 set1.addAllTo(list1);
\r
1497 assertEquals("iteration test", oldList, list1);
\r
1499 list1 = set1.addAllTo(new ArrayList<String>());
\r
1500 assertEquals("addAllTo", oldList, list1);
\r
1502 ArrayList<String> list2 = set2.addAllTo(new ArrayList<String>());
\r
1503 ArrayList<String> list3 = set3.addAllTo(new ArrayList<String>());
\r
1505 // put them into different order, to check that order doesn't matter
\r
1506 TreeSet sorted1 = set1.addAllTo(new TreeSet<String>());
\r
1507 TreeSet sorted2 = set2.addAllTo(new TreeSet<String>());
\r
1508 TreeSet sorted3 = set3.addAllTo(new TreeSet<String>());
\r
1510 //containsAll(Collection<String> collection)
\r
1511 assertTrue("containsAll", set1.containsAll(list1));
\r
1512 assertTrue("containsAll", set1.containsAll(sorted1));
\r
1513 assertTrue("containsAll", set1.containsAll(list2));
\r
1514 assertTrue("containsAll", set1.containsAll(sorted2));
\r
1515 assertFalse("containsAll", set1.containsAll(list3));
\r
1516 assertFalse("containsAll", set1.containsAll(sorted3));
\r
1517 assertFalse("containsAll", set2.containsAll(list3));
\r
1518 assertFalse("containsAll", set2.containsAll(sorted3));
\r
1520 //containsSome(Collection<String>)
\r
1521 assertTrue("containsSome", set1.containsSome(list1));
\r
1522 assertTrue("containsSome", set1.containsSome(sorted1));
\r
1523 assertTrue("containsSome", set1.containsSome(list2));
\r
1524 assertTrue("containsSome", set1.containsSome(sorted2));
\r
1525 assertTrue("containsSome", set1.containsSome(list3));
\r
1526 assertTrue("containsSome", set1.containsSome(sorted3));
\r
1527 assertFalse("containsSome", set2.containsSome(list3));
\r
1528 assertFalse("containsSome", set2.containsSome(sorted3));
\r
1530 //containsNone(Collection<String>)
\r
1531 assertFalse("containsNone", set1.containsNone(list1));
\r
1532 assertFalse("containsNone", set1.containsNone(sorted1));
\r
1533 assertFalse("containsNone", set1.containsNone(list2));
\r
1534 assertFalse("containsNone", set1.containsNone(sorted2));
\r
1535 assertFalse("containsNone", set1.containsNone(list3));
\r
1536 assertFalse("containsNone", set1.containsNone(sorted3));
\r
1537 assertTrue("containsNone", set2.containsNone(list3));
\r
1538 assertTrue("containsNone", set2.containsNone(sorted3));
\r
1540 //addAll(String...)
\r
1541 UnicodeSet other3 = new UnicodeSet().addAll("d", "m", "n", "dh");
\r
1542 assertEquals("addAll", set3, other3);
\r
1544 //removeAll(Collection<String>)
\r
1545 UnicodeSet mod1 = new UnicodeSet(set1).removeAll(set2);
\r
1546 UnicodeSet mod2 = new UnicodeSet(set1).removeAll(list2);
\r
1547 assertEquals("remove all", mod1, mod2);
\r
1549 //retainAll(Collection<String>)
\r
1550 mod1 = new UnicodeSet(set1).retainAll(set2);
\r
1551 mod2 = new UnicodeSet(set1).retainAll(set2.addAllTo(new LinkedHashSet<String>()));
\r
1552 assertEquals("remove all", mod1, mod2);
\r
1555 public void TestComparison() {
\r
1556 UnicodeSet set1 = new UnicodeSet("[a-b d-g {ch} {zh}]").freeze();
\r
1557 UnicodeSet set2 = new UnicodeSet("[c-e {ch}]").freeze();
\r
1558 UnicodeSet set3 = new UnicodeSet("[d m-n z {dh}]").freeze();
\r
1560 //compareTo(UnicodeSet)
\r
1561 // do indirectly, by sorting
\r
1562 List<UnicodeSet> unsorted = Arrays.asList(set3, set2, set1);
\r
1563 List<UnicodeSet> goalShortest = Arrays.asList(set2, set3, set1);
\r
1564 List<UnicodeSet> goalLongest = Arrays.asList(set1, set3, set2);
\r
1565 List<UnicodeSet> goalLex = Arrays.asList(set1, set2, set3);
\r
1567 List<UnicodeSet> sorted = new ArrayList(new TreeSet<UnicodeSet>(unsorted));
\r
1568 assertNotEquals("compareTo-shorter-first", unsorted, sorted);
\r
1569 assertEquals("compareTo-shorter-first", goalShortest, sorted);
\r
1571 TreeSet<UnicodeSet> sorted1 = new TreeSet<UnicodeSet>(new Comparator<UnicodeSet>(){
\r
1572 public int compare(UnicodeSet o1, UnicodeSet o2) {
\r
1573 // TODO Auto-generated method stub
\r
1574 return o1.compareTo(o2, ComparisonStyle.LONGER_FIRST);
\r
1576 sorted1.addAll(unsorted);
\r
1577 sorted = new ArrayList(sorted1);
\r
1578 assertNotEquals("compareTo-longer-first", unsorted, sorted);
\r
1579 assertEquals("compareTo-longer-first", goalLongest, sorted);
\r
1581 sorted1 = new TreeSet<UnicodeSet>(new Comparator<UnicodeSet>(){
\r
1582 public int compare(UnicodeSet o1, UnicodeSet o2) {
\r
1583 // TODO Auto-generated method stub
\r
1584 return o1.compareTo(o2, ComparisonStyle.LEXICOGRAPHIC);
\r
1586 sorted1.addAll(unsorted);
\r
1587 sorted = new ArrayList(sorted1);
\r
1588 assertNotEquals("compareTo-lex", unsorted, sorted);
\r
1589 assertEquals("compareTo-lex", goalLex, sorted);
\r
1591 //compare(String, int)
\r
1592 // make a list of interesting combinations
\r
1593 List<String> sources = Arrays.asList("\u0000", "a", "b", "\uD7FF", "\uD800", "\uDBFF", "\uDC00", "\uDFFF", "\uE000", "\uFFFD", "\uFFFF");
\r
1594 TreeSet<String> target = new TreeSet<String>();
\r
1595 for (String s : sources) {
\r
1597 for (String t : sources) {
\r
1598 target.add(s + t);
\r
1599 for (String u : sources) {
\r
1600 target.add(s + t + u);
\r
1604 // now compare all the combinations. If any of them is a code point, use it.
\r
1605 for (String last : target) {
\r
1606 for (String curr : target) {
\r
1607 int lastCount = Character.codePointCount(last, 0, last.length());
\r
1608 int currCount = Character.codePointCount(curr, 0, curr.length());
\r
1610 if (lastCount == 1) {
\r
1611 comparison = UnicodeSet.compare(last.codePointAt(0), curr);
\r
1612 } else if (currCount == 1) {
\r
1613 comparison = UnicodeSet.compare(last, curr.codePointAt(0));
\r
1617 if (comparison != last.compareTo(curr)) {
\r
1618 // repeat for debugging
\r
1619 if (lastCount == 1) {
\r
1620 comparison = UnicodeSet.compare(last.codePointAt(0), curr);
\r
1621 } else if (currCount == 1) {
\r
1622 comparison = UnicodeSet.compare(last, curr.codePointAt(0));
\r
1624 errln("Failure in comparing " + last + " & " + curr);
\r
1629 //compare(Iterable<T>, Iterable<T>)
\r
1631 List<String> test1 = new ArrayList<String>(max);
\r
1632 List<String> test2 = new ArrayList<String>(max);
\r
1633 for (int i = 0; i <= max; ++i) {
\r
1634 test1.add("a" + i);
\r
1635 test2.add("a" + (max - i)); // add in reverse order
\r
1637 assertNotEquals("compare iterable test", test1, test2);
\r
1638 TreeSet<CharSequence> sortedTest1 = new TreeSet<CharSequence>(test1);
\r
1639 TreeSet<CharSequence> sortedTest2 = new TreeSet<CharSequence>(test2);
\r
1640 assertEquals("compare iterable test", sortedTest1, sortedTest2);
\r
1643 public void TestRangeConstructor() {
\r
1644 UnicodeSet w = new UnicodeSet().addAll(3,5);
\r
1645 UnicodeSet s = new UnicodeSet(3,5);
\r
1646 assertEquals("new constructor", w, s);
\r
1648 w = new UnicodeSet().addAll(3,5).addAll(7,7);
\r
1649 UnicodeSet t = new UnicodeSet(3,5, 7,7);
\r
1650 assertEquals("new constructor", w, t);
\r
1651 // check to make sure right exceptions are thrown
\r
1652 Class expected = IllegalArgumentException.class;
\r
1657 @SuppressWarnings("unused")
\r
1658 UnicodeSet u = new UnicodeSet(5);
\r
1659 } catch (IllegalArgumentException e) {
\r
1660 actual = e.getClass();
\r
1662 assertEquals("exception if odd", expected, actual);
\r
1666 @SuppressWarnings("unused")
\r
1667 UnicodeSet u = new UnicodeSet(3, 2, 7, 9);
\r
1668 } catch (IllegalArgumentException e) {
\r
1669 actual = e.getClass();
\r
1671 assertEquals("exception for start/end problem", expected, actual);
\r
1675 @SuppressWarnings("unused")
\r
1676 UnicodeSet u = new UnicodeSet(3, 5, 6, 9);
\r
1677 } catch (IllegalArgumentException e) {
\r
1678 actual = e.getClass();
\r
1680 assertEquals("exception for end/start problem", expected, actual);
\r
1682 CheckRangeSpeed(10000, new UnicodeSet("[:whitespace:]"));
\r
1683 CheckRangeSpeed(1000, new UnicodeSet("[:letter:]"));
\r
1687 * @param iterations
\r
1690 private void CheckRangeSpeed(int iterations, UnicodeSet testSet) {
\r
1691 testSet.complement().complement();
\r
1692 String testPattern = testSet.toString();
\r
1693 // fill a set of pairs from the pattern
\r
1694 int[] pairs = new int[testSet.getRangeCount()*2];
\r
1696 for (UnicodeSetIterator it = new UnicodeSetIterator(testSet); it.nextRange();) {
\r
1697 pairs[j++] = it.codepoint;
\r
1698 pairs[j++] = it.codepointEnd;
\r
1700 UnicodeSet fromRange = new UnicodeSet(testSet);
\r
1701 assertEquals("from range vs pattern", testSet, fromRange);
\r
1703 double start = System.currentTimeMillis();
\r
1704 for (int i = 0; i < iterations; ++i) {
\r
1705 fromRange = new UnicodeSet(testSet);
\r
1707 double middle = System.currentTimeMillis();
\r
1708 for (int i = 0; i < iterations; ++i) {
\r
1709 new UnicodeSet(testPattern);
\r
1711 double end = System.currentTimeMillis();
\r
1713 double rangeConstructorTime = (middle - start)/iterations;
\r
1714 double patternConstructorTime = (end - middle)/iterations;
\r
1715 String message = "Range constructor:\t" + rangeConstructorTime + ";\tPattern constructor:\t" + patternConstructorTime + "\t\t"
\r
1716 + percent.format(rangeConstructorTime/patternConstructorTime-1);
\r
1717 if (rangeConstructorTime < 2*patternConstructorTime) {
\r
1724 NumberFormat percent = NumberFormat.getPercentInstance();
\r
1726 percent.setMaximumFractionDigits(2);
\r
1728 // ****************************************
\r
1730 // ****************************************
\r
1732 public void checkModification(UnicodeSet original, boolean isFrozen) {
\r
1734 for (int i = 0; ;++i) {
\r
1735 UnicodeSet test = (UnicodeSet) (isFrozen ? original.clone() : original.cloneAsThawed());
\r
1736 boolean gotException = true;
\r
1737 boolean checkEquals = true;
\r
1740 case 0: test.add(0); break;
\r
1741 case 1: test.add(0,1); break;
\r
1742 case 2: test.add("a"); break;
\r
1743 case 3: List a = new ArrayList(); a.add("a"); test.addAll(a); break;
\r
1744 case 4: test.addAll("ab"); break;
\r
1745 case 5: test.addAll(new UnicodeSet("[ab]")); break;
\r
1746 case 6: test.applyIntPropertyValue(0,0); break;
\r
1747 case 7: test.applyPattern("[ab]"); break;
\r
1748 case 8: test.applyPattern("[ab]", true); break;
\r
1749 case 9: test.applyPattern("[ab]", 0); break;
\r
1750 case 10: test.applyPropertyAlias("hex","true"); break;
\r
1751 case 11: test.applyPropertyAlias("hex", "true", null); break;
\r
1752 case 12: test.closeOver(UnicodeSet.CASE); break;
\r
1753 case 13: test.compact(); checkEquals = false; break;
\r
1754 case 14: test.complement(0); break;
\r
1755 case 15: test.complement(0,0); break;
\r
1756 case 16: test.complement("ab"); break;
\r
1757 case 17: test.complementAll("ab"); break;
\r
1758 case 18: test.complementAll(new UnicodeSet("[ab]")); break;
\r
1759 case 19: test.remove(' '); break;
\r
1760 case 20: test.remove(' ','a'); break;
\r
1761 case 21: test.remove(" "); break;
\r
1762 case 22: test.removeAll(" a"); break;
\r
1763 case 23: test.removeAll(new UnicodeSet("[\\ a]")); break;
\r
1764 case 24: test.retain(' '); break;
\r
1765 case 25: test.retain(' ','a'); break;
\r
1766 case 26: test.retain(" "); break;
\r
1767 case 27: test.retainAll(" a"); break;
\r
1768 case 28: test.retainAll(new UnicodeSet("[\\ a]")); break;
\r
1769 case 29: test.set(0,1); break;
\r
1770 case 30: test.set(new UnicodeSet("[ab]")); break;
\r
1772 default: continue main; // so we don't keep having to change the endpoint, and gaps are not skipped.
\r
1775 gotException = false;
\r
1776 } catch (UnsupportedOperationException e) {
\r
1779 if (isFrozen && !gotException) errln(i + ") attempt to modify frozen object didn't result in an exception");
\r
1780 if (!isFrozen && gotException) errln(i + ") attempt to modify thawed object did result in an exception");
\r
1781 if (checkEquals) {
\r
1782 if (test.equals(original)) {
\r
1783 if (!isFrozen) errln(i + ") attempt to modify thawed object didn't change the object");
\r
1784 } else { // unequal
\r
1785 if (isFrozen) errln(i + ") attempt to modify frozen object changed the object");
\r
1791 // Following cod block is commented out to eliminate PrettyPrinter depenencies
\r
1793 // String[] prettyData = {
\r
1794 // "[\\uD7DE-\\uD90C \\uDCB5-\\uDD9F]", // special case
\r
1796 // "[:whitespace:]",
\r
1797 // "[:linebreak=AL:]",
\r
1800 // public void TestPrettyPrinting() {
\r
1802 // PrettyPrinter pp = new PrettyPrinter();
\r
1805 // for (; i < prettyData.length; ++i) {
\r
1806 // UnicodeSet test = new UnicodeSet(prettyData[i]);
\r
1807 // checkPrettySet(pp, i, test);
\r
1809 // Random random = new Random(0);
\r
1810 // UnicodeSet test = new UnicodeSet();
\r
1812 // // To keep runtimes under control, make the number of random test cases
\r
1813 // // to try depends on the test framework exhaustive setting.
\r
1814 // // params.inclusions = 5: default exhaustive value
\r
1815 // // params.inclusions = 10: max exhaustive value.
\r
1816 // int iterations = 50;
\r
1817 // if (params.inclusion > 5) {
\r
1818 // iterations = (params.inclusion-5) * 200;
\r
1820 // for (; i < iterations; ++i) {
\r
1821 // double start = random.nextGaussian() * 0x10000;
\r
1822 // if (start < 0) start = - start;
\r
1823 // if (start > 0x10FFFF) {
\r
1824 // start = 0x10FFFF;
\r
1826 // double end = random.nextGaussian() * 0x100;
\r
1827 // if (end < 0) end = -end;
\r
1828 // end = start + end;
\r
1829 // if (end > 0x10FFFF) {
\r
1830 // end = 0x10FFFF;
\r
1832 // test.complement((int)start, (int)end);
\r
1833 // checkPrettySet(pp, i, test);
\r
1835 // }catch(RuntimeException ex){
\r
1836 // warnln("Could not load Collator");
\r
1840 // private void checkPrettySet(PrettyPrinter pp, int i, UnicodeSet test) {
\r
1841 // String pretty = pp.toPattern(test);
\r
1842 // UnicodeSet retry = new UnicodeSet(pretty);
\r
1843 // if (!test.equals(retry)) {
\r
1844 // errln(i + ". Failed test: " + test + " != " + pretty);
\r
1846 // logln(i + ". Worked for " + truncate(test.toString()) + " => " + truncate(pretty));
\r
1850 // private String truncate(String string) {
\r
1851 // if (string.length() <= 100) return string;
\r
1852 // return string.substring(0,97) + "...";
\r
1855 public class TokenSymbolTable implements SymbolTable {
\r
1856 HashMap contents = new HashMap();
\r
1859 * (Non-SymbolTable API) Add the given variable and value to
\r
1860 * the table. Variable should NOT contain leading '$'.
\r
1862 public void add(String var, String value) {
\r
1863 char[] buffer = new char[value.length()];
\r
1864 value.getChars(0, value.length(), buffer, 0);
\r
1869 * (Non-SymbolTable API) Add the given variable and value to
\r
1870 * the table. Variable should NOT contain leading '$'.
\r
1872 public void add(String var, char[] body) {
\r
1873 logln("TokenSymbolTable: add \"" + var + "\" => \"" +
\r
1874 new String(body) + "\"");
\r
1875 contents.put(var, body);
\r
1879 * @see com.ibm.icu.text.SymbolTable#lookup(java.lang.String)
\r
1881 public char[] lookup(String s) {
\r
1882 logln("TokenSymbolTable: lookup \"" + s + "\" => \"" +
\r
1883 new String((char[]) contents.get(s)) + "\"");
\r
1884 return (char[])contents.get(s);
\r
1888 * @see com.ibm.icu.text.SymbolTable#lookupMatcher(int)
\r
1890 public UnicodeMatcher lookupMatcher(int ch) {
\r
1895 * @see com.ibm.icu.text.SymbolTable#parseReference(java.lang.String,
\r
1896 java.text.ParsePosition, int)
\r
1898 public String parseReference(String text, ParsePosition pos, int
\r
1901 int start = pos.getIndex();
\r
1903 for (i = start; i < limit; i += UTF16.getCharCount(cp)) {
\r
1904 cp = UTF16.charAt(text, i);
\r
1905 if (!com.ibm.icu.lang.UCharacter.isUnicodeIdentifierPart(cp)) {
\r
1909 logln("TokenSymbolTable: parse \"" + text + "\" from " +
\r
1910 start + " to " + i +
\r
1911 " => \"" + text.substring(start,i) + "\"");
\r
1913 return text.substring(start,i);
\r
1917 public void TestSurrogate() {
\r
1919 // These should all behave identically
\r
1920 "[abc\\uD800\\uDC00]",
\r
1921 "[abc\uD800\uDC00]",
\r
1922 "[abc\\U00010000]",
\r
1924 for (int i=0; i<DATA.length; ++i) {
\r
1925 logln("Test pattern " + i + " :" + Utility.escape(DATA[i]));
\r
1926 UnicodeSet set = new UnicodeSet(DATA[i]);
\r
1927 expectContainment(set,
\r
1928 CharsToUnicodeString("abc\\U00010000"),
\r
1929 "\uD800;\uDC00"); // split apart surrogate-pair
\r
1930 if (set.size() != 4) {
\r
1931 errln(Utility.escape("FAIL: " + DATA[i] + ".size() == " +
\r
1932 set.size() + ", expected 4"));
\r
1937 public void TestContains() {
\r
1938 int limit = 256; // combinations to test
\r
1939 for (int i = 0; i < limit; ++i) {
\r
1940 logln("Trying: " + i);
\r
1941 UnicodeSet x = bitsToSet(i);
\r
1942 for (int j = 0; j < limit; ++j) {
\r
1943 UnicodeSet y = bitsToSet(j);
\r
1944 boolean containsNone = (i & j) == 0;
\r
1945 boolean containsAll = (i & j) == j;
\r
1946 boolean equals = i == j;
\r
1947 if (containsNone != x.containsNone(y)) {
\r
1948 x.containsNone(y); // repeat for debugging
\r
1949 errln("FAILED: " + x + " containsSome " + y);
\r
1951 if (containsAll != x.containsAll(y)) {
\r
1952 x.containsAll(y); // repeat for debugging
\r
1953 errln("FAILED: " + x + " containsAll " + y);
\r
1955 if (equals != x.equals(y)) {
\r
1956 x.equals(y); // repeat for debugging
\r
1957 errln("FAILED: " + x + " equals " + y);
\r
1963 void _testComplement(int a) {
\r
1964 UnicodeSet x = bitsToSet(a);
\r
1965 UnicodeSet z = bitsToSet(a);
\r
1967 int c = setToBits(z);
\r
1969 errln("FAILED: add: ~" + x + " != " + z);
\r
1970 errln("FAILED: add: ~" + a + " != " + c);
\r
1972 checkCanonicalRep(z, "complement " + a);
\r
1975 void _testAdd(int a, int b) {
\r
1976 UnicodeSet x = bitsToSet(a);
\r
1977 UnicodeSet y = bitsToSet(b);
\r
1978 UnicodeSet z = bitsToSet(a);
\r
1980 int c = setToBits(z);
\r
1981 if (c != (a | b)) {
\r
1982 errln(Utility.escape("FAILED: add: " + x + " | " + y + " != " + z));
\r
1983 errln("FAILED: add: " + a + " | " + b + " != " + c);
\r
1985 checkCanonicalRep(z, "add " + a + "," + b);
\r
1988 void _testRetain(int a, int b) {
\r
1989 UnicodeSet x = bitsToSet(a);
\r
1990 UnicodeSet y = bitsToSet(b);
\r
1991 UnicodeSet z = bitsToSet(a);
\r
1993 int c = setToBits(z);
\r
1994 if (c != (a & b)) {
\r
1995 errln("FAILED: retain: " + x + " & " + y + " != " + z);
\r
1996 errln("FAILED: retain: " + a + " & " + b + " != " + c);
\r
1998 checkCanonicalRep(z, "retain " + a + "," + b);
\r
2001 void _testRemove(int a, int b) {
\r
2002 UnicodeSet x = bitsToSet(a);
\r
2003 UnicodeSet y = bitsToSet(b);
\r
2004 UnicodeSet z = bitsToSet(a);
\r
2006 int c = setToBits(z);
\r
2007 if (c != (a &~ b)) {
\r
2008 errln("FAILED: remove: " + x + " &~ " + y + " != " + z);
\r
2009 errln("FAILED: remove: " + a + " &~ " + b + " != " + c);
\r
2011 checkCanonicalRep(z, "remove " + a + "," + b);
\r
2014 void _testXor(int a, int b) {
\r
2015 UnicodeSet x = bitsToSet(a);
\r
2016 UnicodeSet y = bitsToSet(b);
\r
2017 UnicodeSet z = bitsToSet(a);
\r
2018 z.complementAll(y);
\r
2019 int c = setToBits(z);
\r
2020 if (c != (a ^ b)) {
\r
2021 errln("FAILED: complement: " + x + " ^ " + y + " != " + z);
\r
2022 errln("FAILED: complement: " + a + " ^ " + b + " != " + c);
\r
2024 checkCanonicalRep(z, "complement " + a + "," + b);
\r
2028 * Check that ranges are monotonically increasing and non-
\r
2031 void checkCanonicalRep(UnicodeSet set, String msg) {
\r
2032 int n = set.getRangeCount();
\r
2034 errln("FAIL result of " + msg +
\r
2035 ": range count should be >= 0 but is " +
\r
2036 n + " for " + Utility.escape(set.toString()));
\r
2040 for (int i=0; i<n; ++i) {
\r
2041 int start = set.getRangeStart(i);
\r
2042 int end = set.getRangeEnd(i);
\r
2043 if (start > end) {
\r
2044 errln("FAIL result of " + msg +
\r
2045 ": range " + (i+1) +
\r
2046 " start > end: " + start + ", " + end +
\r
2047 " for " + Utility.escape(set.toString()));
\r
2049 if (i > 0 && start <= last) {
\r
2050 errln("FAIL result of " + msg +
\r
2051 ": range " + (i+1) +
\r
2052 " overlaps previous range: " + start + ", " + end +
\r
2053 " for " + Utility.escape(set.toString()));
\r
2060 * Convert a bitmask to a UnicodeSet.
\r
2062 UnicodeSet bitsToSet(int a) {
\r
2063 UnicodeSet result = new UnicodeSet();
\r
2064 for (int i = 0; i < 32; ++i) {
\r
2065 if ((a & (1<<i)) != 0) {
\r
2066 result.add((char)i,(char)i);
\r
2074 * Convert a UnicodeSet to a bitmask. Only the characters
\r
2075 * U+0000 to U+0020 are represented in the bitmask.
\r
2077 static int setToBits(UnicodeSet x) {
\r
2079 for (int i = 0; i < 32; ++i) {
\r
2080 if (x.contains((char)i)) {
\r
2088 * Return the representation of an inversion list based UnicodeSet
\r
2089 * as a pairs list. Ranges are listed in ascending Unicode order.
\r
2090 * For example, the set [a-zA-M3] is represented as "33AMaz".
\r
2092 static String getPairs(UnicodeSet set) {
\r
2093 StringBuffer pairs = new StringBuffer();
\r
2094 for (int i=0; i<set.getRangeCount(); ++i) {
\r
2095 int start = set.getRangeStart(i);
\r
2096 int end = set.getRangeEnd(i);
\r
2097 if (end > 0xFFFF) {
\r
2099 i = set.getRangeCount(); // Should be unnecessary
\r
2101 pairs.append((char)start).append((char)end);
\r
2103 return pairs.toString();
\r
2107 * Test function. Make sure that the sets have the right relation
\r
2110 void expectRelation(Object relationObj, Object set1Obj, Object set2Obj, String message) {
\r
2111 int relation = ((Integer) relationObj).intValue();
\r
2112 UnicodeSet set1 = (UnicodeSet) set1Obj;
\r
2113 UnicodeSet set2 = (UnicodeSet) set2Obj;
\r
2115 // by-the-by, check the iterator
\r
2116 checkRoundTrip(set1);
\r
2117 checkRoundTrip(set2);
\r
2119 boolean contains = set1.containsAll(set2);
\r
2120 boolean isContained = set2.containsAll(set1);
\r
2121 boolean disjoint = set1.containsNone(set2);
\r
2122 boolean equals = set1.equals(set2);
\r
2124 UnicodeSet intersection = new UnicodeSet(set1).retainAll(set2);
\r
2125 UnicodeSet minus12 = new UnicodeSet(set1).removeAll(set2);
\r
2126 UnicodeSet minus21 = new UnicodeSet(set2).removeAll(set1);
\r
2128 // test basic properties
\r
2130 if (contains != (intersection.size() == set2.size())) {
\r
2131 errln("FAIL contains1" + set1.toPattern(true) + ", " + set2.toPattern(true));
\r
2134 if (contains != (intersection.equals(set2))) {
\r
2135 errln("FAIL contains2" + set1.toPattern(true) + ", " + set2.toPattern(true));
\r
2138 if (isContained != (intersection.size() == set1.size())) {
\r
2139 errln("FAIL isContained1" + set1.toPattern(true) + ", " + set2.toPattern(true));
\r
2142 if (isContained != (intersection.equals(set1))) {
\r
2143 errln("FAIL isContained2" + set1.toPattern(true) + ", " + set2.toPattern(true));
\r
2146 if ((contains && isContained) != equals) {
\r
2147 errln("FAIL equals" + set1.toPattern(true) + ", " + set2.toPattern(true));
\r
2150 if (disjoint != (intersection.size() == 0)) {
\r
2151 errln("FAIL disjoint" + set1.toPattern(true) + ", " + set2.toPattern(true));
\r
2154 // Now see if the expected relation is true
\r
2155 int status = (minus12.size() != 0 ? 4 : 0)
\r
2156 | (intersection.size() != 0 ? 2 : 0)
\r
2157 | (minus21.size() != 0 ? 1 : 0);
\r
2159 if (status != relation) {
\r
2160 errln("FAIL relation incorrect" + message
\r
2161 + "; desired = " + RELATION_NAME[relation]
\r
2162 + "; found = " + RELATION_NAME[status]
\r
2163 + "; set1 = " + set1.toPattern(true)
\r
2164 + "; set2 = " + set2.toPattern(true)
\r
2170 * Basic consistency check for a few items.
\r
2171 * That the iterator works, and that we can create a pattern and
\r
2172 * get the same thing back
\r
2175 void checkRoundTrip(UnicodeSet s) {
\r
2176 String pat = s.toPattern(false);
\r
2177 UnicodeSet t = copyWithIterator(s, false);
\r
2178 checkEqual(s, t, "iterator roundtrip");
\r
2180 t = copyWithIterator(s, true); // try range
\r
2181 checkEqual(s, t, "iterator roundtrip");
\r
2183 t = new UnicodeSet(pat);
\r
2184 checkEqual(s, t, "toPattern(false)");
\r
2186 pat = s.toPattern(true);
\r
2187 t = new UnicodeSet(pat);
\r
2188 checkEqual(s, t, "toPattern(true)");
\r
2191 UnicodeSet copyWithIterator(UnicodeSet s, boolean withRange) {
\r
2192 UnicodeSet t = new UnicodeSet();
\r
2193 UnicodeSetIterator it = new UnicodeSetIterator(s);
\r
2195 while (it.nextRange()) {
\r
2196 if (it.codepoint == UnicodeSetIterator.IS_STRING) {
\r
2199 t.add(it.codepoint, it.codepointEnd);
\r
2203 while (it.next()) {
\r
2204 if (it.codepoint == UnicodeSetIterator.IS_STRING) {
\r
2207 t.add(it.codepoint);
\r
2214 boolean checkEqual(UnicodeSet s, UnicodeSet t, String message) {
\r
2215 if (!s.equals(t)) {
\r
2216 errln("FAIL " + message
\r
2217 + "; source = " + s.toPattern(true)
\r
2218 + "; result = " + t.toPattern(true)
\r
2225 void expectEqual(String name, String pat1, String pat2) {
\r
2226 UnicodeSet set1, set2;
\r
2228 set1 = new UnicodeSet(pat1);
\r
2229 set2 = new UnicodeSet(pat2);
\r
2230 } catch (IllegalArgumentException e) {
\r
2231 errln("FAIL: Couldn't create UnicodeSet from pattern for \"" + name + "\": " + e.getMessage());
\r
2234 if(!set1.equals(set2)) {
\r
2235 errln("FAIL: Sets built from patterns differ for \"" + name + "\"");
\r
2240 * Expect the given set to contain the characters in charsIn and
\r
2241 * to not contain those in charsOut.
\r
2243 void expectContainment(String pat, String charsIn, String charsOut) {
\r
2246 set = new UnicodeSet(pat);
\r
2247 } catch (IllegalArgumentException e) {
\r
2248 errln("FAIL: Couldn't create UnicodeSet from pattern \"" +
\r
2249 pat + "\": " + e.getMessage());
\r
2252 expectContainment(set, charsIn, charsOut);
\r
2256 * Expect the given set to contain the characters in charsIn and
\r
2257 * to not contain those in charsOut.
\r
2259 void expectContainment(UnicodeSet set, String charsIn, String charsOut) {
\r
2260 StringBuffer bad = new StringBuffer();
\r
2261 if (charsIn != null) {
\r
2262 charsIn = Utility.unescape(charsIn);
\r
2263 for (int i=0; i<charsIn.length(); ) {
\r
2264 int c = UTF16.charAt(charsIn,i);
\r
2265 i += UTF16.getCharCount(c);
\r
2266 if (!set.contains(c)) {
\r
2267 UTF16.append(bad,c);
\r
2270 if (bad.length() > 0) {
\r
2271 errln(Utility.escape("FAIL: set " + set + " does not contain " + bad +
\r
2272 ", expected containment of " + charsIn));
\r
2274 logln(Utility.escape("Ok: set " + set + " contains " + charsIn));
\r
2277 if (charsOut != null) {
\r
2278 charsOut = Utility.unescape(charsOut);
\r
2280 for (int i=0; i<charsOut.length(); ) {
\r
2281 int c = UTF16.charAt(charsOut,i);
\r
2282 i += UTF16.getCharCount(c);
\r
2283 if (set.contains(c)) {
\r
2284 UTF16.append(bad, c);
\r
2287 if (bad.length() > 0) {
\r
2288 errln(Utility.escape("FAIL: set " + set + " contains " + bad +
\r
2289 ", expected non-containment of " + charsOut));
\r
2291 logln(Utility.escape("Ok: set " + set + " does not contain " + charsOut));
\r
2296 void expectPattern(UnicodeSet set,
\r
2298 String expectedPairs) {
\r
2299 set.applyPattern(pattern);
\r
2300 if (!getPairs(set).equals(expectedPairs)) {
\r
2301 errln("FAIL: applyPattern(\"" + pattern +
\r
2302 "\") => pairs \"" +
\r
2303 Utility.escape(getPairs(set)) + "\", expected \"" +
\r
2304 Utility.escape(expectedPairs) + "\"");
\r
2306 logln("Ok: applyPattern(\"" + pattern +
\r
2307 "\") => pairs \"" +
\r
2308 Utility.escape(getPairs(set)) + "\"");
\r
2312 void expectToPattern(UnicodeSet set,
\r
2314 String[] expStrings) {
\r
2315 String pat = set.toPattern(true);
\r
2316 if (pat.equals(expPat)) {
\r
2317 logln("Ok: toPattern() => \"" + pat + "\"");
\r
2319 errln("FAIL: toPattern() => \"" + pat + "\", expected \"" + expPat + "\"");
\r
2322 if (expStrings == null) {
\r
2325 boolean in = true;
\r
2326 for (int i=0; i<expStrings.length; ++i) {
\r
2327 if (expStrings[i] == NOT) { // sic; pointer comparison
\r
2331 boolean contained = set.contains(expStrings[i]);
\r
2332 if (contained == in) {
\r
2333 logln("Ok: " + expPat +
\r
2334 (contained ? " contains {" : " does not contain {") +
\r
2335 Utility.escape(expStrings[i]) + "}");
\r
2337 errln("FAIL: " + expPat +
\r
2338 (contained ? " contains {" : " does not contain {") +
\r
2339 Utility.escape(expStrings[i]) + "}");
\r
2344 void expectPairs(UnicodeSet set, String expectedPairs) {
\r
2345 if (!getPairs(set).equals(expectedPairs)) {
\r
2346 errln("FAIL: Expected pair list \"" +
\r
2347 Utility.escape(expectedPairs) + "\", got \"" +
\r
2348 Utility.escape(getPairs(set)) + "\"");
\r
2351 static final String CharsToUnicodeString(String s) {
\r
2352 return Utility.unescape(s);
\r
2355 /* Test the method public UnicodeSet getSet() */
\r
2356 public void TestGetSet() {
\r
2357 UnicodeSetIterator us = new UnicodeSetIterator();
\r
2360 } catch (Exception e) {
\r
2361 errln("UnicodeSetIterator.getSet() was not suppose to given an " + "an exception.");
\r
2365 /* Tests the method public UnicodeSet add(Collection<?> source) */
\r
2366 public void TestAddCollection() {
\r
2367 UnicodeSet us = new UnicodeSet();
\r
2368 Collection<?> s = null;
\r
2371 errln("UnicodeSet.add(Collection<?>) was suppose to return an exception for a null parameter.");
\r
2372 } catch (Exception e) {
\r