2 *******************************************************************************
\r
3 * Copyright (C) 1996-2009, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 *******************************************************************************
\r
7 package com.ibm.icu.dev.test.translit;
\r
9 import java.text.ParsePosition;
\r
10 import java.util.ArrayList;
\r
11 import java.util.Arrays;
\r
12 import java.util.Collection;
\r
13 import java.util.HashMap;
\r
14 import java.util.HashSet;
\r
15 import java.util.Iterator;
\r
16 import java.util.List;
\r
17 import java.util.Random;
\r
18 import java.util.Set;
\r
19 import java.util.SortedSet;
\r
20 import java.util.TreeSet;
\r
22 import com.ibm.icu.dev.test.TestFmwk;
\r
23 import com.ibm.icu.dev.test.util.PrettyPrinter;
\r
24 import com.ibm.icu.impl.SortedSetRelation;
\r
25 import com.ibm.icu.impl.Utility;
\r
26 import com.ibm.icu.lang.UCharacter;
\r
27 import com.ibm.icu.lang.UProperty;
\r
28 import com.ibm.icu.lang.UScript;
\r
29 import com.ibm.icu.lang.UCharacterEnums.ECharacterCategory;
\r
30 import com.ibm.icu.text.SymbolTable;
\r
31 import com.ibm.icu.text.UTF16;
\r
32 import com.ibm.icu.text.UnicodeMatcher;
\r
33 import com.ibm.icu.text.UnicodeSet;
\r
34 import com.ibm.icu.text.UnicodeSetIterator;
\r
38 * @summary General test of UnicodeSet
\r
40 public class UnicodeSetTest extends TestFmwk {
\r
42 static final String NOT = "%%%%";
\r
44 public static void main(String[] args) throws Exception {
\r
45 new UnicodeSetTest().run(args);
\r
48 private static final boolean isCccValue(int ccc) {
\r
75 public void TestPropertyAccess() {
\r
77 // test to see that all of the names work
\r
78 for (int propNum = UProperty.BINARY_START; propNum < UProperty.INT_LIMIT; ++propNum) {
\r
80 //Skipping tests in the non-exhaustive mode to shorten the test time ticket#6475
\r
81 if(getInclusion()<=5 && count%5!=0){
\r
84 if (propNum >= UProperty.BINARY_LIMIT && propNum < UProperty.INT_START) { // skip the gap
\r
85 propNum = UProperty.INT_START;
\r
87 for (int nameChoice = UProperty.NameChoice.SHORT; nameChoice <= UProperty.NameChoice.LONG; ++nameChoice) {
\r
90 propName = UCharacter.getPropertyName(propNum, nameChoice);
\r
91 if (propName == null) {
\r
92 if (nameChoice == UProperty.NameChoice.SHORT) continue; // allow non-existent short names
\r
93 throw new NullPointerException();
\r
95 } catch (RuntimeException e1) {
\r
96 errln("Can't get property name for: "
\r
97 + "Property (" + propNum + ")"
\r
98 + ", NameChoice: " + nameChoice + ", "
\r
99 + e1.getClass().getName());
\r
102 logln("Property (" + propNum + "): " + propName);
\r
103 for (int valueNum = UCharacter.getIntPropertyMinValue(propNum); valueNum <= UCharacter.getIntPropertyMaxValue(propNum); ++valueNum) {
\r
106 valueName = UCharacter.getPropertyValueName(propNum, valueNum, nameChoice);
\r
107 if (valueName == null) {
\r
108 if (nameChoice == UProperty.NameChoice.SHORT) continue; // allow non-existent short names
\r
109 if ((propNum == UProperty.CANONICAL_COMBINING_CLASS ||
\r
110 propNum == UProperty.LEAD_CANONICAL_COMBINING_CLASS ||
\r
111 propNum == UProperty.TRAIL_CANONICAL_COMBINING_CLASS) &&
\r
112 !isCccValue(valueNum)) {
\r
113 // Only a few of the canonical combining classes have names.
\r
114 // Otherwise they are just integer values.
\r
117 throw new NullPointerException();
\r
120 } catch (RuntimeException e1) {
\r
121 errln("Can't get property value name for: "
\r
122 + "Property (" + propNum + "): " + propName + ", "
\r
123 + "Value (" + valueNum + ") "
\r
124 + ", NameChoice: " + nameChoice + ", "
\r
125 + e1.getClass().getName());
\r
128 logln("Value (" + valueNum + "): " + valueName);
\r
129 UnicodeSet testSet;
\r
131 testSet = new UnicodeSet("[:" + propName + "=" + valueName + ":]");
\r
132 } catch (RuntimeException e) {
\r
133 errln("Can't create UnicodeSet for: "
\r
134 + "Property (" + propNum + "): " + propName + ", "
\r
135 + "Value (" + valueNum + "): " + valueName + ", "
\r
136 + e.getClass().getName());
\r
139 UnicodeSet collectedErrors = new UnicodeSet();
\r
140 for (UnicodeSetIterator it = new UnicodeSetIterator(testSet); it.next();) {
\r
141 int value = UCharacter.getIntPropertyValue(it.codepoint, propNum);
\r
142 if (value != valueNum) {
\r
143 collectedErrors.add(it.codepoint);
\r
146 if (collectedErrors.size() != 0) {
\r
147 errln("Property Value Differs: "
\r
148 + "Property (" + propNum + "): " + propName + ", "
\r
149 + "Value (" + valueNum + "): " + valueName + ", "
\r
150 + "Differing values: " + collectedErrors.toPattern(true));
\r
159 * Test toPattern().
\r
161 public void TestToPattern() throws Exception {
\r
162 // Test that toPattern() round trips with syntax characters
\r
164 for (int i = 0; i < OTHER_TOPATTERN_TESTS.length; ++i) {
\r
165 checkPat(OTHER_TOPATTERN_TESTS[i], new UnicodeSet(OTHER_TOPATTERN_TESTS[i]));
\r
167 for (int i = 0; i <= 0x10FFFF; ++i) {
\r
168 if ((i <= 0xFF && !UCharacter.isLetter(i)) || UCharacter.isWhitespace(i)) {
\r
169 // check various combinations to make sure they all work.
\r
170 if (i != 0 && !toPatternAux(i, i)) continue;
\r
171 if (!toPatternAux(0, i)) continue;
\r
172 if (!toPatternAux(i, 0xFFFF)) continue;
\r
176 // Test pattern behavior of multicharacter strings.
\r
177 UnicodeSet s = new UnicodeSet("[a-z {aa} {ab}]");
\r
178 expectToPattern(s, "[a-z{aa}{ab}]",
\r
179 new String[] {"aa", "ab", NOT, "ac"});
\r
181 expectToPattern(s, "[a-z{aa}{ab}{ac}]",
\r
182 new String[] {"aa", "ab", "ac", NOT, "xy"});
\r
184 s.applyPattern("[a-z {\\{l} {r\\}}]");
\r
185 expectToPattern(s, "[a-z{r\\}}{\\{l}]",
\r
186 new String[] {"{l", "r}", NOT, "xy"});
\r
188 expectToPattern(s, "[a-z{\\[\\]}{r\\}}{\\{l}]",
\r
189 new String[] {"{l", "r}", "[]", NOT, "xy"});
\r
191 s.applyPattern("[a-z {\u4E01\u4E02}{\\n\\r}]");
\r
192 expectToPattern(s, "[a-z{\\u000A\\u000D}{\\u4E01\\u4E02}]",
\r
193 new String[] {"\u4E01\u4E02", "\n\r"});
\r
198 expectToPattern(s, "[{abc}]",
\r
199 new String[] {"abc", NOT, "ab"});
\r
201 // JB#3400: For 2 character ranges prefer [ab] to [a-b]
\r
204 expectToPattern(s, "[ab]", null);
\r
206 // Cover applyPattern, applyPropertyAlias
\r
208 s.applyPattern("[ab ]", true);
\r
209 expectToPattern(s, "[ab]", new String[] {"a", NOT, "ab", " "});
\r
211 s.applyPattern("[ab ]", false);
\r
212 expectToPattern(s, "[\\ ab]", new String[] {"a", "\u0020", NOT, "ab"});
\r
215 s.applyPropertyAlias("nv", "0.5");
\r
216 expectToPattern(s, "[\\u00BD\\u0D74\\u0F2A\\u2CFD\\U00010141\\U00010175\\U00010176]", null);
\r
217 // Unicode 5.1 adds Malayalam 1/2 (\u0D74)
\r
220 s.applyPropertyAlias("gc", "Lu");
\r
221 // TODO expectToPattern(s, what?)
\r
223 // RemoveAllStrings()
\r
225 s.applyPattern("[a-z{abc}{def}]");
\r
226 expectToPattern(s, "[a-z{abc}{def}]", null);
\r
227 s.removeAllStrings();
\r
228 expectToPattern(s, "[a-z]", null);
\r
231 static String[] OTHER_TOPATTERN_TESTS = {
\r
232 "[[:latin:]&[:greek:]]",
\r
233 "[[:latin:]-[:greek:]]",
\r
234 "[:nonspacing mark:]"
\r
238 public boolean toPatternAux(int start, int end) {
\r
239 // use Integer.toString because Utility.hex doesn't handle ints
\r
240 String source = "0x" + Integer.toString(start,16).toUpperCase();
\r
241 if (start != end) source += "..0x" + Integer.toString(end,16).toUpperCase();
\r
242 UnicodeSet testSet = new UnicodeSet();
\r
243 testSet.add(start, end);
\r
244 return checkPat(source, testSet);
\r
247 boolean checkPat (String source, UnicodeSet testSet) {
\r
250 // What we want to make sure of is that a pattern generated
\r
251 // by toPattern(), with or without escaped unprintables, can
\r
252 // be passed back into the UnicodeSet constructor.
\r
253 String pat0 = testSet.toPattern(true);
\r
254 if (!checkPat(source + " (escaped)", testSet, pat0)) return false;
\r
256 //String pat1 = unescapeLeniently(pat0);
\r
257 //if (!checkPat(source + " (in code)", testSet, pat1)) return false;
\r
259 String pat2 = testSet.toPattern(false);
\r
260 if (!checkPat(source, testSet, pat2)) return false;
\r
262 //String pat3 = unescapeLeniently(pat2);
\r
263 //if (!checkPat(source + " (in code)", testSet, pat3)) return false;
\r
265 //logln(source + " => " + pat0 + ", " + pat1 + ", " + pat2 + ", " + pat3);
\r
266 logln(source + " => " + pat0 + ", " + pat2);
\r
267 } catch (Exception e) {
\r
268 errln("EXCEPTION in toPattern: " + source + " => " + pat);
\r
274 boolean checkPat (String source, UnicodeSet testSet, String pat) {
\r
275 UnicodeSet testSet2 = new UnicodeSet(pat);
\r
276 if (!testSet2.equals(testSet)) {
\r
277 errln("Fail toPattern: " + source + "; " + pat + " => " +
\r
278 testSet2.toPattern(false) + ", expected " +
\r
279 testSet.toPattern(false));
\r
285 // NOTE: copied the following from Utility. There ought to be a version in there with a flag
\r
286 // that does the Java stuff
\r
288 public static int unescapeAt(String s, int[] offset16) {
\r
294 int bitsPerDigit = 4;
\r
298 /* Check that offset is in range */
\r
299 int offset = offset16[0];
\r
300 int length = s.length();
\r
301 if (offset < 0 || offset >= length) {
\r
305 /* Fetch first UChar after '\\' */
\r
306 c = UTF16.charAt(s, offset);
\r
307 offset += UTF16.getCharCount(c);
\r
309 /* Convert hexadecimal and octal escapes */
\r
312 minDig = maxDig = 4;
\r
316 minDig = maxDig = 8;
\r
324 dig = UCharacter.digit(c, 8);
\r
328 n = 1; /* Already have first octal digit */
\r
335 while (offset < length && n < maxDig) {
\r
337 // TODO: Restore the char32-based code when UCharacter.digit
\r
338 // is working (Bug 66).
\r
340 //c = UTF16.charAt(s, offset);
\r
341 //dig = UCharacter.digit(c, (bitsPerDigit == 3) ? 8 : 16);
\r
342 c = s.charAt(offset);
\r
343 dig = Character.digit((char)c, (bitsPerDigit == 3) ? 8 : 16);
\r
347 result = (result << bitsPerDigit) | dig;
\r
348 //offset += UTF16.getCharCount(c);
\r
355 offset16[0] = offset;
\r
359 /* Convert C-style escapes in table */
\r
360 for (i=0; i<UNESCAPE_MAP.length; i+=2) {
\r
361 if (c == UNESCAPE_MAP[i]) {
\r
362 offset16[0] = offset;
\r
363 return UNESCAPE_MAP[i+1];
\r
364 } else if (c < UNESCAPE_MAP[i]) {
\r
369 /* If no special forms are recognized, then consider
\r
370 * the backslash to generically escape the next character. */
\r
371 offset16[0] = offset;
\r
375 /* This map must be in ASCENDING ORDER OF THE ESCAPE CODE */
\r
376 static private final char[] UNESCAPE_MAP = {
\r
391 * Convert all escapes in a given string using unescapeAt().
\r
392 * Leave invalid escape sequences unchanged.
\r
394 public static String unescapeLeniently(String s) {
\r
395 StringBuffer buf = new StringBuffer();
\r
396 int[] pos = new int[1];
\r
397 for (int i=0; i<s.length(); ) {
\r
398 char c = s.charAt(i++);
\r
401 int e = unescapeAt(s, pos);
\r
405 UTF16.append(buf, e);
\r
412 return buf.toString();
\r
415 public void TestPatterns() {
\r
416 UnicodeSet set = new UnicodeSet();
\r
417 expectPattern(set, "[[a-m]&[d-z]&[k-y]]", "km");
\r
418 expectPattern(set, "[[a-z]-[m-y]-[d-r]]", "aczz");
\r
419 expectPattern(set, "[a\\-z]", "--aazz");
\r
420 expectPattern(set, "[-az]", "--aazz");
\r
421 expectPattern(set, "[az-]", "--aazz");
\r
422 expectPattern(set, "[[[a-z]-[aeiou]i]]", "bdfnptvz");
\r
424 // Throw in a test of complement
\r
426 String exp = '\u0000' + "aeeoouu" + (char)('z'+1) + '\uFFFF';
\r
427 expectPairs(set, exp);
\r
430 public void TestCategories() {
\r
432 UnicodeSet set = new UnicodeSet("[:Lu:]");
\r
433 expectContainment(set, "ABC", "abc");
\r
435 // Make sure generation of L doesn't pollute cached Lu set
\r
436 // First generate L, then Lu
\r
437 // not used int TOP = 0x200; // Don't need to go over the whole range:
\r
438 set = new UnicodeSet("[:L:]");
\r
439 for (int i=0; i<0x200; ++i) {
\r
440 boolean l = UCharacter.isLetter(i);
\r
441 if (l != set.contains((char)i)) {
\r
442 errln("FAIL: L contains " + (char)i + " = " +
\r
443 set.contains((char)i));
\r
444 if (++failures == 10) break;
\r
448 set = new UnicodeSet("[:Lu:]");
\r
449 for (int i=0; i<0x200; ++i) {
\r
450 boolean lu = (UCharacter.getType(i) == ECharacterCategory.UPPERCASE_LETTER);
\r
451 if (lu != set.contains((char)i)) {
\r
452 errln("FAIL: Lu contains " + (char)i + " = " +
\r
453 set.contains((char)i));
\r
454 if (++failures == 20) break;
\r
459 public void TestAddRemove() {
\r
460 UnicodeSet set = new UnicodeSet();
\r
462 expectPairs(set, "az");
\r
463 set.remove('m', 'p');
\r
464 expectPairs(set, "alqz");
\r
465 set.remove('e', 'g');
\r
466 expectPairs(set, "adhlqz");
\r
467 set.remove('d', 'i');
\r
468 expectPairs(set, "acjlqz");
\r
469 set.remove('c', 'r');
\r
470 expectPairs(set, "absz");
\r
472 expectPairs(set, "abfqsz");
\r
473 set.remove('a', 'g');
\r
474 expectPairs(set, "hqsz");
\r
475 set.remove('a', 'z');
\r
476 expectPairs(set, "");
\r
478 // Try removing an entire set from another set
\r
479 expectPattern(set, "[c-x]", "cx");
\r
480 UnicodeSet set2 = new UnicodeSet();
\r
481 expectPattern(set2, "[f-ky-za-bc[vw]]", "acfkvwyz");
\r
482 set.removeAll(set2);
\r
483 expectPairs(set, "deluxx");
\r
485 // Try adding an entire set to another set
\r
486 expectPattern(set, "[jackiemclean]", "aacceein");
\r
487 expectPattern(set2, "[hitoshinamekatajamesanderson]", "aadehkmort");
\r
489 expectPairs(set, "aacehort");
\r
491 // Test commutativity
\r
492 expectPattern(set, "[hitoshinamekatajamesanderson]", "aadehkmort");
\r
493 expectPattern(set2, "[jackiemclean]", "aacceein");
\r
495 expectPairs(set, "aacehort");
\r
499 * Make sure minimal representation is maintained.
\r
501 public void TestMinimalRep() {
\r
502 // This is pretty thoroughly tested by checkCanonicalRep()
\r
503 // run against the exhaustive operation results. Use the code
\r
504 // here for debugging specific spot problems.
\r
506 // 1 overlap against 2
\r
507 UnicodeSet set = new UnicodeSet("[h-km-q]");
\r
508 UnicodeSet set2 = new UnicodeSet("[i-o]");
\r
510 expectPairs(set, "hq");
\r
512 set.applyPattern("[a-m]");
\r
513 set2.applyPattern("[e-o]");
\r
515 expectPairs(set, "ao");
\r
517 set.applyPattern("[e-o]");
\r
518 set2.applyPattern("[a-m]");
\r
520 expectPairs(set, "ao");
\r
521 // 1 overlap against 3
\r
522 set.applyPattern("[a-eg-mo-w]");
\r
523 set2.applyPattern("[d-q]");
\r
525 expectPairs(set, "aw");
\r
528 public void TestAPI() {
\r
530 UnicodeSet set = new UnicodeSet();
\r
531 if (!set.isEmpty() || set.getRangeCount() != 0) {
\r
532 errln("FAIL, set should be empty but isn't: " +
\r
536 // clear(), isEmpty()
\r
538 if (set.isEmpty()) {
\r
539 errln("FAIL, set shouldn't be empty but is: " +
\r
543 if (!set.isEmpty()) {
\r
544 errln("FAIL, set should be empty but isn't: " +
\r
550 if (set.size() != 0) {
\r
551 errln("FAIL, size should be 0, but is " + set.size() +
\r
555 if (set.size() != 1) {
\r
556 errln("FAIL, size should be 1, but is " + set.size() +
\r
560 if (set.size() != 10) {
\r
561 errln("FAIL, size should be 10, but is " + set.size() +
\r
566 if (set.size() != 0x110000) {
\r
567 errln("FAIL, size should be 0x110000, but is" + set.size());
\r
570 // contains(first, last)
\r
572 set.applyPattern("[A-Y 1-8 b-d l-y]");
\r
573 for (int i = 0; i<set.getRangeCount(); ++i) {
\r
574 int a = set.getRangeStart(i);
\r
575 int b = set.getRangeEnd(i);
\r
576 if (!set.contains(a, b)) {
\r
577 errln("FAIL, should contain " + (char)a + '-' + (char)b +
\r
578 " but doesn't: " + set);
\r
580 if (set.contains((char)(a-1), b)) {
\r
581 errln("FAIL, shouldn't contain " +
\r
582 (char)(a-1) + '-' + (char)b +
\r
583 " but does: " + set);
\r
585 if (set.contains(a, (char)(b+1))) {
\r
586 errln("FAIL, shouldn't contain " +
\r
587 (char)a + '-' + (char)(b+1) +
\r
588 " but does: " + set);
\r
592 // Ported InversionList test.
\r
593 UnicodeSet a = new UnicodeSet((char)3,(char)10);
\r
594 UnicodeSet b = new UnicodeSet((char)7,(char)15);
\r
595 UnicodeSet c = new UnicodeSet();
\r
597 logln("a [3-10]: " + a);
\r
598 logln("b [7-15]: " + b);
\r
599 c.set(a); c.addAll(b);
\r
600 UnicodeSet exp = new UnicodeSet((char)3,(char)15);
\r
601 if (c.equals(exp)) {
\r
602 logln("c.set(a).add(b): " + c);
\r
604 errln("FAIL: c.set(a).add(b) = " + c + ", expect " + exp);
\r
607 exp.set((char)0, (char)2);
\r
608 exp.add((char)16, UnicodeSet.MAX_VALUE);
\r
609 if (c.equals(exp)) {
\r
610 logln("c.complement(): " + c);
\r
612 errln(Utility.escape("FAIL: c.complement() = " + c + ", expect " + exp));
\r
615 exp.set((char)3, (char)15);
\r
616 if (c.equals(exp)) {
\r
617 logln("c.complement(): " + c);
\r
619 errln("FAIL: c.complement() = " + c + ", expect " + exp);
\r
621 c.set(a); c.complementAll(b);
\r
622 exp.set((char)3,(char)6);
\r
623 exp.add((char)11,(char) 15);
\r
624 if (c.equals(exp)) {
\r
625 logln("c.set(a).complement(b): " + c);
\r
627 errln("FAIL: c.set(a).complement(b) = " + c + ", expect " + exp);
\r
631 c = bitsToSet(setToBits(c));
\r
632 if (c.equals(exp)) {
\r
633 logln("bitsToSet(setToBits(c)): " + c);
\r
635 errln("FAIL: bitsToSet(setToBits(c)) = " + c + ", expect " + exp);
\r
638 // Additional tests for coverage JB#2118
\r
639 //UnicodeSet::complement(class UnicodeString const &)
\r
640 //UnicodeSet::complementAll(class UnicodeString const &)
\r
641 //UnicodeSet::containsNone(class UnicodeSet const &)
\r
642 //UnicodeSet::containsNone(long,long)
\r
643 //UnicodeSet::containsSome(class UnicodeSet const &)
\r
644 //UnicodeSet::containsSome(long,long)
\r
645 //UnicodeSet::removeAll(class UnicodeString const &)
\r
646 //UnicodeSet::retain(long)
\r
647 //UnicodeSet::retainAll(class UnicodeString const &)
\r
648 //UnicodeSet::serialize(unsigned short *,long,enum UErrorCode &)
\r
649 //UnicodeSetIterator::getString(void)
\r
651 set.complement("ab");
\r
652 exp.applyPattern("[{ab}]");
\r
653 if (!set.equals(exp)) { errln("FAIL: complement(\"ab\")"); return; }
\r
655 UnicodeSetIterator iset = new UnicodeSetIterator(set);
\r
656 if (!iset.next() || iset.codepoint != UnicodeSetIterator.IS_STRING) {
\r
657 errln("FAIL: UnicodeSetIterator.next/IS_STRING");
\r
658 } else if (!iset.string.equals("ab")) {
\r
659 errln("FAIL: UnicodeSetIterator.string");
\r
662 set.add((char)0x61, (char)0x7A);
\r
663 set.complementAll("alan");
\r
664 exp.applyPattern("[{ab}b-kmo-z]");
\r
665 if (!set.equals(exp)) { errln("FAIL: complementAll(\"alan\")"); return; }
\r
667 exp.applyPattern("[a-z]");
\r
668 if (set.containsNone(exp)) { errln("FAIL: containsNone(UnicodeSet)"); }
\r
669 if (!set.containsSome(exp)) { errln("FAIL: containsSome(UnicodeSet)"); }
\r
670 exp.applyPattern("[aln]");
\r
671 if (!set.containsNone(exp)) { errln("FAIL: containsNone(UnicodeSet)"); }
\r
672 if (set.containsSome(exp)) { errln("FAIL: containsSome(UnicodeSet)"); }
\r
674 if (set.containsNone((char)0x61, (char)0x7A)) {
\r
675 errln("FAIL: containsNone(char, char)");
\r
677 if (!set.containsSome((char)0x61, (char)0x7A)) {
\r
678 errln("FAIL: containsSome(char, char)");
\r
680 if (!set.containsNone((char)0x41, (char)0x5A)) {
\r
681 errln("FAIL: containsNone(char, char)");
\r
683 if (set.containsSome((char)0x41, (char)0x5A)) {
\r
684 errln("FAIL: containsSome(char, char)");
\r
687 set.removeAll("liu");
\r
688 exp.applyPattern("[{ab}b-hj-kmo-tv-z]");
\r
689 if (!set.equals(exp)) { errln("FAIL: removeAll(\"liu\")"); return; }
\r
691 set.retainAll("star");
\r
692 exp.applyPattern("[rst]");
\r
693 if (!set.equals(exp)) { errln("FAIL: retainAll(\"star\")"); return; }
\r
695 set.retain((char)0x73);
\r
696 exp.applyPattern("[s]");
\r
697 if (!set.equals(exp)) { errln("FAIL: retain('s')"); return; }
\r
699 // ICU 2.6 coverage tests
\r
700 // public final UnicodeSet retain(String s);
\r
701 // public final UnicodeSet remove(int c);
\r
702 // public final UnicodeSet remove(String s);
\r
703 // public int hashCode();
\r
704 set.applyPattern("[a-z{ab}{cd}]");
\r
706 exp.applyPattern("[{cd}]");
\r
707 if (!set.equals(exp)) { errln("FAIL: retain(\"cd\")"); return; }
\r
709 set.applyPattern("[a-z{ab}{cd}]");
\r
710 set.remove((char)0x63);
\r
711 exp.applyPattern("[abd-z{ab}{cd}]");
\r
712 if (!set.equals(exp)) { errln("FAIL: remove('c')"); return; }
\r
715 exp.applyPattern("[abd-z{ab}]");
\r
716 if (!set.equals(exp)) { errln("FAIL: remove(\"cd\")"); return; }
\r
718 if (set.hashCode() != exp.hashCode()) {
\r
719 errln("FAIL: hashCode() unequal");
\r
722 if (set.hashCode() == exp.hashCode()) {
\r
723 errln("FAIL: hashCode() equal");
\r
727 //Cover addAll(Collection) and addAllTo(Collection)
\r
728 // Seems that there is a bug in addAll(Collection) operation
\r
729 // Ram also add a similar test to UtilityTest.java
\r
730 logln("Testing addAll(Collection) ... ");
\r
731 String[] array = {"a", "b", "c", "de"};
\r
732 List list = Arrays.asList(array);
\r
733 Set aset = new HashSet(list);
\r
734 logln(" *** The source set's size is: " + aset.size());
\r
738 if (set.size() != aset.size()) {
\r
739 errln("FAIL: After addAll, the UnicodeSet size expected " + aset.size() +
\r
740 ", " + set.size() + " seen instead!");
\r
742 logln("OK: After addAll, the UnicodeSet size got " + set.size());
\r
745 List list2 = new ArrayList();
\r
746 set.addAllTo(list2);
\r
748 //verify the result
\r
749 log(" *** The elements are: ");
\r
750 String s = set.toPattern(true);
\r
752 Iterator myiter = list2.iterator();
\r
753 while(myiter.hasNext()) {
\r
754 log(myiter.next().toString() + " ");
\r
756 logln(""); // a new line
\r
761 public void TestStrings() {
\r
762 // Object[][] testList = {
\r
763 // {I_EQUALS, UnicodeSet.fromAll("abc"),
\r
764 // new UnicodeSet("[a-c]")},
\r
766 // {I_EQUALS, UnicodeSet.from("ch").add('a','z').add("ll"),
\r
767 // new UnicodeSet("[{ll}{ch}a-z]")},
\r
769 // {I_EQUALS, UnicodeSet.from("ab}c"),
\r
770 // new UnicodeSet("[{ab\\}c}]")},
\r
772 // {I_EQUALS, new UnicodeSet('a','z').add('A', 'Z').retain('M','m').complement('X'),
\r
773 // new UnicodeSet("[[a-zA-Z]&[M-m]-[X]]")},
\r
776 // for (int i = 0; i < testList.length; ++i) {
\r
777 // expectRelation(testList[i][0], testList[i][1], testList[i][2], "(" + i + ")");
\r
780 UnicodeSet[][] testList = {
\r
781 {UnicodeSet.fromAll("abc"),
\r
782 new UnicodeSet("[a-c]")},
\r
784 {UnicodeSet.from("ch").add('a','z').add("ll"),
\r
785 new UnicodeSet("[{ll}{ch}a-z]")},
\r
787 {UnicodeSet.from("ab}c"),
\r
788 new UnicodeSet("[{ab\\}c}]")},
\r
790 {new UnicodeSet('a','z').add('A', 'Z').retain('M','m').complement('X'),
\r
791 new UnicodeSet("[[a-zA-Z]&[M-m]-[X]]")},
\r
794 for (int i = 0; i < testList.length; ++i) {
\r
795 if (!testList[i][0].equals(testList[i][1])) {
\r
796 errln("FAIL: sets unequal; see source code (" + i + ")");
\r
801 static final Integer
\r
802 I_ANY = new Integer(SortedSetRelation.ANY),
\r
803 I_CONTAINS = new Integer(SortedSetRelation.CONTAINS),
\r
804 I_DISJOINT = new Integer(SortedSetRelation.DISJOINT),
\r
805 I_NO_B = new Integer(SortedSetRelation.NO_B),
\r
806 I_ISCONTAINED = new Integer(SortedSetRelation.ISCONTAINED),
\r
807 I_EQUALS = new Integer(SortedSetRelation.EQUALS),
\r
808 I_NO_A = new Integer(SortedSetRelation.NO_A),
\r
809 I_NONE = new Integer(SortedSetRelation.NONE);
\r
811 public void TestSetRelation() {
\r
813 String[] choices = {"a", "b", "cd", "ef"};
\r
814 int limit = 1 << choices.length;
\r
816 SortedSet iset = new TreeSet();
\r
817 SortedSet jset = new TreeSet();
\r
819 for (int i = 0; i < limit; ++i) {
\r
820 pick(i, choices, iset);
\r
821 for (int j = 0; j < limit; ++j) {
\r
822 pick(j, choices, jset);
\r
823 checkSetRelation(iset, jset, "(" + i + ")");
\r
828 public void TestSetSpeed() {
\r
829 // skip unless verbose
\r
830 if (!isVerbose()) return;
\r
836 public void SetSpeed2(int size) {
\r
838 SortedSet iset = new TreeSet();
\r
839 SortedSet jset = new TreeSet();
\r
841 for (int i = 0; i < size*2; i += 2) { // only even values
\r
842 iset.add(new Integer(i));
\r
843 jset.add(new Integer(i));
\r
846 int iterations = 1000000 / size;
\r
848 logln("Timing comparison of Java vs Utility");
\r
849 logln("For about " + size + " objects that are almost all the same.");
\r
851 CheckSpeed(iset, jset, "when a = b", iterations);
\r
853 iset.add(new Integer(size + 1)); // add odd value in middle
\r
855 CheckSpeed(iset, jset, "when a contains b", iterations);
\r
856 CheckSpeed(jset, iset, "when b contains a", iterations);
\r
858 jset.add(new Integer(size - 1)); // add different odd value in middle
\r
860 CheckSpeed(jset, iset, "when a, b are disjoint", iterations);
\r
863 void CheckSpeed(SortedSet iset, SortedSet jset, String message, int iterations) {
\r
864 CheckSpeed2(iset, jset, message, iterations);
\r
865 CheckSpeed3(iset, jset, message, iterations);
\r
868 void CheckSpeed2(SortedSet iset, SortedSet jset, String message, int iterations) {
\r
872 // make sure code is loaded:
\r
873 x = iset.containsAll(jset);
\r
874 y = SortedSetRelation.hasRelation(iset, SortedSetRelation.CONTAINS, jset);
\r
875 if (x != y) errln("FAIL contains comparison");
\r
877 double start = System.currentTimeMillis();
\r
878 for (int i = 0; i < iterations; ++i) {
\r
879 x |= iset.containsAll(jset);
\r
881 double middle = System.currentTimeMillis();
\r
882 for (int i = 0; i < iterations; ++i) {
\r
883 y |= SortedSetRelation.hasRelation(iset, SortedSetRelation.CONTAINS, jset);
\r
885 double end = System.currentTimeMillis();
\r
887 double jtime = (middle - start)/iterations;
\r
888 double utime = (end - middle)/iterations;
\r
890 java.text.NumberFormat nf = java.text.NumberFormat.getPercentInstance();
\r
891 logln("Test contains: " + message + ": Java: " + jtime
\r
892 + ", Utility: " + utime + ", u:j: " + nf.format(utime/jtime));
\r
895 void CheckSpeed3(SortedSet iset, SortedSet jset, String message, int iterations) {
\r
899 // make sure code is loaded:
\r
900 x = iset.equals(jset);
\r
901 y = SortedSetRelation.hasRelation(iset, SortedSetRelation.EQUALS, jset);
\r
902 if (x != y) errln("FAIL equality comparison");
\r
905 double start = System.currentTimeMillis();
\r
906 for (int i = 0; i < iterations; ++i) {
\r
907 x |= iset.equals(jset);
\r
909 double middle = System.currentTimeMillis();
\r
910 for (int i = 0; i < iterations; ++i) {
\r
911 y |= SortedSetRelation.hasRelation(iset, SortedSetRelation.EQUALS, jset);
\r
913 double end = System.currentTimeMillis();
\r
915 double jtime = (middle - start)/iterations;
\r
916 double utime = (end - middle)/iterations;
\r
918 java.text.NumberFormat nf = java.text.NumberFormat.getPercentInstance();
\r
919 logln("Test equals: " + message + ": Java: " + jtime
\r
920 + ", Utility: " + utime + ", u:j: " + nf.format(utime/jtime));
\r
923 void pick(int bits, Object[] examples, SortedSet output) {
\r
925 for (int k = 0; k < 32; ++k) {
\r
926 if (((1<<k) & bits) != 0) output.add(examples[k]);
\r
930 public static final String[] RELATION_NAME = {
\r
936 "is-disjoint_with",
\r
940 boolean dumbHasRelation(Collection A, int filter, Collection B) {
\r
941 Collection ab = new TreeSet(A);
\r
943 if (ab.size() > 0 && (filter & SortedSetRelation.A_AND_B) == 0) return false;
\r
945 // A - B size == A.size - A&B.size
\r
946 if (A.size() > ab.size() && (filter & SortedSetRelation.A_NOT_B) == 0) return false;
\r
948 // B - A size == B.size - A&B.size
\r
949 if (B.size() > ab.size() && (filter & SortedSetRelation.B_NOT_A) == 0) return false;
\r
955 void checkSetRelation(SortedSet a, SortedSet b, String message) {
\r
956 for (int i = 0; i < 8; ++i) {
\r
958 boolean hasRelation = SortedSetRelation.hasRelation(a, i, b);
\r
959 boolean dumbHasRelation = dumbHasRelation(a, i, b);
\r
961 logln(message + " " + hasRelation + ":\t" + a + "\t" + RELATION_NAME[i] + "\t" + b);
\r
963 if (hasRelation != dumbHasRelation) {
\r
965 message + " " + dumbHasRelation + ":\t" + a + "\t" + RELATION_NAME[i] + "\t" + b);
\r
972 * Test the [:Latin:] syntax.
\r
974 public void TestScriptSet() {
\r
976 expectContainment("[:Latin:]", "aA", CharsToUnicodeString("\\u0391\\u03B1"));
\r
978 expectContainment("[:Greek:]", CharsToUnicodeString("\\u0391\\u03B1"), "aA");
\r
980 /* Jitterbug 1423 */
\r
981 expectContainment("[[:Common:][:Inherited:]]", CharsToUnicodeString("\\U00003099\\U0001D169\\u0000"), "aA");
\r
986 * Test the [:Latin:] syntax.
\r
988 public void TestPropertySet() {
\r
990 // Pattern, Chars IN, Chars NOT in
\r
1000 "\\P{ GENERAL Category = upper case letter }",
\r
1004 // Combining class: @since ICU 2.2
\r
1005 // Check both symbolic and numeric
\r
1010 "\\p{Canonical Combining Class = 11}",
\r
1014 "[:c c c = iota subscript :]",
\r
1018 // Bidi class: @since ICU 2.2
\r
1019 "\\p{bidiclass=lefttoright}",
\r
1023 // Binary properties: @since ICU 2.2
\r
1024 "\\p{ideographic}",
\r
1029 "q)*(", // )(and * were removed from math in Unicode 4.0.1
\r
1032 // JB#1767 \N{}, \p{ASCII}
\r
1034 "abc\u0000\u007F",
\r
1037 "[\\N{ latin small letter a }[:name= latin small letter z:]]",
\r
1053 "\u03D8\u03D9", // 3.2
\r
1056 "\\u1800\\u3400\\U0002f800",
\r
1057 "\\u0220\\u034f\\u30ff\\u33ff\\ufe73\\U00010000\\U00050000",
\r
1059 // JB#2350: Case_Sensitive
\r
1060 "[:Case Sensitive:]",
\r
1061 "A\u1FFC\\U00010410",
\r
1062 ";\u00B4\\U00010500",
\r
1065 // Regex compatibility test
\r
1066 "[-b]", // leading '-' is literal
\r
1070 "[^-b]", // leading '-' is literal
\r
1074 "[b-]", // trailing '-' is literal
\r
1078 "[^b-]", // trailing '-' is literal
\r
1082 "[a-b-]", // trailing '-' is literal
\r
1086 "[[a-q]&[p-z]-]", // trailing '-' is literal
\r
1090 "[\\s|\\)|:|$|\\>]", // from regex tests
\r
1094 "[\uDC00cd]", // JB#2906: isolated trail at start
\r
1096 "ab\uD800\\U00010000",
\r
1098 "[ab\uD800]", // JB#2906: isolated trail at start
\r
1100 "cd\uDC00\\U00010000",
\r
1102 "[ab\uD800cd]", // JB#2906: isolated lead in middle
\r
1104 "ef\uDC00\\U00010000",
\r
1106 "[ab\uDC00cd]", // JB#2906: isolated trail in middle
\r
1108 "ef\uD800\\U00010000",
\r
1110 "[:^lccc=0:]", // Lead canonical class
\r
1112 "abcd\u00c0\u00c5",
\r
1114 "[:^tccc=0:]", // Trail canonical class
\r
1115 "\u0300\u0301\u00c0\u00c5",
\r
1118 "[[:^lccc=0:][:^tccc=0:]]", // Lead and trail canonical class
\r
1119 "\u0300\u0301\u00c0\u00c5",
\r
1122 "[[:^lccc=0:]-[:^tccc=0:]]", // Stuff that starts with an accent but ends with a base (none right now)
\r
1124 "abcd\u0300\u0301\u00c0\u00c5",
\r
1126 "[[:ccc=0:]-[:lccc=0:]-[:tccc=0:]]", // Weirdos. Complete canonical class is zero, but both lead and trail are not
\r
1127 "\u0F73\u0F75\u0F81",
\r
1128 "abcd\u0300\u0301\u00c0\u00c5",
\r
1131 "A\\uE000\\uF8FF\\uFDC7\\U00010000\\U0010FFFD",
\r
1132 "\\u0888\\uFDD3\\uFFFE\\U00050005",
\r
1136 for (int i=0; i<DATA.length; i+=3) {
\r
1137 expectContainment(DATA[i], DATA[i+1], DATA[i+2]);
\r
1141 public void TestUnicodeSetStrings() {
\r
1142 UnicodeSet uset = new UnicodeSet("[a{bc}{cd}pqr\u0000]");
\r
1143 logln(uset + " ~ " + uset.getRegexEquivalent());
\r
1144 String[][] testStrings = {{"x", "none"},
\r
1152 {"dccbx", "none"},
\r
1154 for (int i = 0; i < testStrings.length; ++i) {
\r
1155 check(uset, testStrings[i][0], testStrings[i][1]);
\r
1160 private void check(UnicodeSet uset, String string, String desiredStatus) {
\r
1161 boolean shouldContainAll = desiredStatus.equals("all");
\r
1162 boolean shouldContainNone = desiredStatus.equals("none");
\r
1163 if (uset.containsAll(string) != shouldContainAll) {
\r
1164 errln("containsAll " + string + " should be " + shouldContainAll);
\r
1166 logln("containsAll " + string + " = " + shouldContainAll);
\r
1168 if (uset.containsNone(string) != shouldContainNone) {
\r
1169 errln("containsNone " + string + " should be " + shouldContainNone);
\r
1171 logln("containsNone " + string + " = " + shouldContainNone);
\r
1176 * Test cloning of UnicodeSet
\r
1178 public void TestClone() {
\r
1179 UnicodeSet s = new UnicodeSet("[abcxyz]");
\r
1180 UnicodeSet t = (UnicodeSet) s.clone();
\r
1181 expectContainment(t, "abc", "def");
\r
1185 * Test the indexOf() and charAt() methods.
\r
1187 public void TestIndexOf() {
\r
1188 UnicodeSet set = new UnicodeSet("[a-cx-y3578]");
\r
1189 for (int i=0; i<set.size(); ++i) {
\r
1190 int c = set.charAt(i);
\r
1191 if (set.indexOf(c) != i) {
\r
1192 errln("FAIL: charAt(" + i + ") = " + c +
\r
1193 " => indexOf() => " + set.indexOf(c));
\r
1196 int c = set.charAt(set.size());
\r
1198 errln("FAIL: charAt(<out of range>) = " +
\r
1199 Utility.escape(String.valueOf(c)));
\r
1201 int j = set.indexOf('q');
\r
1203 errln("FAIL: indexOf('q') = " + j);
\r
1207 public void TestContainsString() {
\r
1208 UnicodeSet x = new UnicodeSet("[a{bc}]");
\r
1209 if (x.contains("abc")) errln("FAIL");
\r
1212 public void TestExhaustive() {
\r
1213 // exhaustive tests. Simulate UnicodeSets with integers.
\r
1214 // That gives us very solid tests (except for large memory tests).
\r
1216 char limit = (char)128;
\r
1218 for (char i = 0; i < limit; ++i) {
\r
1219 logln("Testing " + i + ", " + bitsToSet(i));
\r
1220 _testComplement(i);
\r
1222 // AS LONG AS WE ARE HERE, check roundtrip
\r
1223 checkRoundTrip(bitsToSet(i));
\r
1225 for (char j = 0; j < limit; ++j) {
\r
1235 * Make sure each script name and abbreviated name can be used
\r
1236 * to construct a UnicodeSet.
\r
1238 public void TestScriptNames() {
\r
1239 for (int i=0; i<UScript.CODE_LIMIT; ++i) {
\r
1240 for (int j=0; j<2; ++j) {
\r
1244 (j==0) ? UScript.getName(i) : UScript.getShortName(i);
\r
1245 pat = "[:" + name + ":]";
\r
1246 UnicodeSet set = new UnicodeSet(pat);
\r
1247 logln("Ok: " + pat + " -> " + set.toPattern(false));
\r
1248 } catch (IllegalArgumentException e) {
\r
1249 if (pat.length() == 0) {
\r
1250 errln("FAIL (in UScript): No name for script " + i);
\r
1252 errln("FAIL: Couldn't create " + pat);
\r
1260 * Test closure API.
\r
1262 public void TestCloseOver() {
\r
1263 String CASE = String.valueOf(UnicodeSet.CASE);
\r
1265 // selector, input, output
\r
1267 "[aq\u00DF{Bc}{bC}{Fi}]",
\r
1268 "[aAqQ\u00DF\u1E9E\uFB01{ss}{bc}{fi}]", // U+1E9E LATIN CAPITAL LETTER SHARP S is new in Unicode 5.1
\r
1271 "[\u01F1]", // 'DZ'
\r
1272 "[\u01F1\u01F2\u01F3]",
\r
1276 "[\u1FB4{\u03AC\u03B9}]",
\r
1283 "[a-z]","[A-Za-z\u017F\u212A]",
\r
1285 "[abc]","[A-Ca-c]",
\r
1287 "[ABC]","[A-Ca-c]",
\r
1290 UnicodeSet s = new UnicodeSet();
\r
1291 UnicodeSet t = new UnicodeSet();
\r
1292 for (int i=0; i<DATA.length; i+=3) {
\r
1293 int selector = Integer.parseInt(DATA[i]);
\r
1294 String pat = DATA[i+1];
\r
1295 String exp = DATA[i+2];
\r
1296 s.applyPattern(pat);
\r
1297 s.closeOver(selector);
\r
1298 t.applyPattern(exp);
\r
1299 if (s.equals(t)) {
\r
1300 logln("Ok: " + pat + ".closeOver(" + selector + ") => " + exp);
\r
1302 errln("FAIL: " + pat + ".closeOver(" + selector + ") => " +
\r
1303 s.toPattern(true) + ", expected " + exp);
\r
1307 // Test the pattern API
\r
1308 s.applyPattern("[abc]", UnicodeSet.CASE);
\r
1309 expectContainment(s, "abcABC", "defDEF");
\r
1310 s = new UnicodeSet("[^abc]", UnicodeSet.CASE);
\r
1311 expectContainment(s, "defDEF", "abcABC");
\r
1314 public void TestEscapePattern() {
\r
1315 // The following pattern must contain at least one range "c-d"
\r
1316 // for which isRuleWhiteSpace(c) or isRuleWhiteSpace(d) is true.
\r
1318 "[\\uFEFF \\u200E-\\u20FF \\uFFF9-\\uFFFC \\U0001D173-\\U0001D17A \\U000F0000-\\U000FFFFD ]";
\r
1320 "[\\u200E-\\u20FF\\uFEFF\\uFFF9-\\uFFFC\\U0001D173-\\U0001D17A\\U000F0000-\\U000FFFFD]";
\r
1321 // We test this with two passes; in the second pass we
\r
1322 // pre-unescape the pattern. Since U+200E is rule whitespace,
\r
1323 // this fails -- which is what we expect.
\r
1324 for (int pass=1; pass<=2; ++pass) {
\r
1325 String pat = pattern;
\r
1327 pat = Utility.unescape(pat);
\r
1329 // Pattern is only good for pass 1
\r
1330 boolean isPatternValid = (pass==1);
\r
1332 UnicodeSet set = null;
\r
1334 set = new UnicodeSet(pat);
\r
1335 } catch (IllegalArgumentException e) {
\r
1338 if ((set != null) != isPatternValid){
\r
1339 errln("FAIL: applyPattern(" +
\r
1340 Utility.escape(pat) + ") => " + set);
\r
1343 if (set == null) {
\r
1346 if (set.contains((char)0x0644)){
\r
1347 errln("FAIL: " + Utility.escape(pat) + " contains(U+0664)");
\r
1350 String newpat = set.toPattern(true);
\r
1351 if (newpat.equals(exp)) {
\r
1352 logln(Utility.escape(pat) + " => " + newpat);
\r
1354 errln("FAIL: " + Utility.escape(pat) + " => " + newpat);
\r
1357 for (int i=0; i<set.getRangeCount(); ++i) {
\r
1358 StringBuffer str = new StringBuffer("Range ");
\r
1359 str.append((char)(0x30 + i))
\r
1361 UTF16.append(str, set.getRangeStart(i));
\r
1362 str.append(" - ");
\r
1363 UTF16.append(str, set.getRangeEnd(i));
\r
1364 String s = Utility.escape(str.toString() + " (" + set.getRangeStart(i) + " - " +
\r
1365 set.getRangeEnd(i) + ")");
\r
1366 if (set.getRangeStart(i) < 0) {
\r
1367 errln("FAIL: " + s);
\r
1375 public void TestSymbolTable() {
\r
1376 // Multiple test cases can be set up here. Each test case
\r
1377 // is terminated by null:
\r
1378 // var, value, var, value,..., input pat., exp. output pat., null
\r
1380 "us", "a-z", "[0-1$us]", "[0-1a-z]", null,
\r
1381 "us", "[a-z]", "[0-1$us]", "[0-1[a-z]]", null,
\r
1382 "us", "\\[a\\-z\\]", "[0-1$us]", "[-01\\[\\]az]", null
\r
1385 for (int i=0; i<DATA.length; ++i) {
\r
1386 TokenSymbolTable sym = new TokenSymbolTable();
\r
1388 // Set up variables
\r
1389 while (DATA[i+2] != null) {
\r
1390 sym.add(DATA[i], DATA[i+1]);
\r
1394 // Input pattern and expected output pattern
\r
1395 String inpat = DATA[i], exppat = DATA[i+1];
\r
1398 ParsePosition pos = new ParsePosition(0);
\r
1399 UnicodeSet us = new UnicodeSet(inpat, pos, sym);
\r
1402 if (pos.getIndex() != inpat.length()) {
\r
1403 errln("Failed to read to end of string \""
\r
1404 + inpat + "\": read to "
\r
1405 + pos.getIndex() + ", length is "
\r
1406 + inpat.length());
\r
1409 UnicodeSet us2 = new UnicodeSet(exppat);
\r
1410 if (!us.equals(us2)) {
\r
1411 errln("Failed, got " + us + ", expected " + us2);
\r
1413 logln("Ok, got " + us);
\r
1416 //cover Unicode(String,ParsePosition,SymbolTable,int)
\r
1417 ParsePosition inpos = new ParsePosition(0);
\r
1418 UnicodeSet inSet = new UnicodeSet(inpat, inpos, sym, UnicodeSet.IGNORE_SPACE);
\r
1419 UnicodeSet expSet = new UnicodeSet(exppat);
\r
1420 if (!inSet.equals(expSet)) {
\r
1421 errln("FAIL: Failed, got " + inSet + ", expected " + expSet);
\r
1423 logln("OK: got " + inSet);
\r
1429 * Test that Posix style character classes [:digit:], etc.
\r
1430 * have the Unicode definitions from TR 18.
\r
1432 public void TestPosixClasses() {
\r
1433 expectEqual("POSIX alpha", "[:alpha:]", "\\p{Alphabetic}");
\r
1434 expectEqual("POSIX lower", "[:lower:]", "\\p{lowercase}");
\r
1435 expectEqual("POSIX upper", "[:upper:]", "\\p{Uppercase}");
\r
1436 expectEqual("POSIX punct", "[:punct:]", "\\p{gc=Punctuation}");
\r
1437 expectEqual("POSIX digit", "[:digit:]", "\\p{gc=DecimalNumber}");
\r
1438 expectEqual("POSIX xdigit", "[:xdigit:]", "[\\p{DecimalNumber}\\p{HexDigit}]");
\r
1439 expectEqual("POSIX alnum", "[:alnum:]", "[\\p{Alphabetic}\\p{DecimalNumber}]");
\r
1440 expectEqual("POSIX space", "[:space:]", "\\p{Whitespace}");
\r
1441 expectEqual("POSIX blank", "[:blank:]", "[\\p{Whitespace}-[\\u000a\\u000B\\u000c\\u000d\\u0085\\p{LineSeparator}\\p{ParagraphSeparator}]]");
\r
1442 expectEqual("POSIX cntrl", "[:cntrl:]", "\\p{Control}");
\r
1443 expectEqual("POSIX graph", "[:graph:]", "[^\\p{Whitespace}\\p{Control}\\p{Surrogate}\\p{Unassigned}]");
\r
1444 expectEqual("POSIX print", "[:print:]", "[[:graph:][:blank:]-[\\p{Control}]]");
\r
1447 public void TestHangulSyllable() {
\r
1448 final UnicodeSet lvt = new UnicodeSet("[:Hangul_Syllable_Type=LVT_Syllable:]");
\r
1449 assertNotEquals("LVT count", new UnicodeSet(), lvt);
\r
1450 logln(lvt + ": " + lvt.size());
\r
1451 final UnicodeSet lv = new UnicodeSet("[:Hangul_Syllable_Type=LV_Syllable:]");
\r
1452 assertNotEquals("LV count", new UnicodeSet(), lv);
\r
1453 logln(lv + ": " + lv.size());
\r
1457 * Test that frozen classes disallow changes. For 4217
\r
1459 public void TestFrozen() {
\r
1460 UnicodeSet test = new UnicodeSet("[[:whitespace:]A]");
\r
1462 checkModification(test, true);
\r
1463 checkModification(test, false);
\r
1466 public void checkModification(UnicodeSet original, boolean isFrozen) {
\r
1468 for (int i = 0; ;++i) {
\r
1469 UnicodeSet test = (UnicodeSet) (isFrozen ? original.clone() : original.cloneAsThawed());
\r
1470 boolean gotException = true;
\r
1471 boolean checkEquals = true;
\r
1474 case 0: test.add(0); break;
\r
1475 case 1: test.add(0,1); break;
\r
1476 case 2: test.add("a"); break;
\r
1477 case 3: List a = new ArrayList(); a.add("a"); test.addAll(a); break;
\r
1478 case 4: test.addAll("ab"); break;
\r
1479 case 5: test.addAll(new UnicodeSet("[ab]")); break;
\r
1480 case 6: test.applyIntPropertyValue(0,0); break;
\r
1481 case 7: test.applyPattern("[ab]"); break;
\r
1482 case 8: test.applyPattern("[ab]", true); break;
\r
1483 case 9: test.applyPattern("[ab]", 0); break;
\r
1484 case 10: test.applyPropertyAlias("hex","true"); break;
\r
1485 case 11: test.applyPropertyAlias("hex", "true", null); break;
\r
1486 case 12: test.closeOver(UnicodeSet.CASE); break;
\r
1487 case 13: test.compact(); checkEquals = false; break;
\r
1488 case 14: test.complement(0); break;
\r
1489 case 15: test.complement(0,0); break;
\r
1490 case 16: test.complement("ab"); break;
\r
1491 case 17: test.complementAll("ab"); break;
\r
1492 case 18: test.complementAll(new UnicodeSet("[ab]")); break;
\r
1493 case 19: test.remove(' '); break;
\r
1494 case 20: test.remove(' ','a'); break;
\r
1495 case 21: test.remove(" "); break;
\r
1496 case 22: test.removeAll(" a"); break;
\r
1497 case 23: test.removeAll(new UnicodeSet("[\\ a]")); break;
\r
1498 case 24: test.retain(' '); break;
\r
1499 case 25: test.retain(' ','a'); break;
\r
1500 case 26: test.retain(" "); break;
\r
1501 case 27: test.retainAll(" a"); break;
\r
1502 case 28: test.retainAll(new UnicodeSet("[\\ a]")); break;
\r
1503 case 29: test.set(0,1); break;
\r
1504 case 30: test.set(new UnicodeSet("[ab]")); break;
\r
1506 default: continue main; // so we don't keep having to change the endpoint, and gaps are not skipped.
\r
1509 gotException = false;
\r
1510 } catch (UnsupportedOperationException e) {
\r
1513 if (isFrozen && !gotException) errln(i + ") attempt to modify frozen object didn't result in an exception");
\r
1514 if (!isFrozen && gotException) errln(i + ") attempt to modify thawed object did result in an exception");
\r
1515 if (checkEquals) {
\r
1516 if (test.equals(original)) {
\r
1517 if (!isFrozen) errln(i + ") attempt to modify thawed object didn't change the object");
\r
1518 } else { // unequal
\r
1519 if (isFrozen) errln(i + ") attempt to modify frozen object changed the object");
\r
1525 String[] prettyData = {
\r
1526 "[\\uD7DE-\\uD90C \\uDCB5-\\uDD9F]", // special case
\r
1529 "[:linebreak=AL:]",
\r
1532 public void TestPrettyPrinting() {
\r
1534 PrettyPrinter pp = new PrettyPrinter();
\r
1537 for (; i < prettyData.length; ++i) {
\r
1538 UnicodeSet test = new UnicodeSet(prettyData[i]);
\r
1539 checkPrettySet(pp, i, test);
\r
1541 Random random = new Random(0);
\r
1542 UnicodeSet test = new UnicodeSet();
\r
1544 // To keep runtimes under control, make the number of random test cases
\r
1545 // to try depends on the test framework exhaustive setting.
\r
1546 // params.inclusions = 5: default exhaustive value
\r
1547 // params.inclusions = 10: max exhaustive value.
\r
1548 int iterations = 50;
\r
1549 if (params.inclusion > 5) {
\r
1550 iterations = (params.inclusion-5) * 200;
\r
1552 for (; i < iterations; ++i) {
\r
1553 double start = random.nextGaussian() * 0x10000;
\r
1554 if (start < 0) start = - start;
\r
1555 if (start > 0x10FFFF) {
\r
1558 double end = random.nextGaussian() * 0x100;
\r
1559 if (end < 0) end = -end;
\r
1560 end = start + end;
\r
1561 if (end > 0x10FFFF) {
\r
1564 test.complement((int)start, (int)end);
\r
1565 checkPrettySet(pp, i, test);
\r
1567 }catch(RuntimeException ex){
\r
1568 warnln("Could not load Collator");
\r
1572 private void checkPrettySet(PrettyPrinter pp, int i, UnicodeSet test) {
\r
1573 String pretty = pp.toPattern(test);
\r
1574 UnicodeSet retry = new UnicodeSet(pretty);
\r
1575 if (!test.equals(retry)) {
\r
1576 errln(i + ". Failed test: " + test + " != " + pretty);
\r
1578 logln(i + ". Worked for " + truncate(test.toString()) + " => " + truncate(pretty));
\r
1582 private String truncate(String string) {
\r
1583 if (string.length() <= 100) return string;
\r
1584 return string.substring(0,97) + "...";
\r
1587 public class TokenSymbolTable implements SymbolTable {
\r
1588 HashMap contents = new HashMap();
\r
1591 * (Non-SymbolTable API) Add the given variable and value to
\r
1592 * the table. Variable should NOT contain leading '$'.
\r
1594 public void add(String var, String value) {
\r
1595 char[] buffer = new char[value.length()];
\r
1596 value.getChars(0, value.length(), buffer, 0);
\r
1601 * (Non-SymbolTable API) Add the given variable and value to
\r
1602 * the table. Variable should NOT contain leading '$'.
\r
1604 public void add(String var, char[] body) {
\r
1605 logln("TokenSymbolTable: add \"" + var + "\" => \"" +
\r
1606 new String(body) + "\"");
\r
1607 contents.put(var, body);
\r
1611 * @see com.ibm.icu.text.SymbolTable#lookup(java.lang.String)
\r
1613 public char[] lookup(String s) {
\r
1614 logln("TokenSymbolTable: lookup \"" + s + "\" => \"" +
\r
1615 new String((char[]) contents.get(s)) + "\"");
\r
1616 return (char[])contents.get(s);
\r
1620 * @see com.ibm.icu.text.SymbolTable#lookupMatcher(int)
\r
1622 public UnicodeMatcher lookupMatcher(int ch) {
\r
1627 * @see com.ibm.icu.text.SymbolTable#parseReference(java.lang.String,
\r
1628 java.text.ParsePosition, int)
\r
1630 public String parseReference(String text, ParsePosition pos, int
\r
1633 int start = pos.getIndex();
\r
1635 for (i = start; i < limit; i += UTF16.getCharCount(cp)) {
\r
1636 cp = UTF16.charAt(text, i);
\r
1637 if (!com.ibm.icu.lang.UCharacter.isUnicodeIdentifierPart(cp)) {
\r
1641 logln("TokenSymbolTable: parse \"" + text + "\" from " +
\r
1642 start + " to " + i +
\r
1643 " => \"" + text.substring(start,i) + "\"");
\r
1645 return text.substring(start,i);
\r
1649 public void TestSurrogate() {
\r
1651 // These should all behave identically
\r
1652 "[abc\\uD800\\uDC00]",
\r
1653 "[abc\uD800\uDC00]",
\r
1654 "[abc\\U00010000]",
\r
1656 for (int i=0; i<DATA.length; ++i) {
\r
1657 logln("Test pattern " + i + " :" + Utility.escape(DATA[i]));
\r
1658 UnicodeSet set = new UnicodeSet(DATA[i]);
\r
1659 expectContainment(set,
\r
1660 CharsToUnicodeString("abc\\U00010000"),
\r
1661 "\uD800;\uDC00"); // split apart surrogate-pair
\r
1662 if (set.size() != 4) {
\r
1663 errln(Utility.escape("FAIL: " + DATA[i] + ".size() == " +
\r
1664 set.size() + ", expected 4"));
\r
1669 public void TestContains() {
\r
1670 int limit = 256; // combinations to test
\r
1671 for (int i = 0; i < limit; ++i) {
\r
1672 logln("Trying: " + i);
\r
1673 UnicodeSet x = bitsToSet(i);
\r
1674 for (int j = 0; j < limit; ++j) {
\r
1675 UnicodeSet y = bitsToSet(j);
\r
1676 boolean containsNone = (i & j) == 0;
\r
1677 boolean containsAll = (i & j) == j;
\r
1678 boolean equals = i == j;
\r
1679 if (containsNone != x.containsNone(y)) {
\r
1680 x.containsNone(y); // repeat for debugging
\r
1681 errln("FAILED: " + x + " containsSome " + y);
\r
1683 if (containsAll != x.containsAll(y)) {
\r
1684 x.containsAll(y); // repeat for debugging
\r
1685 errln("FAILED: " + x + " containsAll " + y);
\r
1687 if (equals != x.equals(y)) {
\r
1688 x.equals(y); // repeat for debugging
\r
1689 errln("FAILED: " + x + " equals " + y);
\r
1695 void _testComplement(int a) {
\r
1696 UnicodeSet x = bitsToSet(a);
\r
1697 UnicodeSet z = bitsToSet(a);
\r
1699 int c = setToBits(z);
\r
1701 errln("FAILED: add: ~" + x + " != " + z);
\r
1702 errln("FAILED: add: ~" + a + " != " + c);
\r
1704 checkCanonicalRep(z, "complement " + a);
\r
1707 void _testAdd(int a, int b) {
\r
1708 UnicodeSet x = bitsToSet(a);
\r
1709 UnicodeSet y = bitsToSet(b);
\r
1710 UnicodeSet z = bitsToSet(a);
\r
1712 int c = setToBits(z);
\r
1713 if (c != (a | b)) {
\r
1714 errln(Utility.escape("FAILED: add: " + x + " | " + y + " != " + z));
\r
1715 errln("FAILED: add: " + a + " | " + b + " != " + c);
\r
1717 checkCanonicalRep(z, "add " + a + "," + b);
\r
1720 void _testRetain(int a, int b) {
\r
1721 UnicodeSet x = bitsToSet(a);
\r
1722 UnicodeSet y = bitsToSet(b);
\r
1723 UnicodeSet z = bitsToSet(a);
\r
1725 int c = setToBits(z);
\r
1726 if (c != (a & b)) {
\r
1727 errln("FAILED: retain: " + x + " & " + y + " != " + z);
\r
1728 errln("FAILED: retain: " + a + " & " + b + " != " + c);
\r
1730 checkCanonicalRep(z, "retain " + a + "," + b);
\r
1733 void _testRemove(int a, int b) {
\r
1734 UnicodeSet x = bitsToSet(a);
\r
1735 UnicodeSet y = bitsToSet(b);
\r
1736 UnicodeSet z = bitsToSet(a);
\r
1738 int c = setToBits(z);
\r
1739 if (c != (a &~ b)) {
\r
1740 errln("FAILED: remove: " + x + " &~ " + y + " != " + z);
\r
1741 errln("FAILED: remove: " + a + " &~ " + b + " != " + c);
\r
1743 checkCanonicalRep(z, "remove " + a + "," + b);
\r
1746 void _testXor(int a, int b) {
\r
1747 UnicodeSet x = bitsToSet(a);
\r
1748 UnicodeSet y = bitsToSet(b);
\r
1749 UnicodeSet z = bitsToSet(a);
\r
1750 z.complementAll(y);
\r
1751 int c = setToBits(z);
\r
1752 if (c != (a ^ b)) {
\r
1753 errln("FAILED: complement: " + x + " ^ " + y + " != " + z);
\r
1754 errln("FAILED: complement: " + a + " ^ " + b + " != " + c);
\r
1756 checkCanonicalRep(z, "complement " + a + "," + b);
\r
1760 * Check that ranges are monotonically increasing and non-
\r
1763 void checkCanonicalRep(UnicodeSet set, String msg) {
\r
1764 int n = set.getRangeCount();
\r
1766 errln("FAIL result of " + msg +
\r
1767 ": range count should be >= 0 but is " +
\r
1768 n + " for " + Utility.escape(set.toString()));
\r
1772 for (int i=0; i<n; ++i) {
\r
1773 int start = set.getRangeStart(i);
\r
1774 int end = set.getRangeEnd(i);
\r
1775 if (start > end) {
\r
1776 errln("FAIL result of " + msg +
\r
1777 ": range " + (i+1) +
\r
1778 " start > end: " + start + ", " + end +
\r
1779 " for " + Utility.escape(set.toString()));
\r
1781 if (i > 0 && start <= last) {
\r
1782 errln("FAIL result of " + msg +
\r
1783 ": range " + (i+1) +
\r
1784 " overlaps previous range: " + start + ", " + end +
\r
1785 " for " + Utility.escape(set.toString()));
\r
1792 * Convert a bitmask to a UnicodeSet.
\r
1794 UnicodeSet bitsToSet(int a) {
\r
1795 UnicodeSet result = new UnicodeSet();
\r
1796 for (int i = 0; i < 32; ++i) {
\r
1797 if ((a & (1<<i)) != 0) {
\r
1798 result.add((char)i,(char)i);
\r
1806 * Convert a UnicodeSet to a bitmask. Only the characters
\r
1807 * U+0000 to U+0020 are represented in the bitmask.
\r
1809 static int setToBits(UnicodeSet x) {
\r
1811 for (int i = 0; i < 32; ++i) {
\r
1812 if (x.contains((char)i)) {
\r
1820 * Return the representation of an inversion list based UnicodeSet
\r
1821 * as a pairs list. Ranges are listed in ascending Unicode order.
\r
1822 * For example, the set [a-zA-M3] is represented as "33AMaz".
\r
1824 static String getPairs(UnicodeSet set) {
\r
1825 StringBuffer pairs = new StringBuffer();
\r
1826 for (int i=0; i<set.getRangeCount(); ++i) {
\r
1827 int start = set.getRangeStart(i);
\r
1828 int end = set.getRangeEnd(i);
\r
1829 if (end > 0xFFFF) {
\r
1831 i = set.getRangeCount(); // Should be unnecessary
\r
1833 pairs.append((char)start).append((char)end);
\r
1835 return pairs.toString();
\r
1839 * Test function. Make sure that the sets have the right relation
\r
1842 void expectRelation(Object relationObj, Object set1Obj, Object set2Obj, String message) {
\r
1843 int relation = ((Integer) relationObj).intValue();
\r
1844 UnicodeSet set1 = (UnicodeSet) set1Obj;
\r
1845 UnicodeSet set2 = (UnicodeSet) set2Obj;
\r
1847 // by-the-by, check the iterator
\r
1848 checkRoundTrip(set1);
\r
1849 checkRoundTrip(set2);
\r
1851 boolean contains = set1.containsAll(set2);
\r
1852 boolean isContained = set2.containsAll(set1);
\r
1853 boolean disjoint = set1.containsNone(set2);
\r
1854 boolean equals = set1.equals(set2);
\r
1856 UnicodeSet intersection = new UnicodeSet(set1).retainAll(set2);
\r
1857 UnicodeSet minus12 = new UnicodeSet(set1).removeAll(set2);
\r
1858 UnicodeSet minus21 = new UnicodeSet(set2).removeAll(set1);
\r
1860 // test basic properties
\r
1862 if (contains != (intersection.size() == set2.size())) {
\r
1863 errln("FAIL contains1" + set1.toPattern(true) + ", " + set2.toPattern(true));
\r
1866 if (contains != (intersection.equals(set2))) {
\r
1867 errln("FAIL contains2" + set1.toPattern(true) + ", " + set2.toPattern(true));
\r
1870 if (isContained != (intersection.size() == set1.size())) {
\r
1871 errln("FAIL isContained1" + set1.toPattern(true) + ", " + set2.toPattern(true));
\r
1874 if (isContained != (intersection.equals(set1))) {
\r
1875 errln("FAIL isContained2" + set1.toPattern(true) + ", " + set2.toPattern(true));
\r
1878 if ((contains && isContained) != equals) {
\r
1879 errln("FAIL equals" + set1.toPattern(true) + ", " + set2.toPattern(true));
\r
1882 if (disjoint != (intersection.size() == 0)) {
\r
1883 errln("FAIL disjoint" + set1.toPattern(true) + ", " + set2.toPattern(true));
\r
1886 // Now see if the expected relation is true
\r
1887 int status = (minus12.size() != 0 ? 4 : 0)
\r
1888 | (intersection.size() != 0 ? 2 : 0)
\r
1889 | (minus21.size() != 0 ? 1 : 0);
\r
1891 if (status != relation) {
\r
1892 errln("FAIL relation incorrect" + message
\r
1893 + "; desired = " + RELATION_NAME[relation]
\r
1894 + "; found = " + RELATION_NAME[status]
\r
1895 + "; set1 = " + set1.toPattern(true)
\r
1896 + "; set2 = " + set2.toPattern(true)
\r
1902 * Basic consistency check for a few items.
\r
1903 * That the iterator works, and that we can create a pattern and
\r
1904 * get the same thing back
\r
1907 void checkRoundTrip(UnicodeSet s) {
\r
1908 String pat = s.toPattern(false);
\r
1909 UnicodeSet t = copyWithIterator(s, false);
\r
1910 checkEqual(s, t, "iterator roundtrip");
\r
1912 t = copyWithIterator(s, true); // try range
\r
1913 checkEqual(s, t, "iterator roundtrip");
\r
1915 t = new UnicodeSet(pat);
\r
1916 checkEqual(s, t, "toPattern(false)");
\r
1918 pat = s.toPattern(true);
\r
1919 t = new UnicodeSet(pat);
\r
1920 checkEqual(s, t, "toPattern(true)");
\r
1923 UnicodeSet copyWithIterator(UnicodeSet s, boolean withRange) {
\r
1924 UnicodeSet t = new UnicodeSet();
\r
1925 UnicodeSetIterator it = new UnicodeSetIterator(s);
\r
1927 while (it.nextRange()) {
\r
1928 if (it.codepoint == UnicodeSetIterator.IS_STRING) {
\r
1931 t.add(it.codepoint, it.codepointEnd);
\r
1935 while (it.next()) {
\r
1936 if (it.codepoint == UnicodeSetIterator.IS_STRING) {
\r
1939 t.add(it.codepoint);
\r
1946 boolean checkEqual(UnicodeSet s, UnicodeSet t, String message) {
\r
1947 if (!s.equals(t)) {
\r
1948 errln("FAIL " + message
\r
1949 + "; source = " + s.toPattern(true)
\r
1950 + "; result = " + t.toPattern(true)
\r
1957 void expectEqual(String name, String pat1, String pat2) {
\r
1958 UnicodeSet set1, set2;
\r
1960 set1 = new UnicodeSet(pat1);
\r
1961 set2 = new UnicodeSet(pat2);
\r
1962 } catch (IllegalArgumentException e) {
\r
1963 errln("FAIL: Couldn't create UnicodeSet from pattern for \"" + name + "\": " + e.getMessage());
\r
1966 if(!set1.equals(set2)) {
\r
1967 errln("FAIL: Sets built from patterns differ for \"" + name + "\"");
\r
1972 * Expect the given set to contain the characters in charsIn and
\r
1973 * to not contain those in charsOut.
\r
1975 void expectContainment(String pat, String charsIn, String charsOut) {
\r
1978 set = new UnicodeSet(pat);
\r
1979 } catch (IllegalArgumentException e) {
\r
1980 errln("FAIL: Couldn't create UnicodeSet from pattern \"" +
\r
1981 pat + "\": " + e.getMessage());
\r
1984 expectContainment(set, charsIn, charsOut);
\r
1988 * Expect the given set to contain the characters in charsIn and
\r
1989 * to not contain those in charsOut.
\r
1991 void expectContainment(UnicodeSet set, String charsIn, String charsOut) {
\r
1992 StringBuffer bad = new StringBuffer();
\r
1993 if (charsIn != null) {
\r
1994 charsIn = Utility.unescape(charsIn);
\r
1995 for (int i=0; i<charsIn.length(); ) {
\r
1996 int c = UTF16.charAt(charsIn,i);
\r
1997 i += UTF16.getCharCount(c);
\r
1998 if (!set.contains(c)) {
\r
1999 UTF16.append(bad,c);
\r
2002 if (bad.length() > 0) {
\r
2003 errln(Utility.escape("FAIL: set " + set + " does not contain " + bad +
\r
2004 ", expected containment of " + charsIn));
\r
2006 logln(Utility.escape("Ok: set " + set + " contains " + charsIn));
\r
2009 if (charsOut != null) {
\r
2010 charsOut = Utility.unescape(charsOut);
\r
2012 for (int i=0; i<charsOut.length(); ) {
\r
2013 int c = UTF16.charAt(charsOut,i);
\r
2014 i += UTF16.getCharCount(c);
\r
2015 if (set.contains(c)) {
\r
2016 UTF16.append(bad, c);
\r
2019 if (bad.length() > 0) {
\r
2020 errln(Utility.escape("FAIL: set " + set + " contains " + bad +
\r
2021 ", expected non-containment of " + charsOut));
\r
2023 logln(Utility.escape("Ok: set " + set + " does not contain " + charsOut));
\r
2028 void expectPattern(UnicodeSet set,
\r
2030 String expectedPairs) {
\r
2031 set.applyPattern(pattern);
\r
2032 if (!getPairs(set).equals(expectedPairs)) {
\r
2033 errln("FAIL: applyPattern(\"" + pattern +
\r
2034 "\") => pairs \"" +
\r
2035 Utility.escape(getPairs(set)) + "\", expected \"" +
\r
2036 Utility.escape(expectedPairs) + "\"");
\r
2038 logln("Ok: applyPattern(\"" + pattern +
\r
2039 "\") => pairs \"" +
\r
2040 Utility.escape(getPairs(set)) + "\"");
\r
2044 void expectToPattern(UnicodeSet set,
\r
2046 String[] expStrings) {
\r
2047 String pat = set.toPattern(true);
\r
2048 if (pat.equals(expPat)) {
\r
2049 logln("Ok: toPattern() => \"" + pat + "\"");
\r
2051 errln("FAIL: toPattern() => \"" + pat + "\", expected \"" + expPat + "\"");
\r
2054 if (expStrings == null) {
\r
2057 boolean in = true;
\r
2058 for (int i=0; i<expStrings.length; ++i) {
\r
2059 if (expStrings[i] == NOT) { // sic; pointer comparison
\r
2063 boolean contained = set.contains(expStrings[i]);
\r
2064 if (contained == in) {
\r
2065 logln("Ok: " + expPat +
\r
2066 (contained ? " contains {" : " does not contain {") +
\r
2067 Utility.escape(expStrings[i]) + "}");
\r
2069 errln("FAIL: " + expPat +
\r
2070 (contained ? " contains {" : " does not contain {") +
\r
2071 Utility.escape(expStrings[i]) + "}");
\r
2076 void expectPairs(UnicodeSet set, String expectedPairs) {
\r
2077 if (!getPairs(set).equals(expectedPairs)) {
\r
2078 errln("FAIL: Expected pair list \"" +
\r
2079 Utility.escape(expectedPairs) + "\", got \"" +
\r
2080 Utility.escape(getPairs(set)) + "\"");
\r
2083 static final String CharsToUnicodeString(String s) {
\r
2084 return Utility.unescape(s);
\r