2 *******************************************************************************
\r
3 * Copyright (C) 1996-2010, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 *******************************************************************************
\r
7 package com.ibm.icu.dev.test.util;
\r
9 import java.io.PrintWriter;
\r
10 import java.io.StringWriter;
\r
11 import java.text.ParsePosition;
\r
12 import java.util.ArrayList;
\r
13 import java.util.Collection;
\r
14 import java.util.Comparator;
\r
15 import java.util.HashMap;
\r
16 import java.util.Iterator;
\r
17 import java.util.List;
\r
18 import java.util.Map;
\r
19 import java.util.TreeMap;
\r
20 import java.util.regex.Pattern;
\r
22 import com.ibm.icu.dev.test.util.CollectionUtilities.InverseMatcher;
\r
23 import com.ibm.icu.dev.test.util.CollectionUtilities.ObjectMatcher;
\r
24 import com.ibm.icu.impl.Utility;
\r
25 import com.ibm.icu.text.SymbolTable;
\r
26 import com.ibm.icu.text.UFormat;
\r
27 import com.ibm.icu.text.UTF16;
\r
28 import com.ibm.icu.text.UnicodeMatcher;
\r
29 import com.ibm.icu.text.UnicodeSet;
\r
30 import com.ibm.icu.text.UnicodeSetIterator;
\r
32 public abstract class UnicodeProperty extends UnicodeLabel {
\r
34 public static final UnicodeSet UNASSIGNED = new UnicodeSet("[:gc=unassigned:]").freeze();
\r
35 public static final UnicodeSet PRIVATE_USE = new UnicodeSet("[:gc=privateuse:]").freeze();
\r
36 public static final UnicodeSet SURROGATE = new UnicodeSet("[:gc=surrogate:]").freeze();
\r
37 public static final UnicodeSet SPECIALS = new UnicodeSet(UNASSIGNED).addAll(PRIVATE_USE).addAll(SURROGATE).freeze();
\r
38 public static final int SAMPLE_UNASSIGNED = UNASSIGNED.charAt(0);
\r
39 public static final int SAMPLE_PRIVATE_USE = 0xE000;
\r
40 public static final int SAMPLE_SURROGATE = 0xD800;
\r
41 public static final UnicodeSet STUFF_TO_TEST = new UnicodeSet(SPECIALS).complement()
\r
42 .add(SAMPLE_UNASSIGNED).add(SAMPLE_PRIVATE_USE).add(SAMPLE_SURROGATE).freeze();
\r
43 public static final UnicodeSet STUFF_TO_TEST_WITH_UNASSIGNED = new UnicodeSet(STUFF_TO_TEST).addAll(UNASSIGNED).freeze();
\r
46 public static boolean DEBUG = false;
\r
48 public static String CHECK_NAME = "FC_NFKC_Closure";
\r
50 public static int CHECK_VALUE = 0x037A;
\r
52 private String name;
\r
54 private String firstNameAlias = null;
\r
58 private Map valueToFirstValueAlias = null;
\r
60 private boolean hasUniformUnassigned = true;
\r
63 * Name: Unicode_1_Name Name: ISO_Comment Name: Name Name: Unicode_1_Name
\r
67 public static final int UNKNOWN = 0, BINARY = 2, EXTENDED_BINARY = 3,
\r
68 ENUMERATED = 4, EXTENDED_ENUMERATED = 5, CATALOG = 6,
\r
69 EXTENDED_CATALOG = 7, MISC = 8, EXTENDED_MISC = 9, STRING = 10,
\r
70 EXTENDED_STRING = 11, NUMERIC = 12, EXTENDED_NUMERIC = 13,
\r
71 START_TYPE = 2, LIMIT_TYPE = 14, EXTENDED_MASK = 1,
\r
72 CORE_MASK = ~EXTENDED_MASK, BINARY_MASK = (1 << BINARY)
\r
73 | (1 << EXTENDED_BINARY), STRING_MASK = (1 << STRING)
\r
74 | (1 << EXTENDED_STRING),
\r
75 STRING_OR_MISC_MASK = (1 << STRING) | (1 << EXTENDED_STRING)
\r
76 | (1 << MISC) | (1 << EXTENDED_MISC),
\r
77 ENUMERATED_OR_CATALOG_MASK = (1 << ENUMERATED)
\r
78 | (1 << EXTENDED_ENUMERATED) | (1 << CATALOG)
\r
79 | (1 << EXTENDED_CATALOG);
\r
81 private static final String[] TYPE_NAMES = { "Unknown", "Unknown",
\r
82 "Binary", "Extended Binary", "Enumerated", "Extended Enumerated",
\r
83 "Catalog", "Extended Catalog", "Miscellaneous",
\r
84 "Extended Miscellaneous", "String", "Extended String", "Numeric",
\r
85 "Extended Numeric", };
\r
87 public static String getTypeName(int propType) {
\r
88 return TYPE_NAMES[propType];
\r
91 public final String getName() {
\r
95 public final int getType() {
\r
99 public String getTypeName() {
\r
100 return TYPE_NAMES[type];
\r
103 public final boolean isType(int mask) {
\r
104 return ((1 << type) & mask) != 0;
\r
107 protected final void setName(String string) {
\r
108 if (string == null)
\r
109 throw new IllegalArgumentException("Name must not be null");
\r
113 protected final void setType(int i) {
\r
117 public String getVersion() {
\r
118 return _getVersion();
\r
121 public String getValue(int codepoint) {
\r
122 if (DEBUG && CHECK_VALUE == codepoint && CHECK_NAME.equals(getName())) {
\r
123 String value = _getValue(codepoint);
\r
124 System.out.println(getName() + "(" + Utility.hex(codepoint) + "):"
\r
125 + (getType() == STRING ? Utility.hex(value) : value));
\r
128 return _getValue(codepoint);
\r
131 // public String getValue(int codepoint, boolean isShort) {
\r
132 // return getValue(codepoint);
\r
135 public List getNameAliases(List result) {
\r
136 if (result == null)
\r
137 result = new ArrayList(1);
\r
138 return _getNameAliases(result);
\r
141 public List getValueAliases(String valueAlias, List result) {
\r
142 if (result == null)
\r
143 result = new ArrayList(1);
\r
144 result = _getValueAliases(valueAlias, result);
\r
145 if (!result.contains(valueAlias)) { // FIX && type < NUMERIC
\r
146 result = _getValueAliases(valueAlias, result); // for debugging
\r
147 throw new IllegalArgumentException("Internal error: " + getName()
\r
148 + " doesn't contain " + valueAlias + ": "
\r
149 + new BagFormatter().join(result));
\r
154 public List getAvailableValues(List result) {
\r
155 if (result == null)
\r
156 result = new ArrayList(1);
\r
157 return _getAvailableValues(result);
\r
160 protected abstract String _getVersion();
\r
162 protected abstract String _getValue(int codepoint);
\r
164 protected abstract List _getNameAliases(List result);
\r
166 protected abstract List _getValueAliases(String valueAlias, List result);
\r
168 protected abstract List _getAvailableValues(List result);
\r
171 public final List getNameAliases() {
\r
172 return getNameAliases(null);
\r
175 public final List getValueAliases(String valueAlias) {
\r
176 return getValueAliases(valueAlias, null);
\r
179 public final List getAvailableValues() {
\r
180 return getAvailableValues(null);
\r
183 public final String getValue(int codepoint, boolean getShortest) {
\r
184 String result = getValue(codepoint);
\r
185 if (type >= MISC || result == null || !getShortest)
\r
187 return getFirstValueAlias(result);
\r
190 public final String getFirstNameAlias() {
\r
191 if (firstNameAlias == null) {
\r
192 firstNameAlias = (String) getNameAliases().get(0);
\r
194 return firstNameAlias;
\r
197 public final String getFirstValueAlias(String value) {
\r
198 if (valueToFirstValueAlias == null)
\r
199 _getFirstValueAliasCache();
\r
200 return valueToFirstValueAlias.get(value).toString();
\r
203 private void _getFirstValueAliasCache() {
\r
205 maxFirstValueAliasWidth = 0;
\r
206 valueToFirstValueAlias = new HashMap(1);
\r
207 Iterator it = getAvailableValues().iterator();
\r
208 while (it.hasNext()) {
\r
209 String value = (String) it.next();
\r
210 String first = (String) getValueAliases(value).get(0);
\r
211 if (first == null) { // internal error
\r
212 throw new IllegalArgumentException(
\r
213 "Value not in value aliases: " + value);
\r
215 if (DEBUG && CHECK_NAME.equals(getName())) {
\r
216 System.out.println("First Alias: " + getName() + ": " + value
\r
218 + new BagFormatter().join(getValueAliases(value)));
\r
220 valueToFirstValueAlias.put(value, first);
\r
221 if (value.length() > maxValueWidth) {
\r
222 maxValueWidth = value.length();
\r
224 if (first.length() > maxFirstValueAliasWidth) {
\r
225 maxFirstValueAliasWidth = first.length();
\r
230 private int maxValueWidth = -1;
\r
232 private int maxFirstValueAliasWidth = -1;
\r
234 public int getMaxWidth(boolean getShortest) {
\r
235 if (maxValueWidth < 0)
\r
236 _getFirstValueAliasCache();
\r
238 return maxFirstValueAliasWidth;
\r
239 return maxValueWidth;
\r
242 public final UnicodeSet getSet(String propertyValue) {
\r
243 return getSet(propertyValue, null);
\r
246 public final UnicodeSet getSet(PatternMatcher matcher) {
\r
247 return getSet(matcher, null);
\r
250 public final UnicodeSet getSet(String propertyValue, UnicodeSet result) {
\r
251 return getSet(new SimpleMatcher(propertyValue,
\r
252 isType(STRING_OR_MISC_MASK) ? null : PROPERTY_COMPARATOR),
\r
256 private UnicodeMap unicodeMap = null;
\r
258 public static final String UNUSED = "??";
\r
260 public final UnicodeSet getSet(PatternMatcher matcher, UnicodeSet result) {
\r
261 if (result == null)
\r
262 result = new UnicodeSet();
\r
263 boolean uniformUnassigned = hasUniformUnassigned();
\r
264 if (isType(STRING_OR_MISC_MASK)) {
\r
265 for (UnicodeSetIterator usi = getStuffToTest(uniformUnassigned); usi.next();) { // int i = 0; i <= 0x10FFFF; ++i
\r
266 int i = usi.codepoint;
\r
267 String value = getValue(i);
\r
268 if (value != null && matcher.matches(value)) {
\r
272 return addUntested(result, uniformUnassigned);
\r
274 List temp = new ArrayList(1); // to avoid reallocating...
\r
275 UnicodeMap um = getUnicodeMap_internal();
\r
276 Iterator it = um.getAvailableValues(null).iterator();
\r
277 main: while (it.hasNext()) {
\r
278 String value = (String) it.next();
\r
280 Iterator it2 = getValueAliases(value, temp).iterator();
\r
281 while (it2.hasNext()) {
\r
282 String value2 = (String) it2.next();
\r
283 // System.out.println("Values:" + value2);
\r
284 if (matcher.matches(value2)
\r
285 || matcher.matches(toSkeleton(value2))) {
\r
286 um.keySet(value, result);
\r
295 * public UnicodeSet getMatchSet(UnicodeSet result) { if (result == null)
\r
296 * result = new UnicodeSet(); addAll(matchIterator, result); return result; }
\r
298 * public void setMatchSet(UnicodeSet set) { matchIterator = new
\r
299 * UnicodeSetIterator(set); }
\r
303 * Utility for debugging
\r
305 public static String getStack() {
\r
306 Exception e = new Exception();
\r
307 StringWriter sw = new StringWriter();
\r
308 PrintWriter pw = new PrintWriter(sw);
\r
309 e.printStackTrace(pw);
\r
311 return "Showing Stack with fake " + sw.getBuffer().toString();
\r
314 // TODO use this instead of plain strings
\r
315 public static class Name implements Comparable {
\r
316 private String skeleton;
\r
318 private String pretty;
\r
320 public final int RAW = 0, TITLE = 1, NORMAL = 2;
\r
322 public Name(String name, int style) {
\r
325 if (style == RAW) {
\r
326 skeleton = pretty = name;
\r
328 pretty = regularize(name, style == TITLE);
\r
329 skeleton = toSkeleton(pretty);
\r
333 public int compareTo(Object o) {
\r
334 return skeleton.compareTo(((Name) o).skeleton);
\r
337 public boolean equals(Object o) {
\r
338 return skeleton.equals(((Name) o).skeleton);
\r
341 public int hashCode() {
\r
342 return skeleton.hashCode();
\r
345 public String toString() {
\r
351 * @return the unicode map
\r
353 public UnicodeMap getUnicodeMap() {
\r
354 return getUnicodeMap(false);
\r
358 * @return the unicode map
\r
360 public UnicodeMap getUnicodeMap(boolean getShortest) {
\r
362 return (UnicodeMap) getUnicodeMap_internal().cloneAsThawed();
\r
363 UnicodeMap result = new UnicodeMap();
\r
364 boolean uniformUnassigned = hasUniformUnassigned();
\r
366 for (UnicodeSetIterator usi = getStuffToTest(uniformUnassigned); usi.next();) { // int i = 0; i <= 0x10FFFF; ++i
\r
367 int i = usi.codepoint;
\r
368 // if (DEBUG && i == 0x41) System.out.println(i + "\t" +
\r
370 String value = getValue(i, true);
\r
371 result.put(i, value);
\r
373 return addUntested(result, uniformUnassigned);
\r
377 * @return the unicode map
\r
379 protected UnicodeMap getUnicodeMap_internal() {
\r
380 if (unicodeMap == null)
\r
381 unicodeMap = _getUnicodeMap();
\r
385 protected UnicodeMap _getUnicodeMap() {
\r
386 UnicodeMap result = new UnicodeMap();
\r
387 HashMap myIntern = new HashMap();
\r
388 boolean uniformUnassigned = hasUniformUnassigned();
\r
390 for (UnicodeSetIterator usi = getStuffToTest(uniformUnassigned); usi.next();) { // int i = 0; i <= 0x10FFFF; ++i
\r
391 int i = usi.codepoint;
\r
392 // if (DEBUG && i == 0x41) System.out.println(i + "\t" +
\r
394 String value = getValue(i);
\r
395 String iValue = (String) myIntern.get(value);
\r
396 if (iValue == null)
\r
397 myIntern.put(value, iValue = value);
\r
398 result.put(i, iValue);
\r
400 addUntested(result, uniformUnassigned);
\r
403 for (UnicodeSetIterator usi = getStuffToTest(uniformUnassigned); usi.next();) { // int i = 0; i <= 0x10FFFF; ++i
\r
404 int i = usi.codepoint;
\r
405 // if (DEBUG && i == 0x41) System.out.println(i + "\t" +
\r
407 String value = getValue(i);
\r
408 String resultValue = (String) result.getValue(i);
\r
409 if (!value.equals(resultValue)) {
\r
410 throw new RuntimeException("Value failure at: "
\r
415 if (DEBUG && CHECK_NAME.equals(getName())) {
\r
416 System.out.println(getName() + ":\t" + getClass().getName() + "\t"
\r
418 System.out.println(getStack());
\r
419 System.out.println(result);
\r
424 private static UnicodeSetIterator getStuffToTest(boolean uniformUnassigned) {
\r
425 return new UnicodeSetIterator(uniformUnassigned ? STUFF_TO_TEST : STUFF_TO_TEST_WITH_UNASSIGNED);
\r
429 * Really ought to create a Collection UniqueList, that forces uniqueness.
\r
432 public static Collection addUnique(Object obj, Collection result) {
\r
433 if (obj != null && !result.contains(obj))
\r
439 * Utility for managing property & non-string value aliases
\r
441 public static final Comparator PROPERTY_COMPARATOR = new Comparator() {
\r
442 public int compare(Object o1, Object o2) {
\r
443 return compareNames((String) o1, (String) o2);
\r
448 * Utility for managing property & non-string value aliases
\r
452 public static boolean equalNames(String a, String b) {
\r
457 return toSkeleton(a).equals(toSkeleton(b));
\r
461 * Utility for managing property & non-string value aliases
\r
464 public static int compareNames(String a, String b) {
\r
471 return toSkeleton(a).compareTo(toSkeleton(b));
\r
475 * Utility for managing property & non-string value aliases
\r
477 // TODO account for special names, tibetan, hangul
\r
478 public static String toSkeleton(String source) {
\r
479 if (source == null)
\r
481 StringBuffer skeletonBuffer = new StringBuffer();
\r
482 boolean gotOne = false;
\r
483 // remove spaces, '_', '-'
\r
484 // we can do this with char, since no surrogates are involved
\r
485 for (int i = 0; i < source.length(); ++i) {
\r
486 char ch = source.charAt(i);
\r
487 if (i > 0 && (ch == '_' || ch == ' ' || ch == '-')) {
\r
490 char ch2 = Character.toLowerCase(ch);
\r
493 skeletonBuffer.append(ch2);
\r
495 skeletonBuffer.append(ch);
\r
500 return source; // avoid string creation
\r
501 return skeletonBuffer.toString();
\r
504 // get the name skeleton
\r
505 public static String toNameSkeleton(String source) {
\r
506 if (source == null)
\r
508 StringBuffer result = new StringBuffer();
\r
509 // remove spaces, medial '-'
\r
510 // we can do this with char, since no surrogates are involved
\r
511 for (int i = 0; i < source.length(); ++i) {
\r
512 char ch = source.charAt(i);
\r
513 if (('0' <= ch && ch <= '9') || ('A' <= ch && ch <= 'Z')
\r
514 || ch == '<' || ch == '>') {
\r
516 } else if (ch == ' ') {
\r
518 } else if (ch == '-') {
\r
519 // only copy non-medials AND trailing O-E
\r
521 || i == source.length() - 1
\r
522 || source.charAt(i - 1) == ' '
\r
523 || source.charAt(i + 1) == ' '
\r
524 || (i == source.length() - 2
\r
525 && source.charAt(i - 1) == 'O' && source
\r
526 .charAt(i + 1) == 'E')) {
\r
527 System.out.println("****** EXCEPTION " + source);
\r
530 // otherwise don't copy
\r
532 throw new IllegalArgumentException("Illegal Name Char: U+"
\r
533 + Utility.hex(ch) + ", " + ch);
\r
536 return result.toString();
\r
540 * These routines use the Java functions, because they only need to act on
\r
541 * ASCII Changes space, - into _, inserts _ between lower and UPPER.
\r
543 public static String regularize(String source, boolean titlecaseStart) {
\r
544 if (source == null)
\r
547 * if (source.equals("noBreak")) { // HACK if (titlecaseStart) return
\r
548 * "NoBreak"; return source; }
\r
550 StringBuffer result = new StringBuffer();
\r
552 boolean haveFirstCased = true;
\r
553 for (int i = 0; i < source.length(); ++i) {
\r
554 char c = source.charAt(i);
\r
555 if (c == ' ' || c == '-' || c == '_') {
\r
557 haveFirstCased = true;
\r
560 haveFirstCased = true;
\r
561 int cat = Character.getType(c);
\r
562 if (lastCat == Character.LOWERCASE_LETTER
\r
563 && cat == Character.UPPERCASE_LETTER) {
\r
564 result.append('_');
\r
567 && (cat == Character.LOWERCASE_LETTER
\r
568 || cat == Character.TITLECASE_LETTER || cat == Character.UPPERCASE_LETTER)) {
\r
569 if (titlecaseStart) {
\r
570 c = Character.toUpperCase(c);
\r
572 haveFirstCased = false;
\r
577 return result.toString();
\r
581 * Utility function for comparing codepoint to string without generating new
\r
586 * @return true if the codepoint equals the string
\r
588 public static final boolean equals(int codepoint, String other) {
\r
589 if (other == null) return false;
\r
590 if (other.length() == 1) {
\r
591 return codepoint == other.charAt(0);
\r
593 if (other.length() == 2) {
\r
594 return other.equals(UTF16.valueOf(codepoint));
\r
600 * Utility function for comparing objects that may be null
\r
603 public static final <T extends Object> boolean equals(T a, T b) {
\r
604 return a == null ? b == null
\r
605 : b == null ? false
\r
610 * Utility that should be on UnicodeSet
\r
615 static public void addAll(UnicodeSetIterator source, UnicodeSet result) {
\r
616 while (source.nextRange()) {
\r
617 if (source.codepoint == UnicodeSetIterator.IS_STRING) {
\r
618 result.add(source.string);
\r
620 result.add(source.codepoint, source.codepointEnd);
\r
626 * Really ought to create a Collection UniqueList, that forces uniqueness.
\r
629 public static Collection addAllUnique(Collection source, Collection result) {
\r
630 for (Iterator it = source.iterator(); it.hasNext();) {
\r
631 addUnique(it.next(), result);
\r
637 * Really ought to create a Collection UniqueList, that forces uniqueness.
\r
640 public static Collection addAllUnique(Object[] source, Collection result) {
\r
641 for (int i = 0; i < source.length; ++i) {
\r
642 addUnique(source[i], result);
\r
647 static public class Factory {
\r
648 static boolean DEBUG = false;
\r
650 Map canonicalNames = new TreeMap();
\r
652 Map skeletonNames = new TreeMap();
\r
654 Map propertyCache = new HashMap(1);
\r
656 public final Factory add(UnicodeProperty sp) {
\r
657 canonicalNames.put(sp.getName(), sp);
\r
658 List c = sp.getNameAliases(new ArrayList(1));
\r
659 Iterator it = c.iterator();
\r
660 while (it.hasNext()) {
\r
661 skeletonNames.put(toSkeleton((String) it.next()), sp);
\r
666 public final UnicodeProperty getProperty(String propertyAlias) {
\r
667 return (UnicodeProperty) skeletonNames
\r
668 .get(toSkeleton(propertyAlias));
\r
671 public final List getAvailableNames() {
\r
672 return getAvailableNames(null);
\r
675 public final List getAvailableNames(List result) {
\r
676 if (result == null)
\r
677 result = new ArrayList(1);
\r
678 Iterator it = canonicalNames.keySet().iterator();
\r
679 while (it.hasNext()) {
\r
680 addUnique(it.next(), result);
\r
685 public final List getAvailableNames(int propertyTypeMask) {
\r
686 return getAvailableNames(propertyTypeMask, null);
\r
689 public final List getAvailableNames(int propertyTypeMask, List result) {
\r
690 if (result == null)
\r
691 result = new ArrayList(1);
\r
692 Iterator it = canonicalNames.keySet().iterator();
\r
693 while (it.hasNext()) {
\r
694 String item = (String) it.next();
\r
695 UnicodeProperty property = getProperty(item);
\r
697 System.out.println("Properties: " + item + ","
\r
698 + property.getType());
\r
699 if (!property.isType(propertyTypeMask)) {
\r
700 // System.out.println("Masking: " + property.getType() + ","
\r
701 // + propertyTypeMask);
\r
704 addUnique(property.getName(), result);
\r
709 InversePatternMatcher inverseMatcher = new InversePatternMatcher();
\r
712 * Format is: propname ('=' | '!=') propvalue ( '|' propValue )*
\r
714 public final UnicodeSet getSet(String propAndValue,
\r
715 PatternMatcher matcher, UnicodeSet result) {
\r
716 int equalPos = propAndValue.indexOf('=');
\r
717 String prop = propAndValue.substring(0, equalPos);
\r
718 String value = propAndValue.substring(equalPos + 1);
\r
719 boolean negative = false;
\r
720 if (prop.endsWith("!")) {
\r
721 prop = prop.substring(0, prop.length() - 1);
\r
724 prop = prop.trim();
\r
725 UnicodeProperty up = getProperty(prop);
\r
726 if (matcher == null) {
\r
727 matcher = new SimpleMatcher(value, up
\r
728 .isType(STRING_OR_MISC_MASK) ? null
\r
729 : PROPERTY_COMPARATOR);
\r
732 inverseMatcher.set(matcher);
\r
733 matcher = inverseMatcher;
\r
735 return up.getSet(matcher.set(value), result);
\r
738 public final UnicodeSet getSet(String propAndValue,
\r
739 PatternMatcher matcher) {
\r
740 return getSet(propAndValue, matcher, null);
\r
743 public final UnicodeSet getSet(String propAndValue) {
\r
744 return getSet(propAndValue, null, null);
\r
747 public final SymbolTable getSymbolTable(String prefix) {
\r
748 return new PropertySymbolTable(prefix);
\r
751 private class MyXSymbolTable extends UnicodeSet.XSymbolTable {
\r
752 public boolean applyPropertyAlias(String propertyName,
\r
753 String propertyValue, UnicodeSet result) {
\r
755 System.out.println(propertyName + "=" + propertyValue);
\r
756 UnicodeProperty prop = getProperty(propertyName);
\r
760 UnicodeSet x = prop.getSet(propertyValue, result);
\r
761 return x.size() != 0;
\r
765 public final UnicodeSet.XSymbolTable getXSymbolTable() {
\r
766 return new MyXSymbolTable();
\r
769 private class PropertySymbolTable implements SymbolTable {
\r
770 static final boolean DEBUG = false;
\r
772 private String prefix;
\r
774 RegexMatcher regexMatcher = new RegexMatcher();
\r
776 PropertySymbolTable(String prefix) {
\r
777 this.prefix = prefix;
\r
780 public char[] lookup(String s) {
\r
782 System.out.println("\t(" + prefix + ")Looking up " + s);
\r
783 // ensure, again, that prefix matches
\r
784 int start = prefix.length();
\r
785 if (!s.regionMatches(true, 0, prefix, 0, start))
\r
788 int pos = s.indexOf(':', start);
\r
789 if (pos < 0) { // should never happen
\r
790 throw new IllegalArgumentException(
\r
791 "Internal Error: missing =: " + s + "\r\n");
\r
793 UnicodeProperty prop = getProperty(s.substring(start, pos));
\r
794 if (prop == null) {
\r
795 throw new IllegalArgumentException("Invalid Property in: "
\r
796 + s + "\r\nUse " + showSet(getAvailableNames()));
\r
798 String value = s.substring(pos + 1);
\r
800 if (value.startsWith("\u00AB")) { // regex!
\r
801 set = prop.getSet(regexMatcher.set(value.substring(1, value
\r
804 set = prop.getSet(value);
\r
806 if (set.size() == 0) {
\r
807 throw new IllegalArgumentException(
\r
808 "Empty Property-Value in: " + s + "\r\nUse "
\r
809 + showSet(prop.getAvailableValues()));
\r
812 System.out.println("\t(" + prefix + ")Returning "
\r
813 + set.toPattern(true));
\r
814 return set.toPattern(true).toCharArray(); // really ugly
\r
817 private String showSet(List list) {
\r
818 StringBuffer result = new StringBuffer("[");
\r
819 boolean first = true;
\r
820 for (Iterator it = list.iterator(); it.hasNext();) {
\r
822 result.append(", ");
\r
825 result.append(it.next().toString());
\r
827 result.append("]");
\r
828 return result.toString();
\r
831 public UnicodeMatcher lookupMatcher(int ch) {
\r
835 public String parseReference(String text, ParsePosition pos,
\r
838 System.out.println("\t(" + prefix + ")Parsing <"
\r
839 + text.substring(pos.getIndex(), limit) + ">");
\r
840 int start = pos.getIndex();
\r
841 // ensure that it starts with 'prefix'
\r
843 .regionMatches(true, start, prefix, 0, prefix.length()))
\r
845 start += prefix.length();
\r
846 // now see if it is of the form identifier:identifier
\r
847 int i = getIdentifier(text, start, limit);
\r
850 String prop = text.substring(start, i);
\r
851 String value = "true";
\r
853 if (text.charAt(i) == ':') {
\r
855 if (text.charAt(i + 1) == '\u00AB') { // regular
\r
857 j = text.indexOf('\u00BB', i + 2) + 1; // include
\r
863 j = getIdentifier(text, i + 1, limit);
\r
865 value = text.substring(i + 1, j);
\r
871 System.out.println("\t(" + prefix + ")Parsed <" + prop
\r
872 + ">=<" + value + ">");
\r
873 return prefix + prop + ":" + value;
\r
876 private int getIdentifier(String text, int start, int limit) {
\r
878 System.out.println("\tGetID <"
\r
879 + text.substring(start, limit) + ">");
\r
882 for (i = start; i < limit; i += UTF16.getCharCount(cp)) {
\r
883 cp = UTF16.charAt(text, i);
\r
884 if (!com.ibm.icu.lang.UCharacter
\r
885 .isUnicodeIdentifierPart(cp)
\r
891 System.out.println("\tGotID <" + text.substring(start, i)
\r
898 public static class FilteredProperty extends UnicodeProperty {
\r
899 private UnicodeProperty property;
\r
901 protected StringFilter filter;
\r
903 protected UnicodeSetIterator matchIterator = new UnicodeSetIterator(
\r
904 new UnicodeSet(0, 0x10FFFF));
\r
906 protected HashMap backmap;
\r
908 boolean allowValueAliasCollisions = false;
\r
910 public FilteredProperty(UnicodeProperty property, StringFilter filter) {
\r
911 this.property = property;
\r
912 this.filter = filter;
\r
915 public StringFilter getFilter() {
\r
919 public UnicodeProperty setFilter(StringFilter filter) {
\r
920 this.filter = filter;
\r
924 List temp = new ArrayList(1);
\r
926 public List _getAvailableValues(List result) {
\r
928 return filter.addUnique(property.getAvailableValues(temp), result);
\r
931 public List _getNameAliases(List result) {
\r
933 return filter.addUnique(property.getNameAliases(temp), result);
\r
936 public String _getValue(int codepoint) {
\r
937 return filter.remap(property.getValue(codepoint));
\r
940 public List _getValueAliases(String valueAlias, List result) {
\r
941 if (backmap == null) {
\r
942 backmap = new HashMap(1);
\r
944 Iterator it = property.getAvailableValues(temp).iterator();
\r
945 while (it.hasNext()) {
\r
946 String item = (String) it.next();
\r
947 String mappedItem = filter.remap(item);
\r
948 if (backmap.get(mappedItem) != null
\r
949 && !allowValueAliasCollisions) {
\r
950 throw new IllegalArgumentException(
\r
951 "Filter makes values collide! " + item + ", "
\r
954 backmap.put(mappedItem, item);
\r
957 valueAlias = (String) backmap.get(valueAlias);
\r
959 return filter.addUnique(property.getValueAliases(valueAlias, temp),
\r
963 public String _getVersion() {
\r
964 return property.getVersion();
\r
967 public boolean isAllowValueAliasCollisions() {
\r
968 return allowValueAliasCollisions;
\r
971 public FilteredProperty setAllowValueAliasCollisions(boolean b) {
\r
972 allowValueAliasCollisions = b;
\r
978 public static abstract class StringFilter implements Cloneable {
\r
979 public abstract String remap(String original);
\r
981 public final List addUnique(Collection source, List result) {
\r
982 if (result == null)
\r
983 result = new ArrayList(1);
\r
984 Iterator it = source.iterator();
\r
985 while (it.hasNext()) {
\r
986 UnicodeProperty.addUnique(remap((String) it.next()), result);
\r
991 * public Object clone() { try { return super.clone(); } catch
\r
992 * (CloneNotSupportedException e) { throw new
\r
993 * IllegalStateException("Should never happen."); } }
\r
997 public static class MapFilter extends StringFilter {
\r
998 private Map valueMap;
\r
1000 public MapFilter(Map valueMap) {
\r
1001 this.valueMap = valueMap;
\r
1004 public String remap(String original) {
\r
1005 Object changed = valueMap.get(original);
\r
1006 return changed == null ? original : (String) changed;
\r
1009 public Map getMap() {
\r
1014 public interface PatternMatcher extends ObjectMatcher {
\r
1015 public PatternMatcher set(String pattern);
\r
1018 public static class InversePatternMatcher extends InverseMatcher implements
\r
1020 PatternMatcher other;
\r
1022 public PatternMatcher set(PatternMatcher toInverse) {
\r
1023 other = toInverse;
\r
1027 public boolean matches(Object value) {
\r
1028 return !other.matches(value);
\r
1031 public PatternMatcher set(String pattern) {
\r
1032 other.set(pattern);
\r
1037 public static class SimpleMatcher implements PatternMatcher {
\r
1038 Comparator comparator;
\r
1042 public SimpleMatcher(String pattern, Comparator comparator) {
\r
1043 this.comparator = comparator;
\r
1044 this.pattern = pattern;
\r
1047 public boolean matches(Object value) {
\r
1048 if (comparator == null)
\r
1049 return pattern.equals(value);
\r
1050 return comparator.compare(pattern, value) == 0;
\r
1053 public PatternMatcher set(String pattern) {
\r
1054 this.pattern = pattern;
\r
1059 public static class RegexMatcher implements UnicodeProperty.PatternMatcher {
\r
1060 private java.util.regex.Matcher matcher;
\r
1062 public UnicodeProperty.PatternMatcher set(String pattern) {
\r
1063 matcher = Pattern.compile(pattern).matcher("");
\r
1067 public boolean matches(Object value) {
\r
1068 matcher.reset(value.toString());
\r
1069 return matcher.find();
\r
1073 public static abstract class BaseProperty extends UnicodeProperty {
\r
1074 private static final String[] NO_VALUES = {"No", "N", "F", "False"};
\r
1076 private static final String[] YES_VALUES = {"Yes", "Y", "T", "True"};
\r
1081 private static final String[][] YES_NO_ALIASES = new String[][] {YES_VALUES, NO_VALUES};
\r
1083 protected List propertyAliases = new ArrayList(1);
\r
1085 protected Map toValueAliases;
\r
1087 protected String version;
\r
1089 public BaseProperty setMain(String alias, String shortAlias,
\r
1090 int propertyType, String version) {
\r
1092 setType(propertyType);
\r
1093 propertyAliases.add(shortAlias);
\r
1094 propertyAliases.add(alias);
\r
1095 if (propertyType == BINARY) {
\r
1096 addValueAliases(YES_NO_ALIASES, false);
\r
1098 this.version = version;
\r
1102 public String _getVersion() {
\r
1106 public List _getNameAliases(List result) {
\r
1107 addAllUnique(propertyAliases, result);
\r
1111 public BaseProperty addValueAliases(String[][] valueAndAlternates,
\r
1112 boolean errorIfCant) {
\r
1113 if (toValueAliases == null)
\r
1114 _fixValueAliases();
\r
1115 for (int i = 0; i < valueAndAlternates.length; ++i) {
\r
1116 for (int j = 1; j < valueAndAlternates[0].length; ++j) {
\r
1117 addValueAlias(valueAndAlternates[i][0],
\r
1118 valueAndAlternates[i][j], errorIfCant);
\r
1124 public void addValueAlias(String value, String valueAlias,
\r
1125 boolean errorIfCant) {
\r
1126 List result = (List) toValueAliases.get(value);
\r
1127 if (result == null && !errorIfCant)
\r
1129 addUnique(value, result);
\r
1130 addUnique(valueAlias, result);
\r
1133 protected List _getValueAliases(String valueAlias, List result) {
\r
1134 if (toValueAliases == null)
\r
1135 _fixValueAliases();
\r
1136 List a = (List) toValueAliases.get(valueAlias);
\r
1138 addAllUnique(a, result);
\r
1142 protected void _fixValueAliases() {
\r
1143 if (toValueAliases == null)
\r
1144 toValueAliases = new HashMap(1);
\r
1145 for (Iterator it = getAvailableValues().iterator(); it.hasNext();) {
\r
1146 Object value = it.next();
\r
1147 _ensureValueInAliases(value);
\r
1151 protected void _ensureValueInAliases(Object value) {
\r
1152 List result = (List) toValueAliases.get(value);
\r
1153 if (result == null)
\r
1154 toValueAliases.put(value, result = new ArrayList(1));
\r
1155 addUnique(value, result);
\r
1158 public BaseProperty swapFirst2ValueAliases() {
\r
1159 for (Iterator it = toValueAliases.keySet().iterator(); it.hasNext();) {
\r
1160 List list = (List) toValueAliases.get(it.next());
\r
1161 if (list.size() < 2)
\r
1163 Object first = list.get(0);
\r
1164 list.set(0, list.get(1));
\r
1165 list.set(1, first);
\r
1174 public UnicodeProperty addName(String string) {
\r
1175 throw new UnsupportedOperationException();
\r
1180 public static abstract class SimpleProperty extends BaseProperty {
\r
1183 public UnicodeProperty addName(String alias) {
\r
1184 propertyAliases.add(alias);
\r
1188 public SimpleProperty setValues(String valueAlias) {
\r
1189 _addToValues(valueAlias, null);
\r
1193 public SimpleProperty addAliases(String valueAlias, String... aliases) {
\r
1194 _addToValues(valueAlias, null);
\r
1198 public SimpleProperty setValues(String[] valueAliases,
\r
1199 String[] alternateValueAliases) {
\r
1200 for (int i = 0; i < valueAliases.length; ++i) {
\r
1201 if (valueAliases[i].equals(UNUSED))
\r
1205 alternateValueAliases != null ? alternateValueAliases[i]
\r
1211 public SimpleProperty setValues(List valueAliases) {
\r
1212 this.values = new ArrayList(valueAliases);
\r
1213 for (Iterator it = this.values.iterator(); it.hasNext();) {
\r
1214 _addToValues((String) it.next(), null);
\r
1219 public List _getAvailableValues(List result) {
\r
1220 if (values == null)
\r
1222 result.addAll(values);
\r
1226 protected void _fillValues() {
\r
1227 List newvalues = (List) getUnicodeMap_internal()
\r
1228 .getAvailableValues(new ArrayList());
\r
1229 for (Iterator it = newvalues.iterator(); it.hasNext();) {
\r
1230 _addToValues((String) it.next(), null);
\r
1234 private void _addToValues(String item, String alias) {
\r
1235 if (values == null)
\r
1236 values = new ArrayList(1);
\r
1237 if (toValueAliases == null)
\r
1238 _fixValueAliases();
\r
1239 addUnique(item, values);
\r
1240 _ensureValueInAliases(item);
\r
1241 addValueAlias(item, alias, true);
\r
1243 /* public String _getVersion() {
\r
1249 public static class UnicodeMapProperty extends BaseProperty {
\r
1251 * Example of usage:
\r
1252 * new UnicodeProperty.UnicodeMapProperty() {
\r
1254 unicodeMap = new UnicodeMap();
\r
1255 unicodeMap.setErrorOnReset(true);
\r
1256 unicodeMap.put(0xD, "CR");
\r
1257 unicodeMap.put(0xA, "LF");
\r
1258 UnicodeProperty cat = getProperty("General_Category");
\r
1259 UnicodeSet temp = cat.getSet("Line_Separator")
\r
1260 .addAll(cat.getSet("Paragraph_Separator"))
\r
1261 .addAll(cat.getSet("Control"))
\r
1262 .addAll(cat.getSet("Format"))
\r
1263 .remove(0xD).remove(0xA).remove(0x200C).remove(0x200D);
\r
1264 unicodeMap.putAll(temp, "Control");
\r
1265 UnicodeSet graphemeExtend = getProperty("Grapheme_Extend").getSet("true");
\r
1266 unicodeMap.putAll(graphemeExtend,"Extend");
\r
1267 UnicodeProperty hangul = getProperty("Hangul_Syllable_Type");
\r
1268 unicodeMap.putAll(hangul.getSet("L"),"L");
\r
1269 unicodeMap.putAll(hangul.getSet("V"),"V");
\r
1270 unicodeMap.putAll(hangul.getSet("T"),"T");
\r
1271 unicodeMap.putAll(hangul.getSet("LV"),"LV");
\r
1272 unicodeMap.putAll(hangul.getSet("LVT"),"LVT");
\r
1273 unicodeMap.setMissing("Other");
\r
1275 }.setMain("Grapheme_Cluster_Break", "GCB", UnicodeProperty.ENUMERATED, version)
\r
1277 protected UnicodeMap unicodeMap;
\r
1279 public UnicodeMapProperty set(UnicodeMap map) {
\r
1284 protected String _getValue(int codepoint) {
\r
1285 return (String) unicodeMap.getValue(codepoint);
\r
1288 /* protected List _getValueAliases(String valueAlias, List result) {
\r
1289 if (!unicodeMap.getAvailableValues().contains(valueAlias)) return result;
\r
1290 result.add(valueAlias);
\r
1291 return result; // no other aliases
\r
1293 */protected List _getAvailableValues(List result) {
\r
1294 return (List) unicodeMap.getAvailableValues(result);
\r
1298 public boolean isValidValue(String propertyValue) {
\r
1299 if (isType(STRING_OR_MISC_MASK)) {
\r
1302 Collection<String> values = (Collection<String>) getAvailableValues();
\r
1303 for (String valueAlias : values) {
\r
1304 if (UnicodeProperty.compareNames(valueAlias, propertyValue) == 0) {
\r
1307 for (String valueAlias2 : (Collection<String>) getValueAliases(valueAlias)) {
\r
1308 if (UnicodeProperty.compareNames(valueAlias2, propertyValue) == 0) {
\r
1316 public List<String> getValueAliases() {
\r
1317 List<String> result = new ArrayList();
\r
1318 if (isType(STRING_OR_MISC_MASK)) {
\r
1321 Collection<String> values = (Collection<String>) getAvailableValues();
\r
1322 for (String valueAlias : values) {
\r
1323 UnicodeProperty.addAllUnique(getValueAliases(valueAlias), result);
\r
1325 result.removeAll(values);
\r
1330 public static UnicodeSet addUntested(UnicodeSet result, boolean uniformUnassigned) {
\r
1331 if (uniformUnassigned && result.contains(UnicodeProperty.SAMPLE_UNASSIGNED)) {
\r
1332 result.addAll(UnicodeProperty.UNASSIGNED);
\r
1334 if (result.contains(UnicodeProperty.SAMPLE_PRIVATE_USE)) {
\r
1335 result.addAll(UnicodeProperty.PRIVATE_USE);
\r
1337 if (result.contains(UnicodeProperty.SAMPLE_SURROGATE)) {
\r
1338 result.addAll(UnicodeProperty.SURROGATE);
\r
1343 public static UnicodeMap addUntested(UnicodeMap result, boolean uniformUnassigned) {
\r
1345 if (uniformUnassigned && null != (temp = result.get(UnicodeProperty.SAMPLE_UNASSIGNED))) {
\r
1346 result.putAll(UnicodeProperty.UNASSIGNED, temp);
\r
1348 if (null != (temp = result.get(UnicodeProperty.SAMPLE_PRIVATE_USE))) {
\r
1349 result.putAll(UnicodeProperty.PRIVATE_USE, temp);
\r
1351 if (null != (temp = result.get(UnicodeProperty.SAMPLE_SURROGATE))) {
\r
1352 result.putAll(UnicodeProperty.SURROGATE, temp);
\r
1357 public boolean isDefault(int cp) {
\r
1358 String value = getValue(cp);
\r
1359 if (isType(STRING_OR_MISC_MASK)) {
\r
1360 return equals(cp, value);
\r
1362 String defaultValue = getValue(SAMPLE_UNASSIGNED);
\r
1363 return defaultValue == null ? value == null : defaultValue.equals(value);
\r
1366 public boolean hasUniformUnassigned() {
\r
1367 return hasUniformUnassigned;
\r
1369 protected UnicodeProperty setUniformUnassigned(boolean hasUniformUnassigned) {
\r
1370 this.hasUniformUnassigned = hasUniformUnassigned;
\r