2 //#if defined(FOUNDATION10) || defined(J2SE13)
\r
5 *******************************************************************************
\r
6 * Copyright (C) 1996-2009, International Business Machines Corporation and *
\r
7 * others. All Rights Reserved. *
\r
8 *******************************************************************************
\r
10 package com.ibm.icu.dev.test.util;
\r
12 import java.io.PrintWriter;
\r
13 import java.io.StringWriter;
\r
14 import java.text.ParsePosition;
\r
15 import java.util.ArrayList;
\r
16 import java.util.Collection;
\r
17 import java.util.Comparator;
\r
18 import java.util.HashMap;
\r
19 import java.util.Iterator;
\r
20 import java.util.List;
\r
21 import java.util.Map;
\r
22 import java.util.TreeMap;
\r
23 import java.util.regex.Pattern;
\r
25 import com.ibm.icu.dev.test.util.CollectionUtilities.InverseMatcher;
\r
26 import com.ibm.icu.dev.test.util.CollectionUtilities.ObjectMatcher;
\r
27 import com.ibm.icu.impl.Utility;
\r
28 import com.ibm.icu.text.SymbolTable;
\r
29 import com.ibm.icu.text.UTF16;
\r
30 import com.ibm.icu.text.UnicodeMatcher;
\r
31 import com.ibm.icu.text.UnicodeSet;
\r
32 import com.ibm.icu.text.UnicodeSetIterator;
\r
34 public abstract class UnicodeProperty extends UnicodeLabel {
\r
36 public static boolean DEBUG = false;
\r
38 public static String CHECK_NAME = "FC_NFKC_Closure";
\r
40 public static int CHECK_VALUE = 0x037A;
\r
42 private String name;
\r
44 private String firstNameAlias = null;
\r
48 private Map valueToFirstValueAlias = null;
\r
51 * Name: Unicode_1_Name Name: ISO_Comment Name: Name Name: Unicode_1_Name
\r
55 public static final int UNKNOWN = 0, BINARY = 2, EXTENDED_BINARY = 3,
\r
56 ENUMERATED = 4, EXTENDED_ENUMERATED = 5, CATALOG = 6,
\r
57 EXTENDED_CATALOG = 7, MISC = 8, EXTENDED_MISC = 9, STRING = 10,
\r
58 EXTENDED_STRING = 11, NUMERIC = 12, EXTENDED_NUMERIC = 13,
\r
59 START_TYPE = 2, LIMIT_TYPE = 14, EXTENDED_MASK = 1,
\r
60 CORE_MASK = ~EXTENDED_MASK, BINARY_MASK = (1 << BINARY)
\r
61 | (1 << EXTENDED_BINARY), STRING_MASK = (1 << STRING)
\r
62 | (1 << EXTENDED_STRING),
\r
63 STRING_OR_MISC_MASK = (1 << STRING) | (1 << EXTENDED_STRING)
\r
64 | (1 << MISC) | (1 << EXTENDED_MISC),
\r
65 ENUMERATED_OR_CATALOG_MASK = (1 << ENUMERATED)
\r
66 | (1 << EXTENDED_ENUMERATED) | (1 << CATALOG)
\r
67 | (1 << EXTENDED_CATALOG);
\r
69 private static final String[] TYPE_NAMES = { "Unknown", "Unknown",
\r
70 "Binary", "Extended Binary", "Enumerated", "Extended Enumerated",
\r
71 "Catalog", "Extended Catalog", "Miscellaneous",
\r
72 "Extended Miscellaneous", "String", "Extended String", "Numeric",
\r
73 "Extended Numeric", };
\r
75 public static String getTypeName(int propType) {
\r
76 return TYPE_NAMES[propType];
\r
79 public final String getName() {
\r
83 public final int getType() {
\r
87 public final boolean isType(int mask) {
\r
88 return ((1 << type) & mask) != 0;
\r
91 protected final void setName(String string) {
\r
93 throw new IllegalArgumentException("Name must not be null");
\r
97 protected final void setType(int i) {
\r
101 public String getVersion() {
\r
102 return _getVersion();
\r
105 public String getValue(int codepoint) {
\r
106 if (DEBUG && CHECK_VALUE == codepoint && CHECK_NAME.equals(getName())) {
\r
107 String value = _getValue(codepoint);
\r
108 System.out.println(getName() + "(" + Utility.hex(codepoint) + "):"
\r
109 + (getType() == STRING ? Utility.hex(value) : value));
\r
112 return _getValue(codepoint);
\r
115 // public String getValue(int codepoint, boolean isShort) {
\r
116 // return getValue(codepoint);
\r
119 public List getNameAliases(List result) {
\r
120 if (result == null)
\r
121 result = new ArrayList(1);
\r
122 return _getNameAliases(result);
\r
125 public List getValueAliases(String valueAlias, List result) {
\r
126 if (result == null)
\r
127 result = new ArrayList(1);
\r
128 result = _getValueAliases(valueAlias, result);
\r
129 if (!result.contains(valueAlias)) { // FIX && type < NUMERIC
\r
130 result = _getValueAliases(valueAlias, result); // for debugging
\r
131 throw new IllegalArgumentException("Internal error: " + getName()
\r
132 + " doesn't contain " + valueAlias + ": "
\r
133 + new BagFormatter().join(result));
\r
138 public List getAvailableValues(List result) {
\r
139 if (result == null)
\r
140 result = new ArrayList(1);
\r
141 return _getAvailableValues(result);
\r
144 protected abstract String _getVersion();
\r
146 protected abstract String _getValue(int codepoint);
\r
148 protected abstract List _getNameAliases(List result);
\r
150 protected abstract List _getValueAliases(String valueAlias, List result);
\r
152 protected abstract List _getAvailableValues(List result);
\r
155 public final List getNameAliases() {
\r
156 return getNameAliases(null);
\r
159 public final List getValueAliases(String valueAlias) {
\r
160 return getValueAliases(valueAlias, null);
\r
163 public final List getAvailableValues() {
\r
164 return getAvailableValues(null);
\r
167 public final String getValue(int codepoint, boolean getShortest) {
\r
168 String result = getValue(codepoint);
\r
169 if (type >= MISC || result == null || !getShortest)
\r
171 return getFirstValueAlias(result);
\r
174 public final String getFirstNameAlias() {
\r
175 if (firstNameAlias == null) {
\r
176 firstNameAlias = (String) getNameAliases().get(0);
\r
178 return firstNameAlias;
\r
181 public final String getFirstValueAlias(String value) {
\r
182 if (valueToFirstValueAlias == null)
\r
183 _getFirstValueAliasCache();
\r
184 return (String) valueToFirstValueAlias.get(value);
\r
187 private void _getFirstValueAliasCache() {
\r
189 maxFirstValueAliasWidth = 0;
\r
190 valueToFirstValueAlias = new HashMap(1);
\r
191 Iterator it = getAvailableValues().iterator();
\r
192 while (it.hasNext()) {
\r
193 String value = (String) it.next();
\r
194 String first = (String) getValueAliases(value).get(0);
\r
195 if (first == null) { // internal error
\r
196 throw new IllegalArgumentException(
\r
197 "Value not in value aliases: " + value);
\r
199 if (DEBUG && CHECK_NAME.equals(getName())) {
\r
200 System.out.println("First Alias: " + getName() + ": " + value
\r
202 + new BagFormatter().join(getValueAliases(value)));
\r
204 valueToFirstValueAlias.put(value, first);
\r
205 if (value.length() > maxValueWidth) {
\r
206 maxValueWidth = value.length();
\r
208 if (first.length() > maxFirstValueAliasWidth) {
\r
209 maxFirstValueAliasWidth = first.length();
\r
214 private int maxValueWidth = -1;
\r
216 private int maxFirstValueAliasWidth = -1;
\r
218 public int getMaxWidth(boolean getShortest) {
\r
219 if (maxValueWidth < 0)
\r
220 _getFirstValueAliasCache();
\r
222 return maxFirstValueAliasWidth;
\r
223 return maxValueWidth;
\r
226 public final UnicodeSet getSet(String propertyValue) {
\r
227 return getSet(propertyValue, null);
\r
230 public final UnicodeSet getSet(PatternMatcher matcher) {
\r
231 return getSet(matcher, null);
\r
234 public final UnicodeSet getSet(String propertyValue, UnicodeSet result) {
\r
235 return getSet(new SimpleMatcher(propertyValue,
\r
236 isType(STRING_OR_MISC_MASK) ? null : PROPERTY_COMPARATOR),
\r
240 private UnicodeMap unicodeMap = null;
\r
242 public static final String UNUSED = "??";
\r
244 public final UnicodeSet getSet(PatternMatcher matcher, UnicodeSet result) {
\r
245 if (result == null)
\r
246 result = new UnicodeSet();
\r
247 if (isType(STRING_OR_MISC_MASK)) {
\r
248 for (int i = 0; i <= 0x10FFFF; ++i) {
\r
249 String value = getValue(i);
\r
250 if (value != null && matcher.matches(value)) {
\r
256 List temp = new ArrayList(1); // to avoid reallocating...
\r
257 UnicodeMap um = getUnicodeMap_internal();
\r
258 Iterator it = um.getAvailableValues(null).iterator();
\r
259 main: while (it.hasNext()) {
\r
260 String value = (String) it.next();
\r
262 Iterator it2 = getValueAliases(value, temp).iterator();
\r
263 while (it2.hasNext()) {
\r
264 String value2 = (String) it2.next();
\r
265 // System.out.println("Values:" + value2);
\r
266 if (matcher.matches(value2)
\r
267 || matcher.matches(toSkeleton(value2))) {
\r
268 um.getSet(value, result);
\r
277 * public UnicodeSet getMatchSet(UnicodeSet result) { if (result == null)
\r
278 * result = new UnicodeSet(); addAll(matchIterator, result); return result; }
\r
280 * public void setMatchSet(UnicodeSet set) { matchIterator = new
\r
281 * UnicodeSetIterator(set); }
\r
285 * Utility for debugging
\r
287 public static String getStack() {
\r
288 Exception e = new Exception();
\r
289 StringWriter sw = new StringWriter();
\r
290 PrintWriter pw = new PrintWriter(sw);
\r
291 e.printStackTrace(pw);
\r
293 return "Showing Stack with fake " + sw.getBuffer().toString();
\r
296 // TODO use this instead of plain strings
\r
297 public static class Name implements Comparable {
\r
298 private String skeleton;
\r
300 private String pretty;
\r
302 public final int RAW = 0, TITLE = 1, NORMAL = 2;
\r
304 public Name(String name, int style) {
\r
307 if (style == RAW) {
\r
308 skeleton = pretty = name;
\r
310 pretty = regularize(name, style == TITLE);
\r
311 skeleton = toSkeleton(pretty);
\r
315 public int compareTo(Object o) {
\r
316 return skeleton.compareTo(((Name) o).skeleton);
\r
319 public boolean equals(Object o) {
\r
320 return skeleton.equals(((Name) o).skeleton);
\r
323 public int hashCode() {
\r
324 return skeleton.hashCode();
\r
327 public String toString() {
\r
333 * @return the unicode map
\r
335 public UnicodeMap getUnicodeMap() {
\r
336 return getUnicodeMap(false);
\r
340 * @return the unicode map
\r
342 public UnicodeMap getUnicodeMap(boolean getShortest) {
\r
344 return (UnicodeMap) getUnicodeMap_internal().cloneAsThawed();
\r
345 UnicodeMap result = new UnicodeMap();
\r
346 for (int i = 0; i <= 0x10FFFF; ++i) {
\r
347 // if (DEBUG && i == 0x41) System.out.println(i + "\t" +
\r
349 String value = getValue(i, true);
\r
350 result.put(i, value);
\r
356 * @return the unicode map
\r
358 protected UnicodeMap getUnicodeMap_internal() {
\r
359 if (unicodeMap == null)
\r
360 unicodeMap = _getUnicodeMap();
\r
364 protected UnicodeMap _getUnicodeMap() {
\r
365 UnicodeMap result = new UnicodeMap();
\r
366 HashMap myIntern = new HashMap();
\r
367 for (int i = 0; i <= 0x10FFFF; ++i) {
\r
368 // if (DEBUG && i == 0x41) System.out.println(i + "\t" +
\r
370 String value = getValue(i);
\r
371 String iValue = (String) myIntern.get(value);
\r
372 if (iValue == null)
\r
373 myIntern.put(value, iValue = value);
\r
374 result.put(i, iValue);
\r
377 for (int i = 0; i <= 0x10FFFF; ++i) {
\r
378 // if (DEBUG && i == 0x41) System.out.println(i + "\t" +
\r
380 String value = getValue(i);
\r
381 String resultValue = (String) result.getValue(i);
\r
382 if (!value.equals(resultValue)) {
\r
383 throw new RuntimeException("Value failure at: "
\r
388 if (DEBUG && CHECK_NAME.equals(getName())) {
\r
389 System.out.println(getName() + ":\t" + getClass().getName() + "\t"
\r
391 System.out.println(getStack());
\r
392 System.out.println(result);
\r
398 * Really ought to create a Collection UniqueList, that forces uniqueness.
\r
401 public static Collection addUnique(Object obj, Collection result) {
\r
402 if (obj != null && !result.contains(obj))
\r
408 * Utility for managing property & non-string value aliases
\r
410 public static final Comparator PROPERTY_COMPARATOR = new Comparator() {
\r
411 public int compare(Object o1, Object o2) {
\r
412 return compareNames((String) o1, (String) o2);
\r
417 * Utility for managing property & non-string value aliases
\r
421 public static boolean equalNames(String a, String b) {
\r
426 return toSkeleton(a).equals(toSkeleton(b));
\r
430 * Utility for managing property & non-string value aliases
\r
433 public static int compareNames(String a, String b) {
\r
440 return toSkeleton(a).compareTo(toSkeleton(b));
\r
444 * Utility for managing property & non-string value aliases
\r
446 // TODO account for special names, tibetan, hangul
\r
447 public static String toSkeleton(String source) {
\r
448 if (source == null)
\r
450 StringBuffer skeletonBuffer = new StringBuffer();
\r
451 boolean gotOne = false;
\r
452 // remove spaces, '_', '-'
\r
453 // we can do this with char, since no surrogates are involved
\r
454 for (int i = 0; i < source.length(); ++i) {
\r
455 char ch = source.charAt(i);
\r
456 if (i > 0 && (ch == '_' || ch == ' ' || ch == '-')) {
\r
459 char ch2 = Character.toLowerCase(ch);
\r
462 skeletonBuffer.append(ch2);
\r
464 skeletonBuffer.append(ch);
\r
469 return source; // avoid string creation
\r
470 return skeletonBuffer.toString();
\r
473 // get the name skeleton
\r
474 public static String toNameSkeleton(String source) {
\r
475 if (source == null)
\r
477 StringBuffer result = new StringBuffer();
\r
478 // remove spaces, medial '-'
\r
479 // we can do this with char, since no surrogates are involved
\r
480 for (int i = 0; i < source.length(); ++i) {
\r
481 char ch = source.charAt(i);
\r
482 if (('0' <= ch && ch <= '9') || ('A' <= ch && ch <= 'Z')
\r
483 || ch == '<' || ch == '>') {
\r
485 } else if (ch == ' ') {
\r
487 } else if (ch == '-') {
\r
488 // only copy non-medials AND trailing O-E
\r
490 || i == source.length() - 1
\r
491 || source.charAt(i - 1) == ' '
\r
492 || source.charAt(i + 1) == ' '
\r
493 || (i == source.length() - 2
\r
494 && source.charAt(i - 1) == 'O' && source
\r
495 .charAt(i + 1) == 'E')) {
\r
496 System.out.println("****** EXCEPTION " + source);
\r
499 // otherwise don't copy
\r
501 throw new IllegalArgumentException("Illegal Name Char: U+"
\r
502 + Utility.hex(ch) + ", " + ch);
\r
505 return result.toString();
\r
509 * These routines use the Java functions, because they only need to act on
\r
510 * ASCII Changes space, - into _, inserts _ between lower and UPPER.
\r
512 public static String regularize(String source, boolean titlecaseStart) {
\r
513 if (source == null)
\r
516 * if (source.equals("noBreak")) { // HACK if (titlecaseStart) return
\r
517 * "NoBreak"; return source; }
\r
519 StringBuffer result = new StringBuffer();
\r
521 boolean haveFirstCased = true;
\r
522 for (int i = 0; i < source.length(); ++i) {
\r
523 char c = source.charAt(i);
\r
524 if (c == ' ' || c == '-' || c == '_') {
\r
526 haveFirstCased = true;
\r
529 haveFirstCased = true;
\r
530 int cat = Character.getType(c);
\r
531 if (lastCat == Character.LOWERCASE_LETTER
\r
532 && cat == Character.UPPERCASE_LETTER) {
\r
533 result.append('_');
\r
536 && (cat == Character.LOWERCASE_LETTER
\r
537 || cat == Character.TITLECASE_LETTER || cat == Character.UPPERCASE_LETTER)) {
\r
538 if (titlecaseStart) {
\r
539 c = Character.toUpperCase(c);
\r
541 haveFirstCased = false;
\r
546 return result.toString();
\r
550 * Utility function for comparing codepoint to string without generating new
\r
555 * @return true if the codepoint equals the string
\r
557 public static final boolean equals(int codepoint, String other) {
\r
558 if (other.length() == 1) {
\r
559 return codepoint == other.charAt(0);
\r
561 if (other.length() == 2) {
\r
562 return other.equals(UTF16.valueOf(codepoint));
\r
568 * Utility that should be on UnicodeSet
\r
573 static public void addAll(UnicodeSetIterator source, UnicodeSet result) {
\r
574 while (source.nextRange()) {
\r
575 if (source.codepoint == UnicodeSetIterator.IS_STRING) {
\r
576 result.add(source.string);
\r
578 result.add(source.codepoint, source.codepointEnd);
\r
584 * Really ought to create a Collection UniqueList, that forces uniqueness.
\r
587 public static Collection addAllUnique(Collection source, Collection result) {
\r
588 for (Iterator it = source.iterator(); it.hasNext();) {
\r
589 addUnique(it.next(), result);
\r
595 * Really ought to create a Collection UniqueList, that forces uniqueness.
\r
598 public static Collection addAllUnique(Object[] source, Collection result) {
\r
599 for (int i = 0; i < source.length; ++i) {
\r
600 addUnique(source[i], result);
\r
605 static public class Factory {
\r
606 static boolean DEBUG = false;
\r
608 Map canonicalNames = new TreeMap();
\r
610 Map skeletonNames = new TreeMap();
\r
612 Map propertyCache = new HashMap(1);
\r
614 public final Factory add(UnicodeProperty sp) {
\r
615 canonicalNames.put(sp.getName(), sp);
\r
616 List c = sp.getNameAliases(new ArrayList(1));
\r
617 Iterator it = c.iterator();
\r
618 while (it.hasNext()) {
\r
619 skeletonNames.put(toSkeleton((String) it.next()), sp);
\r
624 public final UnicodeProperty getProperty(String propertyAlias) {
\r
625 return (UnicodeProperty) skeletonNames
\r
626 .get(toSkeleton(propertyAlias));
\r
629 public final List getAvailableNames() {
\r
630 return getAvailableNames(null);
\r
633 public final List getAvailableNames(List result) {
\r
634 if (result == null)
\r
635 result = new ArrayList(1);
\r
636 Iterator it = canonicalNames.keySet().iterator();
\r
637 while (it.hasNext()) {
\r
638 addUnique(it.next(), result);
\r
643 public final List getAvailableNames(int propertyTypeMask) {
\r
644 return getAvailableNames(propertyTypeMask, null);
\r
647 public final List getAvailableNames(int propertyTypeMask, List result) {
\r
648 if (result == null)
\r
649 result = new ArrayList(1);
\r
650 Iterator it = canonicalNames.keySet().iterator();
\r
651 while (it.hasNext()) {
\r
652 String item = (String) it.next();
\r
653 UnicodeProperty property = getProperty(item);
\r
655 System.out.println("Properties: " + item + ","
\r
656 + property.getType());
\r
657 if (!property.isType(propertyTypeMask)) {
\r
658 // System.out.println("Masking: " + property.getType() + ","
\r
659 // + propertyTypeMask);
\r
662 addUnique(property.getName(), result);
\r
667 InversePatternMatcher inverseMatcher = new InversePatternMatcher();
\r
670 * Format is: propname ('=' | '!=') propvalue ( '|' propValue )*
\r
672 public final UnicodeSet getSet(String propAndValue,
\r
673 PatternMatcher matcher, UnicodeSet result) {
\r
674 int equalPos = propAndValue.indexOf('=');
\r
675 String prop = propAndValue.substring(0, equalPos);
\r
676 String value = propAndValue.substring(equalPos + 1);
\r
677 boolean negative = false;
\r
678 if (prop.endsWith("!")) {
\r
679 prop = prop.substring(0, prop.length() - 1);
\r
682 prop = prop.trim();
\r
683 UnicodeProperty up = getProperty(prop);
\r
684 if (matcher == null) {
\r
685 matcher = new SimpleMatcher(value, up
\r
686 .isType(STRING_OR_MISC_MASK) ? null
\r
687 : PROPERTY_COMPARATOR);
\r
690 inverseMatcher.set(matcher);
\r
691 matcher = inverseMatcher;
\r
693 return up.getSet(matcher.set(value), result);
\r
696 public final UnicodeSet getSet(String propAndValue,
\r
697 PatternMatcher matcher) {
\r
698 return getSet(propAndValue, matcher, null);
\r
701 public final UnicodeSet getSet(String propAndValue) {
\r
702 return getSet(propAndValue, null, null);
\r
705 public final SymbolTable getSymbolTable(String prefix) {
\r
706 return new PropertySymbolTable(prefix);
\r
709 private class MyXSymbolTable extends UnicodeSet.XSymbolTable {
\r
710 public boolean applyPropertyAlias(String propertyName,
\r
711 String propertyValue, UnicodeSet result) {
\r
713 System.out.println(propertyName + "=" + propertyValue);
\r
714 UnicodeProperty prop = getProperty(propertyName);
\r
718 UnicodeSet x = prop.getSet(propertyValue, result);
\r
719 return x.size() != 0;
\r
723 public final UnicodeSet.XSymbolTable getXSymbolTable() {
\r
724 return new MyXSymbolTable();
\r
727 private class PropertySymbolTable implements SymbolTable {
\r
728 static final boolean DEBUG = false;
\r
730 private String prefix;
\r
732 RegexMatcher regexMatcher = new RegexMatcher();
\r
734 PropertySymbolTable(String prefix) {
\r
735 this.prefix = prefix;
\r
738 public char[] lookup(String s) {
\r
740 System.out.println("\t(" + prefix + ")Looking up " + s);
\r
741 // ensure, again, that prefix matches
\r
742 int start = prefix.length();
\r
743 if (!s.regionMatches(true, 0, prefix, 0, start))
\r
746 int pos = s.indexOf(':', start);
\r
747 if (pos < 0) { // should never happen
\r
748 throw new IllegalArgumentException(
\r
749 "Internal Error: missing =: " + s + "\r\n");
\r
751 UnicodeProperty prop = getProperty(s.substring(start, pos));
\r
752 if (prop == null) {
\r
753 throw new IllegalArgumentException("Invalid Property in: "
\r
754 + s + "\r\nUse " + showSet(getAvailableNames()));
\r
756 String value = s.substring(pos + 1);
\r
758 if (value.startsWith("\u00AB")) { // regex!
\r
759 set = prop.getSet(regexMatcher.set(value.substring(1, value
\r
762 set = prop.getSet(value);
\r
764 if (set.size() == 0) {
\r
765 throw new IllegalArgumentException(
\r
766 "Empty Property-Value in: " + s + "\r\nUse "
\r
767 + showSet(prop.getAvailableValues()));
\r
770 System.out.println("\t(" + prefix + ")Returning "
\r
771 + set.toPattern(true));
\r
772 return set.toPattern(true).toCharArray(); // really ugly
\r
775 private String showSet(List list) {
\r
776 StringBuffer result = new StringBuffer("[");
\r
777 boolean first = true;
\r
778 for (Iterator it = list.iterator(); it.hasNext();) {
\r
780 result.append(", ");
\r
783 result.append(it.next().toString());
\r
785 result.append("]");
\r
786 return result.toString();
\r
789 public UnicodeMatcher lookupMatcher(int ch) {
\r
793 public String parseReference(String text, ParsePosition pos,
\r
796 System.out.println("\t(" + prefix + ")Parsing <"
\r
797 + text.substring(pos.getIndex(), limit) + ">");
\r
798 int start = pos.getIndex();
\r
799 // ensure that it starts with 'prefix'
\r
801 .regionMatches(true, start, prefix, 0, prefix.length()))
\r
803 start += prefix.length();
\r
804 // now see if it is of the form identifier:identifier
\r
805 int i = getIdentifier(text, start, limit);
\r
808 String prop = text.substring(start, i);
\r
809 String value = "true";
\r
811 if (text.charAt(i) == ':') {
\r
813 if (text.charAt(i + 1) == '\u00AB') { // regular
\r
815 j = text.indexOf('\u00BB', i + 2) + 1; // include
\r
821 j = getIdentifier(text, i + 1, limit);
\r
823 value = text.substring(i + 1, j);
\r
829 System.out.println("\t(" + prefix + ")Parsed <" + prop
\r
830 + ">=<" + value + ">");
\r
831 return prefix + prop + ":" + value;
\r
834 private int getIdentifier(String text, int start, int limit) {
\r
836 System.out.println("\tGetID <"
\r
837 + text.substring(start, limit) + ">");
\r
840 for (i = start; i < limit; i += UTF16.getCharCount(cp)) {
\r
841 cp = UTF16.charAt(text, i);
\r
842 if (!com.ibm.icu.lang.UCharacter
\r
843 .isUnicodeIdentifierPart(cp)
\r
849 System.out.println("\tGotID <" + text.substring(start, i)
\r
856 public static class FilteredProperty extends UnicodeProperty {
\r
857 private UnicodeProperty property;
\r
859 protected StringFilter filter;
\r
861 protected UnicodeSetIterator matchIterator = new UnicodeSetIterator(
\r
862 new UnicodeSet(0, 0x10FFFF));
\r
864 protected HashMap backmap;
\r
866 boolean allowValueAliasCollisions = false;
\r
868 public FilteredProperty(UnicodeProperty property, StringFilter filter) {
\r
869 this.property = property;
\r
870 this.filter = filter;
\r
873 public StringFilter getFilter() {
\r
877 public UnicodeProperty setFilter(StringFilter filter) {
\r
878 this.filter = filter;
\r
882 List temp = new ArrayList(1);
\r
884 public List _getAvailableValues(List result) {
\r
886 return filter.addUnique(property.getAvailableValues(temp), result);
\r
889 public List _getNameAliases(List result) {
\r
891 return filter.addUnique(property.getNameAliases(temp), result);
\r
894 public String _getValue(int codepoint) {
\r
895 return filter.remap(property.getValue(codepoint));
\r
898 public List _getValueAliases(String valueAlias, List result) {
\r
899 if (backmap == null) {
\r
900 backmap = new HashMap(1);
\r
902 Iterator it = property.getAvailableValues(temp).iterator();
\r
903 while (it.hasNext()) {
\r
904 String item = (String) it.next();
\r
905 String mappedItem = filter.remap(item);
\r
906 if (backmap.get(mappedItem) != null
\r
907 && !allowValueAliasCollisions) {
\r
908 throw new IllegalArgumentException(
\r
909 "Filter makes values collide! " + item + ", "
\r
912 backmap.put(mappedItem, item);
\r
915 valueAlias = (String) backmap.get(valueAlias);
\r
917 return filter.addUnique(property.getValueAliases(valueAlias, temp),
\r
921 public String _getVersion() {
\r
922 return property.getVersion();
\r
925 public boolean isAllowValueAliasCollisions() {
\r
926 return allowValueAliasCollisions;
\r
929 public FilteredProperty setAllowValueAliasCollisions(boolean b) {
\r
930 allowValueAliasCollisions = b;
\r
936 public static abstract class StringFilter implements Cloneable {
\r
937 public abstract String remap(String original);
\r
939 public final List addUnique(Collection source, List result) {
\r
940 if (result == null)
\r
941 result = new ArrayList(1);
\r
942 Iterator it = source.iterator();
\r
943 while (it.hasNext()) {
\r
944 UnicodeProperty.addUnique(remap((String) it.next()), result);
\r
949 * public Object clone() { try { return super.clone(); } catch
\r
950 * (CloneNotSupportedException e) { throw new
\r
951 * IllegalStateException("Should never happen."); } }
\r
955 public static class MapFilter extends StringFilter {
\r
956 private Map valueMap;
\r
958 public MapFilter(Map valueMap) {
\r
959 this.valueMap = valueMap;
\r
962 public String remap(String original) {
\r
963 Object changed = valueMap.get(original);
\r
964 return changed == null ? original : (String) changed;
\r
967 public Map getMap() {
\r
972 public interface PatternMatcher extends ObjectMatcher {
\r
973 public PatternMatcher set(String pattern);
\r
976 public static class InversePatternMatcher extends InverseMatcher implements
\r
978 PatternMatcher other;
\r
980 public PatternMatcher set(PatternMatcher toInverse) {
\r
985 public boolean matches(Object value) {
\r
986 return !other.matches(value);
\r
989 public PatternMatcher set(String pattern) {
\r
990 other.set(pattern);
\r
995 public static class SimpleMatcher implements PatternMatcher {
\r
996 Comparator comparator;
\r
1000 public SimpleMatcher(String pattern, Comparator comparator) {
\r
1001 this.comparator = comparator;
\r
1002 this.pattern = pattern;
\r
1005 public boolean matches(Object value) {
\r
1006 if (comparator == null)
\r
1007 return pattern.equals(value);
\r
1008 return comparator.compare(pattern, value) == 0;
\r
1011 public PatternMatcher set(String pattern) {
\r
1012 this.pattern = pattern;
\r
1017 public static class RegexMatcher implements UnicodeProperty.PatternMatcher {
\r
1018 private java.util.regex.Matcher matcher;
\r
1020 public UnicodeProperty.PatternMatcher set(String pattern) {
\r
1021 matcher = Pattern.compile(pattern).matcher("");
\r
1025 public boolean matches(Object value) {
\r
1026 matcher.reset(value.toString());
\r
1027 return matcher.matches();
\r
1031 public static abstract class BaseProperty extends UnicodeProperty {
\r
1032 protected List propertyAliases = new ArrayList(1);
\r
1034 protected Map toValueAliases;
\r
1036 protected String version;
\r
1038 public BaseProperty setMain(String alias, String shortAlias,
\r
1039 int propertyType, String version) {
\r
1041 setType(propertyType);
\r
1042 propertyAliases.add(shortAlias);
\r
1043 propertyAliases.add(alias);
\r
1044 this.version = version;
\r
1048 public String _getVersion() {
\r
1052 public List _getNameAliases(List result) {
\r
1053 addAllUnique(propertyAliases, result);
\r
1057 public BaseProperty addValueAliases(String[][] valueAndAlternates,
\r
1058 boolean errorIfCant) {
\r
1059 if (toValueAliases == null)
\r
1060 _fixValueAliases();
\r
1061 for (int i = 0; i < valueAndAlternates.length; ++i) {
\r
1062 for (int j = 1; j < valueAndAlternates[0].length; ++j) {
\r
1063 addValueAlias(valueAndAlternates[i][0],
\r
1064 valueAndAlternates[i][j], errorIfCant);
\r
1070 public void addValueAlias(String value, String valueAlias,
\r
1071 boolean errorIfCant) {
\r
1072 List result = (List) toValueAliases.get(value);
\r
1073 if (result == null && !errorIfCant)
\r
1075 addUnique(value, result);
\r
1076 addUnique(valueAlias, result);
\r
1079 protected List _getValueAliases(String valueAlias, List result) {
\r
1080 if (toValueAliases == null)
\r
1081 _fixValueAliases();
\r
1082 List a = (List) toValueAliases.get(valueAlias);
\r
1084 addAllUnique(a, result);
\r
1088 protected void _fixValueAliases() {
\r
1089 if (toValueAliases == null)
\r
1090 toValueAliases = new HashMap(1);
\r
1091 for (Iterator it = getAvailableValues().iterator(); it.hasNext();) {
\r
1092 Object value = it.next();
\r
1093 _ensureValueInAliases(value);
\r
1097 protected void _ensureValueInAliases(Object value) {
\r
1098 List result = (List) toValueAliases.get(value);
\r
1099 if (result == null)
\r
1100 toValueAliases.put(value, result = new ArrayList(1));
\r
1101 addUnique(value, result);
\r
1104 public BaseProperty swapFirst2ValueAliases() {
\r
1105 for (Iterator it = toValueAliases.keySet().iterator(); it.hasNext();) {
\r
1106 List list = (List) toValueAliases.get(it.next());
\r
1107 if (list.size() < 2)
\r
1109 Object first = list.get(0);
\r
1110 list.set(0, list.get(1));
\r
1111 list.set(1, first);
\r
1120 public UnicodeProperty addName(String string) {
\r
1121 throw new UnsupportedOperationException();
\r
1126 public static abstract class SimpleProperty extends BaseProperty {
\r
1129 public UnicodeProperty addName(String alias) {
\r
1130 propertyAliases.add(alias);
\r
1134 public SimpleProperty setValues(String valueAlias) {
\r
1135 _addToValues(valueAlias, null);
\r
1139 public SimpleProperty setValues(String[] valueAliases,
\r
1140 String[] alternateValueAliases) {
\r
1141 for (int i = 0; i < valueAliases.length; ++i) {
\r
1142 if (valueAliases[i].equals(UNUSED))
\r
1146 alternateValueAliases != null ? alternateValueAliases[i]
\r
1152 public SimpleProperty setValues(List valueAliases) {
\r
1153 this.values = new ArrayList(valueAliases);
\r
1154 for (Iterator it = this.values.iterator(); it.hasNext();) {
\r
1155 _addToValues((String) it.next(), null);
\r
1160 public List _getAvailableValues(List result) {
\r
1161 if (values == null)
\r
1163 result.addAll(values);
\r
1167 protected void _fillValues() {
\r
1168 List newvalues = (List) getUnicodeMap_internal()
\r
1169 .getAvailableValues(new ArrayList());
\r
1170 for (Iterator it = newvalues.iterator(); it.hasNext();) {
\r
1171 _addToValues((String) it.next(), null);
\r
1175 private void _addToValues(String item, String alias) {
\r
1176 if (values == null)
\r
1177 values = new ArrayList(1);
\r
1178 if (toValueAliases == null)
\r
1179 _fixValueAliases();
\r
1180 addUnique(item, values);
\r
1181 _ensureValueInAliases(item);
\r
1182 addValueAlias(item, alias, true);
\r
1184 /* public String _getVersion() {
\r
1190 public static class UnicodeMapProperty extends BaseProperty {
\r
1192 * Example of usage:
\r
1193 * new UnicodeProperty.UnicodeMapProperty() {
\r
1195 unicodeMap = new UnicodeMap();
\r
1196 unicodeMap.setErrorOnReset(true);
\r
1197 unicodeMap.put(0xD, "CR");
\r
1198 unicodeMap.put(0xA, "LF");
\r
1199 UnicodeProperty cat = getProperty("General_Category");
\r
1200 UnicodeSet temp = cat.getSet("Line_Separator")
\r
1201 .addAll(cat.getSet("Paragraph_Separator"))
\r
1202 .addAll(cat.getSet("Control"))
\r
1203 .addAll(cat.getSet("Format"))
\r
1204 .remove(0xD).remove(0xA).remove(0x200C).remove(0x200D);
\r
1205 unicodeMap.putAll(temp, "Control");
\r
1206 UnicodeSet graphemeExtend = getProperty("Grapheme_Extend").getSet("true");
\r
1207 unicodeMap.putAll(graphemeExtend,"Extend");
\r
1208 UnicodeProperty hangul = getProperty("Hangul_Syllable_Type");
\r
1209 unicodeMap.putAll(hangul.getSet("L"),"L");
\r
1210 unicodeMap.putAll(hangul.getSet("V"),"V");
\r
1211 unicodeMap.putAll(hangul.getSet("T"),"T");
\r
1212 unicodeMap.putAll(hangul.getSet("LV"),"LV");
\r
1213 unicodeMap.putAll(hangul.getSet("LVT"),"LVT");
\r
1214 unicodeMap.setMissing("Other");
\r
1216 }.setMain("Grapheme_Cluster_Break", "GCB", UnicodeProperty.ENUMERATED, version)
\r
1218 protected UnicodeMap unicodeMap;
\r
1220 public UnicodeMapProperty set(UnicodeMap map) {
\r
1225 protected String _getValue(int codepoint) {
\r
1226 return (String) unicodeMap.getValue(codepoint);
\r
1229 /* protected List _getValueAliases(String valueAlias, List result) {
\r
1230 if (!unicodeMap.getAvailableValues().contains(valueAlias)) return result;
\r
1231 result.add(valueAlias);
\r
1232 return result; // no other aliases
\r
1234 */protected List _getAvailableValues(List result) {
\r
1235 return (List) unicodeMap.getAvailableValues(result);
\r