2 *******************************************************************************
3 * Copyright (C) 1996-2013, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 *******************************************************************************
7 package com.ibm.icu.dev.util;
9 import java.io.PrintWriter;
10 import java.io.StringWriter;
11 import java.text.ParsePosition;
12 import java.util.ArrayList;
13 import java.util.Arrays;
14 import java.util.Collection;
15 import java.util.Comparator;
16 import java.util.HashMap;
17 import java.util.Iterator;
18 import java.util.LinkedHashSet;
19 import java.util.List;
21 import java.util.TreeMap;
22 import java.util.regex.Pattern;
24 import com.ibm.icu.dev.util.CollectionUtilities.InverseMatcher;
25 import com.ibm.icu.dev.util.CollectionUtilities.ObjectMatcher;
26 import com.ibm.icu.impl.Utility;
27 import com.ibm.icu.text.SymbolTable;
28 import com.ibm.icu.text.UFormat;
29 import com.ibm.icu.text.UTF16;
30 import com.ibm.icu.text.UnicodeMatcher;
31 import com.ibm.icu.text.UnicodeSet;
32 import com.ibm.icu.text.UnicodeSetIterator;
34 public abstract class UnicodeProperty extends UnicodeLabel {
36 public static final UnicodeSet NONCHARACTERS = new UnicodeSet("[:noncharactercodepoint:]").freeze();
37 public static final UnicodeSet PRIVATE_USE = new UnicodeSet("[:gc=privateuse:]").freeze();
38 public static final UnicodeSet SURROGATE = new UnicodeSet("[:gc=surrogate:]").freeze();
40 public static final UnicodeSet HIGH_SURROGATES = new UnicodeSet("[\\uD800-\\uDB7F]").freeze();
41 public static final int SAMPLE_HIGH_SURROGATE = HIGH_SURROGATES.charAt(0);
42 public static final UnicodeSet HIGH_PRIVATE_USE_SURROGATES = new UnicodeSet("[\\uDB80-\\uDBFF]").freeze();
43 public static final int SAMPLE_HIGH_PRIVATE_USE_SURROGATE = HIGH_PRIVATE_USE_SURROGATES.charAt(0);
44 public static final UnicodeSet LOW_SURROGATES = new UnicodeSet("[\\uDC00-\\uDFFF]").freeze();
45 public static final int SAMPLE_LOW_SURROGATE = LOW_SURROGATES.charAt(0);
47 public static final UnicodeSet PRIVATE_USE_AREA = new UnicodeSet("[\\uE000-\\uF8FF]").freeze();
48 public static final int SAMPLE_PRIVATE_USE_AREA = PRIVATE_USE_AREA.charAt(0);
49 public static final UnicodeSet PRIVATE_USE_AREA_A = new UnicodeSet("[\\U000F0000-\\U000FFFFD]").freeze();
50 public static final int SAMPLE_PRIVATE_USE_AREA_A = PRIVATE_USE_AREA_A.charAt(0);
51 public static final UnicodeSet PRIVATE_USE_AREA_B = new UnicodeSet("[\\U00100000-\\U0010FFFD]").freeze();
52 public static final int SAMPLE_PRIVATE_USE_AREA_B = PRIVATE_USE_AREA_B.charAt(0);
54 // The following are special. They are used for performance, but must be changed if the version of Unicode for the UnicodeProperty changes.
55 private static UnicodeSet UNASSIGNED;
56 private static int SAMPLE_UNASSIGNED;
57 private static UnicodeSet SPECIALS;
58 private static UnicodeSet STUFF_TO_TEST;
59 private static UnicodeSet STUFF_TO_TEST_WITH_UNASSIGNED;
61 public static synchronized UnicodeSet getUNASSIGNED() {
62 if (UNASSIGNED == null) {
63 UNASSIGNED = new UnicodeSet("[:gc=unassigned:]").freeze();
68 public static synchronized UnicodeSet contractUNASSIGNED(UnicodeSet toBeUnassigned) {
69 UnicodeSet temp = UNASSIGNED;
70 ResetCacheProperties();
71 UNASSIGNED = temp == null ? toBeUnassigned.freeze() : new UnicodeSet(temp).retainAll(toBeUnassigned).freeze();
75 public static synchronized int getSAMPLE_UNASSIGNED() {
76 if (SAMPLE_UNASSIGNED == 0) {
77 SAMPLE_UNASSIGNED = getUNASSIGNED().charAt(0);
79 return SAMPLE_UNASSIGNED;
82 public static synchronized UnicodeSet getSPECIALS() {
83 if (SPECIALS == null) {
84 SPECIALS = new UnicodeSet(getUNASSIGNED()).addAll(PRIVATE_USE).addAll(SURROGATE).freeze();
89 public static synchronized UnicodeSet getSTUFF_TO_TEST() {
90 if (STUFF_TO_TEST == null) {
91 STUFF_TO_TEST = new UnicodeSet(getSPECIALS()).complement()
92 .addAll(NONCHARACTERS)
93 .add(getSAMPLE_UNASSIGNED())
94 .add(SAMPLE_HIGH_SURROGATE)
95 .add(SAMPLE_HIGH_PRIVATE_USE_SURROGATE)
96 .add(SAMPLE_LOW_SURROGATE)
97 .add(SAMPLE_PRIVATE_USE_AREA)
98 .add(SAMPLE_PRIVATE_USE_AREA_A)
99 .add(SAMPLE_PRIVATE_USE_AREA_B)
102 return STUFF_TO_TEST;
105 public static synchronized UnicodeSet getSTUFF_TO_TEST_WITH_UNASSIGNED() {
106 if (STUFF_TO_TEST_WITH_UNASSIGNED == null) {
107 STUFF_TO_TEST_WITH_UNASSIGNED = new UnicodeSet(getSTUFF_TO_TEST()).addAll(getUNASSIGNED()).freeze();
109 return STUFF_TO_TEST_WITH_UNASSIGNED;
113 * Reset the cache properties. Must be done if the version of Unicode is different than the ICU one, AND any UnicodeProperty has already been instantiated.
114 * TODO make this a bit more robust.
117 public static synchronized void ResetCacheProperties() {
119 SAMPLE_UNASSIGNED = 0;
121 STUFF_TO_TEST = null;
122 STUFF_TO_TEST_WITH_UNASSIGNED = null;
125 public static boolean DEBUG = false;
127 public static String CHECK_NAME = "FC_NFKC_Closure";
129 public static int CHECK_VALUE = 0x037A;
133 private String firstNameAlias = null;
137 private Map valueToFirstValueAlias = null;
139 private boolean hasUniformUnassigned = true;
142 * Name: Unicode_1_Name Name: ISO_Comment Name: Name Name: Unicode_1_Name
146 public static final int UNKNOWN = 0, BINARY = 2, EXTENDED_BINARY = 3,
147 ENUMERATED = 4, EXTENDED_ENUMERATED = 5, CATALOG = 6,
148 EXTENDED_CATALOG = 7, MISC = 8, EXTENDED_MISC = 9, STRING = 10,
149 EXTENDED_STRING = 11, NUMERIC = 12, EXTENDED_NUMERIC = 13,
150 START_TYPE = 2, LIMIT_TYPE = 14, EXTENDED_MASK = 1,
151 CORE_MASK = ~EXTENDED_MASK, BINARY_MASK = (1 << BINARY)
152 | (1 << EXTENDED_BINARY), STRING_MASK = (1 << STRING)
153 | (1 << EXTENDED_STRING),
154 STRING_OR_MISC_MASK = (1 << STRING) | (1 << EXTENDED_STRING)
155 | (1 << MISC) | (1 << EXTENDED_MISC),
156 ENUMERATED_OR_CATALOG_MASK = (1 << ENUMERATED)
157 | (1 << EXTENDED_ENUMERATED) | (1 << CATALOG)
158 | (1 << EXTENDED_CATALOG);
160 private static final String[] TYPE_NAMES = { "Unknown", "Unknown",
161 "Binary", "Extended Binary", "Enumerated", "Extended Enumerated",
162 "Catalog", "Extended Catalog", "Miscellaneous",
163 "Extended Miscellaneous", "String", "Extended String", "Numeric",
164 "Extended Numeric", };
166 public static String getTypeName(int propType) {
167 return TYPE_NAMES[propType];
170 public final String getName() {
174 public final int getType() {
178 public String getTypeName() {
179 return TYPE_NAMES[type];
182 public final boolean isType(int mask) {
183 return ((1 << type) & mask) != 0;
186 protected final void setName(String string) {
188 throw new IllegalArgumentException("Name must not be null");
192 protected final void setType(int i) {
196 public String getVersion() {
197 return _getVersion();
200 public String getValue(int codepoint) {
201 if (DEBUG && CHECK_VALUE == codepoint && CHECK_NAME.equals(getName())) {
202 String value = _getValue(codepoint);
203 System.out.println(getName() + "(" + Utility.hex(codepoint) + "):"
204 + (getType() == STRING ? Utility.hex(value) : value));
207 return _getValue(codepoint);
210 // public String getValue(int codepoint, boolean isShort) {
211 // return getValue(codepoint);
214 public List<String> getNameAliases(List<String> result) {
216 result = new ArrayList(1);
217 return _getNameAliases(result);
220 public List<String> getValueAliases(String valueAlias, List<String> result) {
222 result = new ArrayList(1);
223 result = _getValueAliases(valueAlias, result);
224 if (!result.contains(valueAlias)) { // FIX && type < NUMERIC
225 result = _getValueAliases(valueAlias, result); // for debugging
226 throw new IllegalArgumentException("Internal error: " + getName()
227 + " doesn't contain " + valueAlias + ": "
228 + new BagFormatter().join(result));
233 public List<String> getAvailableValues(List<String> result) {
235 result = new ArrayList(1);
236 return _getAvailableValues(result);
239 protected abstract String _getVersion();
241 protected abstract String _getValue(int codepoint);
243 protected abstract List<String> _getNameAliases(List<String> result);
245 protected abstract List<String> _getValueAliases(String valueAlias, List<String> result);
247 protected abstract List<String> _getAvailableValues(List<String> result);
250 public final List<String> getNameAliases() {
251 return getNameAliases(null);
254 public final List<String> getValueAliases(String valueAlias) {
255 return getValueAliases(valueAlias, null);
258 public final List<String> getAvailableValues() {
259 return getAvailableValues(null);
262 public final String getValue(int codepoint, boolean getShortest) {
263 String result = getValue(codepoint);
264 if (type >= MISC || result == null || !getShortest)
266 return getFirstValueAlias(result);
269 public final String getFirstNameAlias() {
270 if (firstNameAlias == null) {
271 firstNameAlias = (String) getNameAliases().get(0);
273 return firstNameAlias;
276 public final String getFirstValueAlias(String value) {
277 if (valueToFirstValueAlias == null)
278 _getFirstValueAliasCache();
279 return valueToFirstValueAlias.get(value).toString();
282 private void _getFirstValueAliasCache() {
284 maxFirstValueAliasWidth = 0;
285 valueToFirstValueAlias = new HashMap(1);
286 Iterator it = getAvailableValues().iterator();
287 while (it.hasNext()) {
288 String value = (String) it.next();
289 String first = (String) getValueAliases(value).get(0);
290 if (first == null) { // internal error
291 throw new IllegalArgumentException(
292 "Value not in value aliases: " + value);
294 if (DEBUG && CHECK_NAME.equals(getName())) {
295 System.out.println("First Alias: " + getName() + ": " + value
297 + new BagFormatter().join(getValueAliases(value)));
299 valueToFirstValueAlias.put(value, first);
300 if (value.length() > maxValueWidth) {
301 maxValueWidth = value.length();
303 if (first.length() > maxFirstValueAliasWidth) {
304 maxFirstValueAliasWidth = first.length();
309 private int maxValueWidth = -1;
311 private int maxFirstValueAliasWidth = -1;
313 public int getMaxWidth(boolean getShortest) {
314 if (maxValueWidth < 0)
315 _getFirstValueAliasCache();
317 return maxFirstValueAliasWidth;
318 return maxValueWidth;
321 public final UnicodeSet getSet(String propertyValue) {
322 return getSet(propertyValue, null);
325 public final UnicodeSet getSet(PatternMatcher matcher) {
326 return getSet(matcher, null);
329 /** Adds the property value set to the result. Clear the result first if you don't want to keep the original contents.
331 public final UnicodeSet getSet(String propertyValue, UnicodeSet result) {
332 return getSet(new SimpleMatcher(propertyValue,
333 isType(STRING_OR_MISC_MASK) ? null : PROPERTY_COMPARATOR),
337 private UnicodeMap unicodeMap = null;
339 public static final String UNUSED = "??";
341 public UnicodeSet getSet(PatternMatcher matcher, UnicodeSet result) {
343 result = new UnicodeSet();
344 boolean uniformUnassigned = hasUniformUnassigned();
345 if (isType(STRING_OR_MISC_MASK)) {
346 for (UnicodeSetIterator usi = getStuffToTest(uniformUnassigned); usi.next();) { // int i = 0; i <= 0x10FFFF; ++i
347 int i = usi.codepoint;
348 String value = getValue(i);
349 if (value != null && matcher.matches(value)) {
353 return addUntested(result, uniformUnassigned);
355 List temp = new ArrayList(1); // to avoid reallocating...
356 UnicodeMap um = getUnicodeMap_internal();
357 Iterator it = um.getAvailableValues(null).iterator();
358 main: while (it.hasNext()) {
359 String value = (String) it.next();
361 Iterator it2 = getValueAliases(value, temp).iterator();
362 while (it2.hasNext()) {
363 String value2 = (String) it2.next();
364 // System.out.println("Values:" + value2);
365 if (matcher.matches(value2)
366 || matcher.matches(toSkeleton(value2))) {
367 um.keySet(value, result);
376 * public UnicodeSet getMatchSet(UnicodeSet result) { if (result == null)
377 * result = new UnicodeSet(); addAll(matchIterator, result); return result; }
379 * public void setMatchSet(UnicodeSet set) { matchIterator = new
380 * UnicodeSetIterator(set); }
384 * Utility for debugging
386 public static String getStack() {
387 Exception e = new Exception();
388 StringWriter sw = new StringWriter();
389 PrintWriter pw = new PrintWriter(sw);
390 e.printStackTrace(pw);
392 return "Showing Stack with fake " + sw.getBuffer().toString();
395 // TODO use this instead of plain strings
396 public static class Name implements Comparable {
397 private String skeleton;
399 private String pretty;
401 public final int RAW = 0, TITLE = 1, NORMAL = 2;
403 public Name(String name, int style) {
407 skeleton = pretty = name;
409 pretty = regularize(name, style == TITLE);
410 skeleton = toSkeleton(pretty);
414 public int compareTo(Object o) {
415 return skeleton.compareTo(((Name) o).skeleton);
418 public boolean equals(Object o) {
419 return skeleton.equals(((Name) o).skeleton);
422 public int hashCode() {
423 return skeleton.hashCode();
426 public String toString() {
432 * @return the unicode map
434 public UnicodeMap getUnicodeMap() {
435 return getUnicodeMap(false);
439 * @return the unicode map
441 public UnicodeMap getUnicodeMap(boolean getShortest) {
443 return (UnicodeMap) getUnicodeMap_internal().cloneAsThawed();
444 UnicodeMap result = new UnicodeMap();
445 boolean uniformUnassigned = hasUniformUnassigned();
447 for (UnicodeSetIterator usi = getStuffToTest(uniformUnassigned); usi.next();) { // int i = 0; i <= 0x10FFFF; ++i
448 int i = usi.codepoint;
449 // if (DEBUG && i == 0x41) System.out.println(i + "\t" +
451 String value = getValue(i, true);
452 result.put(i, value);
454 return addUntested(result, uniformUnassigned);
458 * @return the unicode map
460 public UnicodeMap getUnicodeMap_internal() {
461 if (unicodeMap == null)
462 unicodeMap = _getUnicodeMap();
466 protected UnicodeMap _getUnicodeMap() {
467 UnicodeMap result = new UnicodeMap();
468 HashMap myIntern = new HashMap();
469 boolean uniformUnassigned = hasUniformUnassigned();
471 for (UnicodeSetIterator usi = getStuffToTest(uniformUnassigned); usi.next();) { // int i = 0; i <= 0x10FFFF; ++i
472 int i = usi.codepoint;
473 // if (DEBUG && i == 0x41) System.out.println(i + "\t" +
475 String value = getValue(i);
476 String iValue = (String) myIntern.get(value);
478 myIntern.put(value, iValue = value);
479 result.put(i, iValue);
481 addUntested(result, uniformUnassigned);
484 for (UnicodeSetIterator usi = getStuffToTest(uniformUnassigned); usi.next();) { // int i = 0; i <= 0x10FFFF; ++i
485 int i = usi.codepoint;
486 // if (DEBUG && i == 0x41) System.out.println(i + "\t" +
488 String value = getValue(i);
489 String resultValue = (String) result.getValue(i);
490 if (!value.equals(resultValue)) {
491 throw new RuntimeException("Value failure at: "
496 if (DEBUG && CHECK_NAME.equals(getName())) {
497 System.out.println(getName() + ":\t" + getClass().getName() + "\t"
499 System.out.println(getStack());
500 System.out.println(result);
505 private static UnicodeSetIterator getStuffToTest(boolean uniformUnassigned) {
506 return new UnicodeSetIterator(uniformUnassigned ? getSTUFF_TO_TEST() : getSTUFF_TO_TEST_WITH_UNASSIGNED());
510 * Really ought to create a Collection UniqueList, that forces uniqueness.
513 public static Collection addUnique(Object obj, Collection result) {
514 if (obj != null && !result.contains(obj))
520 * Utility for managing property & non-string value aliases
522 public static final Comparator PROPERTY_COMPARATOR = new Comparator() {
523 public int compare(Object o1, Object o2) {
524 return compareNames((String) o1, (String) o2);
529 * Utility for managing property & non-string value aliases
533 public static boolean equalNames(String a, String b) {
538 return toSkeleton(a).equals(toSkeleton(b));
542 * Utility for managing property & non-string value aliases
545 public static int compareNames(String a, String b) {
552 return toSkeleton(a).compareTo(toSkeleton(b));
556 * Utility for managing property & non-string value aliases
558 // TODO account for special names, tibetan, hangul
559 public static String toSkeleton(String source) {
562 StringBuffer skeletonBuffer = new StringBuffer();
563 boolean gotOne = false;
564 // remove spaces, '_', '-'
565 // we can do this with char, since no surrogates are involved
566 for (int i = 0; i < source.length(); ++i) {
567 char ch = source.charAt(i);
568 if (i > 0 && (ch == '_' || ch == ' ' || ch == '-')) {
571 char ch2 = Character.toLowerCase(ch);
574 skeletonBuffer.append(ch2);
576 skeletonBuffer.append(ch);
581 return source; // avoid string creation
582 return skeletonBuffer.toString();
585 // get the name skeleton
586 public static String toNameSkeleton(String source) {
589 StringBuffer result = new StringBuffer();
590 // remove spaces, medial '-'
591 // we can do this with char, since no surrogates are involved
592 for (int i = 0; i < source.length(); ++i) {
593 char ch = source.charAt(i);
594 if (('0' <= ch && ch <= '9') || ('A' <= ch && ch <= 'Z')
595 || ch == '<' || ch == '>') {
597 } else if (ch == ' ') {
599 } else if (ch == '-') {
600 // only copy non-medials AND trailing O-E
602 || i == source.length() - 1
603 || source.charAt(i - 1) == ' '
604 || source.charAt(i + 1) == ' '
605 || (i == source.length() - 2
606 && source.charAt(i - 1) == 'O' && source
607 .charAt(i + 1) == 'E')) {
608 System.out.println("****** EXCEPTION " + source);
611 // otherwise don't copy
613 throw new IllegalArgumentException("Illegal Name Char: U+"
614 + Utility.hex(ch) + ", " + ch);
617 return result.toString();
621 * These routines use the Java functions, because they only need to act on
622 * ASCII Changes space, - into _, inserts _ between lower and UPPER.
624 public static String regularize(String source, boolean titlecaseStart) {
628 * if (source.equals("noBreak")) { // HACK if (titlecaseStart) return
629 * "NoBreak"; return source; }
631 StringBuffer result = new StringBuffer();
633 boolean haveFirstCased = true;
634 for (int i = 0; i < source.length(); ++i) {
635 char c = source.charAt(i);
636 if (c == ' ' || c == '-' || c == '_') {
638 haveFirstCased = true;
641 haveFirstCased = true;
642 int cat = Character.getType(c);
643 if (lastCat == Character.LOWERCASE_LETTER
644 && cat == Character.UPPERCASE_LETTER) {
648 && (cat == Character.LOWERCASE_LETTER
649 || cat == Character.TITLECASE_LETTER || cat == Character.UPPERCASE_LETTER)) {
650 if (titlecaseStart) {
651 c = Character.toUpperCase(c);
653 haveFirstCased = false;
658 return result.toString();
662 * Utility function for comparing codepoint to string without generating new
667 * @return true if the codepoint equals the string
669 public static final boolean equals(int codepoint, String other) {
670 if (other == null) return false;
671 if (other.length() == 1) {
672 return codepoint == other.charAt(0);
674 if (other.length() == 2) {
675 return other.equals(UTF16.valueOf(codepoint));
681 * Utility function for comparing objects that may be null
684 public static final <T extends Object> boolean equals(T a, T b) {
685 return a == null ? b == null
691 * Utility that should be on UnicodeSet
696 static public void addAll(UnicodeSetIterator source, UnicodeSet result) {
697 while (source.nextRange()) {
698 if (source.codepoint == UnicodeSetIterator.IS_STRING) {
699 result.add(source.string);
701 result.add(source.codepoint, source.codepointEnd);
707 * Really ought to create a Collection UniqueList, that forces uniqueness.
710 public static Collection addAllUnique(Collection source, Collection result) {
711 for (Iterator it = source.iterator(); it.hasNext();) {
712 addUnique(it.next(), result);
718 * Really ought to create a Collection UniqueList, that forces uniqueness.
721 public static Collection addAllUnique(Object[] source, Collection result) {
722 for (int i = 0; i < source.length; ++i) {
723 addUnique(source[i], result);
728 static public class Factory {
729 static boolean DEBUG = false;
731 Map<String, UnicodeProperty> canonicalNames = new TreeMap<String, UnicodeProperty>();
733 Map skeletonNames = new TreeMap();
735 Map propertyCache = new HashMap(1);
737 public final Factory add(UnicodeProperty sp) {
738 String name2 = sp.getName();
739 if (name2.length() == 0) {
740 throw new IllegalArgumentException();
742 canonicalNames.put(name2, sp);
743 skeletonNames.put(toSkeleton(name2), sp);
744 List c = sp.getNameAliases(new ArrayList(1));
745 Iterator it = c.iterator();
746 while (it.hasNext()) {
747 skeletonNames.put(toSkeleton((String) it.next()), sp);
752 public final UnicodeProperty getProperty(String propertyAlias) {
753 return (UnicodeProperty) skeletonNames
754 .get(toSkeleton(propertyAlias));
757 public final List<String> getAvailableNames() {
758 return getAvailableNames(null);
761 public final List<String> getAvailableNames(List<String> result) {
763 result = new ArrayList(1);
764 Iterator it = canonicalNames.keySet().iterator();
765 while (it.hasNext()) {
766 addUnique(it.next(), result);
771 public final List getAvailableNames(int propertyTypeMask) {
772 return getAvailableNames(propertyTypeMask, null);
775 public final List getAvailableNames(int propertyTypeMask, List result) {
777 result = new ArrayList(1);
778 Iterator it = canonicalNames.keySet().iterator();
779 while (it.hasNext()) {
780 String item = (String) it.next();
781 UnicodeProperty property = getProperty(item);
783 System.out.println("Properties: " + item + ","
784 + property.getType());
785 if (!property.isType(propertyTypeMask)) {
786 // System.out.println("Masking: " + property.getType() + ","
787 // + propertyTypeMask);
790 addUnique(property.getName(), result);
795 InversePatternMatcher inverseMatcher = new InversePatternMatcher();
798 * Format is: propname ('=' | '!=') propvalue ( '|' propValue )*
800 public final UnicodeSet getSet(String propAndValue,
801 PatternMatcher matcher, UnicodeSet result) {
802 int equalPos = propAndValue.indexOf('=');
803 String prop = propAndValue.substring(0, equalPos);
804 String value = propAndValue.substring(equalPos + 1);
805 boolean negative = false;
806 if (prop.endsWith("!")) {
807 prop = prop.substring(0, prop.length() - 1);
811 UnicodeProperty up = getProperty(prop);
812 if (matcher == null) {
813 matcher = new SimpleMatcher(value, up
814 .isType(STRING_OR_MISC_MASK) ? null
815 : PROPERTY_COMPARATOR);
818 inverseMatcher.set(matcher);
819 matcher = inverseMatcher;
821 return up.getSet(matcher.set(value), result);
824 public final UnicodeSet getSet(String propAndValue,
825 PatternMatcher matcher) {
826 return getSet(propAndValue, matcher, null);
829 public final UnicodeSet getSet(String propAndValue) {
830 return getSet(propAndValue, null, null);
833 public final SymbolTable getSymbolTable(String prefix) {
834 return new PropertySymbolTable(prefix);
837 private class MyXSymbolTable extends UnicodeSet.XSymbolTable {
838 public boolean applyPropertyAlias(String propertyName,
839 String propertyValue, UnicodeSet result) {
841 System.out.println(propertyName + "=" + propertyValue);
842 UnicodeProperty prop = getProperty(propertyName);
846 UnicodeSet x = prop.getSet(propertyValue, result);
847 return x.size() != 0;
851 public final UnicodeSet.XSymbolTable getXSymbolTable() {
852 return new MyXSymbolTable();
855 private class PropertySymbolTable implements SymbolTable {
856 static final boolean DEBUG = false;
858 private String prefix;
860 RegexMatcher regexMatcher = new RegexMatcher();
862 PropertySymbolTable(String prefix) {
863 this.prefix = prefix;
866 public char[] lookup(String s) {
868 System.out.println("\t(" + prefix + ")Looking up " + s);
869 // ensure, again, that prefix matches
870 int start = prefix.length();
871 if (!s.regionMatches(true, 0, prefix, 0, start))
874 int pos = s.indexOf(':', start);
875 if (pos < 0) { // should never happen
876 throw new IllegalArgumentException(
877 "Internal Error: missing =: " + s + "\r\n");
879 UnicodeProperty prop = getProperty(s.substring(start, pos));
881 throw new IllegalArgumentException("Invalid Property in: "
882 + s + "\r\nUse " + showSet(getAvailableNames()));
884 String value = s.substring(pos + 1);
886 if (value.startsWith("\u00AB")) { // regex!
887 set = prop.getSet(regexMatcher.set(value.substring(1, value
890 set = prop.getSet(value);
892 if (set.size() == 0) {
893 throw new IllegalArgumentException(
894 "Empty Property-Value in: " + s + "\r\nUse "
895 + showSet(prop.getAvailableValues()));
898 System.out.println("\t(" + prefix + ")Returning "
899 + set.toPattern(true));
900 return set.toPattern(true).toCharArray(); // really ugly
903 private String showSet(List list) {
904 StringBuffer result = new StringBuffer("[");
905 boolean first = true;
906 for (Iterator it = list.iterator(); it.hasNext();) {
911 result.append(it.next().toString());
914 return result.toString();
917 public UnicodeMatcher lookupMatcher(int ch) {
921 public String parseReference(String text, ParsePosition pos,
924 System.out.println("\t(" + prefix + ")Parsing <"
925 + text.substring(pos.getIndex(), limit) + ">");
926 int start = pos.getIndex();
927 // ensure that it starts with 'prefix'
929 .regionMatches(true, start, prefix, 0, prefix.length()))
931 start += prefix.length();
932 // now see if it is of the form identifier:identifier
933 int i = getIdentifier(text, start, limit);
936 String prop = text.substring(start, i);
937 String value = "true";
939 if (text.charAt(i) == ':') {
941 if (text.charAt(i + 1) == '\u00AB') { // regular
943 j = text.indexOf('\u00BB', i + 2) + 1; // include
949 j = getIdentifier(text, i + 1, limit);
951 value = text.substring(i + 1, j);
957 System.out.println("\t(" + prefix + ")Parsed <" + prop
958 + ">=<" + value + ">");
959 return prefix + prop + ":" + value;
962 private int getIdentifier(String text, int start, int limit) {
964 System.out.println("\tGetID <"
965 + text.substring(start, limit) + ">");
968 for (i = start; i < limit; i += UTF16.getCharCount(cp)) {
969 cp = UTF16.charAt(text, i);
970 if (!com.ibm.icu.lang.UCharacter
971 .isUnicodeIdentifierPart(cp)
977 System.out.println("\tGotID <" + text.substring(start, i)
984 public static class FilteredProperty extends UnicodeProperty {
985 private UnicodeProperty property;
987 protected StringFilter filter;
989 protected UnicodeSetIterator matchIterator = new UnicodeSetIterator(
990 new UnicodeSet(0, 0x10FFFF));
992 protected HashMap backmap;
994 boolean allowValueAliasCollisions = false;
996 public FilteredProperty(UnicodeProperty property, StringFilter filter) {
997 this.property = property;
998 this.filter = filter;
1001 public StringFilter getFilter() {
1005 public UnicodeProperty setFilter(StringFilter filter) {
1006 this.filter = filter;
1010 List temp = new ArrayList(1);
1012 public List _getAvailableValues(List result) {
1014 return filter.addUnique(property.getAvailableValues(temp), result);
1017 public List _getNameAliases(List result) {
1019 return filter.addUnique(property.getNameAliases(temp), result);
1022 public String _getValue(int codepoint) {
1023 return filter.remap(property.getValue(codepoint));
1026 public List _getValueAliases(String valueAlias, List result) {
1027 if (backmap == null) {
1028 backmap = new HashMap(1);
1030 Iterator it = property.getAvailableValues(temp).iterator();
1031 while (it.hasNext()) {
1032 String item = (String) it.next();
1033 String mappedItem = filter.remap(item);
1034 if (backmap.get(mappedItem) != null
1035 && !allowValueAliasCollisions) {
1036 throw new IllegalArgumentException(
1037 "Filter makes values collide! " + item + ", "
1040 backmap.put(mappedItem, item);
1043 valueAlias = (String) backmap.get(valueAlias);
1045 return filter.addUnique(property.getValueAliases(valueAlias, temp),
1049 public String _getVersion() {
1050 return property.getVersion();
1053 public boolean isAllowValueAliasCollisions() {
1054 return allowValueAliasCollisions;
1057 public FilteredProperty setAllowValueAliasCollisions(boolean b) {
1058 allowValueAliasCollisions = b;
1064 public static abstract class StringFilter implements Cloneable {
1065 public abstract String remap(String original);
1067 public final List addUnique(Collection source, List result) {
1069 result = new ArrayList(1);
1070 Iterator it = source.iterator();
1071 while (it.hasNext()) {
1072 UnicodeProperty.addUnique(remap((String) it.next()), result);
1077 * public Object clone() { try { return super.clone(); } catch
1078 * (CloneNotSupportedException e) { throw new
1079 * IllegalStateException("Should never happen."); } }
1083 public static class MapFilter extends StringFilter {
1084 private Map valueMap;
1086 public MapFilter(Map valueMap) {
1087 this.valueMap = valueMap;
1090 public String remap(String original) {
1091 Object changed = valueMap.get(original);
1092 return changed == null ? original : (String) changed;
1095 public Map getMap() {
1100 public interface PatternMatcher extends ObjectMatcher {
1101 public PatternMatcher set(String pattern);
1104 public static class InversePatternMatcher extends InverseMatcher implements
1106 PatternMatcher other;
1108 public PatternMatcher set(PatternMatcher toInverse) {
1113 public boolean matches(Object value) {
1114 return !other.matches(value);
1117 public PatternMatcher set(String pattern) {
1123 public static class SimpleMatcher implements PatternMatcher {
1124 Comparator comparator;
1128 public SimpleMatcher(String pattern, Comparator comparator) {
1129 this.comparator = comparator;
1130 this.pattern = pattern;
1133 public boolean matches(Object value) {
1134 if (comparator == null)
1135 return pattern.equals(value);
1136 return comparator.compare(pattern, value) == 0;
1139 public PatternMatcher set(String pattern) {
1140 this.pattern = pattern;
1145 public static class RegexMatcher implements UnicodeProperty.PatternMatcher {
1146 private java.util.regex.Matcher matcher;
1148 public UnicodeProperty.PatternMatcher set(String pattern) {
1149 matcher = Pattern.compile(pattern).matcher("");
1153 public boolean matches(Object value) {
1154 matcher.reset(value.toString());
1155 return matcher.find();
1159 public enum AliasAddAction {IGNORE_IF_MISSING, REQUIRE_MAIN_ALIAS, ADD_MAIN_ALIAS}
1161 public static abstract class BaseProperty extends UnicodeProperty {
1162 private static final String[] NO_VALUES = {"No", "N", "F", "False"};
1164 private static final String[] YES_VALUES = {"Yes", "Y", "T", "True"};
1169 private static final String[][] YES_NO_ALIASES = new String[][] {YES_VALUES, NO_VALUES};
1171 protected List propertyAliases = new ArrayList(1);
1173 protected Map toValueAliases;
1175 protected String version;
1177 public BaseProperty setMain(String alias, String shortAlias,
1178 int propertyType, String version) {
1180 setType(propertyType);
1181 propertyAliases.add(shortAlias);
1182 propertyAliases.add(alias);
1183 if (propertyType == BINARY) {
1184 addValueAliases(YES_NO_ALIASES, AliasAddAction.ADD_MAIN_ALIAS);
1186 this.version = version;
1190 public String _getVersion() {
1194 public List _getNameAliases(List result) {
1195 addAllUnique(propertyAliases, result);
1199 public BaseProperty addValueAliases(String[][] valueAndAlternates,
1200 AliasAddAction aliasAddAction) {
1201 if (toValueAliases == null)
1203 for (int i = 0; i < valueAndAlternates.length; ++i) {
1204 for (int j = 1; j < valueAndAlternates[0].length; ++j) {
1205 addValueAlias(valueAndAlternates[i][0],
1206 valueAndAlternates[i][j], aliasAddAction);
1212 public void addValueAlias(String value, String valueAlias,
1213 AliasAddAction aliasAddAction) {
1214 List result = (List) toValueAliases.get(value);
1215 if (result == null) {
1216 switch(aliasAddAction) {
1217 case IGNORE_IF_MISSING: return;
1218 case REQUIRE_MAIN_ALIAS: throw new IllegalArgumentException("Can't add alias for mising value: " + value);
1219 case ADD_MAIN_ALIAS:
1220 toValueAliases.put(value, result = new ArrayList(0));
1224 addUnique(value, result);
1225 addUnique(valueAlias, result);
1228 protected List _getValueAliases(String valueAlias, List result) {
1229 if (toValueAliases == null)
1231 List a = (List) toValueAliases.get(valueAlias);
1233 addAllUnique(a, result);
1237 protected void _fixValueAliases() {
1238 if (toValueAliases == null)
1239 toValueAliases = new HashMap(1);
1240 for (Iterator it = getAvailableValues().iterator(); it.hasNext();) {
1241 Object value = it.next();
1242 _ensureValueInAliases(value);
1246 protected void _ensureValueInAliases(Object value) {
1247 List result = (List) toValueAliases.get(value);
1249 toValueAliases.put(value, result = new ArrayList(1));
1250 addUnique(value, result);
1253 public BaseProperty swapFirst2ValueAliases() {
1254 for (Iterator it = toValueAliases.keySet().iterator(); it.hasNext();) {
1255 List list = (List) toValueAliases.get(it.next());
1256 if (list.size() < 2)
1258 Object first = list.get(0);
1259 list.set(0, list.get(1));
1269 public UnicodeProperty addName(String string) {
1270 throw new UnsupportedOperationException();
1275 public static abstract class SimpleProperty extends BaseProperty {
1276 LinkedHashSet values;
1278 public UnicodeProperty addName(String alias) {
1279 propertyAliases.add(alias);
1283 public SimpleProperty setValues(String valueAlias) {
1284 _addToValues(valueAlias, null);
1288 public SimpleProperty addAliases(String valueAlias, String... aliases) {
1289 _addToValues(valueAlias, null);
1293 public SimpleProperty setValues(String[] valueAliases,
1294 String[] alternateValueAliases) {
1295 for (int i = 0; i < valueAliases.length; ++i) {
1296 if (valueAliases[i].equals(UNUSED))
1300 alternateValueAliases != null ? alternateValueAliases[i]
1306 public SimpleProperty setValues(List valueAliases) {
1307 this.values = new LinkedHashSet(valueAliases);
1308 for (Iterator it = this.values.iterator(); it.hasNext();) {
1309 _addToValues((String) it.next(), null);
1314 public List _getAvailableValues(List result) {
1317 result.addAll(values);
1321 protected void _fillValues() {
1322 List newvalues = (List) getUnicodeMap_internal()
1323 .getAvailableValues(new ArrayList());
1324 for (Iterator it = newvalues.iterator(); it.hasNext();) {
1325 _addToValues((String) it.next(), null);
1329 private void _addToValues(String item, String alias) {
1331 values = new LinkedHashSet();
1332 if (toValueAliases == null)
1334 addUnique(item, values);
1335 _ensureValueInAliases(item);
1336 addValueAlias(item, alias, AliasAddAction.REQUIRE_MAIN_ALIAS);
1338 /* public String _getVersion() {
1344 public static class UnicodeMapProperty extends BaseProperty {
1347 * new UnicodeProperty.UnicodeMapProperty() {
1349 unicodeMap = new UnicodeMap();
1350 unicodeMap.setErrorOnReset(true);
1351 unicodeMap.put(0xD, "CR");
1352 unicodeMap.put(0xA, "LF");
1353 UnicodeProperty cat = getProperty("General_Category");
1354 UnicodeSet temp = cat.getSet("Line_Separator")
1355 .addAll(cat.getSet("Paragraph_Separator"))
1356 .addAll(cat.getSet("Control"))
1357 .addAll(cat.getSet("Format"))
1358 .remove(0xD).remove(0xA).remove(0x200C).remove(0x200D);
1359 unicodeMap.putAll(temp, "Control");
1360 UnicodeSet graphemeExtend = getProperty("Grapheme_Extend").getSet("true");
1361 unicodeMap.putAll(graphemeExtend,"Extend");
1362 UnicodeProperty hangul = getProperty("Hangul_Syllable_Type");
1363 unicodeMap.putAll(hangul.getSet("L"),"L");
1364 unicodeMap.putAll(hangul.getSet("V"),"V");
1365 unicodeMap.putAll(hangul.getSet("T"),"T");
1366 unicodeMap.putAll(hangul.getSet("LV"),"LV");
1367 unicodeMap.putAll(hangul.getSet("LVT"),"LVT");
1368 unicodeMap.setMissing("Other");
1370 }.setMain("Grapheme_Cluster_Break", "GCB", UnicodeProperty.ENUMERATED, version)
1372 protected UnicodeMap unicodeMap;
1374 protected UnicodeMap _getUnicodeMap() {
1378 public UnicodeMapProperty set(UnicodeMap map) {
1379 unicodeMap = map.freeze();
1383 protected String _getValue(int codepoint) {
1384 return (String) unicodeMap.getValue(codepoint);
1387 /* protected List _getValueAliases(String valueAlias, List result) {
1388 if (!unicodeMap.getAvailableValues().contains(valueAlias)) return result;
1389 result.add(valueAlias);
1390 return result; // no other aliases
1392 */protected List _getAvailableValues(List result) {
1393 unicodeMap.getAvailableValues(result);
1394 if (toValueAliases != null) {
1395 for (Object s : toValueAliases.keySet()) {
1396 if (!result.contains(s)) {
1405 public boolean isValidValue(String propertyValue) {
1406 if (isType(STRING_OR_MISC_MASK)) {
1409 Collection<String> values = (Collection<String>) getAvailableValues();
1410 for (String valueAlias : values) {
1411 if (UnicodeProperty.compareNames(valueAlias, propertyValue) == 0) {
1414 for (String valueAlias2 : (Collection<String>) getValueAliases(valueAlias)) {
1415 if (UnicodeProperty.compareNames(valueAlias2, propertyValue) == 0) {
1423 public List<String> getValueAliases() {
1424 List<String> result = new ArrayList();
1425 if (isType(STRING_OR_MISC_MASK)) {
1428 Collection<String> values = (Collection<String>) getAvailableValues();
1429 for (String valueAlias : values) {
1430 UnicodeProperty.addAllUnique(getValueAliases(valueAlias), result);
1432 result.removeAll(values);
1437 public static UnicodeSet addUntested(UnicodeSet result, boolean uniformUnassigned) {
1438 if (uniformUnassigned && result.contains(UnicodeProperty.getSAMPLE_UNASSIGNED())) {
1439 result.addAll(UnicodeProperty.getUNASSIGNED());
1442 if (result.contains(UnicodeProperty.SAMPLE_HIGH_SURROGATE)) {
1443 result.addAll(UnicodeProperty.HIGH_SURROGATES);
1445 if (result.contains(UnicodeProperty.SAMPLE_HIGH_PRIVATE_USE_SURROGATE)) {
1446 result.addAll(UnicodeProperty.HIGH_PRIVATE_USE_SURROGATES);
1448 if (result.contains(UnicodeProperty.SAMPLE_LOW_SURROGATE)) {
1449 result.addAll(UnicodeProperty.LOW_SURROGATES);
1452 if (result.contains(UnicodeProperty.SAMPLE_PRIVATE_USE_AREA)) {
1453 result.addAll(UnicodeProperty.PRIVATE_USE_AREA);
1455 if (result.contains(UnicodeProperty.SAMPLE_PRIVATE_USE_AREA_A)) {
1456 result.addAll(UnicodeProperty.PRIVATE_USE_AREA_A);
1458 if (result.contains(UnicodeProperty.SAMPLE_PRIVATE_USE_AREA_B)) {
1459 result.addAll(UnicodeProperty.PRIVATE_USE_AREA_B);
1465 public static UnicodeMap addUntested(UnicodeMap result, boolean uniformUnassigned) {
1467 if (uniformUnassigned && null != (temp = result.get(UnicodeProperty.getSAMPLE_UNASSIGNED()))) {
1468 result.putAll(UnicodeProperty.getUNASSIGNED(), temp);
1471 if (null != (temp = result.get(UnicodeProperty.SAMPLE_HIGH_SURROGATE))) {
1472 result.putAll(UnicodeProperty.HIGH_SURROGATES, temp);
1474 if (null != (temp = result.get(UnicodeProperty.SAMPLE_HIGH_PRIVATE_USE_SURROGATE))) {
1475 result.putAll(UnicodeProperty.HIGH_PRIVATE_USE_SURROGATES, temp);
1477 if (null != (temp = result.get(UnicodeProperty.SAMPLE_LOW_SURROGATE))) {
1478 result.putAll(UnicodeProperty.LOW_SURROGATES, temp);
1481 if (null != (temp = result.get(UnicodeProperty.SAMPLE_PRIVATE_USE_AREA))) {
1482 result.putAll(UnicodeProperty.PRIVATE_USE_AREA, temp);
1484 if (null != (temp = result.get(UnicodeProperty.SAMPLE_PRIVATE_USE_AREA_A))) {
1485 result.putAll(UnicodeProperty.PRIVATE_USE_AREA_A, temp);
1487 if (null != (temp = result.get(UnicodeProperty.SAMPLE_PRIVATE_USE_AREA_B))) {
1488 result.putAll(UnicodeProperty.PRIVATE_USE_AREA_B, temp);
1493 public boolean isDefault(int cp) {
1494 String value = getValue(cp);
1495 if (isType(STRING_OR_MISC_MASK)) {
1496 return equals(cp, value);
1498 String defaultValue = getValue(getSAMPLE_UNASSIGNED());
1499 return defaultValue == null ? value == null : defaultValue.equals(value);
1502 public boolean hasUniformUnassigned() {
1503 return hasUniformUnassigned;
1505 protected UnicodeProperty setUniformUnassigned(boolean hasUniformUnassigned) {
1506 this.hasUniformUnassigned = hasUniformUnassigned;
1510 public static class UnicodeSetProperty extends BaseProperty {
1511 protected UnicodeSet unicodeSet;
1512 private static final String[] YESNO_ARRAY = new String[]{"Yes", "No"};
1513 private static final List YESNO = Arrays.asList(YESNO_ARRAY);
1515 public UnicodeSetProperty set(UnicodeSet set) {
1516 unicodeSet = set.freeze();
1520 public UnicodeSetProperty set(String string) {
1521 // TODO Auto-generated method stub
1522 return set(new UnicodeSet(string).freeze());
1525 protected String _getValue(int codepoint) {
1526 return YESNO_ARRAY[unicodeSet.contains(codepoint) ? 0 : 1];
1529 protected List _getAvailableValues(List result) {
1534 // private static class StringTransformProperty extends SimpleProperty {
1535 // Transform<String,String> transform;
1537 // public StringTransformProperty(Transform<String,String> transform, boolean hasUniformUnassigned) {
1538 // this.transform = transform;
1539 // setUniformUnassigned(hasUniformUnassigned);
1541 // protected String _getValue(int codepoint) {
1542 // return transform.transform(UTF16.valueOf(codepoint));
1546 // private static class CodepointTransformProperty extends SimpleProperty {
1547 // Transform<Integer,String> transform;
1549 // public CodepointTransformProperty(Transform<Integer,String> transform, boolean hasUniformUnassigned) {
1550 // this.transform = transform;
1551 // setUniformUnassigned(hasUniformUnassigned);
1553 // protected String _getValue(int codepoint) {
1554 // return transform.transform(codepoint);