2 *******************************************************************************
3 * Copyright (C) 1996-2012, Google, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 *******************************************************************************
7 package com.ibm.icu.dev.util;
9 import java.util.Comparator;
10 import java.util.HashMap;
11 import java.util.List;
13 import com.ibm.icu.dev.util.UnicodeProperty.PatternMatcher;
14 import com.ibm.icu.impl.UnicodeRegex;
15 import com.ibm.icu.text.UTF16;
16 import com.ibm.icu.text.UnicodeSet;
19 * Allows for overriding the parsing of UnicodeSet property patterns.
21 * WARNING: If this UnicodePropertySymbolTable is used with {@code UnicodeSet.setDefaultXSymbolTable}, and the
22 * Unassigned characters (gc=Cn) are different than in ICU other than in ICU, you MUST call
23 * {@code UnicodeProperty.ResetCacheProperties} afterwards. If you then call {@code UnicodeSet.setDefaultXSymbolTable}
24 * with null to clear the value, you MUST also call {@code UnicodeProperty.ResetCacheProperties}.
28 public class UnicodePropertySymbolTable extends UnicodeSet.XSymbolTable {
29 UnicodeRegex unicodeRegex;
30 final UnicodeProperty.Factory factory;
32 public UnicodePropertySymbolTable(UnicodeProperty.Factory factory) {
33 unicodeRegex = new UnicodeRegex().setSymbolTable(this);
34 this.factory = factory;
38 // public boolean applyPropertyAlias0(String propertyName,
39 // String propertyValue, UnicodeSet result) {
40 // if (!propertyName.contains("*")) {
41 // return applyPropertyAlias(propertyName, propertyValue, result);
43 // String[] propertyNames = propertyName.split("[*]");
44 // for (int i = propertyNames.length - 1; i >= 0; ++i) {
45 // String pname = propertyNames[i];
51 public boolean applyPropertyAlias(String propertyName,
52 String propertyValue, UnicodeSet result) {
53 boolean status = false;
54 boolean invert = false;
55 int posNotEqual = propertyName.indexOf('\u2260');
56 int posColon = propertyName.indexOf(':');
57 if (posNotEqual >= 0 || posColon >= 0) {
58 if (posNotEqual < 0) posNotEqual = propertyName.length();
59 if (posColon < 0) posColon = propertyName.length();
60 int opPos = posNotEqual < posColon ? posNotEqual : posColon;
61 propertyValue = propertyValue.length() == 0 ? propertyName.substring(opPos+1)
62 : propertyName.substring(opPos+1) + "=" + propertyValue;
63 propertyName = propertyName.substring(0,opPos);
64 if (posNotEqual < posColon) {
68 if (propertyName.endsWith("!")) {
69 propertyName = propertyName.substring(0, propertyName.length() - 1);
72 propertyValue = propertyValue.trim();
73 if (propertyValue.length() != 0) {
74 status = applyPropertyAlias0(propertyName, propertyValue, result);
77 status = applyPropertyAlias0("gc", propertyName, result);
78 } catch (Exception e) {};
81 status = applyPropertyAlias0("sc", propertyName, result);
82 } catch (Exception e) {};
85 status = applyPropertyAlias0(propertyName, "Yes", result);
86 } catch (Exception e) {};
88 status = applyPropertyAlias0(propertyName, "", result);
93 if (status && invert) {
99 static final HashMap<String,String[]> GC_REMAP = new HashMap();
101 GC_REMAP.put("c", "Cc Cf Cn Co Cs".split(" "));
102 GC_REMAP.put("other", GC_REMAP.get("c"));
104 GC_REMAP.put("l", "Ll Lm Lo Lt Lu".split(" "));
105 GC_REMAP.put("letter", GC_REMAP.get("l"));
107 GC_REMAP.put("lc", "Ll Lt Lu".split(" "));
108 GC_REMAP.put("casedletter", GC_REMAP.get("lc"));
110 GC_REMAP.put("m", "Mc Me Mn".split(" "));
111 GC_REMAP.put("mark", GC_REMAP.get("m"));
113 GC_REMAP.put("n", "Nd Nl No".split(" "));
114 GC_REMAP.put("number", GC_REMAP.get("n"));
116 GC_REMAP.put("p", "Pc Pd Pe Pf Pi Po Ps".split(" "));
117 GC_REMAP.put("punctuation", GC_REMAP.get("p"));
118 GC_REMAP.put("punct", GC_REMAP.get("p"));
120 GC_REMAP.put("s", "Sc Sk Sm So".split(" "));
121 GC_REMAP.put("symbol", GC_REMAP.get("s"));
123 GC_REMAP.put("z", "Zl Zp Zs".split(" "));
124 GC_REMAP.put("separator", GC_REMAP.get("z"));
127 public boolean applyPropertyAlias0(String propertyName,
128 String propertyValue, UnicodeSet result) {
130 UnicodeProperty prop = factory.getProperty(propertyName);
131 String canonicalName = prop.getName();
132 boolean isAge = UnicodeProperty.equalNames("Age", canonicalName);
134 // Hack for special GC values
135 if (canonicalName.equals("General_Category")) {
136 String[] parts = GC_REMAP.get(UnicodeProperty.toSkeleton(propertyValue));
138 for (String part : parts) {
139 prop.getSet(part, result);
145 PatternMatcher patternMatcher = null;
146 if (propertyValue.length() > 1 && propertyValue.startsWith("/") && propertyValue.endsWith("/")) {
147 String fixedRegex = unicodeRegex.transform(propertyValue.substring(1, propertyValue.length() - 1));
148 patternMatcher = new UnicodeProperty.RegexMatcher().set(fixedRegex);
150 UnicodeProperty otherProperty = null;
151 boolean testCp = false;
152 if (propertyValue.length() > 1 && propertyValue.startsWith("@") && propertyValue.endsWith("@")) {
153 String otherPropName = propertyValue.substring(1, propertyValue.length() - 1).trim();
154 if ("cp".equalsIgnoreCase(otherPropName)) {
157 otherProperty = factory.getProperty(otherPropName);
163 set = new UnicodeSet();
164 for (int i = 0; i <= 0x10FFFF; ++i) {
165 if (UnicodeProperty.equals(i, prop.getValue(i))) {
169 } else if (otherProperty != null) {
170 set = new UnicodeSet();
171 for (int i = 0; i <= 0x10FFFF; ++i) {
172 String v1 = prop.getValue(i);
173 String v2 = otherProperty.getValue(i);
174 if (UnicodeProperty.equals(v1, v2)) {
178 } else if (patternMatcher == null) {
179 if (!isValid(prop, propertyValue)) {
180 throw new IllegalArgumentException("The value '" + propertyValue + "' is illegal. Values for " + propertyName
182 + prop.getAvailableValues() + " or in " + prop.getValueAliases());
185 set = prop.getSet(new ComparisonMatcher(propertyValue, Relation.geq));
187 set = prop.getSet(propertyValue);
190 set = new UnicodeSet();
191 List<String> values = prop.getAvailableValues();
192 for (String value : values) {
193 if (patternMatcher.matches(value)) {
194 for (String other : values) {
195 if (other.compareTo(value) <= 0) {
196 set.addAll(prop.getSet(other));
202 set = prop.getSet(patternMatcher);
207 throw new IllegalArgumentException("Illegal property: " + propertyName);
212 private boolean isValid(UnicodeProperty prop, String propertyValue) {
213 // if (prop.getName().equals("General_Category")) {
214 // if (propertyValue)
216 return prop.isValidValue(propertyValue);
219 public enum Relation {less, leq, equal, geq, greater}
221 public static class ComparisonMatcher implements PatternMatcher {
223 static Comparator comparator = new UTF16.StringComparator(true, false,0);
227 public ComparisonMatcher(String pattern, Relation comparator) {
228 this.relation = comparator;
229 this.pattern = pattern;
232 public boolean matches(Object value) {
233 int comp = comparator.compare(pattern, value.toString());
235 case less: return comp < 0;
236 case leq: return comp <= 0;
237 default: return comp == 0;
238 case geq: return comp >= 0;
239 case greater: return comp > 0;
243 public PatternMatcher set(String pattern) {
244 this.pattern = pattern;