3 *******************************************************************************
4 * Copyright (C) 2002-2012, International Business Machines Corporation and *
5 * others. All Rights Reserved. *
6 *******************************************************************************
8 package com.ibm.icu.dev.util;
10 import java.util.Arrays;
11 import java.util.List;
12 import java.util.Locale;
16 import com.ibm.icu.lang.UCharacter;
17 import com.ibm.icu.lang.UProperty;
18 import com.ibm.icu.text.Normalizer;
19 import com.ibm.icu.text.UTF16;
20 import com.ibm.icu.text.UnicodeSet;
21 import com.ibm.icu.text.UnicodeSetIterator;
25 * Provides a general interface for Unicode Properties, and
26 * extracting sets based on those values.
29 public abstract class UnicodePropertySource implements Cloneable {
31 protected String propertyAlias;
32 protected int m_nameChoice = UProperty.NameChoice.LONG;
33 protected StringFilter filter = new StringFilter();
34 protected UnicodeSetIterator matchIterator = new UnicodeSetIterator(new UnicodeSet(0,0x10FFFF));
36 abstract public String getPropertyValue(int codepoint);
37 abstract public Set getAvailablePropertyAliases(Set result);
38 abstract public Set getAvailablePropertyValueAliases(Set result);
40 abstract public String getPropertyAlias(int nameChoice);
41 abstract public String getPropertyValueAlias(String valueAlias, int nameChoice);
44 * Subclasses should override
46 public Object clone() {
48 UnicodePropertySource result = (UnicodePropertySource)super.clone();
49 result.filter = (StringFilter)filter.clone();
51 } catch (CloneNotSupportedException e) {
52 throw new IllegalStateException("Should never happen.");
56 public UnicodePropertySource setPropertyAlias(String propertyAlias) {
57 this.propertyAlias = propertyAlias;
61 public String getPropertyAlias() {
65 public static final boolean equals(int codepoint, String other) {
66 if (other.length() == 1) {
67 return codepoint == other.charAt(0);
69 return other.equals(UTF16.valueOf(codepoint));
72 public UnicodeSet getPropertySet(boolean charEqualsValue, UnicodeSet result){
73 if (result == null) result = new UnicodeSet();
74 matchIterator.reset();
75 while (matchIterator.next()) {
76 String value = filter.remap(getPropertyValue(matchIterator.codepoint));
77 if (equals(matchIterator.codepoint, value) == charEqualsValue) {
78 result.add(matchIterator.codepoint);
84 public UnicodeSet getPropertySet(String propertyValue, UnicodeSet result){
85 if (result == null) result = new UnicodeSet();
86 matchIterator.reset();
87 while (matchIterator.next()) {
88 String value = filter.remap(getPropertyValue(matchIterator.codepoint));
89 if (propertyValue.equals(value)) {
90 result.add(matchIterator.codepoint);
96 public UnicodeSet getPropertySet(Matcher matcher, UnicodeSet result) {
97 if (result == null) result = new UnicodeSet();
98 matchIterator.reset();
99 while (matchIterator.next()) {
100 String value = filter.remap(getPropertyValue(matchIterator.codepoint));
103 if (matcher.matches(value)) {
104 result.add(matchIterator.codepoint);
110 public interface Matcher {
111 public boolean matches(String value);
114 public int getNameChoice() {
118 public UnicodePropertySource setNameChoice(int choice) {
119 m_nameChoice = choice;
123 public static class StringFilter implements Cloneable {
124 public String remap(String original) {
127 public Object clone() {
129 return super.clone();
130 } catch (CloneNotSupportedException e) {
131 throw new IllegalStateException("Should never happen.");
136 public static class MapFilter extends StringFilter {
138 public String remap(String original) {
139 Object changed = valueMap.get(original);
140 return changed == null ? original : (String) changed;
142 public Map getMap() {
146 public MapFilter setMap(Map map) {
152 static public class ICU extends UnicodePropertySource {
153 protected int propEnum = Integer.MIN_VALUE;
155 matchIterator = new UnicodeSetIterator(
156 new UnicodeSet("[^[:Cn:]-[:Default_Ignorable_Code_Point:]]"));
159 public UnicodePropertySource setPropertyAlias(String propertyAlias) {
160 super.setPropertyAlias(propertyAlias);
161 int extraPosition = Extras.indexOf(propertyAlias);
162 if (extraPosition >= 0) {
163 propEnum = EXTRA_START + extraPosition;
165 propEnum = UCharacter.getPropertyEnum(propertyAlias);
170 public String getPropertyValue(int codePoint) {
171 if (propEnum < UProperty.INT_LIMIT) {
172 int enumValue = UCharacter.getIntPropertyValue(codePoint, propEnum);
173 return UCharacter.getPropertyValueName(propEnum,enumValue, (int)m_nameChoice);
174 } else if (propEnum < UProperty.DOUBLE_LIMIT) {
175 return Double.toString(UCharacter.getUnicodeNumericValue(codePoint));
176 // TODO: Fix HACK -- API deficient
177 } else switch(propEnum) {
178 case UProperty.AGE: return UCharacter.getAge(codePoint).toString();
179 case UProperty.BIDI_MIRRORING_GLYPH: return UTF16.valueOf(UCharacter.getMirror(codePoint));
180 case UProperty.CASE_FOLDING: return UCharacter.foldCase(UTF16.valueOf(codePoint),true);
181 case UProperty.ISO_COMMENT: return UCharacter.getISOComment(codePoint);
182 case UProperty.LOWERCASE_MAPPING: return UCharacter.toLowerCase(Locale.ENGLISH,UTF16.valueOf(codePoint));
183 case UProperty.NAME: return UCharacter.getName(codePoint);
184 case UProperty.SIMPLE_CASE_FOLDING: return UTF16.valueOf(UCharacter.foldCase(codePoint,true));
185 case UProperty.SIMPLE_LOWERCASE_MAPPING: return UTF16.valueOf(UCharacter.toLowerCase(codePoint));
186 case UProperty.SIMPLE_TITLECASE_MAPPING: return UTF16.valueOf(UCharacter.toTitleCase(codePoint));
187 case UProperty.SIMPLE_UPPERCASE_MAPPING: return UTF16.valueOf(UCharacter.toUpperCase(codePoint));
188 case UProperty.TITLECASE_MAPPING: return UCharacter.toTitleCase(Locale.ENGLISH,UTF16.valueOf(codePoint),null);
189 case UProperty.UNICODE_1_NAME: return UCharacter.getName1_0(codePoint);
190 case UProperty.UPPERCASE_MAPPING: return UCharacter.toUpperCase(Locale.ENGLISH,UTF16.valueOf(codePoint));
191 case NFC: return Normalizer.normalize(codePoint, Normalizer.NFC);
192 case NFD: return Normalizer.normalize(codePoint, Normalizer.NFD);
193 case NFKC: return Normalizer.normalize(codePoint, Normalizer.NFKC);
194 case NFKD: return Normalizer.normalize(codePoint, Normalizer.NFKD);
199 static final List Extras = Arrays.asList(new String[] {
200 "NFC", "NFD", "NFKC", "NKFD"
209 EXTRA_LIMIT = NFKD+1;
211 static final int[][] ranges = {
212 {UProperty.BINARY_START, UProperty.BINARY_LIMIT},
213 {UProperty.INT_START, UProperty.INT_LIMIT},
214 {UProperty.DOUBLE_START, UProperty.DOUBLE_LIMIT},
215 {UProperty.STRING_START, UProperty.STRING_LIMIT},
218 public Set getAvailablePropertyAliases(Set result) {
219 for (int i = 0; i < ranges.length; ++i) {
220 for (int j = ranges[i][0]; j < ranges[i][1]; ++j) {
221 result.add(UCharacter.getPropertyName(j, m_nameChoice));
224 result.addAll(Extras);
228 public Set getAvailablePropertyValueAliases(Set result) {
229 if (propEnum < UProperty.INT_LIMIT) {
230 int start = UCharacter.getIntPropertyMinValue(propEnum);
231 int end = UCharacter.getIntPropertyMaxValue(propEnum);
232 for (int i = start; i <= end; ++i) {
233 result.add(getFixedValueAlias(null, i,m_nameChoice));
236 result.add(getFixedValueAlias(null, -1,m_nameChoice));
242 * @param valueAlias null if unused.
243 * @param valueEnum -1 if unused
247 private String getFixedValueAlias(String valueAlias, int valueEnum, int nameChoice) {
248 if (propEnum >= UProperty.STRING_START) {
250 } else if (propEnum >= UProperty.DOUBLE_START) {
253 if (valueAlias != null && !valueAlias.equals("<integer>")) {
254 valueEnum = UCharacter.getPropertyValueEnum(propEnum,valueAlias);
256 String result = fixedGetPropertyValueName(propEnum, valueEnum, nameChoice);
257 if (result != null) return result;
258 // try other namechoice
259 result = fixedGetPropertyValueName(propEnum,valueEnum,
260 nameChoice == UProperty.NameChoice.LONG ? UProperty.NameChoice.SHORT : UProperty.NameChoice.LONG);
261 if (result != null) return result;
265 private static String fixedGetPropertyValueName(int propEnum, int valueEnum, int nameChoice) {
267 return UCharacter.getPropertyValueName(propEnum,valueEnum,nameChoice);
268 } catch (Exception e) {
273 public String getPropertyAlias(int nameChoice) {
274 if (propEnum < EXTRA_START) {
275 return UCharacter.getPropertyName(propEnum, nameChoice);
277 return (String)Extras.get(propEnum-EXTRA_START);
280 public String getPropertyValueAlias(String valueAlias, int nameChoice) {
281 return getFixedValueAlias(valueAlias, -1, nameChoice);
284 // TODO file bug on getPropertyValueName for Canonical_Combining_Class
286 public StringFilter getFilter() {
291 public UnicodePropertySource setFilter(StringFilter filter) {
292 this.filter = filter;
298 static public void addAll(UnicodeSetIterator source, UnicodeSet result) {
299 while (source.nextRange()) {
300 if (source.codepoint == UnicodeSetIterator.IS_STRING) {
301 result.add(source.string);
303 result.add(source.codepoint, source.codepointEnd);
308 public UnicodeSet getMatchSet(UnicodeSet result) {
309 if (result == null) result = new UnicodeSet();
310 addAll(matchIterator, result);
317 public void setMatchSet(UnicodeSet set) {
318 matchIterator = new UnicodeSetIterator(set);