3 *******************************************************************************
\r
4 * Copyright (C) 2002-2010, International Business Machines Corporation and *
\r
5 * others. All Rights Reserved. *
\r
6 *******************************************************************************
\r
8 package com.ibm.icu.dev.test.util;
\r
10 import java.util.Arrays;
\r
11 import java.util.List;
\r
12 import java.util.Locale;
\r
13 import java.util.Map;
\r
14 import java.util.Set;
\r
16 import com.ibm.icu.lang.UCharacter;
\r
17 import com.ibm.icu.lang.UProperty;
\r
18 import com.ibm.icu.text.Normalizer;
\r
19 import com.ibm.icu.text.UTF16;
\r
20 import com.ibm.icu.text.UnicodeSet;
\r
21 import com.ibm.icu.text.UnicodeSetIterator;
\r
25 * Provides a general interface for Unicode Properties, and
\r
26 * extracting sets based on those values.
\r
29 public abstract class UnicodePropertySource implements Cloneable {
\r
31 protected String propertyAlias;
\r
32 protected int m_nameChoice = UProperty.NameChoice.LONG;
\r
33 protected StringFilter filter = new StringFilter();
\r
34 protected UnicodeSetIterator matchIterator = new UnicodeSetIterator(new UnicodeSet(0,0x10FFFF));
\r
36 abstract public String getPropertyValue(int codepoint);
\r
37 abstract public Set getAvailablePropertyAliases(Set result);
\r
38 abstract public Set getAvailablePropertyValueAliases(Set result);
\r
40 abstract public String getPropertyAlias(int nameChoice);
\r
41 abstract public String getPropertyValueAlias(String valueAlias, int nameChoice);
\r
44 * Subclasses should override
\r
46 public Object clone() {
\r
48 UnicodePropertySource result = (UnicodePropertySource)super.clone();
\r
49 result.filter = (StringFilter)filter.clone();
\r
51 } catch (CloneNotSupportedException e) {
\r
52 throw new IllegalStateException("Should never happen.");
\r
56 public UnicodePropertySource setPropertyAlias(String propertyAlias) {
\r
57 this.propertyAlias = propertyAlias;
\r
61 public String getPropertyAlias() {
\r
62 return propertyAlias;
\r
65 public static final boolean equals(int codepoint, String other) {
\r
66 if (other.length() == 1) {
\r
67 return codepoint == other.charAt(0);
\r
69 return other.equals(UTF16.valueOf(codepoint));
\r
72 public UnicodeSet getPropertySet(boolean charEqualsValue, UnicodeSet result){
\r
73 if (result == null) result = new UnicodeSet();
\r
74 matchIterator.reset();
\r
75 while (matchIterator.next()) {
\r
76 String value = filter.remap(getPropertyValue(matchIterator.codepoint));
\r
77 if (equals(matchIterator.codepoint, value) == charEqualsValue) {
\r
78 result.add(matchIterator.codepoint);
\r
84 public UnicodeSet getPropertySet(String propertyValue, UnicodeSet result){
\r
85 if (result == null) result = new UnicodeSet();
\r
86 matchIterator.reset();
\r
87 while (matchIterator.next()) {
\r
88 String value = filter.remap(getPropertyValue(matchIterator.codepoint));
\r
89 if (propertyValue.equals(value)) {
\r
90 result.add(matchIterator.codepoint);
\r
96 public UnicodeSet getPropertySet(Matcher matcher, UnicodeSet result) {
\r
97 if (result == null) result = new UnicodeSet();
\r
98 matchIterator.reset();
\r
99 while (matchIterator.next()) {
\r
100 String value = filter.remap(getPropertyValue(matchIterator.codepoint));
\r
103 if (matcher.matches(value)) {
\r
104 result.add(matchIterator.codepoint);
\r
110 public interface Matcher {
\r
111 public boolean matches(String value);
\r
114 public int getNameChoice() {
\r
115 return m_nameChoice;
\r
118 public UnicodePropertySource setNameChoice(int choice) {
\r
119 m_nameChoice = choice;
\r
123 public static class StringFilter implements Cloneable {
\r
124 public String remap(String original) {
\r
127 public Object clone() {
\r
129 return super.clone();
\r
130 } catch (CloneNotSupportedException e) {
\r
131 throw new IllegalStateException("Should never happen.");
\r
136 public static class MapFilter extends StringFilter {
\r
138 public String remap(String original) {
\r
139 Object changed = valueMap.get(original);
\r
140 return changed == null ? original : (String) changed;
\r
142 public Map getMap() {
\r
146 public MapFilter setMap(Map map) {
\r
152 static public class ICU extends UnicodePropertySource {
\r
153 protected int propEnum = Integer.MIN_VALUE;
\r
155 matchIterator = new UnicodeSetIterator(
\r
156 new UnicodeSet("[^[:Cn:]-[:Default_Ignorable_Code_Point:]]"));
\r
159 public UnicodePropertySource setPropertyAlias(String propertyAlias) {
\r
160 super.setPropertyAlias(propertyAlias);
\r
161 int extraPosition = Extras.indexOf(propertyAlias);
\r
162 if (extraPosition >= 0) {
\r
163 propEnum = EXTRA_START + extraPosition;
\r
165 propEnum = UCharacter.getPropertyEnum(propertyAlias);
\r
170 public String getPropertyValue(int codePoint) {
\r
171 if (propEnum < UProperty.INT_LIMIT) {
\r
172 int enumValue = UCharacter.getIntPropertyValue(codePoint, propEnum);
\r
173 return UCharacter.getPropertyValueName(propEnum,enumValue, (int)m_nameChoice);
\r
174 } else if (propEnum < UProperty.DOUBLE_LIMIT) {
\r
175 return Double.toString(UCharacter.getUnicodeNumericValue(codePoint));
\r
176 // TODO: Fix HACK -- API deficient
\r
177 } else switch(propEnum) {
\r
178 case UProperty.AGE: return UCharacter.getAge(codePoint).toString();
\r
179 case UProperty.BIDI_MIRRORING_GLYPH: return UTF16.valueOf(UCharacter.getMirror(codePoint));
\r
180 case UProperty.CASE_FOLDING: return UCharacter.foldCase(UTF16.valueOf(codePoint),true);
\r
181 case UProperty.ISO_COMMENT: return UCharacter.getISOComment(codePoint);
\r
182 case UProperty.LOWERCASE_MAPPING: return UCharacter.toLowerCase(Locale.ENGLISH,UTF16.valueOf(codePoint));
\r
183 case UProperty.NAME: return UCharacter.getName(codePoint);
\r
184 case UProperty.SIMPLE_CASE_FOLDING: return UTF16.valueOf(UCharacter.foldCase(codePoint,true));
\r
185 case UProperty.SIMPLE_LOWERCASE_MAPPING: return UTF16.valueOf(UCharacter.toLowerCase(codePoint));
\r
186 case UProperty.SIMPLE_TITLECASE_MAPPING: return UTF16.valueOf(UCharacter.toTitleCase(codePoint));
\r
187 case UProperty.SIMPLE_UPPERCASE_MAPPING: return UTF16.valueOf(UCharacter.toUpperCase(codePoint));
\r
188 case UProperty.TITLECASE_MAPPING: return UCharacter.toTitleCase(Locale.ENGLISH,UTF16.valueOf(codePoint),null);
\r
189 case UProperty.UNICODE_1_NAME: return UCharacter.getName1_0(codePoint);
\r
190 case UProperty.UPPERCASE_MAPPING: return UCharacter.toUpperCase(Locale.ENGLISH,UTF16.valueOf(codePoint));
\r
191 case NFC: return Normalizer.normalize(codePoint, Normalizer.NFC);
\r
192 case NFD: return Normalizer.normalize(codePoint, Normalizer.NFD);
\r
193 case NFKC: return Normalizer.normalize(codePoint, Normalizer.NFKC);
\r
194 case NFKD: return Normalizer.normalize(codePoint, Normalizer.NFKD);
\r
199 static final List Extras = Arrays.asList(new String[] {
\r
200 "NFC", "NFD", "NFKC", "NKFD"
\r
209 EXTRA_LIMIT = NFKD+1;
\r
211 static final int[][] ranges = {
\r
212 {UProperty.BINARY_START, UProperty.BINARY_LIMIT},
\r
213 {UProperty.INT_START, UProperty.INT_LIMIT},
\r
214 {UProperty.DOUBLE_START, UProperty.DOUBLE_LIMIT},
\r
215 {UProperty.STRING_START, UProperty.STRING_LIMIT},
\r
218 public Set getAvailablePropertyAliases(Set result) {
\r
219 for (int i = 0; i < ranges.length; ++i) {
\r
220 for (int j = ranges[i][0]; j < ranges[i][1]; ++j) {
\r
221 result.add(UCharacter.getPropertyName(j, m_nameChoice));
\r
224 result.addAll(Extras);
\r
228 public Set getAvailablePropertyValueAliases(Set result) {
\r
229 if (propEnum < UProperty.INT_LIMIT) {
\r
230 int start = UCharacter.getIntPropertyMinValue(propEnum);
\r
231 int end = UCharacter.getIntPropertyMaxValue(propEnum);
\r
232 for (int i = start; i <= end; ++i) {
\r
233 result.add(getFixedValueAlias(null, i,m_nameChoice));
\r
236 result.add(getFixedValueAlias(null, -1,m_nameChoice));
\r
242 * @param valueAlias null if unused.
\r
243 * @param valueEnum -1 if unused
\r
244 * @param nameChoice
\r
245 * @return the alias
\r
247 private String getFixedValueAlias(String valueAlias, int valueEnum, int nameChoice) {
\r
248 if (propEnum >= UProperty.STRING_START) {
\r
250 } else if (propEnum >= UProperty.DOUBLE_START) {
\r
253 if (valueAlias != null && !valueAlias.equals("<integer>")) {
\r
254 valueEnum = UCharacter.getPropertyValueEnum(propEnum,valueAlias);
\r
256 String result = fixedGetPropertyValueName(propEnum, valueEnum, nameChoice);
\r
257 if (result != null) return result;
\r
258 // try other namechoice
\r
259 result = fixedGetPropertyValueName(propEnum,valueEnum,
\r
260 nameChoice == UProperty.NameChoice.LONG ? UProperty.NameChoice.SHORT : UProperty.NameChoice.LONG);
\r
261 if (result != null) return result;
\r
262 return "<integer>";
\r
265 private static String fixedGetPropertyValueName(int propEnum, int valueEnum, int nameChoice) {
\r
267 return UCharacter.getPropertyValueName(propEnum,valueEnum,nameChoice);
\r
268 } catch (Exception e) {
\r
273 public String getPropertyAlias(int nameChoice) {
\r
274 if (propEnum < EXTRA_START) {
\r
275 return UCharacter.getPropertyName(propEnum, nameChoice);
\r
277 return (String)Extras.get(propEnum-EXTRA_START);
\r
280 public String getPropertyValueAlias(String valueAlias, int nameChoice) {
\r
281 return getFixedValueAlias(valueAlias, -1, nameChoice);
\r
284 // TODO file bug on getPropertyValueName for Canonical_Combining_Class
\r
286 public StringFilter getFilter() {
\r
291 public UnicodePropertySource setFilter(StringFilter filter) {
\r
292 this.filter = filter;
\r
298 static public void addAll(UnicodeSetIterator source, UnicodeSet result) {
\r
299 while (source.nextRange()) {
\r
300 if (source.codepoint == UnicodeSetIterator.IS_STRING) {
\r
301 result.add(source.string);
\r
303 result.add(source.codepoint, source.codepointEnd);
\r
308 public UnicodeSet getMatchSet(UnicodeSet result) {
\r
309 if (result == null) result = new UnicodeSet();
\r
310 addAll(matchIterator, result);
\r
317 public void setMatchSet(UnicodeSet set) {
\r
318 matchIterator = new UnicodeSetIterator(set);
\r