2 *******************************************************************************
\r
3 * Copyright (C) 2002-2010, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 *******************************************************************************
\r
7 package com.ibm.icu.dev.test.util;
\r
9 import java.util.ArrayList;
\r
10 import java.util.Arrays;
\r
11 import java.util.Collection;
\r
12 import java.util.HashMap;
\r
13 import java.util.HashSet;
\r
14 import java.util.Iterator;
\r
15 import java.util.List;
\r
16 import java.util.Locale;
\r
17 import java.util.Map;
\r
18 import java.util.Set;
\r
19 import java.util.TreeSet;
\r
21 import com.ibm.icu.lang.UCharacter;
\r
22 import com.ibm.icu.lang.UProperty;
\r
23 import com.ibm.icu.text.Normalizer;
\r
24 import com.ibm.icu.text.UTF16;
\r
25 import com.ibm.icu.util.VersionInfo;
\r
29 * Provides a general interface for Unicode Properties, and
\r
30 * extracting sets based on those values.
\r
34 public class ICUPropertyFactory extends UnicodeProperty.Factory {
\r
36 static class ICUProperty extends UnicodeProperty {
\r
37 protected int propEnum = Integer.MIN_VALUE;
\r
39 protected ICUProperty(String propName, int propEnum) {
\r
41 this.propEnum = propEnum;
\r
42 setType(internalGetPropertyType(propEnum));
\r
43 if (propEnum == UProperty.DEFAULT_IGNORABLE_CODE_POINT || propEnum == UProperty.BIDI_CLASS) {
\r
44 setUniformUnassigned(false);
\r
48 boolean shownException = false;
\r
50 public String _getValue(int codePoint) {
\r
53 return getAge(codePoint);
\r
54 case UProperty.BIDI_MIRRORING_GLYPH:
\r
55 return UTF16.valueOf(UCharacter.getMirror(codePoint));
\r
56 case UProperty.CASE_FOLDING:
\r
57 return UCharacter.foldCase(UTF16.valueOf(codePoint), true);
\r
58 case UProperty.ISO_COMMENT:
\r
59 return UCharacter.getISOComment(codePoint);
\r
60 case UProperty.LOWERCASE_MAPPING:
\r
61 return UCharacter.toLowerCase(Locale.ENGLISH, UTF16.valueOf(codePoint));
\r
62 case UProperty.NAME:
\r
63 return UCharacter.getName(codePoint);
\r
64 case UProperty.SIMPLE_CASE_FOLDING:
\r
65 return UTF16.valueOf(UCharacter.foldCase(codePoint, true));
\r
66 case UProperty.SIMPLE_LOWERCASE_MAPPING:
\r
67 return UTF16.valueOf(UCharacter.toLowerCase(codePoint));
\r
68 case UProperty.SIMPLE_TITLECASE_MAPPING:
\r
69 return UTF16.valueOf(UCharacter.toTitleCase(codePoint));
\r
70 case UProperty.SIMPLE_UPPERCASE_MAPPING:
\r
71 return UTF16.valueOf(UCharacter.toUpperCase(codePoint));
\r
72 case UProperty.TITLECASE_MAPPING:
\r
73 return UCharacter.toTitleCase(Locale.ENGLISH, UTF16.valueOf(codePoint), null);
\r
74 case UProperty.UNICODE_1_NAME:
\r
75 return UCharacter.getName1_0(codePoint);
\r
76 case UProperty.UPPERCASE_MAPPING:
\r
77 return UCharacter.toUpperCase(Locale.ENGLISH, UTF16.valueOf(codePoint));
\r
78 // case NFC: return Normalizer.normalize(codePoint, Normalizer.NFC);
\r
79 // case NFD: return Normalizer.normalize(codePoint, Normalizer.NFD);
\r
80 // case NFKC: return Normalizer.normalize(codePoint, Normalizer.NFKC);
\r
81 // case NFKD: return Normalizer.normalize(codePoint, Normalizer.NFKD);
\r
83 return String.valueOf(Normalizer.normalize(codePoint, Normalizer.NFC).equals(UTF16.valueOf(codePoint)));
\r
85 return String.valueOf(Normalizer.normalize(codePoint, Normalizer.NFD).equals(UTF16.valueOf(codePoint)));
\r
88 .valueOf(Normalizer.normalize(codePoint, Normalizer.NFKC).equals(UTF16.valueOf(codePoint)));
\r
91 .valueOf(Normalizer.normalize(codePoint, Normalizer.NFKD).equals(UTF16.valueOf(codePoint)));
\r
93 return String.valueOf(UCharacter.toLowerCase(Locale.ENGLISH, UTF16.valueOf(codePoint)).equals(
\r
94 UTF16.valueOf(codePoint)));
\r
96 return String.valueOf(UCharacter.toUpperCase(Locale.ENGLISH, UTF16.valueOf(codePoint)).equals(
\r
97 UTF16.valueOf(codePoint)));
\r
99 return String.valueOf(UCharacter.toTitleCase(Locale.ENGLISH, UTF16.valueOf(codePoint), null).equals(
\r
100 UTF16.valueOf(codePoint)));
\r
102 return String.valueOf(UCharacter.foldCase(UTF16.valueOf(codePoint), true).equals(
\r
103 UTF16.valueOf(codePoint)));
\r
105 return String.valueOf(UCharacter.toLowerCase(Locale.ENGLISH, UTF16.valueOf(codePoint)).equals(
\r
106 UTF16.valueOf(codePoint)));
\r
108 if (propEnum < UProperty.INT_LIMIT) {
\r
109 int enumValue = -1;
\r
110 String value = null;
\r
112 enumValue = UCharacter.getIntPropertyValue(codePoint, propEnum);
\r
113 if (enumValue >= 0)
\r
114 value = fixedGetPropertyValueName(propEnum, enumValue, UProperty.NameChoice.LONG);
\r
115 } catch (IllegalArgumentException e) {
\r
116 if (!shownException) {
\r
117 System.out.println("Fail: " + getName() + ", " + Integer.toHexString(codePoint));
\r
118 shownException = true;
\r
121 return value != null ? value : String.valueOf(enumValue);
\r
122 } else if (propEnum < UProperty.DOUBLE_LIMIT) {
\r
123 double num = UCharacter.getUnicodeNumericValue(codePoint);
\r
124 if (num == UCharacter.NO_NUMERIC_VALUE)
\r
126 return Double.toString(num);
\r
127 // TODO: Fix HACK -- API deficient
\r
132 private String getAge(int codePoint) {
\r
133 String temp = UCharacter.getAge(codePoint).toString();
\r
134 if (temp.equals("0.0.0.0"))
\r
135 return "unassigned";
\r
136 if (temp.endsWith(".0.0"))
\r
137 return temp.substring(0, temp.length() - 4);
\r
142 * @param valueAlias null if unused.
\r
143 * @param valueEnum -1 if unused
\r
144 * @param nameChoice
\r
147 private String getFixedValueAlias(String valueAlias, int valueEnum, int nameChoice) {
\r
148 if (propEnum >= UProperty.STRING_START) {
\r
149 if (nameChoice > UProperty.NameChoice.LONG)
\r
150 throw new IllegalArgumentException();
\r
151 if (nameChoice != UProperty.NameChoice.LONG)
\r
154 } else if (propEnum >= UProperty.DOUBLE_START) {
\r
155 if (nameChoice > UProperty.NameChoice.LONG)
\r
156 throw new IllegalArgumentException();
\r
157 if (nameChoice != UProperty.NameChoice.LONG)
\r
161 if (valueAlias != null && !valueAlias.equals("<integer>")) {
\r
162 valueEnum = fixedGetPropertyValueEnum(propEnum, valueAlias);
\r
164 // because these are defined badly, there may be no normal (long) name.
\r
166 String result = fixedGetPropertyValueName(propEnum, valueEnum, nameChoice);
\r
167 if (result != null)
\r
169 // HACK try other namechoice
\r
170 if (nameChoice == UProperty.NameChoice.LONG) {
\r
171 result = fixedGetPropertyValueName(propEnum, valueEnum, UProperty.NameChoice.SHORT);
\r
172 if (result != null)
\r
174 if (isCombiningClassProperty())
\r
176 return "<integer>";
\r
181 public boolean isCombiningClassProperty() {
\r
182 return (propEnum == UProperty.CANONICAL_COMBINING_CLASS
\r
183 || propEnum == UProperty.LEAD_CANONICAL_COMBINING_CLASS
\r
184 || propEnum == UProperty.TRAIL_CANONICAL_COMBINING_CLASS);
\r
187 private static int fixedGetPropertyValueEnum(int propEnum, String valueAlias) {
\r
189 if (propEnum < BINARY_LIMIT) {
\r
190 propEnum = UProperty.ALPHABETIC;
\r
192 return UCharacter.getPropertyValueEnum(propEnum, valueAlias);
\r
193 } catch (Exception e) {
\r
194 return Integer.parseInt(valueAlias);
\r
198 static Map fixSkeleton = new HashMap();
\r
200 private static String fixedGetPropertyValueName(int propEnum, int valueEnum, int nameChoice) {
\r
201 String value = UCharacter.getPropertyValueName(propEnum, valueEnum, nameChoice);
\r
202 String newValue = (String) fixSkeleton.get(value);
\r
203 if (newValue == null) {
\r
205 if (propEnum == UProperty.JOINING_GROUP) {
\r
206 newValue = newValue == null ? null : newValue.toLowerCase(Locale.ENGLISH);
\r
208 newValue = regularize(newValue, true);
\r
209 fixSkeleton.put(value, newValue);
\r
214 public List _getNameAliases(List result) {
\r
215 if (result == null)
\r
216 result = new ArrayList();
\r
217 // String alias = String_Extras.get(propEnum);
\r
218 // if (alias == null)
\r
219 String alias = Binary_Extras.get(propEnum);
\r
220 if (alias != null) {
\r
221 addUnique(alias, result);
\r
223 addUnique(getFixedPropertyName(propEnum, UProperty.NameChoice.SHORT), result);
\r
224 addUnique(getFixedPropertyName(propEnum, UProperty.NameChoice.LONG), result);
\r
229 public String getFixedPropertyName(int propName, int nameChoice) {
\r
231 return UCharacter.getPropertyName(propEnum, nameChoice);
\r
232 } catch (IllegalArgumentException e) {
\r
237 private static Map cccHack = new HashMap();
\r
238 private static Set cccExtras = new HashSet();
\r
240 for (int i = 0; i <= 255; ++i) {
\r
241 String alias = UCharacter.getPropertyValueName(UProperty.CANONICAL_COMBINING_CLASS, i,
\r
242 UProperty.NameChoice.LONG);
\r
243 String numStr = String.valueOf(i);
\r
244 if (alias != null) {
\r
245 cccHack.put(alias, numStr);
\r
247 cccHack.put(numStr, numStr);
\r
248 cccExtras.add(numStr);
\r
253 public List _getAvailableValues(List result) {
\r
254 if (result == null)
\r
255 result = new ArrayList();
\r
256 if (propEnum == UProperty.AGE) {
\r
257 addAllUnique(getAges(), result);
\r
261 if (propEnum < UProperty.INT_LIMIT) {
\r
262 if (Binary_Extras.isInRange(propEnum)) {
\r
263 propEnum = UProperty.BINARY_START; // HACK
\r
265 int start = UCharacter.getIntPropertyMinValue(propEnum);
\r
266 int end = UCharacter.getIntPropertyMaxValue(propEnum);
\r
267 for (int i = start; i <= end; ++i) {
\r
268 String alias = getFixedValueAlias(null, i, UProperty.NameChoice.LONG);
\r
269 String alias2 = getFixedValueAlias(null, i, UProperty.NameChoice.SHORT);
\r
270 if (alias == null) {
\r
272 if (alias == null && isCombiningClassProperty()) {
\r
273 alias = String.valueOf(i);
\r
276 // System.out.println(propertyAlias + "\t" + i + ":\t" + alias);
\r
277 addUnique(alias, result);
\r
279 } else if (propEnum >= UProperty.DOUBLE_START && propEnum < UProperty.DOUBLE_LIMIT) {
\r
280 UnicodeMap map = getUnicodeMap();
\r
281 Collection values = map.values();
\r
282 addAllUnique(values, result);
\r
284 String alias = getFixedValueAlias(null, -1, UProperty.NameChoice.LONG);
\r
285 addUnique(alias, result);
\r
290 static String[] AGES = null;
\r
292 private String[] getAges() {
\r
293 if (AGES == null) {
\r
294 Set ages = new TreeSet();
\r
295 for (int i = 0; i < 0x10FFFF; ++i) {
\r
296 ages.add(getAge(i));
\r
298 AGES = (String[]) ages.toArray(new String[ages.size()]);
\r
303 public List _getValueAliases(String valueAlias, List result) {
\r
304 if (result == null)
\r
305 result = new ArrayList();
\r
306 if (propEnum == UProperty.AGE) {
\r
307 addUnique(valueAlias, result);
\r
310 if (isCombiningClassProperty()) {
\r
311 addUnique(cccHack.get(valueAlias), result); // add number
\r
313 int type = getType();
\r
314 if (type == UnicodeProperty.NUMERIC || type == EXTENDED_NUMERIC) {
\r
315 addUnique(valueAlias, result);
\r
316 if (valueAlias.endsWith(".0")) {
\r
317 addUnique(valueAlias.substring(0, valueAlias.length() - 2), result);
\r
320 for (int nameChoice = UProperty.NameChoice.SHORT;; ++nameChoice) {
\r
322 addUnique(getFixedValueAlias(valueAlias, -1, nameChoice), result);
\r
323 } catch (Exception e) {
\r
332 * @see com.ibm.icu.dev.test.util.UnicodePropertySource#getPropertyType()
\r
334 private int internalGetPropertyType(int prop) {
\r
336 case UProperty.AGE:
\r
337 case UProperty.BLOCK:
\r
338 case UProperty.SCRIPT:
\r
339 return UnicodeProperty.CATALOG;
\r
340 case UProperty.ISO_COMMENT:
\r
341 case UProperty.NAME:
\r
342 case UProperty.UNICODE_1_NAME:
\r
343 return UnicodeProperty.MISC;
\r
344 case UProperty.BIDI_MIRRORING_GLYPH:
\r
345 case UProperty.CASE_FOLDING:
\r
346 case UProperty.LOWERCASE_MAPPING:
\r
347 case UProperty.SIMPLE_CASE_FOLDING:
\r
348 case UProperty.SIMPLE_LOWERCASE_MAPPING:
\r
349 case UProperty.SIMPLE_TITLECASE_MAPPING:
\r
350 case UProperty.SIMPLE_UPPERCASE_MAPPING:
\r
351 case UProperty.TITLECASE_MAPPING:
\r
352 case UProperty.UPPERCASE_MAPPING:
\r
353 return UnicodeProperty.EXTENDED_STRING;
\r
355 if (prop < UProperty.BINARY_START)
\r
356 return UnicodeProperty.UNKNOWN;
\r
357 if (prop < UProperty.BINARY_LIMIT)
\r
358 return UnicodeProperty.BINARY;
\r
359 if (prop < UProperty.INT_START)
\r
360 return UnicodeProperty.EXTENDED_BINARY;
\r
361 if (prop < UProperty.INT_LIMIT)
\r
362 return UnicodeProperty.ENUMERATED;
\r
363 if (prop < UProperty.DOUBLE_START)
\r
364 return UnicodeProperty.EXTENDED_ENUMERATED;
\r
365 if (prop < UProperty.DOUBLE_LIMIT)
\r
366 return UnicodeProperty.NUMERIC;
\r
367 if (prop < UProperty.STRING_START)
\r
368 return UnicodeProperty.EXTENDED_NUMERIC;
\r
369 if (prop < UProperty.STRING_LIMIT)
\r
370 return UnicodeProperty.STRING;
\r
371 return UnicodeProperty.EXTENDED_STRING;
\r
377 * @see com.ibm.icu.dev.test.util.UnicodeProperty#getVersion()
\r
379 public String _getVersion() {
\r
380 return VersionInfo.ICU_VERSION.toString();
\r
385 matchIterator = new UnicodeSetIterator(
\r
386 new UnicodeSet("[^[:Cn:]-[:Default_Ignorable_Code_Point:]]"));
\r
392 * Other Missing Functions:
\r
397 Composition_Exclusion
\r
398 Decomposition_Mapping
\r
405 Special_Case_Condition
\r
406 Unicode_Radical_Stroke
\r
409 static final Names Binary_Extras = new Names(UProperty.BINARY_LIMIT,
\r
411 "isNFC", "isNFD", "isNFKC", "isNFKD",
\r
412 "isLowercase", "isUppercase", "isTitlecase", "isCasefolded", "isCased",
\r
415 // static final Names String_Extras = new Names(UProperty.STRING_LIMIT,
\r
417 // "toNFC", "toNFD", "toNFKC", "toNKFD",
\r
421 isNFC = UProperty.BINARY_LIMIT,
\r
422 isNFD = UProperty.BINARY_LIMIT+1,
\r
423 isNFKC = UProperty.BINARY_LIMIT+2,
\r
424 isNFKD = UProperty.BINARY_LIMIT+3,
\r
425 isLowercase = UProperty.BINARY_LIMIT+4,
\r
426 isUppercase = UProperty.BINARY_LIMIT+5,
\r
427 isTitlecase = UProperty.BINARY_LIMIT+6,
\r
428 isCasefolded = UProperty.BINARY_LIMIT+7,
\r
429 isCased = UProperty.BINARY_LIMIT+8,
\r
430 BINARY_LIMIT = UProperty.BINARY_LIMIT+9
\r
432 // NFC = UProperty.STRING_LIMIT,
\r
433 // NFD = UProperty.STRING_LIMIT+1,
\r
434 // NFKC = UProperty.STRING_LIMIT+2,
\r
435 // NFKD = UProperty.STRING_LIMIT+3
\r
438 private ICUPropertyFactory() {
\r
439 Collection c = getInternalAvailablePropertyAliases(new ArrayList());
\r
440 Iterator it = c.iterator();
\r
441 while (it.hasNext()) {
\r
442 add(getInternalProperty((String) it.next()));
\r
446 private static ICUPropertyFactory singleton = null;
\r
448 public static synchronized ICUPropertyFactory make() {
\r
449 if (singleton != null)
\r
451 singleton = new ICUPropertyFactory();
\r
455 public List getInternalAvailablePropertyAliases(List result) {
\r
457 {UProperty.BINARY_START, UProperty.BINARY_LIMIT},
\r
458 {UProperty.INT_START, UProperty.INT_LIMIT},
\r
459 {UProperty.DOUBLE_START, UProperty.DOUBLE_LIMIT},
\r
460 {UProperty.STRING_START, UProperty.STRING_LIMIT},
\r
462 for (int i = 0; i < ranges.length; ++i) {
\r
463 for (int j = ranges[i][0]; j < ranges[i][1]; ++j) {
\r
464 String alias = UCharacter.getPropertyName(j, UProperty.NameChoice.LONG);
\r
465 UnicodeProperty.addUnique(alias, result);
\r
466 if (!result.contains(alias))
\r
470 // result.addAll(String_Extras.getNames());
\r
471 result.addAll(Binary_Extras.getNames());
\r
475 public UnicodeProperty getInternalProperty(String propertyAlias) {
\r
478 int possibleItem = Binary_Extras.get(propertyAlias);
\r
479 if (possibleItem >= 0) {
\r
480 propEnum = possibleItem;
\r
483 // possibleItem = String_Extras.get(propertyAlias);
\r
484 // if (possibleItem >= 0) {
\r
485 // propEnum = possibleItem;
\r
488 propEnum = UCharacter.getPropertyEnum(propertyAlias);
\r
490 return new ICUProperty(propertyAlias, propEnum);
\r
496 * @see com.ibm.icu.dev.test.util.UnicodePropertySource#getProperty(java.lang.String)
\r
498 // TODO file bug on getPropertyValueName for Canonical_Combining_Class
\r
499 public static class Names {
\r
500 private String[] names;
\r
503 public Names(int base, String[] names) {
\r
505 this.names = names;
\r
508 public int get(String name) {
\r
509 for (int i = 0; i < names.length; ++i) {
\r
510 if (name.equalsIgnoreCase(names[i]))
\r
516 public String get(int number) {
\r
518 if (number < 0 || names.length <= number)
\r
520 return names[number];
\r
523 public boolean isInRange(int number) {
\r
525 return (0 <= number && number < names.length);
\r
528 public List getNames() {
\r
529 return Arrays.asList(names);
\r