2 *******************************************************************************
3 * Copyright (C) 2002-2012, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 *******************************************************************************
7 package com.ibm.icu.dev.util;
9 import java.util.ArrayList;
10 import java.util.Arrays;
11 import java.util.BitSet;
12 import java.util.Collection;
13 import java.util.HashMap;
14 import java.util.HashSet;
15 import java.util.Iterator;
16 import java.util.List;
17 import java.util.Locale;
20 import java.util.TreeMap;
21 import java.util.TreeSet;
23 import com.ibm.icu.lang.UCharacter;
24 import com.ibm.icu.lang.UProperty;
25 import com.ibm.icu.lang.UScript;
26 import com.ibm.icu.text.Normalizer;
27 import com.ibm.icu.text.UTF16;
28 import com.ibm.icu.util.VersionInfo;
32 * Provides a general interface for Unicode Properties, and
33 * extracting sets based on those values.
37 public class ICUPropertyFactory extends UnicodeProperty.Factory {
39 static class ICUProperty extends UnicodeProperty {
40 protected int propEnum = Integer.MIN_VALUE;
42 protected ICUProperty(String propName, int propEnum) {
44 this.propEnum = propEnum;
45 setType(internalGetPropertyType(propEnum));
46 if (propEnum == UProperty.DEFAULT_IGNORABLE_CODE_POINT || propEnum == UProperty.BIDI_CLASS || propEnum == UProperty.GENERAL_CATEGORY) {
47 setUniformUnassigned(false);
49 setUniformUnassigned(true);
53 boolean shownException = false;
55 public String _getValue(int codePoint) {
58 return getAge(codePoint);
59 case UProperty.BIDI_MIRRORING_GLYPH:
60 return UTF16.valueOf(UCharacter.getMirror(codePoint));
61 case UProperty.CASE_FOLDING:
62 return UCharacter.foldCase(UTF16.valueOf(codePoint), true);
63 case UProperty.ISO_COMMENT:
64 return UCharacter.getISOComment(codePoint);
65 case UProperty.LOWERCASE_MAPPING:
66 return UCharacter.toLowerCase(Locale.ENGLISH, UTF16.valueOf(codePoint));
68 return UCharacter.getName(codePoint);
69 case UProperty.SIMPLE_CASE_FOLDING:
70 return UTF16.valueOf(UCharacter.foldCase(codePoint, true));
71 case UProperty.SIMPLE_LOWERCASE_MAPPING:
72 return UTF16.valueOf(UCharacter.toLowerCase(codePoint));
73 case UProperty.SIMPLE_TITLECASE_MAPPING:
74 return UTF16.valueOf(UCharacter.toTitleCase(codePoint));
75 case UProperty.SIMPLE_UPPERCASE_MAPPING:
76 return UTF16.valueOf(UCharacter.toUpperCase(codePoint));
77 case UProperty.TITLECASE_MAPPING:
78 return UCharacter.toTitleCase(Locale.ENGLISH, UTF16.valueOf(codePoint), null);
79 case UProperty.UNICODE_1_NAME:
80 return UCharacter.getName1_0(codePoint);
81 case UProperty.UPPERCASE_MAPPING:
82 return UCharacter.toUpperCase(Locale.ENGLISH, UTF16.valueOf(codePoint));
83 // case NFC: return Normalizer.normalize(codePoint, Normalizer.NFC);
84 // case NFD: return Normalizer.normalize(codePoint, Normalizer.NFD);
85 // case NFKC: return Normalizer.normalize(codePoint, Normalizer.NFKC);
86 // case NFKD: return Normalizer.normalize(codePoint, Normalizer.NFKD);
88 return String.valueOf(Normalizer.normalize(codePoint, Normalizer.NFC).equals(UTF16.valueOf(codePoint)));
90 return String.valueOf(Normalizer.normalize(codePoint, Normalizer.NFD).equals(UTF16.valueOf(codePoint)));
93 .valueOf(Normalizer.normalize(codePoint, Normalizer.NFKC).equals(UTF16.valueOf(codePoint)));
96 .valueOf(Normalizer.normalize(codePoint, Normalizer.NFKD).equals(UTF16.valueOf(codePoint)));
98 return String.valueOf(UCharacter.toLowerCase(Locale.ENGLISH, UTF16.valueOf(codePoint)).equals(
99 UTF16.valueOf(codePoint)));
101 return String.valueOf(UCharacter.toUpperCase(Locale.ENGLISH, UTF16.valueOf(codePoint)).equals(
102 UTF16.valueOf(codePoint)));
104 return String.valueOf(UCharacter.toTitleCase(Locale.ENGLISH, UTF16.valueOf(codePoint), null).equals(
105 UTF16.valueOf(codePoint)));
107 return String.valueOf(UCharacter.foldCase(UTF16.valueOf(codePoint), true).equals(
108 UTF16.valueOf(codePoint)));
110 return String.valueOf(UCharacter.toLowerCase(Locale.ENGLISH, UTF16.valueOf(codePoint)).equals(
111 UTF16.valueOf(codePoint)));
112 case UProperty.SCRIPT_EXTENSIONS:
113 return getStringScriptExtensions(codePoint);
115 if (propEnum < UProperty.INT_LIMIT) {
119 enumValue = UCharacter.getIntPropertyValue(codePoint, propEnum);
121 value = fixedGetPropertyValueName(propEnum, enumValue, UProperty.NameChoice.LONG);
122 } catch (IllegalArgumentException e) {
123 if (!shownException) {
124 System.out.println("Fail: " + getName() + ", " + Integer.toHexString(codePoint));
125 shownException = true;
128 return value != null ? value : String.valueOf(enumValue);
129 } else if (propEnum < UProperty.DOUBLE_LIMIT) {
130 double num = UCharacter.getUnicodeNumericValue(codePoint);
131 if (num == UCharacter.NO_NUMERIC_VALUE)
133 return Double.toString(num);
134 // TODO: Fix HACK -- API deficient
139 private String getAge(int codePoint) {
140 String temp = UCharacter.getAge(codePoint).toString();
141 if (temp.equals("0.0.0.0"))
143 if (temp.endsWith(".0.0"))
144 return temp.substring(0, temp.length() - 4);
149 * @param valueAlias null if unused.
150 * @param valueEnum -1 if unused
154 private String getFixedValueAlias(String valueAlias, int valueEnum, int nameChoice) {
155 if (propEnum >= UProperty.STRING_START) {
156 if (nameChoice > UProperty.NameChoice.LONG)
157 throw new IllegalArgumentException();
158 if (nameChoice != UProperty.NameChoice.LONG)
161 } else if (propEnum >= UProperty.DOUBLE_START) {
162 if (nameChoice > UProperty.NameChoice.LONG)
163 throw new IllegalArgumentException();
164 if (nameChoice != UProperty.NameChoice.LONG)
168 if (valueAlias != null && !valueAlias.equals("<integer>")) {
169 valueEnum = fixedGetPropertyValueEnum(propEnum, valueAlias);
171 // because these are defined badly, there may be no normal (long) name.
173 String result = fixedGetPropertyValueName(propEnum, valueEnum, nameChoice);
176 // HACK try other namechoice
177 if (nameChoice == UProperty.NameChoice.LONG) {
178 result = fixedGetPropertyValueName(propEnum, valueEnum, UProperty.NameChoice.SHORT);
181 if (isCombiningClassProperty())
188 public boolean isCombiningClassProperty() {
189 return (propEnum == UProperty.CANONICAL_COMBINING_CLASS
190 || propEnum == UProperty.LEAD_CANONICAL_COMBINING_CLASS
191 || propEnum == UProperty.TRAIL_CANONICAL_COMBINING_CLASS);
194 private static int fixedGetPropertyValueEnum(int propEnum, String valueAlias) {
196 if (propEnum < BINARY_LIMIT) {
197 propEnum = UProperty.ALPHABETIC;
199 return UCharacter.getPropertyValueEnum(propEnum, valueAlias);
200 } catch (Exception e) {
201 return Integer.parseInt(valueAlias);
205 static Map fixSkeleton = new HashMap();
207 private static String fixedGetPropertyValueName(int propEnum, int valueEnum, int nameChoice) {
208 String value = UCharacter.getPropertyValueName(propEnum, valueEnum, nameChoice);
209 String newValue = (String) fixSkeleton.get(value);
210 if (newValue == null) {
212 if (propEnum == UProperty.JOINING_GROUP) {
213 newValue = newValue == null ? null : newValue.toLowerCase(Locale.ENGLISH);
215 newValue = regularize(newValue, true);
216 fixSkeleton.put(value, newValue);
221 public List _getNameAliases(List result) {
223 result = new ArrayList();
224 // String alias = String_Extras.get(propEnum);
225 // if (alias == null)
226 String alias = Binary_Extras.get(propEnum);
228 addUnique(alias, result);
230 addUnique(getFixedPropertyName(propEnum, UProperty.NameChoice.SHORT), result);
231 addUnique(getFixedPropertyName(propEnum, UProperty.NameChoice.LONG), result);
236 public String getFixedPropertyName(int propName, int nameChoice) {
238 return UCharacter.getPropertyName(propEnum, nameChoice);
239 } catch (IllegalArgumentException e) {
244 private static Map cccHack = new HashMap();
245 private static Set cccExtras = new HashSet();
247 for (int i = 0; i <= 255; ++i) {
248 String alias = UCharacter.getPropertyValueName(UProperty.CANONICAL_COMBINING_CLASS, i,
249 UProperty.NameChoice.LONG);
250 String numStr = String.valueOf(i);
252 cccHack.put(alias, numStr);
254 cccHack.put(numStr, numStr);
255 cccExtras.add(numStr);
260 public List _getAvailableValues(List result) {
262 result = new ArrayList();
263 if (propEnum == UProperty.AGE) {
264 addAllUnique(getAges(), result);
268 if (propEnum < UProperty.INT_LIMIT) {
269 if (Binary_Extras.isInRange(propEnum)) {
270 propEnum = UProperty.BINARY_START; // HACK
272 int start = UCharacter.getIntPropertyMinValue(propEnum);
273 int end = UCharacter.getIntPropertyMaxValue(propEnum);
274 for (int i = start; i <= end; ++i) {
275 String alias = getFixedValueAlias(null, i, UProperty.NameChoice.LONG);
276 String alias2 = getFixedValueAlias(null, i, UProperty.NameChoice.SHORT);
279 if (alias == null && isCombiningClassProperty()) {
280 alias = String.valueOf(i);
283 // System.out.println(propertyAlias + "\t" + i + ":\t" + alias);
284 addUnique(alias, result);
286 } else if (propEnum >= UProperty.DOUBLE_START && propEnum < UProperty.DOUBLE_LIMIT) {
287 UnicodeMap map = getUnicodeMap();
288 Collection values = map.values();
289 addAllUnique(values, result);
291 String alias = getFixedValueAlias(null, -1, UProperty.NameChoice.LONG);
292 addUnique(alias, result);
297 static String[] AGES = null;
299 private String[] getAges() {
301 Set ages = new TreeSet();
302 for (int i = 0; i < 0x10FFFF; ++i) {
305 AGES = (String[]) ages.toArray(new String[ages.size()]);
310 public List _getValueAliases(String valueAlias, List result) {
312 result = new ArrayList();
313 if (propEnum == UProperty.AGE) {
314 addUnique(valueAlias, result);
317 if (isCombiningClassProperty()) {
318 addUnique(cccHack.get(valueAlias), result); // add number
320 int type = getType();
321 if (type == UnicodeProperty.NUMERIC || type == EXTENDED_NUMERIC) {
322 addUnique(valueAlias, result);
323 if (valueAlias.endsWith(".0")) {
324 addUnique(valueAlias.substring(0, valueAlias.length() - 2), result);
327 for (int nameChoice = UProperty.NameChoice.SHORT;; ++nameChoice) {
329 addUnique(getFixedValueAlias(valueAlias, -1, nameChoice), result);
330 } catch (Exception e) {
339 * @see com.ibm.icu.dev.test.util.UnicodePropertySource#getPropertyType()
341 private int internalGetPropertyType(int prop) {
344 case UProperty.BLOCK:
345 case UProperty.SCRIPT:
346 return UnicodeProperty.CATALOG;
347 case UProperty.ISO_COMMENT:
349 case UProperty.UNICODE_1_NAME:
350 case UProperty.SCRIPT_EXTENSIONS:
351 return UnicodeProperty.MISC;
352 case UProperty.BIDI_MIRRORING_GLYPH:
353 case UProperty.CASE_FOLDING:
354 case UProperty.LOWERCASE_MAPPING:
355 case UProperty.SIMPLE_CASE_FOLDING:
356 case UProperty.SIMPLE_LOWERCASE_MAPPING:
357 case UProperty.SIMPLE_TITLECASE_MAPPING:
358 case UProperty.SIMPLE_UPPERCASE_MAPPING:
359 case UProperty.TITLECASE_MAPPING:
360 case UProperty.UPPERCASE_MAPPING:
361 return UnicodeProperty.EXTENDED_STRING;
363 if (prop < UProperty.BINARY_START)
364 return UnicodeProperty.UNKNOWN;
365 if (prop < UProperty.BINARY_LIMIT)
366 return UnicodeProperty.BINARY;
367 if (prop < UProperty.INT_START)
368 return UnicodeProperty.EXTENDED_BINARY;
369 if (prop < UProperty.INT_LIMIT)
370 return UnicodeProperty.ENUMERATED;
371 if (prop < UProperty.DOUBLE_START)
372 return UnicodeProperty.EXTENDED_ENUMERATED;
373 if (prop < UProperty.DOUBLE_LIMIT)
374 return UnicodeProperty.NUMERIC;
375 if (prop < UProperty.STRING_START)
376 return UnicodeProperty.EXTENDED_NUMERIC;
377 if (prop < UProperty.STRING_LIMIT)
378 return UnicodeProperty.STRING;
379 return UnicodeProperty.EXTENDED_STRING;
385 * @see com.ibm.icu.dev.test.util.UnicodeProperty#getVersion()
387 public String _getVersion() {
388 return VersionInfo.ICU_VERSION.toString();
393 matchIterator = new UnicodeSetIterator(
394 new UnicodeSet("[^[:Cn:]-[:Default_Ignorable_Code_Point:]]"));
400 * Other Missing Functions:
405 Composition_Exclusion
406 Decomposition_Mapping
413 Special_Case_Condition
414 Unicode_Radical_Stroke
417 static final Names Binary_Extras = new Names(UProperty.BINARY_LIMIT,
419 "isNFC", "isNFD", "isNFKC", "isNFKD",
420 "isLowercase", "isUppercase", "isTitlecase", "isCasefolded", "isCased",
423 // static final Names String_Extras = new Names(UProperty.STRING_LIMIT,
425 // "toNFC", "toNFD", "toNFKC", "toNKFD",
429 isNFC = UProperty.BINARY_LIMIT,
430 isNFD = UProperty.BINARY_LIMIT+1,
431 isNFKC = UProperty.BINARY_LIMIT+2,
432 isNFKD = UProperty.BINARY_LIMIT+3,
433 isLowercase = UProperty.BINARY_LIMIT+4,
434 isUppercase = UProperty.BINARY_LIMIT+5,
435 isTitlecase = UProperty.BINARY_LIMIT+6,
436 isCasefolded = UProperty.BINARY_LIMIT+7,
437 isCased = UProperty.BINARY_LIMIT+8,
438 BINARY_LIMIT = UProperty.BINARY_LIMIT+9
440 // NFC = UProperty.STRING_LIMIT,
441 // NFD = UProperty.STRING_LIMIT+1,
442 // NFKC = UProperty.STRING_LIMIT+2,
443 // NFKD = UProperty.STRING_LIMIT+3
446 protected ICUPropertyFactory() {
447 Collection c = getInternalAvailablePropertyAliases(new ArrayList());
448 Iterator it = c.iterator();
449 while (it.hasNext()) {
450 add(getInternalProperty((String) it.next()));
454 static BitSet BITSET = new BitSet();
455 public static synchronized String getStringScriptExtensions(int codePoint) {
456 int result = UScript.getScriptExtensions(codePoint, BITSET);
458 return UScript.getName(result);
460 TreeMap<String,String> sorted = new TreeMap<String,String>();
461 for (int scriptCode = BITSET.nextSetBit(0); scriptCode >= 0; scriptCode = BITSET.nextSetBit(scriptCode+1)) {
462 // sort by short form
463 sorted.put(UScript.getShortName(scriptCode), UScript.getName(scriptCode));
465 return CollectionUtilities.join(sorted.values(), " ");
468 private static ICUPropertyFactory singleton = null;
470 public static synchronized ICUPropertyFactory make() {
471 if (singleton != null)
473 singleton = new ICUPropertyFactory();
477 public List getInternalAvailablePropertyAliases(List result) {
479 {UProperty.BINARY_START, UProperty.BINARY_LIMIT},
480 {UProperty.INT_START, UProperty.INT_LIMIT},
481 {UProperty.DOUBLE_START, UProperty.DOUBLE_LIMIT},
482 {UProperty.STRING_START, UProperty.STRING_LIMIT},
483 {UProperty.OTHER_PROPERTY_START, UProperty.OTHER_PROPERTY_LIMIT},
486 for (int i = 0; i < ranges.length; ++i) {
487 for (int j = ranges[i][0]; j < ranges[i][1]; ++j) {
488 String alias = UCharacter.getPropertyName(j, UProperty.NameChoice.LONG);
489 UnicodeProperty.addUnique(alias, result);
490 if (!result.contains(alias))
494 // result.addAll(String_Extras.getNames());
495 result.addAll(Binary_Extras.getNames());
499 public UnicodeProperty getInternalProperty(String propertyAlias) {
502 int possibleItem = Binary_Extras.get(propertyAlias);
503 if (possibleItem >= 0) {
504 propEnum = possibleItem;
507 // possibleItem = String_Extras.get(propertyAlias);
508 // if (possibleItem >= 0) {
509 // propEnum = possibleItem;
512 propEnum = UCharacter.getPropertyEnum(propertyAlias);
514 return new ICUProperty(propertyAlias, propEnum);
520 * @see com.ibm.icu.dev.test.util.UnicodePropertySource#getProperty(java.lang.String)
522 // TODO file bug on getPropertyValueName for Canonical_Combining_Class
523 public static class Names {
524 private String[] names;
527 public Names(int base, String[] names) {
532 public int get(String name) {
533 for (int i = 0; i < names.length; ++i) {
534 if (name.equalsIgnoreCase(names[i]))
540 public String get(int number) {
542 if (number < 0 || names.length <= number)
544 return names[number];
547 public boolean isInRange(int number) {
549 return (0 <= number && number < names.length);
552 public List getNames() {
553 return Arrays.asList(names);