]> gitweb.fperrin.net Git - Dictionary.git/blob - jars/icu4j-52_1/main/tests/translit/src/com/ibm/icu/dev/util/ICUPropertyFactory.java
Clean up imports.
[Dictionary.git] / jars / icu4j-52_1 / main / tests / translit / src / com / ibm / icu / dev / util / ICUPropertyFactory.java
1 /*
2  *******************************************************************************
3  * Copyright (C) 2002-2012, International Business Machines Corporation and    *
4  * others. All Rights Reserved.                                                *
5  *******************************************************************************
6  */
7 package com.ibm.icu.dev.util;
8
9 import java.util.ArrayList;
10 import java.util.Arrays;
11 import java.util.BitSet;
12 import java.util.Collection;
13 import java.util.HashMap;
14 import java.util.HashSet;
15 import java.util.Iterator;
16 import java.util.List;
17 import java.util.Locale;
18 import java.util.Map;
19 import java.util.Set;
20 import java.util.TreeMap;
21 import java.util.TreeSet;
22
23 import com.ibm.icu.lang.UCharacter;
24 import com.ibm.icu.lang.UProperty;
25 import com.ibm.icu.lang.UScript;
26 import com.ibm.icu.text.Normalizer;
27 import com.ibm.icu.text.UTF16;
28 import com.ibm.icu.util.VersionInfo;
29
30
31 /**
32  * Provides a general interface for Unicode Properties, and
33  * extracting sets based on those values.
34  * @author Davis
35  */
36
37 public class ICUPropertyFactory extends UnicodeProperty.Factory {
38
39     static class ICUProperty extends UnicodeProperty {
40         protected int propEnum = Integer.MIN_VALUE;
41
42         protected ICUProperty(String propName, int propEnum) {
43             setName(propName);
44             this.propEnum = propEnum;
45             setType(internalGetPropertyType(propEnum));
46             if (propEnum == UProperty.DEFAULT_IGNORABLE_CODE_POINT || propEnum == UProperty.BIDI_CLASS || propEnum == UProperty.GENERAL_CATEGORY) {
47                 setUniformUnassigned(false);
48             } else {
49                 setUniformUnassigned(true);
50             }
51         }
52
53         boolean shownException = false;
54
55         public String _getValue(int codePoint) {
56             switch (propEnum) {
57             case UProperty.AGE:
58                 return getAge(codePoint);
59             case UProperty.BIDI_MIRRORING_GLYPH:
60                 return UTF16.valueOf(UCharacter.getMirror(codePoint));
61             case UProperty.CASE_FOLDING:
62                 return UCharacter.foldCase(UTF16.valueOf(codePoint), true);
63             case UProperty.ISO_COMMENT:
64                 return UCharacter.getISOComment(codePoint);
65             case UProperty.LOWERCASE_MAPPING:
66                 return UCharacter.toLowerCase(Locale.ENGLISH, UTF16.valueOf(codePoint));
67             case UProperty.NAME:
68                 return UCharacter.getName(codePoint);
69             case UProperty.SIMPLE_CASE_FOLDING:
70                 return UTF16.valueOf(UCharacter.foldCase(codePoint, true));
71             case UProperty.SIMPLE_LOWERCASE_MAPPING:
72                 return UTF16.valueOf(UCharacter.toLowerCase(codePoint));
73             case UProperty.SIMPLE_TITLECASE_MAPPING:
74                 return UTF16.valueOf(UCharacter.toTitleCase(codePoint));
75             case UProperty.SIMPLE_UPPERCASE_MAPPING:
76                 return UTF16.valueOf(UCharacter.toUpperCase(codePoint));
77             case UProperty.TITLECASE_MAPPING:
78                 return UCharacter.toTitleCase(Locale.ENGLISH, UTF16.valueOf(codePoint), null);
79             case UProperty.UNICODE_1_NAME:
80                 return UCharacter.getName1_0(codePoint);
81             case UProperty.UPPERCASE_MAPPING:
82                 return UCharacter.toUpperCase(Locale.ENGLISH, UTF16.valueOf(codePoint));
83             // case NFC: return Normalizer.normalize(codePoint, Normalizer.NFC);
84             // case NFD: return Normalizer.normalize(codePoint, Normalizer.NFD);
85             // case NFKC: return Normalizer.normalize(codePoint, Normalizer.NFKC);
86             // case NFKD: return Normalizer.normalize(codePoint, Normalizer.NFKD);
87             case isNFC:
88                 return String.valueOf(Normalizer.normalize(codePoint, Normalizer.NFC).equals(UTF16.valueOf(codePoint)));
89             case isNFD:
90                 return String.valueOf(Normalizer.normalize(codePoint, Normalizer.NFD).equals(UTF16.valueOf(codePoint)));
91             case isNFKC:
92                 return String
93                         .valueOf(Normalizer.normalize(codePoint, Normalizer.NFKC).equals(UTF16.valueOf(codePoint)));
94             case isNFKD:
95                 return String
96                         .valueOf(Normalizer.normalize(codePoint, Normalizer.NFKD).equals(UTF16.valueOf(codePoint)));
97             case isLowercase:
98                 return String.valueOf(UCharacter.toLowerCase(Locale.ENGLISH, UTF16.valueOf(codePoint)).equals(
99                         UTF16.valueOf(codePoint)));
100             case isUppercase:
101                 return String.valueOf(UCharacter.toUpperCase(Locale.ENGLISH, UTF16.valueOf(codePoint)).equals(
102                         UTF16.valueOf(codePoint)));
103             case isTitlecase:
104                 return String.valueOf(UCharacter.toTitleCase(Locale.ENGLISH, UTF16.valueOf(codePoint), null).equals(
105                         UTF16.valueOf(codePoint)));
106             case isCasefolded:
107                 return String.valueOf(UCharacter.foldCase(UTF16.valueOf(codePoint), true).equals(
108                         UTF16.valueOf(codePoint)));
109             case isCased:
110                 return String.valueOf(UCharacter.toLowerCase(Locale.ENGLISH, UTF16.valueOf(codePoint)).equals(
111                         UTF16.valueOf(codePoint)));
112             case UProperty.SCRIPT_EXTENSIONS: 
113                 return getStringScriptExtensions(codePoint);
114             }
115             if (propEnum < UProperty.INT_LIMIT) {
116                 int enumValue = -1;
117                 String value = null;
118                 try {
119                     enumValue = UCharacter.getIntPropertyValue(codePoint, propEnum);
120                     if (enumValue >= 0)
121                         value = fixedGetPropertyValueName(propEnum, enumValue, UProperty.NameChoice.LONG);
122                 } catch (IllegalArgumentException e) {
123                     if (!shownException) {
124                         System.out.println("Fail: " + getName() + ", " + Integer.toHexString(codePoint));
125                         shownException = true;
126                     }
127                 }
128                 return value != null ? value : String.valueOf(enumValue);
129             } else if (propEnum < UProperty.DOUBLE_LIMIT) {
130                 double num = UCharacter.getUnicodeNumericValue(codePoint);
131                 if (num == UCharacter.NO_NUMERIC_VALUE)
132                     return null;
133                 return Double.toString(num);
134                 // TODO: Fix HACK -- API deficient
135             }
136             return null;
137         }
138
139         private String getAge(int codePoint) {
140             String temp = UCharacter.getAge(codePoint).toString();
141             if (temp.equals("0.0.0.0"))
142                 return "unassigned";
143             if (temp.endsWith(".0.0"))
144                 return temp.substring(0, temp.length() - 4);
145             return temp;
146         }
147
148         /**
149          * @param valueAlias null if unused.
150          * @param valueEnum -1 if unused
151          * @param nameChoice
152          * @return
153          */
154         private String getFixedValueAlias(String valueAlias, int valueEnum, int nameChoice) {
155             if (propEnum >= UProperty.STRING_START) {
156                 if (nameChoice > UProperty.NameChoice.LONG)
157                     throw new IllegalArgumentException();
158                 if (nameChoice != UProperty.NameChoice.LONG)
159                     return null;
160                 return "<string>";
161             } else if (propEnum >= UProperty.DOUBLE_START) {
162                 if (nameChoice > UProperty.NameChoice.LONG)
163                     throw new IllegalArgumentException();
164                 if (nameChoice != UProperty.NameChoice.LONG)
165                     return null;
166                 return "<number>";
167             }
168             if (valueAlias != null && !valueAlias.equals("<integer>")) {
169                 valueEnum = fixedGetPropertyValueEnum(propEnum, valueAlias);
170             }
171             // because these are defined badly, there may be no normal (long) name.
172             // if there is
173             String result = fixedGetPropertyValueName(propEnum, valueEnum, nameChoice);
174             if (result != null)
175                 return result;
176             // HACK try other namechoice
177             if (nameChoice == UProperty.NameChoice.LONG) {
178                 result = fixedGetPropertyValueName(propEnum, valueEnum, UProperty.NameChoice.SHORT);
179                 if (result != null)
180                     return result;
181                 if (isCombiningClassProperty())
182                     return null;
183                 return "<integer>";
184             }
185             return null;
186         }
187
188         public boolean isCombiningClassProperty() {
189             return (propEnum == UProperty.CANONICAL_COMBINING_CLASS
190                     || propEnum == UProperty.LEAD_CANONICAL_COMBINING_CLASS
191                     || propEnum == UProperty.TRAIL_CANONICAL_COMBINING_CLASS);
192         }
193
194         private static int fixedGetPropertyValueEnum(int propEnum, String valueAlias) {
195             try {
196                 if (propEnum < BINARY_LIMIT) {
197                     propEnum = UProperty.ALPHABETIC;
198                 }
199                 return UCharacter.getPropertyValueEnum(propEnum, valueAlias);
200             } catch (Exception e) {
201                 return Integer.parseInt(valueAlias);
202             }
203         }
204
205         static Map fixSkeleton = new HashMap();
206
207         private static String fixedGetPropertyValueName(int propEnum, int valueEnum, int nameChoice) {
208             String value = UCharacter.getPropertyValueName(propEnum, valueEnum, nameChoice);
209             String newValue = (String) fixSkeleton.get(value);
210             if (newValue == null) {
211                 newValue = value;
212                 if (propEnum == UProperty.JOINING_GROUP) {
213                     newValue = newValue == null ? null : newValue.toLowerCase(Locale.ENGLISH);
214                 }
215                 newValue = regularize(newValue, true);
216                 fixSkeleton.put(value, newValue);
217             }
218             return newValue;
219         }
220
221         public List _getNameAliases(List result) {
222             if (result == null)
223                 result = new ArrayList();
224             // String alias = String_Extras.get(propEnum);
225             // if (alias == null)
226             String alias = Binary_Extras.get(propEnum);
227             if (alias != null) {
228                 addUnique(alias, result);
229             } else {
230                 addUnique(getFixedPropertyName(propEnum, UProperty.NameChoice.SHORT), result);
231                 addUnique(getFixedPropertyName(propEnum, UProperty.NameChoice.LONG), result);
232             }
233             return result;
234         }
235
236         public String getFixedPropertyName(int propName, int nameChoice) {
237             try {
238                 return UCharacter.getPropertyName(propEnum, nameChoice);
239             } catch (IllegalArgumentException e) {
240                 return null;
241             }
242         }
243
244         private static Map cccHack = new HashMap();
245         private static Set cccExtras = new HashSet();
246         static {
247             for (int i = 0; i <= 255; ++i) {
248                 String alias = UCharacter.getPropertyValueName(UProperty.CANONICAL_COMBINING_CLASS, i,
249                         UProperty.NameChoice.LONG);
250                 String numStr = String.valueOf(i);
251                 if (alias != null) {
252                     cccHack.put(alias, numStr);
253                 } else {
254                     cccHack.put(numStr, numStr);
255                     cccExtras.add(numStr);
256                 }
257             }
258         }
259
260         public List _getAvailableValues(List result) {
261             if (result == null)
262                 result = new ArrayList();
263             if (propEnum == UProperty.AGE) {
264                 addAllUnique(getAges(), result);
265                 return result;
266
267             }
268             if (propEnum < UProperty.INT_LIMIT) {
269                 if (Binary_Extras.isInRange(propEnum)) {
270                     propEnum = UProperty.BINARY_START; // HACK
271                 }
272                 int start = UCharacter.getIntPropertyMinValue(propEnum);
273                 int end = UCharacter.getIntPropertyMaxValue(propEnum);
274                 for (int i = start; i <= end; ++i) {
275                     String alias = getFixedValueAlias(null, i, UProperty.NameChoice.LONG);
276                     String alias2 = getFixedValueAlias(null, i, UProperty.NameChoice.SHORT);
277                     if (alias == null) {
278                         alias = alias2;
279                         if (alias == null && isCombiningClassProperty()) {
280                             alias = String.valueOf(i);
281                         }
282                     }
283                     // System.out.println(propertyAlias + "\t" + i + ":\t" + alias);
284                     addUnique(alias, result);
285                 }
286             } else if (propEnum >= UProperty.DOUBLE_START && propEnum < UProperty.DOUBLE_LIMIT) {
287                 UnicodeMap map = getUnicodeMap();
288                 Collection values = map.values();
289                 addAllUnique(values, result);
290             } else {
291                 String alias = getFixedValueAlias(null, -1, UProperty.NameChoice.LONG);
292                 addUnique(alias, result);
293             }
294             return result;
295         }
296
297         static String[] AGES = null;
298
299         private String[] getAges() {
300             if (AGES == null) {
301                 Set ages = new TreeSet();
302                 for (int i = 0; i < 0x10FFFF; ++i) {
303                     ages.add(getAge(i));
304                 }
305                 AGES = (String[]) ages.toArray(new String[ages.size()]);
306             }
307             return AGES;
308         }
309
310         public List _getValueAliases(String valueAlias, List result) {
311             if (result == null)
312                 result = new ArrayList();
313             if (propEnum == UProperty.AGE) {
314                 addUnique(valueAlias, result);
315                 return result;
316             }
317             if (isCombiningClassProperty()) {
318                 addUnique(cccHack.get(valueAlias), result); // add number
319             }
320             int type = getType();
321             if (type == UnicodeProperty.NUMERIC || type == EXTENDED_NUMERIC) {
322                 addUnique(valueAlias, result);
323                 if (valueAlias.endsWith(".0")) {
324                     addUnique(valueAlias.substring(0, valueAlias.length() - 2), result);
325                 }
326             } else {
327                 for (int nameChoice = UProperty.NameChoice.SHORT;; ++nameChoice) {
328                     try {
329                         addUnique(getFixedValueAlias(valueAlias, -1, nameChoice), result);
330                     } catch (Exception e) {
331                         break;
332                     }
333                 }
334             }
335             return result;
336         }
337
338         /* (non-Javadoc)
339          * @see com.ibm.icu.dev.test.util.UnicodePropertySource#getPropertyType()
340          */
341         private int internalGetPropertyType(int prop) {
342             switch (prop) {
343             case UProperty.AGE:
344             case UProperty.BLOCK:
345             case UProperty.SCRIPT:
346                 return UnicodeProperty.CATALOG;
347             case UProperty.ISO_COMMENT:
348             case UProperty.NAME:
349             case UProperty.UNICODE_1_NAME:
350             case UProperty.SCRIPT_EXTENSIONS:
351                 return UnicodeProperty.MISC;
352             case UProperty.BIDI_MIRRORING_GLYPH:
353             case UProperty.CASE_FOLDING:
354             case UProperty.LOWERCASE_MAPPING:
355             case UProperty.SIMPLE_CASE_FOLDING:
356             case UProperty.SIMPLE_LOWERCASE_MAPPING:
357             case UProperty.SIMPLE_TITLECASE_MAPPING:
358             case UProperty.SIMPLE_UPPERCASE_MAPPING:
359             case UProperty.TITLECASE_MAPPING:
360             case UProperty.UPPERCASE_MAPPING:
361                 return UnicodeProperty.EXTENDED_STRING;
362             }
363             if (prop < UProperty.BINARY_START)
364                 return UnicodeProperty.UNKNOWN;
365             if (prop < UProperty.BINARY_LIMIT)
366                 return UnicodeProperty.BINARY;
367             if (prop < UProperty.INT_START)
368                 return UnicodeProperty.EXTENDED_BINARY;
369             if (prop < UProperty.INT_LIMIT)
370                 return UnicodeProperty.ENUMERATED;
371             if (prop < UProperty.DOUBLE_START)
372                 return UnicodeProperty.EXTENDED_ENUMERATED;
373             if (prop < UProperty.DOUBLE_LIMIT)
374                 return UnicodeProperty.NUMERIC;
375             if (prop < UProperty.STRING_START)
376                 return UnicodeProperty.EXTENDED_NUMERIC;
377             if (prop < UProperty.STRING_LIMIT)
378                 return UnicodeProperty.STRING;
379             return UnicodeProperty.EXTENDED_STRING;
380         }
381
382         /*
383          * (non-Javadoc)
384          * 
385          * @see com.ibm.icu.dev.test.util.UnicodeProperty#getVersion()
386          */
387         public String _getVersion() {
388             return VersionInfo.ICU_VERSION.toString();
389         }
390     }
391
392   /*{
393             matchIterator = new UnicodeSetIterator(
394                 new UnicodeSet("[^[:Cn:]-[:Default_Ignorable_Code_Point:]]"));
395         }*/
396
397
398
399     /*
400      * Other Missing Functions:
401             Expands_On_NFC
402             Expands_On_NFD
403             Expands_On_NFKC
404             Expands_On_NFKD
405             Composition_Exclusion
406             Decomposition_Mapping
407             FC_NFKC_Closure
408             ISO_Comment
409             NFC_Quick_Check
410             NFD_Quick_Check
411             NFKC_Quick_Check
412             NFKD_Quick_Check
413             Special_Case_Condition
414             Unicode_Radical_Stroke
415      */
416
417     static final Names Binary_Extras = new Names(UProperty.BINARY_LIMIT,
418           new String[] {
419           "isNFC", "isNFD", "isNFKC", "isNFKD",
420           "isLowercase", "isUppercase", "isTitlecase", "isCasefolded", "isCased",
421     });
422
423 //    static final Names String_Extras = new Names(UProperty.STRING_LIMIT,
424 //          new String[] {
425 //          "toNFC", "toNFD", "toNFKC", "toNKFD",
426 //    });
427
428     static final int
429         isNFC = UProperty.BINARY_LIMIT,
430         isNFD = UProperty.BINARY_LIMIT+1,
431         isNFKC = UProperty.BINARY_LIMIT+2,
432         isNFKD = UProperty.BINARY_LIMIT+3,
433         isLowercase = UProperty.BINARY_LIMIT+4,
434         isUppercase = UProperty.BINARY_LIMIT+5,
435         isTitlecase = UProperty.BINARY_LIMIT+6,
436         isCasefolded = UProperty.BINARY_LIMIT+7,
437         isCased = UProperty.BINARY_LIMIT+8,
438         BINARY_LIMIT = UProperty.BINARY_LIMIT+9
439
440 //        NFC  = UProperty.STRING_LIMIT,
441 //        NFD  = UProperty.STRING_LIMIT+1,
442 //        NFKC = UProperty.STRING_LIMIT+2,
443 //        NFKD = UProperty.STRING_LIMIT+3
444         ;
445
446     protected ICUPropertyFactory() {
447         Collection c = getInternalAvailablePropertyAliases(new ArrayList());
448         Iterator it = c.iterator();
449         while (it.hasNext()) {
450             add(getInternalProperty((String) it.next()));
451         }
452     }
453
454     static BitSet BITSET = new BitSet();
455     public static synchronized String getStringScriptExtensions(int codePoint) {
456         int result = UScript.getScriptExtensions(codePoint, BITSET);
457         if (result >= 0) {
458             return UScript.getName(result);
459         }
460         TreeMap<String,String> sorted = new TreeMap<String,String>();
461         for (int scriptCode = BITSET.nextSetBit(0); scriptCode >= 0; scriptCode = BITSET.nextSetBit(scriptCode+1)) {
462             // sort by short form
463             sorted.put(UScript.getShortName(scriptCode), UScript.getName(scriptCode));
464         }
465         return CollectionUtilities.join(sorted.values(), " ");
466     }
467
468     private static ICUPropertyFactory singleton = null;
469
470     public static synchronized ICUPropertyFactory make() {
471         if (singleton != null)
472             return singleton;
473         singleton = new ICUPropertyFactory();
474         return singleton;
475     }
476
477     public List getInternalAvailablePropertyAliases(List result) {
478         int[][] ranges = {
479                 {UProperty.BINARY_START,    UProperty.BINARY_LIMIT},
480                 {UProperty.INT_START,       UProperty.INT_LIMIT},
481                 {UProperty.DOUBLE_START,    UProperty.DOUBLE_LIMIT},
482                 {UProperty.STRING_START,    UProperty.STRING_LIMIT},
483                 {UProperty.OTHER_PROPERTY_START, UProperty.OTHER_PROPERTY_LIMIT},
484
485         };
486         for (int i = 0; i < ranges.length; ++i) {
487             for (int j = ranges[i][0]; j < ranges[i][1]; ++j) {
488                 String alias = UCharacter.getPropertyName(j, UProperty.NameChoice.LONG);
489                 UnicodeProperty.addUnique(alias, result);
490                 if (!result.contains(alias))
491                     result.add(alias);
492             }
493         }
494         // result.addAll(String_Extras.getNames());
495         result.addAll(Binary_Extras.getNames());
496         return result;
497     }
498
499     public UnicodeProperty getInternalProperty(String propertyAlias) {
500         int propEnum;
501         main: {
502             int possibleItem = Binary_Extras.get(propertyAlias);
503             if (possibleItem >= 0) {
504                 propEnum = possibleItem;
505                 break main;
506             }
507             // possibleItem = String_Extras.get(propertyAlias);
508             // if (possibleItem >= 0) {
509             // propEnum = possibleItem;
510             // break main;
511             // }
512             propEnum = UCharacter.getPropertyEnum(propertyAlias);
513         }
514         return new ICUProperty(propertyAlias, propEnum);
515     }
516
517     /*
518      * (non-Javadoc)
519      * 
520      * @see com.ibm.icu.dev.test.util.UnicodePropertySource#getProperty(java.lang.String)
521      */
522     // TODO file bug on getPropertyValueName for Canonical_Combining_Class
523     public static class Names {
524         private String[] names;
525         private int base;
526
527         public Names(int base, String[] names) {
528             this.base = base;
529             this.names = names;
530         }
531
532         public int get(String name) {
533             for (int i = 0; i < names.length; ++i) {
534                 if (name.equalsIgnoreCase(names[i]))
535                     return base + i;
536             }
537             return -1;
538         }
539
540         public String get(int number) {
541             number -= base;
542             if (number < 0 || names.length <= number)
543                 return null;
544             return names[number];
545         }
546
547         public boolean isInRange(int number) {
548             number -= base;
549             return (0 <= number && number < names.length);
550         }
551
552         public List getNames() {
553             return Arrays.asList(names);
554         }
555     }
556 }