]> gitweb.fperrin.net Git - Dictionary.git/blob - jars/icu4j-4_4_2-src/main/tests/translit/src/com/ibm/icu/dev/test/util/ICUPropertyFactory.java
go
[Dictionary.git] / jars / icu4j-4_4_2-src / main / tests / translit / src / com / ibm / icu / dev / test / util / ICUPropertyFactory.java
1 /*\r
2  *******************************************************************************\r
3  * Copyright (C) 2002-2010, International Business Machines Corporation and    *\r
4  * others. All Rights Reserved.                                                *\r
5  *******************************************************************************\r
6  */\r
7 package com.ibm.icu.dev.test.util;\r
8 \r
9 import java.util.ArrayList;\r
10 import java.util.Arrays;\r
11 import java.util.Collection;\r
12 import java.util.HashMap;\r
13 import java.util.HashSet;\r
14 import java.util.Iterator;\r
15 import java.util.List;\r
16 import java.util.Locale;\r
17 import java.util.Map;\r
18 import java.util.Set;\r
19 import java.util.TreeSet;\r
20 \r
21 import com.ibm.icu.lang.UCharacter;\r
22 import com.ibm.icu.lang.UProperty;\r
23 import com.ibm.icu.text.Normalizer;\r
24 import com.ibm.icu.text.UTF16;\r
25 import com.ibm.icu.util.VersionInfo;\r
26 \r
27 \r
28 /**\r
29  * Provides a general interface for Unicode Properties, and\r
30  * extracting sets based on those values.\r
31  * @author Davis\r
32  */\r
33 \r
34 public class ICUPropertyFactory extends UnicodeProperty.Factory {\r
35 \r
36     static class ICUProperty extends UnicodeProperty {\r
37         protected int propEnum = Integer.MIN_VALUE;\r
38 \r
39         protected ICUProperty(String propName, int propEnum) {\r
40             setName(propName);\r
41             this.propEnum = propEnum;\r
42             setType(internalGetPropertyType(propEnum));\r
43             if (propEnum == UProperty.DEFAULT_IGNORABLE_CODE_POINT || propEnum == UProperty.BIDI_CLASS) {\r
44                 setUniformUnassigned(false);\r
45             }\r
46         }\r
47 \r
48         boolean shownException = false;\r
49 \r
50         public String _getValue(int codePoint) {\r
51             switch (propEnum) {\r
52             case UProperty.AGE:\r
53                 return getAge(codePoint);\r
54             case UProperty.BIDI_MIRRORING_GLYPH:\r
55                 return UTF16.valueOf(UCharacter.getMirror(codePoint));\r
56             case UProperty.CASE_FOLDING:\r
57                 return UCharacter.foldCase(UTF16.valueOf(codePoint), true);\r
58             case UProperty.ISO_COMMENT:\r
59                 return UCharacter.getISOComment(codePoint);\r
60             case UProperty.LOWERCASE_MAPPING:\r
61                 return UCharacter.toLowerCase(Locale.ENGLISH, UTF16.valueOf(codePoint));\r
62             case UProperty.NAME:\r
63                 return UCharacter.getName(codePoint);\r
64             case UProperty.SIMPLE_CASE_FOLDING:\r
65                 return UTF16.valueOf(UCharacter.foldCase(codePoint, true));\r
66             case UProperty.SIMPLE_LOWERCASE_MAPPING:\r
67                 return UTF16.valueOf(UCharacter.toLowerCase(codePoint));\r
68             case UProperty.SIMPLE_TITLECASE_MAPPING:\r
69                 return UTF16.valueOf(UCharacter.toTitleCase(codePoint));\r
70             case UProperty.SIMPLE_UPPERCASE_MAPPING:\r
71                 return UTF16.valueOf(UCharacter.toUpperCase(codePoint));\r
72             case UProperty.TITLECASE_MAPPING:\r
73                 return UCharacter.toTitleCase(Locale.ENGLISH, UTF16.valueOf(codePoint), null);\r
74             case UProperty.UNICODE_1_NAME:\r
75                 return UCharacter.getName1_0(codePoint);\r
76             case UProperty.UPPERCASE_MAPPING:\r
77                 return UCharacter.toUpperCase(Locale.ENGLISH, UTF16.valueOf(codePoint));\r
78             // case NFC: return Normalizer.normalize(codePoint, Normalizer.NFC);\r
79             // case NFD: return Normalizer.normalize(codePoint, Normalizer.NFD);\r
80             // case NFKC: return Normalizer.normalize(codePoint, Normalizer.NFKC);\r
81             // case NFKD: return Normalizer.normalize(codePoint, Normalizer.NFKD);\r
82             case isNFC:\r
83                 return String.valueOf(Normalizer.normalize(codePoint, Normalizer.NFC).equals(UTF16.valueOf(codePoint)));\r
84             case isNFD:\r
85                 return String.valueOf(Normalizer.normalize(codePoint, Normalizer.NFD).equals(UTF16.valueOf(codePoint)));\r
86             case isNFKC:\r
87                 return String\r
88                         .valueOf(Normalizer.normalize(codePoint, Normalizer.NFKC).equals(UTF16.valueOf(codePoint)));\r
89             case isNFKD:\r
90                 return String\r
91                         .valueOf(Normalizer.normalize(codePoint, Normalizer.NFKD).equals(UTF16.valueOf(codePoint)));\r
92             case isLowercase:\r
93                 return String.valueOf(UCharacter.toLowerCase(Locale.ENGLISH, UTF16.valueOf(codePoint)).equals(\r
94                         UTF16.valueOf(codePoint)));\r
95             case isUppercase:\r
96                 return String.valueOf(UCharacter.toUpperCase(Locale.ENGLISH, UTF16.valueOf(codePoint)).equals(\r
97                         UTF16.valueOf(codePoint)));\r
98             case isTitlecase:\r
99                 return String.valueOf(UCharacter.toTitleCase(Locale.ENGLISH, UTF16.valueOf(codePoint), null).equals(\r
100                         UTF16.valueOf(codePoint)));\r
101             case isCasefolded:\r
102                 return String.valueOf(UCharacter.foldCase(UTF16.valueOf(codePoint), true).equals(\r
103                         UTF16.valueOf(codePoint)));\r
104             case isCased:\r
105                 return String.valueOf(UCharacter.toLowerCase(Locale.ENGLISH, UTF16.valueOf(codePoint)).equals(\r
106                         UTF16.valueOf(codePoint)));\r
107             }\r
108             if (propEnum < UProperty.INT_LIMIT) {\r
109                 int enumValue = -1;\r
110                 String value = null;\r
111                 try {\r
112                     enumValue = UCharacter.getIntPropertyValue(codePoint, propEnum);\r
113                     if (enumValue >= 0)\r
114                         value = fixedGetPropertyValueName(propEnum, enumValue, UProperty.NameChoice.LONG);\r
115                 } catch (IllegalArgumentException e) {\r
116                     if (!shownException) {\r
117                         System.out.println("Fail: " + getName() + ", " + Integer.toHexString(codePoint));\r
118                         shownException = true;\r
119                     }\r
120                 }\r
121                 return value != null ? value : String.valueOf(enumValue);\r
122             } else if (propEnum < UProperty.DOUBLE_LIMIT) {\r
123                 double num = UCharacter.getUnicodeNumericValue(codePoint);\r
124                 if (num == UCharacter.NO_NUMERIC_VALUE)\r
125                     return null;\r
126                 return Double.toString(num);\r
127                 // TODO: Fix HACK -- API deficient\r
128             }\r
129             return null;\r
130         }\r
131 \r
132         private String getAge(int codePoint) {\r
133             String temp = UCharacter.getAge(codePoint).toString();\r
134             if (temp.equals("0.0.0.0"))\r
135                 return "unassigned";\r
136             if (temp.endsWith(".0.0"))\r
137                 return temp.substring(0, temp.length() - 4);\r
138             return temp;\r
139         }\r
140 \r
141         /**\r
142          * @param valueAlias null if unused.\r
143          * @param valueEnum -1 if unused\r
144          * @param nameChoice\r
145          * @return\r
146          */\r
147         private String getFixedValueAlias(String valueAlias, int valueEnum, int nameChoice) {\r
148             if (propEnum >= UProperty.STRING_START) {\r
149                 if (nameChoice > UProperty.NameChoice.LONG)\r
150                     throw new IllegalArgumentException();\r
151                 if (nameChoice != UProperty.NameChoice.LONG)\r
152                     return null;\r
153                 return "<string>";\r
154             } else if (propEnum >= UProperty.DOUBLE_START) {\r
155                 if (nameChoice > UProperty.NameChoice.LONG)\r
156                     throw new IllegalArgumentException();\r
157                 if (nameChoice != UProperty.NameChoice.LONG)\r
158                     return null;\r
159                 return "<number>";\r
160             }\r
161             if (valueAlias != null && !valueAlias.equals("<integer>")) {\r
162                 valueEnum = fixedGetPropertyValueEnum(propEnum, valueAlias);\r
163             }\r
164             // because these are defined badly, there may be no normal (long) name.\r
165             // if there is\r
166             String result = fixedGetPropertyValueName(propEnum, valueEnum, nameChoice);\r
167             if (result != null)\r
168                 return result;\r
169             // HACK try other namechoice\r
170             if (nameChoice == UProperty.NameChoice.LONG) {\r
171                 result = fixedGetPropertyValueName(propEnum, valueEnum, UProperty.NameChoice.SHORT);\r
172                 if (result != null)\r
173                     return result;\r
174                 if (isCombiningClassProperty())\r
175                     return null;\r
176                 return "<integer>";\r
177             }\r
178             return null;\r
179         }\r
180 \r
181         public boolean isCombiningClassProperty() {\r
182             return (propEnum == UProperty.CANONICAL_COMBINING_CLASS\r
183                     || propEnum == UProperty.LEAD_CANONICAL_COMBINING_CLASS\r
184                     || propEnum == UProperty.TRAIL_CANONICAL_COMBINING_CLASS);\r
185         }\r
186 \r
187         private static int fixedGetPropertyValueEnum(int propEnum, String valueAlias) {\r
188             try {\r
189                 if (propEnum < BINARY_LIMIT) {\r
190                     propEnum = UProperty.ALPHABETIC;\r
191                 }\r
192                 return UCharacter.getPropertyValueEnum(propEnum, valueAlias);\r
193             } catch (Exception e) {\r
194                 return Integer.parseInt(valueAlias);\r
195             }\r
196         }\r
197 \r
198         static Map fixSkeleton = new HashMap();\r
199 \r
200         private static String fixedGetPropertyValueName(int propEnum, int valueEnum, int nameChoice) {\r
201             String value = UCharacter.getPropertyValueName(propEnum, valueEnum, nameChoice);\r
202             String newValue = (String) fixSkeleton.get(value);\r
203             if (newValue == null) {\r
204                 newValue = value;\r
205                 if (propEnum == UProperty.JOINING_GROUP) {\r
206                     newValue = newValue == null ? null : newValue.toLowerCase(Locale.ENGLISH);\r
207                 }\r
208                 newValue = regularize(newValue, true);\r
209                 fixSkeleton.put(value, newValue);\r
210             }\r
211             return newValue;\r
212         }\r
213 \r
214         public List _getNameAliases(List result) {\r
215             if (result == null)\r
216                 result = new ArrayList();\r
217             // String alias = String_Extras.get(propEnum);\r
218             // if (alias == null)\r
219             String alias = Binary_Extras.get(propEnum);\r
220             if (alias != null) {\r
221                 addUnique(alias, result);\r
222             } else {\r
223                 addUnique(getFixedPropertyName(propEnum, UProperty.NameChoice.SHORT), result);\r
224                 addUnique(getFixedPropertyName(propEnum, UProperty.NameChoice.LONG), result);\r
225             }\r
226             return result;\r
227         }\r
228 \r
229         public String getFixedPropertyName(int propName, int nameChoice) {\r
230             try {\r
231                 return UCharacter.getPropertyName(propEnum, nameChoice);\r
232             } catch (IllegalArgumentException e) {\r
233                 return null;\r
234             }\r
235         }\r
236 \r
237         private static Map cccHack = new HashMap();\r
238         private static Set cccExtras = new HashSet();\r
239         static {\r
240             for (int i = 0; i <= 255; ++i) {\r
241                 String alias = UCharacter.getPropertyValueName(UProperty.CANONICAL_COMBINING_CLASS, i,\r
242                         UProperty.NameChoice.LONG);\r
243                 String numStr = String.valueOf(i);\r
244                 if (alias != null) {\r
245                     cccHack.put(alias, numStr);\r
246                 } else {\r
247                     cccHack.put(numStr, numStr);\r
248                     cccExtras.add(numStr);\r
249                 }\r
250             }\r
251         }\r
252 \r
253         public List _getAvailableValues(List result) {\r
254             if (result == null)\r
255                 result = new ArrayList();\r
256             if (propEnum == UProperty.AGE) {\r
257                 addAllUnique(getAges(), result);\r
258                 return result;\r
259 \r
260             }\r
261             if (propEnum < UProperty.INT_LIMIT) {\r
262                 if (Binary_Extras.isInRange(propEnum)) {\r
263                     propEnum = UProperty.BINARY_START; // HACK\r
264                 }\r
265                 int start = UCharacter.getIntPropertyMinValue(propEnum);\r
266                 int end = UCharacter.getIntPropertyMaxValue(propEnum);\r
267                 for (int i = start; i <= end; ++i) {\r
268                     String alias = getFixedValueAlias(null, i, UProperty.NameChoice.LONG);\r
269                     String alias2 = getFixedValueAlias(null, i, UProperty.NameChoice.SHORT);\r
270                     if (alias == null) {\r
271                         alias = alias2;\r
272                         if (alias == null && isCombiningClassProperty()) {\r
273                             alias = String.valueOf(i);\r
274                         }\r
275                     }\r
276                     // System.out.println(propertyAlias + "\t" + i + ":\t" + alias);\r
277                     addUnique(alias, result);\r
278                 }\r
279             } else if (propEnum >= UProperty.DOUBLE_START && propEnum < UProperty.DOUBLE_LIMIT) {\r
280                 UnicodeMap map = getUnicodeMap();\r
281                 Collection values = map.values();\r
282                 addAllUnique(values, result);\r
283             } else {\r
284                 String alias = getFixedValueAlias(null, -1, UProperty.NameChoice.LONG);\r
285                 addUnique(alias, result);\r
286             }\r
287             return result;\r
288         }\r
289 \r
290         static String[] AGES = null;\r
291 \r
292         private String[] getAges() {\r
293             if (AGES == null) {\r
294                 Set ages = new TreeSet();\r
295                 for (int i = 0; i < 0x10FFFF; ++i) {\r
296                     ages.add(getAge(i));\r
297                 }\r
298                 AGES = (String[]) ages.toArray(new String[ages.size()]);\r
299             }\r
300             return AGES;\r
301         }\r
302 \r
303         public List _getValueAliases(String valueAlias, List result) {\r
304             if (result == null)\r
305                 result = new ArrayList();\r
306             if (propEnum == UProperty.AGE) {\r
307                 addUnique(valueAlias, result);\r
308                 return result;\r
309             }\r
310             if (isCombiningClassProperty()) {\r
311                 addUnique(cccHack.get(valueAlias), result); // add number\r
312             }\r
313             int type = getType();\r
314             if (type == UnicodeProperty.NUMERIC || type == EXTENDED_NUMERIC) {\r
315                 addUnique(valueAlias, result);\r
316                 if (valueAlias.endsWith(".0")) {\r
317                     addUnique(valueAlias.substring(0, valueAlias.length() - 2), result);\r
318                 }\r
319             } else {\r
320                 for (int nameChoice = UProperty.NameChoice.SHORT;; ++nameChoice) {\r
321                     try {\r
322                         addUnique(getFixedValueAlias(valueAlias, -1, nameChoice), result);\r
323                     } catch (Exception e) {\r
324                         break;\r
325                     }\r
326                 }\r
327             }\r
328             return result;\r
329         }\r
330 \r
331         /* (non-Javadoc)\r
332          * @see com.ibm.icu.dev.test.util.UnicodePropertySource#getPropertyType()\r
333          */\r
334         private int internalGetPropertyType(int prop) {\r
335             switch (prop) {\r
336             case UProperty.AGE:\r
337             case UProperty.BLOCK:\r
338             case UProperty.SCRIPT:\r
339                 return UnicodeProperty.CATALOG;\r
340             case UProperty.ISO_COMMENT:\r
341             case UProperty.NAME:\r
342             case UProperty.UNICODE_1_NAME:\r
343                 return UnicodeProperty.MISC;\r
344             case UProperty.BIDI_MIRRORING_GLYPH:\r
345             case UProperty.CASE_FOLDING:\r
346             case UProperty.LOWERCASE_MAPPING:\r
347             case UProperty.SIMPLE_CASE_FOLDING:\r
348             case UProperty.SIMPLE_LOWERCASE_MAPPING:\r
349             case UProperty.SIMPLE_TITLECASE_MAPPING:\r
350             case UProperty.SIMPLE_UPPERCASE_MAPPING:\r
351             case UProperty.TITLECASE_MAPPING:\r
352             case UProperty.UPPERCASE_MAPPING:\r
353                 return UnicodeProperty.EXTENDED_STRING;\r
354             }\r
355             if (prop < UProperty.BINARY_START)\r
356                 return UnicodeProperty.UNKNOWN;\r
357             if (prop < UProperty.BINARY_LIMIT)\r
358                 return UnicodeProperty.BINARY;\r
359             if (prop < UProperty.INT_START)\r
360                 return UnicodeProperty.EXTENDED_BINARY;\r
361             if (prop < UProperty.INT_LIMIT)\r
362                 return UnicodeProperty.ENUMERATED;\r
363             if (prop < UProperty.DOUBLE_START)\r
364                 return UnicodeProperty.EXTENDED_ENUMERATED;\r
365             if (prop < UProperty.DOUBLE_LIMIT)\r
366                 return UnicodeProperty.NUMERIC;\r
367             if (prop < UProperty.STRING_START)\r
368                 return UnicodeProperty.EXTENDED_NUMERIC;\r
369             if (prop < UProperty.STRING_LIMIT)\r
370                 return UnicodeProperty.STRING;\r
371             return UnicodeProperty.EXTENDED_STRING;\r
372         }\r
373 \r
374         /*\r
375          * (non-Javadoc)\r
376          * \r
377          * @see com.ibm.icu.dev.test.util.UnicodeProperty#getVersion()\r
378          */\r
379         public String _getVersion() {\r
380             return VersionInfo.ICU_VERSION.toString();\r
381         }\r
382     }\r
383 \r
384   /*{\r
385             matchIterator = new UnicodeSetIterator(\r
386                 new UnicodeSet("[^[:Cn:]-[:Default_Ignorable_Code_Point:]]"));\r
387         }*/\r
388 \r
389 \r
390 \r
391     /*\r
392      * Other Missing Functions:\r
393             Expands_On_NFC\r
394             Expands_On_NFD\r
395             Expands_On_NFKC\r
396             Expands_On_NFKD\r
397             Composition_Exclusion\r
398             Decomposition_Mapping\r
399             FC_NFKC_Closure\r
400             ISO_Comment\r
401             NFC_Quick_Check\r
402             NFD_Quick_Check\r
403             NFKC_Quick_Check\r
404             NFKD_Quick_Check\r
405             Special_Case_Condition\r
406             Unicode_Radical_Stroke\r
407      */\r
408 \r
409     static final Names Binary_Extras = new Names(UProperty.BINARY_LIMIT,\r
410           new String[] {\r
411           "isNFC", "isNFD", "isNFKC", "isNFKD",\r
412           "isLowercase", "isUppercase", "isTitlecase", "isCasefolded", "isCased",\r
413     });\r
414 \r
415 //    static final Names String_Extras = new Names(UProperty.STRING_LIMIT,\r
416 //          new String[] {\r
417 //          "toNFC", "toNFD", "toNFKC", "toNKFD",\r
418 //    });\r
419 \r
420     static final int\r
421         isNFC = UProperty.BINARY_LIMIT,\r
422         isNFD = UProperty.BINARY_LIMIT+1,\r
423         isNFKC = UProperty.BINARY_LIMIT+2,\r
424         isNFKD = UProperty.BINARY_LIMIT+3,\r
425         isLowercase = UProperty.BINARY_LIMIT+4,\r
426         isUppercase = UProperty.BINARY_LIMIT+5,\r
427         isTitlecase = UProperty.BINARY_LIMIT+6,\r
428         isCasefolded = UProperty.BINARY_LIMIT+7,\r
429         isCased = UProperty.BINARY_LIMIT+8,\r
430         BINARY_LIMIT = UProperty.BINARY_LIMIT+9\r
431 \r
432 //        NFC  = UProperty.STRING_LIMIT,\r
433 //        NFD  = UProperty.STRING_LIMIT+1,\r
434 //        NFKC = UProperty.STRING_LIMIT+2,\r
435 //        NFKD = UProperty.STRING_LIMIT+3\r
436         ;\r
437 \r
438     private ICUPropertyFactory() {\r
439         Collection c = getInternalAvailablePropertyAliases(new ArrayList());\r
440         Iterator it = c.iterator();\r
441         while (it.hasNext()) {\r
442             add(getInternalProperty((String) it.next()));\r
443         }\r
444     }\r
445 \r
446     private static ICUPropertyFactory singleton = null;\r
447 \r
448     public static synchronized ICUPropertyFactory make() {\r
449         if (singleton != null)\r
450             return singleton;\r
451         singleton = new ICUPropertyFactory();\r
452         return singleton;\r
453     }\r
454 \r
455     public List getInternalAvailablePropertyAliases(List result) {\r
456         int[][] ranges = {\r
457                 {UProperty.BINARY_START,    UProperty.BINARY_LIMIT},\r
458                 {UProperty.INT_START,       UProperty.INT_LIMIT},\r
459                 {UProperty.DOUBLE_START,    UProperty.DOUBLE_LIMIT},\r
460                 {UProperty.STRING_START,    UProperty.STRING_LIMIT},\r
461         };\r
462         for (int i = 0; i < ranges.length; ++i) {\r
463             for (int j = ranges[i][0]; j < ranges[i][1]; ++j) {\r
464                 String alias = UCharacter.getPropertyName(j, UProperty.NameChoice.LONG);\r
465                 UnicodeProperty.addUnique(alias, result);\r
466                 if (!result.contains(alias))\r
467                     result.add(alias);\r
468             }\r
469         }\r
470         // result.addAll(String_Extras.getNames());\r
471         result.addAll(Binary_Extras.getNames());\r
472         return result;\r
473     }\r
474 \r
475     public UnicodeProperty getInternalProperty(String propertyAlias) {\r
476         int propEnum;\r
477         main: {\r
478             int possibleItem = Binary_Extras.get(propertyAlias);\r
479             if (possibleItem >= 0) {\r
480                 propEnum = possibleItem;\r
481                 break main;\r
482             }\r
483             // possibleItem = String_Extras.get(propertyAlias);\r
484             // if (possibleItem >= 0) {\r
485             // propEnum = possibleItem;\r
486             // break main;\r
487             // }\r
488             propEnum = UCharacter.getPropertyEnum(propertyAlias);\r
489         }\r
490         return new ICUProperty(propertyAlias, propEnum);\r
491     }\r
492 \r
493     /*\r
494      * (non-Javadoc)\r
495      * \r
496      * @see com.ibm.icu.dev.test.util.UnicodePropertySource#getProperty(java.lang.String)\r
497      */\r
498     // TODO file bug on getPropertyValueName for Canonical_Combining_Class\r
499     public static class Names {\r
500         private String[] names;\r
501         private int base;\r
502 \r
503         public Names(int base, String[] names) {\r
504             this.base = base;\r
505             this.names = names;\r
506         }\r
507 \r
508         public int get(String name) {\r
509             for (int i = 0; i < names.length; ++i) {\r
510                 if (name.equalsIgnoreCase(names[i]))\r
511                     return base + i;\r
512             }\r
513             return -1;\r
514         }\r
515 \r
516         public String get(int number) {\r
517             number -= base;\r
518             if (number < 0 || names.length <= number)\r
519                 return null;\r
520             return names[number];\r
521         }\r
522 \r
523         public boolean isInRange(int number) {\r
524             number -= base;\r
525             return (0 <= number && number < names.length);\r
526         }\r
527 \r
528         public List getNames() {\r
529             return Arrays.asList(names);\r
530         }\r
531     }\r
532 }\r