]> gitweb.fperrin.net Git - Dictionary.git/blob - jars/icu4j-4_2_1-src/src/com/ibm/icu/dev/test/lang/UCharacterTest.java
icu4jsrc
[Dictionary.git] / jars / icu4j-4_2_1-src / src / com / ibm / icu / dev / test / lang / UCharacterTest.java
1 /**\r
2 *******************************************************************************\r
3 * Copyright (C) 1996-2009, International Business Machines Corporation and    *\r
4 * others. All Rights Reserved.                                                *\r
5 *******************************************************************************\r
6 */\r
7 \r
8 package com.ibm.icu.dev.test.lang;\r
9 \r
10 import com.ibm.icu.impl.UBiDiProps;\r
11 import com.ibm.icu.impl.UCaseProps;\r
12 \r
13 import com.ibm.icu.dev.test.TestFmwk;\r
14 import com.ibm.icu.dev.test.TestUtil;\r
15 import com.ibm.icu.lang.UCharacter;\r
16 import com.ibm.icu.lang.UCharacterCategory;\r
17 import com.ibm.icu.lang.UCharacterDirection;\r
18 import com.ibm.icu.lang.UProperty;\r
19 import com.ibm.icu.lang.UScript;\r
20 import com.ibm.icu.text.UTF16;\r
21 import com.ibm.icu.text.UnicodeSet;\r
22 import com.ibm.icu.text.UnicodeSetIterator;\r
23 import com.ibm.icu.util.RangeValueIterator;\r
24 import com.ibm.icu.util.ValueIterator;\r
25 import com.ibm.icu.util.VersionInfo;\r
26 import com.ibm.icu.impl.UCharacterName;\r
27 import com.ibm.icu.impl.Utility;\r
28 import com.ibm.icu.impl.USerializedSet;\r
29 import com.ibm.icu.impl.NormalizerImpl;\r
30 import com.ibm.icu.impl.UCharacterProperty;\r
31 import java.io.BufferedReader;\r
32 import java.util.Arrays;\r
33 \r
34 /**\r
35 * Testing class for UCharacter\r
36 * Mostly following the test cases for ICU\r
37 * @author Syn Wee Quek\r
38 * @since nov 04 2000\r
39 */\r
40 public final class UCharacterTest extends TestFmwk\r
41 {\r
42     // private variables =============================================\r
43 \r
44     /**\r
45     * ICU4J data version number\r
46     */\r
47     private final VersionInfo VERSION_ = VersionInfo.getInstance("5.1.0.0");\r
48 \r
49     // constructor ===================================================\r
50 \r
51     /**\r
52     * Constructor\r
53     */\r
54     public UCharacterTest()\r
55     {\r
56     }\r
57 \r
58     // public methods ================================================\r
59 \r
60     public static void main(String[] arg)\r
61     {\r
62         try\r
63         {\r
64             UCharacterTest test = new UCharacterTest();\r
65             test.run(arg);\r
66         }\r
67         catch (Exception e)\r
68         {\r
69         e.printStackTrace();\r
70         }\r
71     }\r
72 \r
73     /**\r
74     * Testing the letter and number determination in UCharacter\r
75     */\r
76     public void TestLetterNumber()\r
77     {\r
78         for (int i = 0x0041; i < 0x005B; i ++)\r
79         if (!UCharacter.isLetter(i))\r
80             errln("FAIL \\u" + hex(i) + " expected to be a letter");\r
81 \r
82         for (int i = 0x0660; i < 0x066A; i ++)\r
83         if (UCharacter.isLetter(i))\r
84             errln("FAIL \\u" + hex(i) + " expected not to be a letter");\r
85 \r
86         for (int i = 0x0660; i < 0x066A; i ++)\r
87         if (!UCharacter.isDigit(i))\r
88             errln("FAIL \\u" + hex(i) + " expected to be a digit");\r
89 \r
90         for (int i = 0x0041; i < 0x005B; i ++)\r
91             if (!UCharacter.isLetterOrDigit(i))\r
92                 errln("FAIL \\u" + hex(i) + " expected not to be a digit");\r
93 \r
94         for (int i = 0x0660; i < 0x066A; i ++)\r
95             if (!UCharacter.isLetterOrDigit(i))\r
96                 errln("FAIL \\u" + hex(i) +\r
97                     "expected to be either a letter or a digit");\r
98 \r
99         /*\r
100          * The following checks work only starting from Unicode 4.0.\r
101          * Check the version number here.\r
102          */\r
103         VersionInfo version =    UCharacter.getUnicodeVersion();\r
104         if(version.getMajor()<4 || version.equals(VersionInfo.getInstance(4, 0, 1))) {\r
105             return;\r
106         }\r
107 \r
108 \r
109 \r
110         /*\r
111          * Sanity check:\r
112          * Verify that exactly the digit characters have decimal digit values.\r
113          * This assumption is used in the implementation of u_digit()\r
114          * (which checks nt=de)\r
115          * compared with the parallel java.lang.Character.digit()\r
116          * (which checks Nd).\r
117          *\r
118          * This was not true in Unicode 3.2 and earlier.\r
119          * Unicode 4.0 fixed discrepancies.\r
120          * Unicode 4.0.1 re-introduced problems in this area due to an\r
121          * unintentionally incomplete last-minute change.\r
122          */\r
123         String digitsPattern = "[:Nd:]";\r
124         String decimalValuesPattern = "[:Numeric_Type=Decimal:]";\r
125 \r
126         UnicodeSet digits, decimalValues;\r
127 \r
128         digits= new UnicodeSet(digitsPattern);\r
129         decimalValues=new UnicodeSet(decimalValuesPattern);\r
130 \r
131 \r
132         compareUSets(digits, decimalValues, "[:Nd:]", "[:Numeric_Type=Decimal:]", true);\r
133 \r
134 \r
135     }\r
136 \r
137     /**\r
138     * Tests for space determination in UCharacter\r
139     */\r
140     public void TestSpaces()\r
141     {\r
142         int spaces[] = {0x0020, 0x00a0, 0x2000, 0x2001, 0x2005};\r
143         int nonspaces[] = {0x0061, 0x0062, 0x0063, 0x0064, 0x0074};\r
144         int whitespaces[] = {0x2008, 0x2009, 0x200a, 0x001c, 0x000c /* ,0x200b */}; // 0x200b was "Zs" in Unicode 4.0, but it is "Cf" in Unicode 4.1\r
145         int nonwhitespaces[] = {0x0061, 0x0062, 0x003c, 0x0028, 0x003f, 0x00a0, 0x2007, 0x202f, 0xfefe, 0x200b};\r
146 \r
147         int size = spaces.length;\r
148         for (int i = 0; i < size; i ++)\r
149         {\r
150             if (!UCharacter.isSpaceChar(spaces[i]))\r
151             {\r
152                 errln("FAIL \\u" + hex(spaces[i]) +\r
153                     " expected to be a space character");\r
154                 break;\r
155             }\r
156 \r
157             if (UCharacter.isSpaceChar(nonspaces[i]))\r
158             {\r
159                 errln("FAIL \\u" + hex(nonspaces[i]) +\r
160                 " expected not to be space character");\r
161                 break;\r
162             }\r
163 \r
164             if (!UCharacter.isWhitespace(whitespaces[i]))\r
165             {\r
166                 errln("FAIL \\u" + hex(whitespaces[i]) +\r
167                         " expected to be a white space character");\r
168                 break;\r
169             }\r
170             if (UCharacter.isWhitespace(nonwhitespaces[i]))\r
171             {\r
172                 errln("FAIL \\u" + hex(nonwhitespaces[i]) +\r
173                             " expected not to be a space character");\r
174                 break;\r
175             }\r
176             logln("Ok    \\u" + hex(spaces[i]) + " and \\u" +\r
177                   hex(nonspaces[i]) + " and \\u" + hex(whitespaces[i]) +\r
178                   " and \\u" + hex(nonwhitespaces[i]));\r
179         }\r
180 \r
181         int rulewhitespace[] = {0x9, 0xd, 0x20, 0x85,\r
182                                 0x200e, 0x200f, 0x2028, 0x2029};\r
183         int nonrulewhitespace[] = {0x8, 0xe, 0x21, 0x86, 0xa0, 0xa1,\r
184                                    0x1680, 0x1681, 0x180e, 0x180f,\r
185                                    0x1FFF, 0x2000, 0x200a, 0x200b,\r
186                                    0x2010, 0x202f, 0x2030, 0x205f,\r
187                                    0x2060, 0x3000, 0x3001};\r
188         for (int i = 0; i < rulewhitespace.length; i ++) {\r
189             if (!UCharacterProperty.isRuleWhiteSpace(rulewhitespace[i])) {\r
190                 errln("\\u" + Utility.hex(rulewhitespace[i], 4)\r
191                       + " expected to be a rule white space");\r
192             }\r
193         }\r
194         for (int i = 0; i < nonrulewhitespace.length; i ++) {\r
195             if (UCharacterProperty.isRuleWhiteSpace(nonrulewhitespace[i])) {\r
196                 errln("\\u" + Utility.hex(nonrulewhitespace[i], 4)\r
197                       + " expected to be a non rule white space");\r
198             }\r
199         }\r
200     }\r
201 \r
202     /**\r
203     * Tests for defined and undefined characters\r
204     */\r
205     public void TestDefined()\r
206     {\r
207         int undefined[] = {0xfff1, 0xfff7, 0xfa6b};\r
208         int defined[] = {0x523E, 0x004f88, 0x00fffd};\r
209 \r
210         int size = undefined.length;\r
211         for (int i = 0; i < size; i ++)\r
212         {\r
213             if (UCharacter.isDefined(undefined[i]))\r
214             {\r
215                 errln("FAIL \\u" + hex(undefined[i]) +\r
216                             " expected not to be defined");\r
217                 break;\r
218             }\r
219             if (!UCharacter.isDefined(defined[i]))\r
220             {\r
221                 errln("FAIL \\u" + hex(defined[i]) + " expected defined");\r
222                 break;\r
223             }\r
224         }\r
225     }\r
226 \r
227     /**\r
228     * Tests for base characters and their cellwidth\r
229     */\r
230     public void TestBase()\r
231     {\r
232         int base[] = {0x0061, 0x000031, 0x0003d2};\r
233         int nonbase[] = {0x002B, 0x000020, 0x00203B};\r
234         int size = base.length;\r
235         for (int i = 0; i < size; i ++)\r
236         {\r
237             if (UCharacter.isBaseForm(nonbase[i]))\r
238             {\r
239                 errln("FAIL \\u" + hex(nonbase[i]) +\r
240                             " expected not to be a base character");\r
241                 break;\r
242             }\r
243             if (!UCharacter.isBaseForm(base[i]))\r
244             {\r
245                 errln("FAIL \\u" + hex(base[i]) +\r
246                       " expected to be a base character");\r
247                 break;\r
248             }\r
249         }\r
250     }\r
251 \r
252     /**\r
253     * Tests for digit characters\r
254     */\r
255     public void TestDigits()\r
256     {\r
257         int digits[] = {0x0030, 0x000662, 0x000F23, 0x000ED5, 0x002160};\r
258 \r
259         //special characters not in the properties table\r
260         int digits2[] = {0x3007, 0x004e00, 0x004e8c, 0x004e09, 0x0056d8,\r
261                          0x004e94, 0x00516d, 0x4e03, 0x00516b, 0x004e5d};\r
262         int nondigits[] = {0x0010, 0x000041, 0x000122, 0x0068FE};\r
263 \r
264         int digitvalues[] = {0, 2, 3, 5, 1};\r
265         int digitvalues2[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};\r
266 \r
267         int size  = digits.length;\r
268         for (int i = 0; i < size; i ++) {\r
269             if (UCharacter.isDigit(digits[i]) &&\r
270                 UCharacter.digit(digits[i]) != digitvalues[i])\r
271             {\r
272                 errln("FAIL \\u" + hex(digits[i]) +\r
273                         " expected digit with value " + digitvalues[i]);\r
274                 break;\r
275             }\r
276         }\r
277         size = nondigits.length;\r
278         for (int i = 0; i < size; i ++)\r
279             if (UCharacter.isDigit(nondigits[i]))\r
280             {\r
281                 errln("FAIL \\u" + hex(nondigits[i]) + " expected nondigit");\r
282                 break;\r
283             }\r
284 \r
285         size = digits2.length;\r
286         for (int i = 0; i < 10; i ++) {\r
287             if (UCharacter.isDigit(digits2[i]) &&\r
288                 UCharacter.digit(digits2[i]) != digitvalues2[i])\r
289             {\r
290                 errln("FAIL \\u" + hex(digits2[i]) +\r
291                     " expected digit with value " + digitvalues2[i]);\r
292                 break;\r
293             }\r
294         }\r
295     }\r
296 \r
297     /**\r
298     *  Tests for numeric characters\r
299     */\r
300     public void TestNumeric()\r
301     {\r
302         if (UCharacter.getNumericValue(0x00BC) != -2) {\r
303             errln("Numeric value of 0x00BC expected to be -2");\r
304         }\r
305 \r
306         for (int i = '0'; i < '9'; i ++) {\r
307             int n1 = UCharacter.getNumericValue(i);\r
308             double n2 = UCharacter.getUnicodeNumericValue(i);\r
309             if (n1 != n2 ||  n1 != (i - '0')) {\r
310                 errln("Numeric value of " + (char)i + " expected to be " +\r
311                       (i - '0'));\r
312             }\r
313         }\r
314         for (int i = 'A'; i < 'F'; i ++) {\r
315             int n1 = UCharacter.getNumericValue(i);\r
316             double n2 = UCharacter.getUnicodeNumericValue(i);\r
317             if (n2 != UCharacter.NO_NUMERIC_VALUE ||  n1 != (i - 'A' + 10)) {\r
318                 errln("Numeric value of " + (char)i + " expected to be " +\r
319                       (i - 'A' + 10));\r
320             }\r
321         }\r
322         for (int i = 0xFF21; i < 0xFF26; i ++) {\r
323             // testing full wideth latin characters A-F\r
324             int n1 = UCharacter.getNumericValue(i);\r
325             double n2 = UCharacter.getUnicodeNumericValue(i);\r
326             if (n2 != UCharacter.NO_NUMERIC_VALUE ||  n1 != (i - 0xFF21 + 10)) {\r
327                 errln("Numeric value of " + (char)i + " expected to be " +\r
328                       (i - 0xFF21 + 10));\r
329             }\r
330         }\r
331         // testing han numbers\r
332         int han[] = {0x96f6, 0, 0x58f9, 1, 0x8cb3, 2, 0x53c3, 3,\r
333                      0x8086, 4, 0x4f0d, 5, 0x9678, 6, 0x67d2, 7,\r
334                      0x634c, 8, 0x7396, 9, 0x5341, 10, 0x62fe, 10,\r
335                      0x767e, 100, 0x4f70, 100, 0x5343, 1000, 0x4edf, 1000,\r
336                      0x824c, 10000, 0x5104, 100000000};\r
337         for (int i = 0; i < han.length; i += 2) {\r
338             if (UCharacter.getHanNumericValue(han[i]) != han[i + 1]) {\r
339                 errln("Numeric value of \\u" +\r
340                       Integer.toHexString(han[i]) + " expected to be " +\r
341                       han[i + 1]);\r
342             }\r
343         }\r
344     }\r
345 \r
346     /**\r
347     * Tests for version\r
348     */\r
349     public void TestVersion()\r
350     {\r
351         if (!UCharacter.getUnicodeVersion().equals(VERSION_))\r
352             errln("FAIL expected: " + VERSION_ + "got: " + UCharacter.getUnicodeVersion());\r
353     }\r
354 \r
355     /**\r
356     * Tests for control characters\r
357     */\r
358     public void TestISOControl()\r
359     {\r
360         int control[] = {0x001b, 0x000097, 0x000082};\r
361         int noncontrol[] = {0x61, 0x000031, 0x0000e2};\r
362 \r
363         int size = control.length;\r
364         for (int i = 0; i < size; i ++)\r
365         {\r
366             if (!UCharacter.isISOControl(control[i]))\r
367             {\r
368                 errln("FAIL 0x" + Integer.toHexString(control[i]) +\r
369                         " expected to be a control character");\r
370                 break;\r
371             }\r
372             if (UCharacter.isISOControl(noncontrol[i]))\r
373             {\r
374                 errln("FAIL 0x" + Integer.toHexString(noncontrol[i]) +\r
375                         " expected to be not a control character");\r
376                 break;\r
377             }\r
378 \r
379             logln("Ok    0x" + Integer.toHexString(control[i]) + " and 0x" +\r
380                     Integer.toHexString(noncontrol[i]));\r
381         }\r
382     }\r
383 \r
384     /**\r
385      * Test Supplementary\r
386      */\r
387     public void TestSupplementary()\r
388     {\r
389         for (int i = 0; i < 0x10000; i ++) {\r
390             if (UCharacter.isSupplementary(i)) {\r
391                 errln("Codepoint \\u" + Integer.toHexString(i) +\r
392                       " is not supplementary");\r
393             }\r
394         }\r
395         for (int i = 0x10000; i < 0x10FFFF; i ++) {\r
396             if (!UCharacter.isSupplementary(i)) {\r
397                 errln("Codepoint \\u" + Integer.toHexString(i) +\r
398                       " is supplementary");\r
399             }\r
400         }\r
401     }\r
402 \r
403     /**\r
404      * Test mirroring\r
405      */\r
406     public void TestMirror()\r
407     {\r
408         if (!(UCharacter.isMirrored(0x28) && UCharacter.isMirrored(0xbb) &&\r
409               UCharacter.isMirrored(0x2045) && UCharacter.isMirrored(0x232a)\r
410               && !UCharacter.isMirrored(0x27) &&\r
411               !UCharacter.isMirrored(0x61) && !UCharacter.isMirrored(0x284)\r
412               && !UCharacter.isMirrored(0x3400))) {\r
413             errln("isMirrored() does not work correctly");\r
414         }\r
415 \r
416         if (!(UCharacter.getMirror(0x3c) == 0x3e &&\r
417               UCharacter.getMirror(0x5d) == 0x5b &&\r
418               UCharacter.getMirror(0x208d) == 0x208e &&\r
419               UCharacter.getMirror(0x3017) == 0x3016 &&\r
420 \r
421               UCharacter.getMirror(0xbb) == 0xab &&\r
422               UCharacter.getMirror(0x2215) == 0x29F5 &&\r
423               UCharacter.getMirror(0x29F5) == 0x2215 && /* large delta between the code points */\r
424 \r
425               UCharacter.getMirror(0x2e) == 0x2e &&\r
426               UCharacter.getMirror(0x6f3) == 0x6f3 &&\r
427               UCharacter.getMirror(0x301c) == 0x301c &&\r
428               UCharacter.getMirror(0xa4ab) == 0xa4ab &&\r
429 \r
430               /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */\r
431               UCharacter.getMirror(0x2018) == 0x2018 &&\r
432               UCharacter.getMirror(0x201b) == 0x201b &&\r
433               UCharacter.getMirror(0x301d) == 0x301d)) {\r
434             errln("getMirror() does not work correctly");\r
435         }\r
436 \r
437         /* verify that Bidi_Mirroring_Glyph roundtrips */\r
438         UnicodeSet set=new UnicodeSet("[:Bidi_Mirrored:]");\r
439         UnicodeSetIterator iter=new UnicodeSetIterator(set);\r
440         int start, end, c2, c3;\r
441         while(iter.nextRange() && (start=iter.codepoint)>=0) {\r
442             end=iter.codepointEnd;\r
443             do {\r
444                 c2=UCharacter.getMirror(start);\r
445                 c3=UCharacter.getMirror(c2);\r
446                 if(c3!=start) {\r
447                     errln("getMirror() does not roundtrip: U+"+hex(start)+"->U+"+hex(c2)+"->U+"+hex(c3));\r
448                 }\r
449             } while(++start<=end);\r
450         }\r
451 \r
452         // verify that Unicode Corrigendum #6 reverts mirrored status of the following\r
453         if (UCharacter.isMirrored(0x2018) ||\r
454             UCharacter.isMirrored(0x201d) ||\r
455             UCharacter.isMirrored(0x201f) ||\r
456             UCharacter.isMirrored(0x301e)) {\r
457             errln("Unicode Corrigendum #6 conflict, one or more of 2018/201d/201f/301e has mirrored property");\r
458         }\r
459     }\r
460 \r
461     /**\r
462     * Tests for printable characters\r
463     */\r
464     public void TestPrint()\r
465     {\r
466         int printable[] = {0x0042, 0x00005f, 0x002014};\r
467         int nonprintable[] = {0x200c, 0x00009f, 0x00001b};\r
468 \r
469         int size = printable.length;\r
470         for (int i = 0; i < size; i ++)\r
471         {\r
472             if (!UCharacter.isPrintable(printable[i]))\r
473             {\r
474                 errln("FAIL \\u" + hex(printable[i]) +\r
475                     " expected to be a printable character");\r
476                 break;\r
477             }\r
478             if (UCharacter.isPrintable(nonprintable[i]))\r
479             {\r
480                 errln("FAIL \\u" + hex(nonprintable[i]) +\r
481                         " expected not to be a printable character");\r
482                 break;\r
483             }\r
484             logln("Ok    \\u" + hex(printable[i]) + " and \\u" +\r
485                     hex(nonprintable[i]));\r
486         }\r
487 \r
488         // test all ISO 8 controls\r
489         for (int ch = 0; ch <= 0x9f; ++ ch) {\r
490             if (ch == 0x20) {\r
491                 // skip ASCII graphic characters and continue with DEL\r
492                 ch = 0x7f;\r
493             }\r
494             if (UCharacter.isPrintable(ch)) {\r
495                 errln("Fail \\u" + hex(ch) +\r
496                     " is a ISO 8 control character hence not printable\n");\r
497             }\r
498         }\r
499 \r
500         /* test all Latin-1 graphic characters */\r
501         for (int ch = 0x20; ch <= 0xff; ++ ch) {\r
502             if (ch == 0x7f) {\r
503                 ch = 0xa0;\r
504             }\r
505             if (!UCharacter.isPrintable(ch)\r
506                 && ch != 0x00AD/* Unicode 4.0 changed the defintion of soft hyphen to be a Cf*/) {\r
507                 errln("Fail \\u" + hex(ch) +\r
508                       " is a Latin-1 graphic character\n");\r
509             }\r
510         }\r
511     }\r
512 \r
513     /**\r
514     * Testing for identifier characters\r
515     */\r
516     public void TestIdentifier()\r
517     {\r
518         int unicodeidstart[] = {0x0250, 0x0000e2, 0x000061};\r
519         int nonunicodeidstart[] = {0x2000, 0x00000a, 0x002019};\r
520         int unicodeidpart[] = {0x005f, 0x000032, 0x000045};\r
521         int nonunicodeidpart[] = {0x2030, 0x0000a3, 0x000020};\r
522         int idignore[] = {0x0006, 0x0010, 0x206b};\r
523         int nonidignore[] = {0x0075, 0x0000a3, 0x000061};\r
524 \r
525         int size = unicodeidstart.length;\r
526         for (int i = 0; i < size; i ++)\r
527         {\r
528             if (!UCharacter.isUnicodeIdentifierStart(unicodeidstart[i]))\r
529             {\r
530                 errln("FAIL \\u" + hex(unicodeidstart[i]) +\r
531                     " expected to be a unicode identifier start character");\r
532                 break;\r
533             }\r
534             if (UCharacter.isUnicodeIdentifierStart(nonunicodeidstart[i]))\r
535             {\r
536                 errln("FAIL \\u" + hex(nonunicodeidstart[i]) +\r
537                         " expected not to be a unicode identifier start " +\r
538                         "character");\r
539                 break;\r
540             }\r
541             if (!UCharacter.isUnicodeIdentifierPart(unicodeidpart[i]))\r
542             {\r
543                 errln("FAIL \\u" + hex(unicodeidpart[i]) +\r
544                     " expected to be a unicode identifier part character");\r
545                 break;\r
546             }\r
547             if (UCharacter.isUnicodeIdentifierPart(nonunicodeidpart[i]))\r
548             {\r
549                 errln("FAIL \\u" + hex(nonunicodeidpart[i]) +\r
550                         " expected not to be a unicode identifier part " +\r
551                         "character");\r
552                 break;\r
553             }\r
554             if (!UCharacter.isIdentifierIgnorable(idignore[i]))\r
555             {\r
556                 errln("FAIL \\u" + hex(idignore[i]) +\r
557                         " expected to be a ignorable unicode character");\r
558                 break;\r
559             }\r
560             if (UCharacter.isIdentifierIgnorable(nonidignore[i]))\r
561             {\r
562                 errln("FAIL \\u" + hex(nonidignore[i]) +\r
563                     " expected not to be a ignorable unicode character");\r
564                 break;\r
565             }\r
566             logln("Ok    \\u" + hex(unicodeidstart[i]) + " and \\u" +\r
567                     hex(nonunicodeidstart[i]) + " and \\u" +\r
568                     hex(unicodeidpart[i]) + " and \\u" +\r
569                     hex(nonunicodeidpart[i]) + " and \\u" +\r
570                     hex(idignore[i]) + " and \\u" + hex(nonidignore[i]));\r
571         }\r
572     }\r
573 \r
574     /**\r
575     * Tests for the character types, direction.<br>\r
576     * This method reads in UnicodeData.txt file for testing purposes. A\r
577     * default path is provided relative to the src path, however the user\r
578     * could set a system property to change the directory path.<br>\r
579     * e.g. java -DUnicodeData="data_directory_path"\r
580     * com.ibm.icu.dev.test.lang.UCharacterTest\r
581     */\r
582     public void TestUnicodeData()\r
583     {\r
584         // this is the 2 char category types used in the UnicodeData file\r
585         final String TYPE =\r
586             "LuLlLtLmLoMnMeMcNdNlNoZsZlZpCcCfCoCsPdPsPePcPoSmScSkSoPiPf";\r
587 \r
588         // directory types used in the UnicodeData file\r
589         // padded by spaces to make each type size 4\r
590         final String DIR =\r
591             "L   R   EN  ES  ET  AN  CS  B   S   WS  ON  LRE LRO AL  RLE RLO PDF NSM BN  ";\r
592 \r
593         final int LASTUNICODECHAR = 0xFFFD;\r
594         int ch = 0,\r
595             index = 0,\r
596             type = 0,\r
597             dir = 0;\r
598 \r
599         try\r
600         {\r
601             BufferedReader input = TestUtil.getDataReader(\r
602                                                 "unicode/UnicodeData.txt");\r
603             int numErrors = 0;\r
604 \r
605             while (ch != LASTUNICODECHAR)\r
606             {\r
607                 String s = input.readLine();\r
608                 if(s.length()<4 || s.startsWith("#")) {\r
609                     continue;\r
610                 }\r
611                 // geting the unicode character, its type and its direction\r
612                 ch = Integer.parseInt(s.substring(0, 4), 16);\r
613                 index = s.indexOf(';', 5);\r
614                 String t = s.substring(index + 1, index + 3);\r
615                 index += 4;\r
616                 int oldindex = index;\r
617                 index = s.indexOf(';', index);\r
618                 int cc = Integer.parseInt(s.substring(oldindex, index));\r
619                 oldindex = index + 1;\r
620                 index = s.indexOf(';', oldindex);\r
621                 String d = s.substring(oldindex, index);\r
622 \r
623                 for (int i = 0; i < 6; i ++) {\r
624                     index = s.indexOf(';', index + 1);\r
625                     // skipping to the 11th field\r
626                 }\r
627                 // iso comment\r
628                 oldindex = index + 1;\r
629                 index = s.indexOf(';', oldindex);\r
630                 String isocomment = s.substring(oldindex, index);\r
631                 // uppercase\r
632                 oldindex = index + 1;\r
633                 index = s.indexOf(';', oldindex);\r
634                 String upper = s.substring(oldindex, index);\r
635                 // lowercase\r
636                 oldindex = index + 1;\r
637                 index = s.indexOf(';', oldindex);\r
638                 String lower = s.substring(oldindex, index);\r
639                 // titlecase last element\r
640                 oldindex = index + 1;\r
641                 String title = s.substring(oldindex);\r
642 \r
643                 // testing the category\r
644                 // we override the general category of some control\r
645                 // characters\r
646                 type = TYPE.indexOf(t);\r
647                 if (type < 0)\r
648                     type = 0;\r
649                 else\r
650                     type = (type >> 1) + 1;\r
651                 if (UCharacter.getType(ch) != type)\r
652                 {\r
653                     errln("FAIL \\u" + hex(ch) + " expected type " +\r
654                             type);\r
655                     break;\r
656                 }\r
657 \r
658                 if (UCharacter.getIntPropertyValue(ch,\r
659                            UProperty.GENERAL_CATEGORY_MASK) != (1 << type)) {\r
660                     errln("error: getIntPropertyValue(\\u" +\r
661                           Integer.toHexString(ch) +\r
662                           ", UProperty.GENERAL_CATEGORY_MASK) != " +\r
663                           "getMask(getType(ch))");\r
664                 }\r
665 \r
666                 // testing combining class\r
667                 if (UCharacter.getCombiningClass(ch) != cc)\r
668                 {\r
669                     errln("FAIL \\u" + hex(ch) + " expected combining " +\r
670                             "class " + cc);\r
671                     break;\r
672                 }\r
673 \r
674                 // testing the direction\r
675                 if (d.length() == 1)\r
676                     d = d + "   ";\r
677 \r
678                 dir = DIR.indexOf(d) >> 2;\r
679                 if (UCharacter.getDirection(ch) != dir)\r
680                 {\r
681                     errln("FAIL \\u" + hex(ch) +\r
682                         " expected direction " + dir + " but got " +\r
683               UCharacter.getDirection(ch));\r
684                     break;\r
685                 }\r
686 \r
687                 byte bdir = (byte)dir;\r
688                 if (UCharacter.getDirectionality(ch) != bdir)\r
689                 {\r
690                     errln("FAIL \\u" + hex(ch) +\r
691                         " expected directionality " + bdir + " but got " +\r
692               UCharacter.getDirectionality(ch));\r
693                     break;\r
694                 }\r
695 \r
696                 // testing iso comment\r
697                 try{\r
698                     String comment = UCharacter.getISOComment(ch);\r
699                     if (comment == null) {\r
700                         comment = "";\r
701                     }\r
702                     if (!comment.equals(isocomment)) {\r
703                         errln("FAIL \\u" + hex(ch) +\r
704                             " expected iso comment " + isocomment);\r
705                         break;\r
706                     }\r
707                 }catch(Exception e){\r
708                     if(e.getMessage().indexOf("unames.icu") >= 0){\r
709                         numErrors++;\r
710                     }else{\r
711                         throw e;\r
712                     }\r
713                 }\r
714 \r
715                 int tempchar = ch;\r
716                 if (upper.length() > 0) {\r
717                     tempchar = Integer.parseInt(upper, 16);\r
718                 }\r
719                 if (UCharacter.toUpperCase(ch) != tempchar) {\r
720                     errln("FAIL \\u" + Utility.hex(ch, 4)\r
721                             + " expected uppercase \\u"\r
722                             + Utility.hex(tempchar, 4));\r
723                     break;\r
724                 }\r
725                 tempchar = ch;\r
726                 if (lower.length() > 0) {\r
727                     tempchar = Integer.parseInt(lower, 16);\r
728                 }\r
729                 if (UCharacter.toLowerCase(ch) != tempchar) {\r
730                     errln("FAIL \\u" + Utility.hex(ch, 4)\r
731                             + " expected lowercase \\u"\r
732                             + Utility.hex(tempchar, 4));\r
733                     break;\r
734                 }\r
735                 tempchar = ch;\r
736                 if (title.length() > 0) {\r
737                     tempchar = Integer.parseInt(title, 16);\r
738                 }\r
739                 if (UCharacter.toTitleCase(ch) != tempchar) {\r
740                     errln("FAIL \\u" + Utility.hex(ch, 4)\r
741                             + " expected titlecase \\u"\r
742                             + Utility.hex(tempchar, 4));\r
743                     break;\r
744                 }\r
745             }\r
746             input.close();\r
747             if(numErrors > 0){\r
748                 warnln("Could not find unames.icu");\r
749             }\r
750         }\r
751         catch (Exception e)\r
752         {\r
753             e.printStackTrace();\r
754         }\r
755 \r
756 \r
757         if (UCharacter.UnicodeBlock.of(0x0041)\r
758                                         != UCharacter.UnicodeBlock.BASIC_LATIN\r
759             || UCharacter.getIntPropertyValue(0x41, UProperty.BLOCK)\r
760                               != UCharacter.UnicodeBlock.BASIC_LATIN.getID()) {\r
761             errln("UCharacter.UnicodeBlock.of(\\u0041) property failed! "\r
762                     + "Expected : "\r
763                     + UCharacter.UnicodeBlock.BASIC_LATIN.getID() + " got "\r
764                     + UCharacter.UnicodeBlock.of(0x0041));\r
765         }\r
766 \r
767         // sanity check on repeated properties\r
768         for (ch = 0xfffe; ch <= 0x10ffff;) {\r
769             type = UCharacter.getType(ch);\r
770             if (UCharacter.getIntPropertyValue(ch,\r
771                                                UProperty.GENERAL_CATEGORY_MASK)\r
772                 != (1 << type)) {\r
773                 errln("error: UCharacter.getIntPropertyValue(\\u"\r
774                       + Integer.toHexString(ch)\r
775                       + ", UProperty.GENERAL_CATEGORY_MASK) != "\r
776                       + "getMask(getType())");\r
777             }\r
778             if (type != UCharacterCategory.UNASSIGNED) {\r
779                 errln("error: UCharacter.getType(\\u" + Utility.hex(ch, 4)\r
780                         + " != UCharacterCategory.UNASSIGNED (returns "\r
781                         + UCharacterCategory.toString(UCharacter.getType(ch))\r
782                         + ")");\r
783             }\r
784             if ((ch & 0xffff) == 0xfffe) {\r
785                 ++ ch;\r
786             }\r
787             else {\r
788                 ch += 0xffff;\r
789             }\r
790         }\r
791 \r
792         // test that PUA is not "unassigned"\r
793         for(ch = 0xe000; ch <= 0x10fffd;) {\r
794             type = UCharacter.getType(ch);\r
795             if (UCharacter.getIntPropertyValue(ch,\r
796                                                UProperty.GENERAL_CATEGORY_MASK)\r
797                 != (1 << type)) {\r
798                 errln("error: UCharacter.getIntPropertyValue(\\u"\r
799                       + Integer.toHexString(ch)\r
800                       + ", UProperty.GENERAL_CATEGORY_MASK) != "\r
801                       + "getMask(getType())");\r
802             }\r
803 \r
804             if (type == UCharacterCategory.UNASSIGNED) {\r
805                 errln("error: UCharacter.getType(\\u"\r
806                         + Utility.hex(ch, 4)\r
807                         + ") == UCharacterCategory.UNASSIGNED");\r
808             }\r
809             else if (type != UCharacterCategory.PRIVATE_USE) {\r
810                 logln("PUA override: UCharacter.getType(\\u"\r
811                       + Utility.hex(ch, 4) + ")=" + type);\r
812             }\r
813             if (ch == 0xf8ff) {\r
814                 ch = 0xf0000;\r
815             }\r
816             else if (ch == 0xffffd) {\r
817                 ch = 0x100000;\r
818             }\r
819             else {\r
820                 ++ ch;\r
821             }\r
822         }\r
823     }\r
824 \r
825 \r
826     /**\r
827     * Test for the character names\r
828     */\r
829     public void TestNames()\r
830     {\r
831         try{\r
832             int length = UCharacterName.getInstance().getMaxCharNameLength();\r
833             if (length < 83) { // Unicode 3.2 max char name length\r
834                errln("getMaxCharNameLength()=" + length + " is too short");\r
835             }\r
836             // ### TODO same tests for max ISO comment length as for max name length\r
837 \r
838             int c[] = {0x0061,                //LATIN SMALL LETTER A\r
839                        0x000284,              //LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK\r
840                        0x003401,              //CJK UNIFIED IDEOGRAPH-3401\r
841                        0x007fed,              //CJK UNIFIED IDEOGRAPH-7FED\r
842                        0x00ac00,              //HANGUL SYLLABLE GA\r
843                        0x00d7a3,              //HANGUL SYLLABLE HIH\r
844                        0x00d800, 0x00dc00,    //LINEAR B SYLLABLE B008 A\r
845                        0xff08,                //FULLWIDTH LEFT PARENTHESIS\r
846                        0x00ffe5,              //FULLWIDTH YEN SIGN\r
847                        0x00ffff,              //null\r
848                        0x0023456              //CJK UNIFIED IDEOGRAPH-23456\r
849                        };\r
850             String name[] = {\r
851                              "LATIN SMALL LETTER A",\r
852                              "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK",\r
853                              "CJK UNIFIED IDEOGRAPH-3401",\r
854                              "CJK UNIFIED IDEOGRAPH-7FED",\r
855                              "HANGUL SYLLABLE GA",\r
856                              "HANGUL SYLLABLE HIH",\r
857                              "",\r
858                              "",\r
859                              "FULLWIDTH LEFT PARENTHESIS",\r
860                              "FULLWIDTH YEN SIGN",\r
861                              "",\r
862                              "CJK UNIFIED IDEOGRAPH-23456"\r
863                              };\r
864             String oldname[] = {"", "LATIN SMALL LETTER DOTLESS J BAR HOOK", "",\r
865                             "",\r
866                             "", "", "", "", "FULLWIDTH OPENING PARENTHESIS", "",\r
867                             "", ""};\r
868             String extendedname[] = {"LATIN SMALL LETTER A",\r
869                                  "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK",\r
870                                  "CJK UNIFIED IDEOGRAPH-3401",\r
871                                  "CJK UNIFIED IDEOGRAPH-7FED",\r
872                                  "HANGUL SYLLABLE GA",\r
873                                  "HANGUL SYLLABLE HIH",\r
874                                  "<lead surrogate-D800>",\r
875                                  "<trail surrogate-DC00>",\r
876                                  "FULLWIDTH LEFT PARENTHESIS",\r
877                                  "FULLWIDTH YEN SIGN",\r
878                                  "<noncharacter-FFFF>",\r
879                                  "CJK UNIFIED IDEOGRAPH-23456"};\r
880 \r
881             int size = c.length;\r
882             String str;\r
883             int uc;\r
884 \r
885             for (int i = 0; i < size; i ++)\r
886             {\r
887                 // modern Unicode character name\r
888                 str = UCharacter.getName(c[i]);\r
889                 if ((str == null && name[i].length() > 0) ||\r
890                     (str != null && !str.equals(name[i])))\r
891                 {\r
892                     errln("FAIL \\u" + hex(c[i]) + " expected name " +\r
893                             name[i]);\r
894                     break;\r
895                 }\r
896 \r
897                 // 1.0 Unicode character name\r
898                 str = UCharacter.getName1_0(c[i]);\r
899                 if ((str == null && oldname[i].length() > 0) ||\r
900                     (str != null && !str.equals(oldname[i])))\r
901                 {\r
902                     errln("FAIL \\u" + hex(c[i]) + " expected 1.0 name " +\r
903                             oldname[i]);\r
904                     break;\r
905                 }\r
906 \r
907                 // extended character name\r
908                 str = UCharacter.getExtendedName(c[i]);\r
909                 if (str == null || !str.equals(extendedname[i]))\r
910                 {\r
911                     errln("FAIL \\u" + hex(c[i]) + " expected extended name " +\r
912                             extendedname[i]);\r
913                     break;\r
914                 }\r
915 \r
916                 // retrieving unicode character from modern name\r
917                 uc = UCharacter.getCharFromName(name[i]);\r
918                 if (uc != c[i] && name[i].length() != 0)\r
919                 {\r
920                     errln("FAIL " + name[i] + " expected character \\u" +\r
921                           hex(c[i]));\r
922                     break;\r
923                 }\r
924 \r
925                 //retrieving unicode character from 1.0 name\r
926                 uc = UCharacter.getCharFromName1_0(oldname[i]);\r
927                 if (uc != c[i] && oldname[i].length() != 0)\r
928                 {\r
929                     errln("FAIL " + oldname[i] + " expected 1.0 character \\u" +\r
930                           hex(c[i]));\r
931                     break;\r
932                 }\r
933 \r
934                 //retrieving unicode character from 1.0 name\r
935                 uc = UCharacter.getCharFromExtendedName(extendedname[i]);\r
936                 if (uc != c[i] && i != 0 && (i == 1 || i == 6))\r
937                 {\r
938                     errln("FAIL " + extendedname[i] +\r
939                           " expected extended character \\u" + hex(c[i]));\r
940                     break;\r
941                 }\r
942             }\r
943 \r
944             // test getName works with mixed-case names (new in 2.0)\r
945             if (0x61 != UCharacter.getCharFromName("LATin smALl letTER A")) {\r
946                 errln("FAIL: 'LATin smALl letTER A' should result in character "\r
947                       + "U+0061");\r
948             }\r
949 \r
950             if (getInclusion() >= 5) {\r
951                 // extra testing different from icu\r
952                 for (int i = UCharacter.MIN_VALUE; i < UCharacter.MAX_VALUE; i ++)\r
953                 {\r
954                     str = UCharacter.getName(i);\r
955                     if (str != null && UCharacter.getCharFromName(str) != i)\r
956                     {\r
957                         errln("FAIL \\u" + hex(i) + " " + str  +\r
958                                             " retrieval of name and vice versa" );\r
959                         break;\r
960                     }\r
961                 }\r
962             }\r
963 \r
964             // Test getCharNameCharacters\r
965             if (getInclusion() >= 10) {\r
966                 boolean map[] = new boolean[256];\r
967 \r
968                 UnicodeSet set = new UnicodeSet(1, 0); // empty set\r
969                 UnicodeSet dumb = new UnicodeSet(1, 0); // empty set\r
970 \r
971                 // uprv_getCharNameCharacters() will likely return more lowercase\r
972                 // letters than actual character names contain because\r
973                 // it includes all the characters in lowercased names of\r
974                 // general categories, for the full possible set of extended names.\r
975                 UCharacterName.getInstance().getCharNameCharacters(set);\r
976 \r
977                 // build set the dumb (but sure-fire) way\r
978                 Arrays.fill(map, false);\r
979 \r
980                 int maxLength = 0;\r
981                 for (int cp = 0; cp < 0x110000; ++ cp) {\r
982                     String n = UCharacter.getExtendedName(cp);\r
983                     int len = n.length();\r
984                     if (len > maxLength) {\r
985                         maxLength = len;\r
986                     }\r
987 \r
988                     for (int i = 0; i < len; ++ i) {\r
989                         char ch = n.charAt(i);\r
990                         if (!map[ch & 0xff]) {\r
991                             dumb.add(ch);\r
992                             map[ch & 0xff] = true;\r
993                         }\r
994                     }\r
995                 }\r
996 \r
997                 length = UCharacterName.getInstance().getMaxCharNameLength();\r
998                 if (length != maxLength) {\r
999                     errln("getMaxCharNameLength()=" + length\r
1000                           + " differs from the maximum length " + maxLength\r
1001                           + " of all extended names");\r
1002                 }\r
1003 \r
1004                 // compare the sets.  Where is my uset_equals?!!\r
1005                 boolean ok = true;\r
1006                 for (int i = 0; i < 256; ++ i) {\r
1007                     if (set.contains(i) != dumb.contains(i)) {\r
1008                         if (0x61 <= i && i <= 0x7a // a-z\r
1009                             && set.contains(i) && !dumb.contains(i)) {\r
1010                             // ignore lowercase a-z that are in set but not in dumb\r
1011                             ok = true;\r
1012                         }\r
1013                         else {\r
1014                             ok = false;\r
1015                             break;\r
1016                         }\r
1017                     }\r
1018                 }\r
1019 \r
1020                 String pattern1 = set.toPattern(true);\r
1021                 String pattern2 = dumb.toPattern(true);\r
1022 \r
1023                 if (!ok) {\r
1024                     errln("FAIL: getCharNameCharacters() returned " + pattern1\r
1025                           + " expected " + pattern2\r
1026                           + " (too many lowercase a-z are ok)");\r
1027                 } else {\r
1028                     logln("Ok: getCharNameCharacters() returned " + pattern1);\r
1029                 }\r
1030             }\r
1031             // improve code coverage\r
1032            String expected = "LATIN SMALL LETTER A|LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK|"+\r
1033                              "CJK UNIFIED IDEOGRAPH-3401|CJK UNIFIED IDEOGRAPH-7FED|HANGUL SYLLABLE GA|"+\r
1034                              "HANGUL SYLLABLE HIH|LINEAR B SYLLABLE B008 A|FULLWIDTH LEFT PARENTHESIS|"+\r
1035                              "FULLWIDTH YEN SIGN|"+\r
1036                              "null|"+ // getName returns null because 0xFFFF does not have a name, but has an extended name!\r
1037                              "CJK UNIFIED IDEOGRAPH-23456";\r
1038            String separator= "|";\r
1039            String source = Utility.valueOf(c);\r
1040            String result = UCharacter.getName(source, separator);\r
1041            if(!result.equals(expected)){\r
1042                errln("UCharacter.getName did not return the expected result.\n\t Expected: "+ expected+"\n\t Got: "+ result);\r
1043            }\r
1044 \r
1045         }catch(IllegalArgumentException e){\r
1046             if(e.getMessage().indexOf("unames.icu") >= 0){\r
1047                 warnln("Could not find unames.icu");\r
1048             }else{\r
1049                 throw e;\r
1050             }\r
1051         }\r
1052 \r
1053     }\r
1054 \r
1055 \r
1056     /**\r
1057     * Testing name iteration\r
1058     */\r
1059     public void TestNameIteration()throws Exception\r
1060     {\r
1061         try {\r
1062             ValueIterator iterator = UCharacter.getExtendedNameIterator();\r
1063             ValueIterator.Element element = new ValueIterator.Element();\r
1064             ValueIterator.Element old     = new ValueIterator.Element();\r
1065             // testing subrange\r
1066             iterator.setRange(-10, -5);\r
1067             if (iterator.next(element)) {\r
1068                 errln("Fail, expected iterator to return false when range is set outside the meaningful range");\r
1069             }\r
1070             iterator.setRange(0x110000, 0x111111);\r
1071             if (iterator.next(element)) {\r
1072                 errln("Fail, expected iterator to return false when range is set outside the meaningful range");\r
1073             }\r
1074             try {\r
1075                 iterator.setRange(50, 10);\r
1076                 errln("Fail, expected exception when encountered invalid range");\r
1077             } catch (Exception e) {\r
1078             }\r
1079 \r
1080             iterator.setRange(-10, 10);\r
1081             if (!iterator.next(element) || element.integer != 0) {\r
1082                 errln("Fail, expected iterator to return 0 when range start limit is set outside the meaningful range");\r
1083             }\r
1084 \r
1085             iterator.setRange(0x10FFFE, 0x200000);\r
1086             int last = 0;\r
1087             while (iterator.next(element)) {\r
1088                 last = element.integer;\r
1089             }\r
1090             if (last != 0x10FFFF) {\r
1091                 errln("Fail, expected iterator to return 0x10FFFF when range end limit is set outside the meaningful range");\r
1092             }\r
1093 \r
1094             iterator = UCharacter.getNameIterator();\r
1095             iterator.setRange(0xF, 0x45);\r
1096             while (iterator.next(element)) {\r
1097                 if (element.integer <= old.integer) {\r
1098                     errln("FAIL next returned a less codepoint \\u" +\r
1099                         Integer.toHexString(element.integer) + " than \\u" +\r
1100                         Integer.toHexString(old.integer));\r
1101                     break;\r
1102                 }\r
1103                 if (!UCharacter.getName(element.integer).equals(element.value))\r
1104                 {\r
1105                     errln("FAIL next codepoint \\u" +\r
1106                         Integer.toHexString(element.integer) +\r
1107                         " does not have the expected name " +\r
1108                         UCharacter.getName(element.integer) +\r
1109                         " instead have the name " + (String)element.value);\r
1110                     break;\r
1111                 }\r
1112                 old.integer = element.integer;\r
1113             }\r
1114 \r
1115             iterator.reset();\r
1116             iterator.next(element);\r
1117             if (element.integer != 0x20) {\r
1118                 errln("FAIL reset in iterator");\r
1119             }\r
1120 \r
1121             iterator.setRange(0, 0x110000);\r
1122             old.integer = 0;\r
1123             while (iterator.next(element)) {\r
1124                 if (element.integer != 0 && element.integer <= old.integer) {\r
1125                     errln("FAIL next returned a less codepoint \\u" +\r
1126                         Integer.toHexString(element.integer) + " than \\u" +\r
1127                         Integer.toHexString(old.integer));\r
1128                     break;\r
1129                 }\r
1130                 if (!UCharacter.getName(element.integer).equals(element.value))\r
1131                 {\r
1132                     errln("FAIL next codepoint \\u" +\r
1133                             Integer.toHexString(element.integer) +\r
1134                             " does not have the expected name " +\r
1135                             UCharacter.getName(element.integer) +\r
1136                             " instead have the name " + (String)element.value);\r
1137                     break;\r
1138                 }\r
1139                 for (int i = old.integer + 1; i < element.integer; i ++) {\r
1140                     if (UCharacter.getName(i) != null) {\r
1141                         errln("FAIL between codepoints are not null \\u" +\r
1142                                 Integer.toHexString(old.integer) + " and " +\r
1143                                 Integer.toHexString(element.integer) + " has " +\r
1144                                 Integer.toHexString(i) + " with a name " +\r
1145                                 UCharacter.getName(i));\r
1146                         break;\r
1147                     }\r
1148                 }\r
1149                 old.integer = element.integer;\r
1150             }\r
1151 \r
1152             iterator = UCharacter.getExtendedNameIterator();\r
1153             old.integer = 0;\r
1154             while (iterator.next(element)) {\r
1155                 if (element.integer != 0 && element.integer != old.integer) {\r
1156                     errln("FAIL next returned a codepoint \\u" +\r
1157                             Integer.toHexString(element.integer) +\r
1158                             " different from \\u" +\r
1159                             Integer.toHexString(old.integer));\r
1160                     break;\r
1161                 }\r
1162                 if (!UCharacter.getExtendedName(element.integer).equals(\r
1163                                                               element.value)) {\r
1164                     errln("FAIL next codepoint \\u" +\r
1165                         Integer.toHexString(element.integer) +\r
1166                         " name should be "\r
1167                         + UCharacter.getExtendedName(element.integer) +\r
1168                         " instead of " + (String)element.value);\r
1169                     break;\r
1170                 }\r
1171                 old.integer++;\r
1172             }\r
1173             iterator = UCharacter.getName1_0Iterator();\r
1174             old.integer = 0;\r
1175             while (iterator.next(element)) {\r
1176                 logln(Integer.toHexString(element.integer) + " " +\r
1177                                                         (String)element.value);\r
1178                 if (element.integer != 0 && element.integer <= old.integer) {\r
1179                     errln("FAIL next returned a less codepoint \\u" +\r
1180                         Integer.toHexString(element.integer) + " than \\u" +\r
1181                         Integer.toHexString(old.integer));\r
1182                     break;\r
1183                 }\r
1184                 if (!element.value.equals(UCharacter.getName1_0(\r
1185                                                             element.integer))) {\r
1186                     errln("FAIL next codepoint \\u" +\r
1187                             Integer.toHexString(element.integer) +\r
1188                             " name cannot be null");\r
1189                     break;\r
1190                 }\r
1191                 for (int i = old.integer + 1; i < element.integer; i ++) {\r
1192                     if (UCharacter.getName1_0(i) != null) {\r
1193                         errln("FAIL between codepoints are not null \\u" +\r
1194                             Integer.toHexString(old.integer) + " and " +\r
1195                             Integer.toHexString(element.integer) + " has " +\r
1196                             Integer.toHexString(i) + " with a name " +\r
1197                             UCharacter.getName1_0(i));\r
1198                         break;\r
1199                     }\r
1200                 }\r
1201                 old.integer = element.integer;\r
1202             }\r
1203         } catch(Exception e){\r
1204             // !!! wouldn't preflighting be simpler?  This looks like\r
1205             // it is effectively be doing that.  It seems that for every\r
1206             // true error the code will call errln, which will throw the error, which\r
1207             // this will catch, which this will then rethrow the error.  Just seems\r
1208             // cumbersome.\r
1209             if(e.getMessage().indexOf("unames.icu") >= 0){\r
1210                 warnln("Could not find unames.icu");\r
1211             } else {\r
1212                 errln(e.getMessage());\r
1213             }\r
1214         }\r
1215     }\r
1216 \r
1217     /**\r
1218     * Testing the for illegal characters\r
1219     */\r
1220     public void TestIsLegal()\r
1221     {\r
1222         int illegal[] = {0xFFFE, 0x00FFFF, 0x005FFFE, 0x005FFFF, 0x0010FFFE,\r
1223                          0x0010FFFF, 0x110000, 0x00FDD0, 0x00FDDF, 0x00FDE0,\r
1224                          0x00FDEF, 0xD800, 0xDC00, -1};\r
1225         int legal[] = {0x61, 0x00FFFD, 0x0010000, 0x005FFFD, 0x0060000,\r
1226                        0x0010FFFD, 0xFDCF, 0x00FDF0};\r
1227         for (int count = 0; count < illegal.length; count ++) {\r
1228             if (UCharacter.isLegal(illegal[count])) {\r
1229                 errln("FAIL \\u" + hex(illegal[count]) +\r
1230                         " is not a legal character");\r
1231             }\r
1232         }\r
1233 \r
1234         for (int count = 0; count < legal.length; count ++) {\r
1235             if (!UCharacter.isLegal(legal[count])) {\r
1236                 errln("FAIL \\u" + hex(legal[count]) +\r
1237                                                    " is a legal character");\r
1238             }\r
1239         }\r
1240 \r
1241         String illegalStr = "This is an illegal string ";\r
1242         String legalStr = "This is a legal string ";\r
1243 \r
1244         for (int count = 0; count < illegal.length; count ++) {\r
1245             StringBuffer str = new StringBuffer(illegalStr);\r
1246             if (illegal[count] < 0x10000) {\r
1247                 str.append((char)illegal[count]);\r
1248             }\r
1249             else {\r
1250                 char lead = UTF16.getLeadSurrogate(illegal[count]);\r
1251                 char trail = UTF16.getTrailSurrogate(illegal[count]);\r
1252                 str.append(lead);\r
1253                 str.append(trail);\r
1254             }\r
1255             if (UCharacter.isLegal(str.toString())) {\r
1256                 errln("FAIL " + hex(str.toString()) +\r
1257                       " is not a legal string");\r
1258             }\r
1259         }\r
1260 \r
1261         for (int count = 0; count < legal.length; count ++) {\r
1262             StringBuffer str = new StringBuffer(legalStr);\r
1263             if (legal[count] < 0x10000) {\r
1264                 str.append((char)legal[count]);\r
1265             }\r
1266             else {\r
1267                 char lead = UTF16.getLeadSurrogate(legal[count]);\r
1268                 char trail = UTF16.getTrailSurrogate(legal[count]);\r
1269                 str.append(lead);\r
1270                 str.append(trail);\r
1271             }\r
1272             if (!UCharacter.isLegal(str.toString())) {\r
1273                 errln("FAIL " + hex(str.toString()) + " is a legal string");\r
1274             }\r
1275         }\r
1276     }\r
1277 \r
1278     /**\r
1279      * Test getCodePoint\r
1280      */\r
1281     public void TestCodePoint()\r
1282     {\r
1283         int ch = 0x10000;\r
1284         for (char i = 0xD800; i < 0xDC00; i ++) {\r
1285             for (char j = 0xDC00; j <= 0xDFFF; j ++) {\r
1286                 if (UCharacter.getCodePoint(i, j) != ch) {\r
1287                     errln("Error getting codepoint for surrogate " +\r
1288                           "characters \\u"\r
1289                           + Integer.toHexString(i) + " \\u" +\r
1290                           Integer.toHexString(j));\r
1291                 }\r
1292                 ch ++;\r
1293             }\r
1294         }\r
1295         try\r
1296         {\r
1297             UCharacter.getCodePoint((char)0xD7ff, (char)0xDC00);\r
1298             errln("Invalid surrogate characters should not form a " +\r
1299                   "supplementary");\r
1300         } catch(Exception e) {\r
1301         }\r
1302         for (char i = 0; i < 0xFFFF; i++) {\r
1303             if (i == 0xFFFE ||\r
1304                 (i >= 0xD800 && i <= 0xDFFF) ||\r
1305                 (i >= 0xFDD0 && i <= 0xFDEF)) {\r
1306                 // not a character\r
1307                 try {\r
1308                     UCharacter.getCodePoint(i);\r
1309                     errln("Not a character is not a valid codepoint");\r
1310                 } catch (Exception e) {\r
1311                 }\r
1312             }\r
1313             else {\r
1314                 if (UCharacter.getCodePoint(i) != i) {\r
1315                     errln("A valid codepoint should return itself");\r
1316                 }\r
1317             }\r
1318         }\r
1319     }\r
1320 \r
1321     /**\r
1322     * This method is alittle different from the type test in icu4c.\r
1323     * But combined with testUnicodeData, they basically do the same thing.\r
1324     */\r
1325     public void TestIteration()\r
1326     {\r
1327         int limit     = 0;\r
1328         int prevtype  = -1;\r
1329         int shouldBeDir;\r
1330         int test[][]={{0x41, UCharacterCategory.UPPERCASE_LETTER},\r
1331                         {0x308, UCharacterCategory.NON_SPACING_MARK},\r
1332                         {0xfffe, UCharacterCategory.GENERAL_OTHER_TYPES},\r
1333                         {0xe0041, UCharacterCategory.FORMAT},\r
1334                         {0xeffff, UCharacterCategory.UNASSIGNED}};\r
1335 \r
1336         // default Bidi classes for unassigned code points\r
1337         int defaultBidi[][]={{ 0x0590, UCharacterDirection.LEFT_TO_RIGHT },\r
1338             { 0x0600, UCharacterDirection.RIGHT_TO_LEFT },\r
1339             { 0x07C0, UCharacterDirection.RIGHT_TO_LEFT_ARABIC },\r
1340             { 0x0900, UCharacterDirection.RIGHT_TO_LEFT },\r
1341             { 0xFB1D, UCharacterDirection.LEFT_TO_RIGHT },\r
1342             { 0xFB50, UCharacterDirection.RIGHT_TO_LEFT },\r
1343             { 0xFE00, UCharacterDirection.RIGHT_TO_LEFT_ARABIC },\r
1344             { 0xFE70, UCharacterDirection.LEFT_TO_RIGHT },\r
1345             { 0xFF00, UCharacterDirection.RIGHT_TO_LEFT_ARABIC },\r
1346             { 0x10800, UCharacterDirection.LEFT_TO_RIGHT },\r
1347             { 0x11000, UCharacterDirection.RIGHT_TO_LEFT },\r
1348             { 0x110000, UCharacterDirection.LEFT_TO_RIGHT }};\r
1349 \r
1350         RangeValueIterator iterator = UCharacter.getTypeIterator();\r
1351         RangeValueIterator.Element result = new RangeValueIterator.Element();\r
1352         while (iterator.next(result)) {\r
1353             if (result.start != limit) {\r
1354                 errln("UCharacterIteration failed: Ranges not continuous " +\r
1355                         "0x" + Integer.toHexString(result.start));\r
1356             }\r
1357 \r
1358             limit = result.limit;\r
1359             if (result.value == prevtype) {\r
1360                 errln("Type of the next set of enumeration should be different");\r
1361             }\r
1362             prevtype = result.value;\r
1363 \r
1364             for (int i = result.start; i < limit; i ++) {\r
1365                 int temptype = UCharacter.getType(i);\r
1366                 if (temptype != result.value) {\r
1367                     errln("UCharacterIteration failed: Codepoint \\u" +\r
1368                             Integer.toHexString(i) + " should be of type " +\r
1369                             temptype + " not " + result.value);\r
1370                 }\r
1371             }\r
1372 \r
1373             for (int i = 0; i < test.length; ++ i) {\r
1374                 if (result.start <= test[i][0] && test[i][0] < result.limit) {\r
1375                     if (result.value != test[i][1]) {\r
1376                         errln("error: getTypes() has range ["\r
1377                               + Integer.toHexString(result.start) + ", "\r
1378                               + Integer.toHexString(result.limit)\r
1379                               + "] with type " + result.value\r
1380                               + " instead of ["\r
1381                               + Integer.toHexString(test[i][0]) + ", "\r
1382                               + Integer.toHexString(test[i][1]));\r
1383                     }\r
1384                 }\r
1385             }\r
1386 \r
1387             // LineBreak.txt specifies:\r
1388             //   #  - Assigned characters that are not listed explicitly are given the value\r
1389             //   #    "AL".\r
1390             //   #  - Unassigned characters are given the value "XX".\r
1391             //\r
1392             // PUA characters are listed explicitly with "XX".\r
1393             // Verify that no assigned character has "XX".\r
1394             if (result.value != UCharacterCategory.UNASSIGNED\r
1395                 && result.value != UCharacterCategory.PRIVATE_USE) {\r
1396                 int c = result.start;\r
1397                 while (c < result.limit) {\r
1398                     if (0 == UCharacter.getIntPropertyValue(c,\r
1399                                                 UProperty.LINE_BREAK)) {\r
1400                         logln("error UProperty.LINE_BREAK(assigned \\u"\r
1401                               + Utility.hex(c, 4) + ")=XX");\r
1402                     }\r
1403                     ++ c;\r
1404                 }\r
1405             }\r
1406 \r
1407             /*\r
1408              * Verify default Bidi classes.\r
1409              * For recent Unicode versions, see UCD.html.\r
1410              *\r
1411              * For older Unicode versions:\r
1412              * See table 3-7 "Bidirectional Character Types" in UAX #9.\r
1413              * http://www.unicode.org/reports/tr9/\r
1414              *\r
1415              * See also DerivedBidiClass.txt for Cn code points!\r
1416              *\r
1417              * Unicode 4.0.1/Public Review Issue #28 (http://www.unicode.org/review/resolved-pri.html)\r
1418              * changed some default values.\r
1419              * In particular, non-characters and unassigned Default Ignorable Code Points\r
1420              * change from L to BN.\r
1421              *\r
1422              * UCD.html version 4.0.1 does not yet reflect these changes.\r
1423              */\r
1424             if (result.value == UCharacterCategory.UNASSIGNED\r
1425                 || result.value == UCharacterCategory.PRIVATE_USE) {\r
1426                 int c = result.start;\r
1427                 for (int i = 0; i < defaultBidi.length && c < result.limit;\r
1428                      ++ i) {\r
1429                     if (c < defaultBidi[i][0]) {\r
1430                         while (c < result.limit && c < defaultBidi[i][0]) {\r
1431                             // TODO change to public UCharacter.isNonCharacter(c) once it's available\r
1432                             if(com.ibm.icu.impl.UCharacterUtility.isNonCharacter(c) || UCharacter.hasBinaryProperty(c, UProperty.DEFAULT_IGNORABLE_CODE_POINT)) {\r
1433                                 shouldBeDir=UCharacter.BOUNDARY_NEUTRAL;\r
1434                             } else {\r
1435                                 shouldBeDir=defaultBidi[i][1];\r
1436                             }\r
1437 \r
1438                             if (UCharacter.getDirection(c) != shouldBeDir\r
1439                                 || UCharacter.getIntPropertyValue(c,\r
1440                                                           UProperty.BIDI_CLASS)\r
1441                                    != shouldBeDir) {\r
1442                                 errln("error: getDirection(unassigned/PUA "\r
1443                                       + Integer.toHexString(c)\r
1444                                       + ") should be "\r
1445                                       + shouldBeDir);\r
1446                             }\r
1447                             ++ c;\r
1448                         }\r
1449                     }\r
1450                 }\r
1451             }\r
1452         }\r
1453 \r
1454         iterator.reset();\r
1455         if (iterator.next(result) == false || result.start != 0) {\r
1456             System.out.println("result " + result.start);\r
1457             errln("UCharacterIteration reset() failed");\r
1458         }\r
1459     }\r
1460 \r
1461     /**\r
1462      * Testing getAge\r
1463      */\r
1464     public void TestGetAge()\r
1465     {\r
1466         int ages[] = {0x41,    1, 1, 0, 0,\r
1467                       0xffff,  1, 1, 0, 0,\r
1468                       0x20ab,  2, 0, 0, 0,\r
1469                       0x2fffe, 2, 0, 0, 0,\r
1470                       0x20ac,  2, 1, 0, 0,\r
1471                       0xfb1d,  3, 0, 0, 0,\r
1472                       0x3f4,   3, 1, 0, 0,\r
1473                       0x10300, 3, 1, 0, 0,\r
1474                       0x220,   3, 2, 0, 0,\r
1475                       0xff60,  3, 2, 0, 0};\r
1476         for (int i = 0; i < ages.length; i += 5) {\r
1477             VersionInfo age = UCharacter.getAge(ages[i]);\r
1478             if (age != VersionInfo.getInstance(ages[i + 1], ages[i + 2],\r
1479                                                ages[i + 3], ages[i + 4])) {\r
1480                 errln("error: getAge(\\u" + Integer.toHexString(ages[i]) +\r
1481                       ") == " + age.toString() + " instead of " +\r
1482                       ages[i + 1] + "." + ages[i + 2] + "." + ages[i + 3] +\r
1483                       "." + ages[i + 4]);\r
1484             }\r
1485         }\r
1486     }\r
1487 \r
1488     /**\r
1489      * Test binary non core properties\r
1490      */\r
1491     public void TestAdditionalProperties()\r
1492     {\r
1493         // test data for hasBinaryProperty()\r
1494         int props[][] = { // code point, property\r
1495             { 0x0627, UProperty.ALPHABETIC, 1 },\r
1496             { 0x1034a, UProperty.ALPHABETIC, 1 },\r
1497             { 0x2028, UProperty.ALPHABETIC, 0 },\r
1498 \r
1499             { 0x0066, UProperty.ASCII_HEX_DIGIT, 1 },\r
1500             { 0x0067, UProperty.ASCII_HEX_DIGIT, 0 },\r
1501 \r
1502             { 0x202c, UProperty.BIDI_CONTROL, 1 },\r
1503             { 0x202f, UProperty.BIDI_CONTROL, 0 },\r
1504 \r
1505             { 0x003c, UProperty.BIDI_MIRRORED, 1 },\r
1506             { 0x003d, UProperty.BIDI_MIRRORED, 0 },\r
1507 \r
1508             /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */\r
1509             { 0x2018, UProperty.BIDI_MIRRORED, 0 },\r
1510             { 0x201d, UProperty.BIDI_MIRRORED, 0 },\r
1511             { 0x201f, UProperty.BIDI_MIRRORED, 0 },\r
1512             { 0x301e, UProperty.BIDI_MIRRORED, 0 },\r
1513 \r
1514             { 0x058a, UProperty.DASH, 1 },\r
1515             { 0x007e, UProperty.DASH, 0 },\r
1516 \r
1517             { 0x0c4d, UProperty.DIACRITIC, 1 },\r
1518             { 0x3000, UProperty.DIACRITIC, 0 },\r
1519 \r
1520             { 0x0e46, UProperty.EXTENDER, 1 },\r
1521             { 0x0020, UProperty.EXTENDER, 0 },\r
1522 \r
1523             { 0xfb1d, UProperty.FULL_COMPOSITION_EXCLUSION, 1 },\r
1524             { 0x1d15f, UProperty.FULL_COMPOSITION_EXCLUSION, 1 },\r
1525             { 0xfb1e, UProperty.FULL_COMPOSITION_EXCLUSION, 0 },\r
1526 \r
1527             { 0x110a, UProperty.NFD_INERT, 1 },      /* Jamo L */\r
1528             { 0x0308, UProperty.NFD_INERT, 0 },\r
1529 \r
1530             { 0x1164, UProperty.NFKD_INERT, 1 },     /* Jamo V */\r
1531             { 0x1d79d, UProperty.NFKD_INERT, 0 },   /* math compat version of xi */\r
1532 \r
1533             { 0x0021, UProperty.NFC_INERT, 1 },      /* ! */\r
1534             { 0x0061, UProperty.NFC_INERT, 0 },     /* a */\r
1535             { 0x00e4, UProperty.NFC_INERT, 0 },     /* a-umlaut */\r
1536             { 0x0102, UProperty.NFC_INERT, 0 },     /* a-breve */\r
1537             { 0xac1c, UProperty.NFC_INERT, 0 },     /* Hangul LV */\r
1538             { 0xac1d, UProperty.NFC_INERT, 1 },      /* Hangul LVT */\r
1539 \r
1540             { 0x1d79d, UProperty.NFKC_INERT, 0 },   /* math compat version of xi */\r
1541             { 0x2a6d6, UProperty.NFKC_INERT, 1 },    /* Han, last of CJK ext. B */\r
1542 \r
1543             { 0x00e4, UProperty.SEGMENT_STARTER, 1 },\r
1544             { 0x0308, UProperty.SEGMENT_STARTER, 0 },\r
1545             { 0x110a, UProperty.SEGMENT_STARTER, 1 }, /* Jamo L */\r
1546             { 0x1164, UProperty.SEGMENT_STARTER, 0 },/* Jamo V */\r
1547             { 0xac1c, UProperty.SEGMENT_STARTER, 1 }, /* Hangul LV */\r
1548             { 0xac1d, UProperty.SEGMENT_STARTER, 1 }, /* Hangul LVT */\r
1549 \r
1550             { 0x0044, UProperty.HEX_DIGIT, 1 },\r
1551             { 0xff46, UProperty.HEX_DIGIT, 1 },\r
1552             { 0x0047, UProperty.HEX_DIGIT, 0 },\r
1553 \r
1554             { 0x30fb, UProperty.HYPHEN, 1 },\r
1555             { 0xfe58, UProperty.HYPHEN, 0 },\r
1556 \r
1557             { 0x2172, UProperty.ID_CONTINUE, 1 },\r
1558             { 0x0307, UProperty.ID_CONTINUE, 1 },\r
1559             { 0x005c, UProperty.ID_CONTINUE, 0 },\r
1560 \r
1561             { 0x2172, UProperty.ID_START, 1 },\r
1562             { 0x007a, UProperty.ID_START, 1 },\r
1563             { 0x0039, UProperty.ID_START, 0 },\r
1564 \r
1565             { 0x4db5, UProperty.IDEOGRAPHIC, 1 },\r
1566             { 0x2f999, UProperty.IDEOGRAPHIC, 1 },\r
1567             { 0x2f99, UProperty.IDEOGRAPHIC, 0 },\r
1568 \r
1569             { 0x200c, UProperty.JOIN_CONTROL, 1 },\r
1570             { 0x2029, UProperty.JOIN_CONTROL, 0 },\r
1571 \r
1572             { 0x1d7bc, UProperty.LOWERCASE, 1 },\r
1573             { 0x0345, UProperty.LOWERCASE, 1 },\r
1574             { 0x0030, UProperty.LOWERCASE, 0 },\r
1575 \r
1576             { 0x1d7a9, UProperty.MATH, 1 },\r
1577             { 0x2135, UProperty.MATH, 1 },\r
1578             { 0x0062, UProperty.MATH, 0 },\r
1579 \r
1580             { 0xfde1, UProperty.NONCHARACTER_CODE_POINT, 1 },\r
1581             { 0x10ffff, UProperty.NONCHARACTER_CODE_POINT, 1 },\r
1582             { 0x10fffd, UProperty.NONCHARACTER_CODE_POINT, 0 },\r
1583 \r
1584             { 0x0022, UProperty.QUOTATION_MARK, 1 },\r
1585             { 0xff62, UProperty.QUOTATION_MARK, 1 },\r
1586             { 0xd840, UProperty.QUOTATION_MARK, 0 },\r
1587 \r
1588             { 0x061f, UProperty.TERMINAL_PUNCTUATION, 1 },\r
1589             { 0xe003f, UProperty.TERMINAL_PUNCTUATION, 0 },\r
1590 \r
1591             { 0x1d44a, UProperty.UPPERCASE, 1 },\r
1592             { 0x2162, UProperty.UPPERCASE, 1 },\r
1593             { 0x0345, UProperty.UPPERCASE, 0 },\r
1594 \r
1595             { 0x0020, UProperty.WHITE_SPACE, 1 },\r
1596             { 0x202f, UProperty.WHITE_SPACE, 1 },\r
1597             { 0x3001, UProperty.WHITE_SPACE, 0 },\r
1598 \r
1599             { 0x0711, UProperty.XID_CONTINUE, 1 },\r
1600             { 0x1d1aa, UProperty.XID_CONTINUE, 1 },\r
1601             { 0x007c, UProperty.XID_CONTINUE, 0 },\r
1602 \r
1603             { 0x16ee, UProperty.XID_START, 1 },\r
1604             { 0x23456, UProperty.XID_START, 1 },\r
1605             { 0x1d1aa, UProperty.XID_START, 0 },\r
1606 \r
1607             /*\r
1608              * Version break:\r
1609              * The following properties are only supported starting with the\r
1610              * Unicode version indicated in the second field.\r
1611              */\r
1612             { -1, 0x320, 0 },\r
1613 \r
1614             { 0x180c, UProperty.DEFAULT_IGNORABLE_CODE_POINT, 1 },\r
1615             { 0xfe02, UProperty.DEFAULT_IGNORABLE_CODE_POINT, 1 },\r
1616             { 0x1801, UProperty.DEFAULT_IGNORABLE_CODE_POINT, 0 },\r
1617 \r
1618             { 0x0341, UProperty.DEPRECATED, 1 },\r
1619             { 0xe0041, UProperty.DEPRECATED, 1 },       /* Changed from Unicode 5 to 5.1 */\r
1620 \r
1621             { 0x00a0, UProperty.GRAPHEME_BASE, 1 },\r
1622             { 0x0a4d, UProperty.GRAPHEME_BASE, 0 },\r
1623             { 0xff9d, UProperty.GRAPHEME_BASE, 1 },\r
1624             { 0xff9f, UProperty.GRAPHEME_BASE, 0 },      /* changed from Unicode 3.2 to 4  and again 5 to 5.1 */\r
1625 \r
1626             { 0x0300, UProperty.GRAPHEME_EXTEND, 1 },\r
1627             { 0xff9d, UProperty.GRAPHEME_EXTEND, 0 }, \r
1628             { 0xff9f, UProperty.GRAPHEME_EXTEND, 1 },   /* changed from Unicode 3.2 to 4 and again 5 to 5.1 */\r
1629             { 0x0603, UProperty.GRAPHEME_EXTEND, 0 },\r
1630 \r
1631             { 0x0a4d, UProperty.GRAPHEME_LINK, 1 },\r
1632             { 0xff9f, UProperty.GRAPHEME_LINK, 0 },\r
1633 \r
1634             { 0x2ff7, UProperty.IDS_BINARY_OPERATOR, 1 },\r
1635             { 0x2ff3, UProperty.IDS_BINARY_OPERATOR, 0 },\r
1636 \r
1637             { 0x2ff3, UProperty.IDS_TRINARY_OPERATOR, 1 },\r
1638             { 0x2f03, UProperty.IDS_TRINARY_OPERATOR, 0 },\r
1639 \r
1640             { 0x0ec1, UProperty.LOGICAL_ORDER_EXCEPTION, 1 },\r
1641             { 0xdcba, UProperty.LOGICAL_ORDER_EXCEPTION, 0 },\r
1642 \r
1643             { 0x2e9b, UProperty.RADICAL, 1 },\r
1644             { 0x4e00, UProperty.RADICAL, 0 },\r
1645 \r
1646             { 0x012f, UProperty.SOFT_DOTTED, 1 },\r
1647             { 0x0049, UProperty.SOFT_DOTTED, 0 },\r
1648 \r
1649             { 0xfa11, UProperty.UNIFIED_IDEOGRAPH, 1 },\r
1650             { 0xfa12, UProperty.UNIFIED_IDEOGRAPH, 0 },\r
1651 \r
1652             { -1, 0x401, 0 }, /* version break for Unicode 4.0.1 */\r
1653 \r
1654             { 0x002e, UProperty.S_TERM, 1 },\r
1655             { 0x0061, UProperty.S_TERM, 0 },\r
1656 \r
1657             { 0x180c, UProperty.VARIATION_SELECTOR, 1 },\r
1658             { 0xfe03, UProperty.VARIATION_SELECTOR, 1 },\r
1659             { 0xe01ef, UProperty.VARIATION_SELECTOR, 1 },\r
1660             { 0xe0200, UProperty.VARIATION_SELECTOR, 0 },\r
1661 \r
1662             /* enum/integer type properties */\r
1663             /* test default Bidi classes for unassigned code points */\r
1664             { 0x0590, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT },\r
1665             { 0x05cf, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT },\r
1666             { 0x05ed, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT },\r
1667             { 0x07f2, UProperty.BIDI_CLASS, UCharacterDirection.DIR_NON_SPACING_MARK }, /* Nko, new in Unicode 5.0 */\r
1668             { 0x07fe, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT }, /* unassigned R */\r
1669             { 0x08ba, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT },\r
1670             { 0xfb37, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT },\r
1671             { 0xfb42, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT },\r
1672             { 0x10806, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT },\r
1673             { 0x10909, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT },\r
1674             { 0x10fe4, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT },\r
1675 \r
1676             { 0x0605, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC },\r
1677             { 0x061c, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC },\r
1678             { 0x063f, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC },\r
1679             { 0x070e, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC },\r
1680             { 0x0775, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC },\r
1681             { 0xfbc2, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC },\r
1682             { 0xfd90, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC },\r
1683             { 0xfefe, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC },\r
1684 \r
1685             { 0x02AF, UProperty.BLOCK, UCharacter.UnicodeBlock.IPA_EXTENSIONS.getID() },\r
1686             { 0x0C4E, UProperty.BLOCK, UCharacter.UnicodeBlock.TELUGU.getID()},\r
1687             { 0x155A, UProperty.BLOCK, UCharacter.UnicodeBlock.UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS.getID() },\r
1688             { 0x1717, UProperty.BLOCK, UCharacter.UnicodeBlock.TAGALOG.getID() },\r
1689             { 0x1900, UProperty.BLOCK, UCharacter.UnicodeBlock.LIMBU.getID() },\r
1690             { 0x1AFF, UProperty.BLOCK, UCharacter.UnicodeBlock.NO_BLOCK.getID()},\r
1691             { 0x3040, UProperty.BLOCK, UCharacter.UnicodeBlock.HIRAGANA.getID()},\r
1692             { 0x1D0FF, UProperty.BLOCK, UCharacter.UnicodeBlock.BYZANTINE_MUSICAL_SYMBOLS.getID()},\r
1693             { 0x50000, UProperty.BLOCK, UCharacter.UnicodeBlock.NO_BLOCK.getID() },\r
1694             { 0xEFFFF, UProperty.BLOCK, UCharacter.UnicodeBlock.NO_BLOCK.getID() },\r
1695             { 0x10D0FF, UProperty.BLOCK, UCharacter.UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_B.getID() },\r
1696 \r
1697             /* UProperty.CANONICAL_COMBINING_CLASS tested for assigned characters in TestUnicodeData() */\r
1698             { 0xd7d7, UProperty.CANONICAL_COMBINING_CLASS, 0 },\r
1699 \r
1700             { 0x00A0, UProperty.DECOMPOSITION_TYPE, UCharacter.DecompositionType.NOBREAK },\r
1701             { 0x00A8, UProperty.DECOMPOSITION_TYPE, UCharacter.DecompositionType.COMPAT },\r
1702             { 0x00bf, UProperty.DECOMPOSITION_TYPE, UCharacter.DecompositionType.NONE },\r
1703             { 0x00c0, UProperty.DECOMPOSITION_TYPE, UCharacter.DecompositionType.CANONICAL },\r
1704             { 0x1E9B, UProperty.DECOMPOSITION_TYPE, UCharacter.DecompositionType.CANONICAL },\r
1705             { 0xBCDE, UProperty.DECOMPOSITION_TYPE, UCharacter.DecompositionType.CANONICAL },\r
1706             { 0xFB5D, UProperty.DECOMPOSITION_TYPE, UCharacter.DecompositionType.MEDIAL },\r
1707             { 0x1D736, UProperty.DECOMPOSITION_TYPE, UCharacter.DecompositionType.FONT },\r
1708             { 0xe0033, UProperty.DECOMPOSITION_TYPE, UCharacter.DecompositionType.NONE },\r
1709 \r
1710             { 0x0009, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.NEUTRAL },\r
1711             { 0x0020, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.NARROW },\r
1712             { 0x00B1, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.AMBIGUOUS },\r
1713             { 0x20A9, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.HALFWIDTH },\r
1714             { 0x2FFB, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.WIDE },\r
1715             { 0x3000, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.FULLWIDTH },\r
1716             { 0x35bb, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.WIDE },\r
1717             { 0x58bd, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.WIDE },\r
1718             { 0xD7A3, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.WIDE },\r
1719             { 0xEEEE, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.AMBIGUOUS },\r
1720             { 0x1D198, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.NEUTRAL },\r
1721             { 0x20000, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.WIDE },\r
1722             { 0x2F8C7, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.WIDE },\r
1723             { 0x3a5bd, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.WIDE },\r
1724             { 0x5a5bd, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.NEUTRAL },\r
1725             { 0xFEEEE, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.AMBIGUOUS },\r
1726             { 0x10EEEE, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.AMBIGUOUS },\r
1727 \r
1728             /* UProperty.GENERAL_CATEGORY tested for assigned characters in TestUnicodeData() */\r
1729             { 0xd7d7, UProperty.GENERAL_CATEGORY, 0 },\r
1730 \r
1731             { 0x0444, UProperty.JOINING_GROUP, UCharacter.JoiningGroup.NO_JOINING_GROUP },\r
1732             { 0x0639, UProperty.JOINING_GROUP, UCharacter.JoiningGroup.AIN },\r
1733             { 0x072A, UProperty.JOINING_GROUP, UCharacter.JoiningGroup.DALATH_RISH },\r
1734             { 0x0647, UProperty.JOINING_GROUP, UCharacter.JoiningGroup.HEH },\r
1735             { 0x06C1, UProperty.JOINING_GROUP, UCharacter.JoiningGroup.HEH_GOAL },\r
1736             { 0x06C3, UProperty.JOINING_GROUP, UCharacter.JoiningGroup.HAMZA_ON_HEH_GOAL },\r
1737 \r
1738             { 0x200C, UProperty.JOINING_TYPE, UCharacter.JoiningType.NON_JOINING },\r
1739             { 0x200D, UProperty.JOINING_TYPE, UCharacter.JoiningType.JOIN_CAUSING },\r
1740             { 0x0639, UProperty.JOINING_TYPE, UCharacter.JoiningType.DUAL_JOINING },\r
1741             { 0x0640, UProperty.JOINING_TYPE, UCharacter.JoiningType.JOIN_CAUSING },\r
1742             { 0x06C3, UProperty.JOINING_TYPE, UCharacter.JoiningType.RIGHT_JOINING },\r
1743             { 0x0300, UProperty.JOINING_TYPE, UCharacter.JoiningType.TRANSPARENT },\r
1744             { 0x070F, UProperty.JOINING_TYPE, UCharacter.JoiningType.TRANSPARENT },\r
1745             { 0xe0033, UProperty.JOINING_TYPE, UCharacter.JoiningType.TRANSPARENT },\r
1746 \r
1747             /* TestUnicodeData() verifies that no assigned character has "XX" (unknown) */\r
1748             { 0xe7e7, UProperty.LINE_BREAK, UCharacter.LineBreak.UNKNOWN },\r
1749             { 0x10fffd, UProperty.LINE_BREAK, UCharacter.LineBreak.UNKNOWN },\r
1750             { 0x0028, UProperty.LINE_BREAK, UCharacter.LineBreak.OPEN_PUNCTUATION },\r
1751             { 0x232A, UProperty.LINE_BREAK, UCharacter.LineBreak.CLOSE_PUNCTUATION },\r
1752             { 0x3401, UProperty.LINE_BREAK, UCharacter.LineBreak.IDEOGRAPHIC },\r
1753             { 0x4e02, UProperty.LINE_BREAK, UCharacter.LineBreak.IDEOGRAPHIC },\r
1754             { 0x20004, UProperty.LINE_BREAK, UCharacter.LineBreak.IDEOGRAPHIC },\r
1755             { 0xf905, UProperty.LINE_BREAK, UCharacter.LineBreak.IDEOGRAPHIC },\r
1756             { 0xdb7e, UProperty.LINE_BREAK, UCharacter.LineBreak.SURROGATE },\r
1757             { 0xdbfd, UProperty.LINE_BREAK, UCharacter.LineBreak.SURROGATE },\r
1758             { 0xdffc, UProperty.LINE_BREAK, UCharacter.LineBreak.SURROGATE },\r
1759             { 0x2762, UProperty.LINE_BREAK, UCharacter.LineBreak.EXCLAMATION },\r
1760             { 0x002F, UProperty.LINE_BREAK, UCharacter.LineBreak.BREAK_SYMBOLS },\r
1761             { 0x1D49C, UProperty.LINE_BREAK, UCharacter.LineBreak.ALPHABETIC },\r
1762             { 0x1731, UProperty.LINE_BREAK, UCharacter.LineBreak.ALPHABETIC },\r
1763 \r
1764             /* UProperty.NUMERIC_TYPE tested in TestNumericProperties() */\r
1765 \r
1766             /* UProperty.SCRIPT tested in TestUScriptCodeAPI() */\r
1767 \r
1768             { 0x1100, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LEADING_JAMO },\r
1769             { 0x1111, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LEADING_JAMO },\r
1770             { 0x1159, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LEADING_JAMO },\r
1771             { 0x115f, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LEADING_JAMO },\r
1772 \r
1773             { 0x1160, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.VOWEL_JAMO },\r
1774             { 0x1161, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.VOWEL_JAMO },\r
1775             { 0x1172, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.VOWEL_JAMO },\r
1776             { 0x11a2, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.VOWEL_JAMO },\r
1777 \r
1778             { 0x11a8, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.TRAILING_JAMO },\r
1779             { 0x11b8, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.TRAILING_JAMO },\r
1780             { 0x11c8, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.TRAILING_JAMO },\r
1781             { 0x11f9, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.TRAILING_JAMO },\r
1782 \r
1783             { 0x115a, UProperty.HANGUL_SYLLABLE_TYPE, 0 },\r
1784             { 0x115e, UProperty.HANGUL_SYLLABLE_TYPE, 0 },\r
1785             { 0x11a3, UProperty.HANGUL_SYLLABLE_TYPE, 0 },\r
1786             { 0x11a7, UProperty.HANGUL_SYLLABLE_TYPE, 0 },\r
1787             { 0x11fa, UProperty.HANGUL_SYLLABLE_TYPE, 0 },\r
1788             { 0x11ff, UProperty.HANGUL_SYLLABLE_TYPE, 0 },\r
1789 \r
1790             { 0xac00, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LV_SYLLABLE },\r
1791             { 0xac1c, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LV_SYLLABLE },\r
1792             { 0xc5ec, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LV_SYLLABLE },\r
1793             { 0xd788, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LV_SYLLABLE },\r
1794 \r
1795             { 0xac01, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LVT_SYLLABLE },\r
1796             { 0xac1b, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LVT_SYLLABLE },\r
1797             { 0xac1d, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LVT_SYLLABLE },\r
1798             { 0xc5ee, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LVT_SYLLABLE },\r
1799             { 0xd7a3, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LVT_SYLLABLE },\r
1800 \r
1801             { 0xd7a4, UProperty.HANGUL_SYLLABLE_TYPE, 0 },\r
1802 \r
1803             { -1, 0x410, 0 }, /* version break for Unicode 4.1 */\r
1804 \r
1805             { 0x00d7, UProperty.PATTERN_SYNTAX, 1 },\r
1806             { 0xfe45, UProperty.PATTERN_SYNTAX, 1 },\r
1807             { 0x0061, UProperty.PATTERN_SYNTAX, 0 },\r
1808 \r
1809             { 0x0020, UProperty.PATTERN_WHITE_SPACE, 1 },\r
1810             { 0x0085, UProperty.PATTERN_WHITE_SPACE, 1 },\r
1811             { 0x200f, UProperty.PATTERN_WHITE_SPACE, 1 },\r
1812             { 0x00a0, UProperty.PATTERN_WHITE_SPACE, 0 },\r
1813             { 0x3000, UProperty.PATTERN_WHITE_SPACE, 0 },\r
1814 \r
1815             { 0x1d200, UProperty.BLOCK, UCharacter.UnicodeBlock.ANCIENT_GREEK_MUSICAL_NOTATION_ID },\r
1816             { 0x2c8e,  UProperty.BLOCK, UCharacter.UnicodeBlock.COPTIC_ID },\r
1817             { 0xfe17,  UProperty.BLOCK, UCharacter.UnicodeBlock.VERTICAL_FORMS_ID },\r
1818 \r
1819             { 0x1a00,  UProperty.SCRIPT, UScript.BUGINESE },\r
1820             { 0x2cea,  UProperty.SCRIPT, UScript.COPTIC },\r
1821             { 0xa82b,  UProperty.SCRIPT, UScript.SYLOTI_NAGRI },\r
1822             { 0x103d0, UProperty.SCRIPT, UScript.OLD_PERSIAN },\r
1823 \r
1824             { 0xcc28, UProperty.LINE_BREAK, UCharacter.LineBreak.H2 },\r
1825             { 0xcc29, UProperty.LINE_BREAK, UCharacter.LineBreak.H3 },\r
1826             { 0xac03, UProperty.LINE_BREAK, UCharacter.LineBreak.H3 },\r
1827             { 0x115f, UProperty.LINE_BREAK, UCharacter.LineBreak.JL },\r
1828             { 0x11aa, UProperty.LINE_BREAK, UCharacter.LineBreak.JT },\r
1829             { 0x11a1, UProperty.LINE_BREAK, UCharacter.LineBreak.JV },\r
1830 \r
1831             { 0xb2c9, UProperty.GRAPHEME_CLUSTER_BREAK, UCharacter.GraphemeClusterBreak.LVT },\r
1832             { 0x036f, UProperty.GRAPHEME_CLUSTER_BREAK, UCharacter.GraphemeClusterBreak.EXTEND },\r
1833             { 0x0000, UProperty.GRAPHEME_CLUSTER_BREAK, UCharacter.GraphemeClusterBreak.CONTROL },\r
1834             { 0x1160, UProperty.GRAPHEME_CLUSTER_BREAK, UCharacter.GraphemeClusterBreak.V },\r
1835 \r
1836             { 0x05f4, UProperty.WORD_BREAK, UCharacter.WordBreak.MIDLETTER },\r
1837             { 0x4ef0, UProperty.WORD_BREAK, UCharacter.WordBreak.OTHER },\r
1838             { 0x19d9, UProperty.WORD_BREAK, UCharacter.WordBreak.NUMERIC },\r
1839             { 0x2044, UProperty.WORD_BREAK, UCharacter.WordBreak.MIDNUM },\r
1840 \r
1841             { 0xfffd, UProperty.SENTENCE_BREAK, UCharacter.SentenceBreak.OTHER },\r
1842             { 0x1ffc, UProperty.SENTENCE_BREAK, UCharacter.SentenceBreak.UPPER },\r
1843             { 0xff63, UProperty.SENTENCE_BREAK, UCharacter.SentenceBreak.CLOSE },\r
1844             { 0x2028, UProperty.SENTENCE_BREAK, UCharacter.SentenceBreak.SEP },\r
1845 \r
1846             /* undefined UProperty values */\r
1847             { 0x61, 0x4a7, 0 },\r
1848             { 0x234bc, 0x15ed, 0 }\r
1849         };\r
1850 \r
1851 \r
1852         if (UCharacter.getIntPropertyMinValue(UProperty.DASH) != 0\r
1853             || UCharacter.getIntPropertyMinValue(UProperty.BIDI_CLASS) != 0\r
1854             || UCharacter.getIntPropertyMinValue(UProperty.BLOCK)!= 0  /* j2478 */\r
1855             || UCharacter.getIntPropertyMinValue(UProperty.SCRIPT)!= 0 /* JB#2410 */\r
1856             || UCharacter.getIntPropertyMinValue(0x2345) != 0) {\r
1857             errln("error: UCharacter.getIntPropertyMinValue() wrong");\r
1858         }\r
1859 \r
1860         if( UCharacter.getIntPropertyMaxValue(UProperty.DASH)!=1) {\r
1861             errln("error: UCharacter.getIntPropertyMaxValue(UProperty.DASH) wrong\n");\r
1862         }\r
1863         if( UCharacter.getIntPropertyMaxValue(UProperty.ID_CONTINUE)!=1) {\r
1864             errln("error: UCharacter.getIntPropertyMaxValue(UProperty.ID_CONTINUE) wrong\n");\r
1865         }\r
1866         if( UCharacter.getIntPropertyMaxValue(UProperty.BINARY_LIMIT-1)!=1) {\r
1867             errln("error: UCharacter.getIntPropertyMaxValue(UProperty.BINARY_LIMIT-1) wrong\n");\r
1868         }\r
1869 \r
1870         if( UCharacter.getIntPropertyMaxValue(UProperty.BIDI_CLASS)!=UCharacterDirection.CHAR_DIRECTION_COUNT-1 ) {\r
1871             errln("error: UCharacter.getIntPropertyMaxValue(UProperty.BIDI_CLASS) wrong\n");\r
1872         }\r
1873         if( UCharacter.getIntPropertyMaxValue(UProperty.BLOCK)!=UCharacter.UnicodeBlock.COUNT-1 ) {\r
1874             errln("error: UCharacter.getIntPropertyMaxValue(UProperty.BLOCK) wrong\n");\r
1875         }\r
1876         if(UCharacter.getIntPropertyMaxValue(UProperty.LINE_BREAK)!=UCharacter.LineBreak.COUNT-1) {\r
1877             errln("error: UCharacter.getIntPropertyMaxValue(UProperty.LINE_BREAK) wrong\n");\r
1878         }\r
1879         if(UCharacter.getIntPropertyMaxValue(UProperty.SCRIPT)!=UScript.CODE_LIMIT-1) {\r
1880             errln("error: UCharacter.getIntPropertyMaxValue(UProperty.SCRIPT) wrong\n");\r
1881         }\r
1882         if(UCharacter.getIntPropertyMaxValue(UProperty.NUMERIC_TYPE)!=UCharacter.NumericType.COUNT-1) {\r
1883             errln("error: UCharacter.getIntPropertyMaxValue(UProperty.NUMERIC_TYPE) wrong\n");\r
1884         }\r
1885         if(UCharacter.getIntPropertyMaxValue(UProperty.GENERAL_CATEGORY)!=UCharacterCategory.CHAR_CATEGORY_COUNT-1) {\r
1886             errln("error: UCharacter.getIntPropertyMaxValue(UProperty.GENERAL_CATEGORY) wrong\n");\r
1887         }\r
1888         if(UCharacter.getIntPropertyMaxValue(UProperty.HANGUL_SYLLABLE_TYPE)!=UCharacter.HangulSyllableType.COUNT-1) {\r
1889             errln("error: UCharacter.getIntPropertyMaxValue(UProperty.HANGUL_SYLLABLE_TYPE) wrong\n");\r
1890         }\r
1891         if(UCharacter.getIntPropertyMaxValue(UProperty.GRAPHEME_CLUSTER_BREAK)!=UCharacter.GraphemeClusterBreak.COUNT-1) {\r
1892             errln("error: UCharacter.getIntPropertyMaxValue(UProperty.GRAPHEME_CLUSTER_BREAK) wrong\n");\r
1893         }\r
1894         if(UCharacter.getIntPropertyMaxValue(UProperty.SENTENCE_BREAK)!=UCharacter.SentenceBreak.COUNT-1) {\r
1895             errln("error: UCharacter.getIntPropertyMaxValue(UProperty.SENTENCE_BREAK) wrong\n");\r
1896         }\r
1897         if(UCharacter.getIntPropertyMaxValue(UProperty.WORD_BREAK)!=UCharacter.WordBreak.COUNT-1) {\r
1898             errln("error: UCharacter.getIntPropertyMaxValue(UProperty.WORD_BREAK) wrong\n");\r
1899         }\r
1900         /*JB#2410*/\r
1901         if( UCharacter.getIntPropertyMaxValue(0x2345)!=-1) {\r
1902             errln("error: UCharacter.getIntPropertyMaxValue(0x2345) wrong\n");\r
1903         }\r
1904         if( UCharacter.getIntPropertyMaxValue(UProperty.DECOMPOSITION_TYPE) !=  (UCharacter.DecompositionType.COUNT - 1)) {\r
1905             errln("error: UCharacter.getIntPropertyMaxValue(UProperty.DECOMPOSITION_TYPE) wrong\n");\r
1906         }\r
1907         if( UCharacter.getIntPropertyMaxValue(UProperty.JOINING_GROUP) !=   (UCharacter.JoiningGroup.COUNT -1)) {\r
1908             errln("error: UCharacter.getIntPropertyMaxValue(UProperty.JOINING_GROUP) wrong\n");\r
1909         }\r
1910         if( UCharacter.getIntPropertyMaxValue(UProperty.JOINING_TYPE) !=  (UCharacter.JoiningType.COUNT -1)) {\r
1911             errln("error: UCharacter.getIntPropertyMaxValue(UProperty.JOINING_TYPE) wrong\n");\r
1912         }\r
1913         if( UCharacter.getIntPropertyMaxValue(UProperty.EAST_ASIAN_WIDTH) !=  (UCharacter.EastAsianWidth.COUNT -1)) {\r
1914             errln("error: UCharacter.getIntPropertyMaxValue(UProperty.EAST_ASIAN_WIDTH) wrong\n");\r
1915         }\r
1916 \r
1917         VersionInfo version = UCharacter.getUnicodeVersion();\r
1918 \r
1919         // test hasBinaryProperty()\r
1920         for (int i = 0; i < props.length; ++ i) {\r
1921             if (props[i][0] < 0) {\r
1922                 if (version.compareTo(VersionInfo.getInstance(props[i][1] >> 8,\r
1923                                                           (props[i][1] >> 4) & 0xF,\r
1924                                                           props[i][1] & 0xF,\r
1925                                                           0)) < 0) {\r
1926                     break;\r
1927                 }\r
1928                 continue;\r
1929             }\r
1930             boolean expect = true;\r
1931             if (props[i][2] == 0) {\r
1932                 expect = false;\r
1933             }\r
1934             if (props[i][1] < UProperty.INT_START) {\r
1935                 if (UCharacter.hasBinaryProperty(props[i][0], props[i][1])\r
1936                     != expect) {\r
1937                     errln("error: UCharacter.hasBinaryProperty(\\u" +\r
1938                           Integer.toHexString(props[i][0]) + ", " +\r
1939                           Integer.toHexString(props[i][1])\r
1940                           + ") has an error expected " + props[i][2]);\r
1941                 }\r
1942             }\r
1943 \r
1944             int retVal = UCharacter.getIntPropertyValue(props[i][0], props[i][1]);\r
1945             if (retVal != props[i][2]) {\r
1946                 errln("error: UCharacter.getIntPropertyValue(\\u" +\r
1947                       Utility.hex(props[i][0], 4) +\r
1948                       ", " + props[i][1] + " is wrong, should be "\r
1949                       + props[i][2] + " not " + retVal);\r
1950             }\r
1951 \r
1952             // test separate functions, too\r
1953             switch (props[i][1]) {\r
1954             case UProperty.ALPHABETIC:\r
1955                 if (UCharacter.isUAlphabetic(props[i][0]) != expect) {\r
1956                     errln("error: UCharacter.isUAlphabetic(\\u" +\r
1957                           Integer.toHexString(props[i][0]) +\r
1958                           ") is wrong expected " + props[i][2]);\r
1959                 }\r
1960                 break;\r
1961             case UProperty.LOWERCASE:\r
1962                 if (UCharacter.isULowercase(props[i][0]) != expect) {\r
1963                     errln("error: UCharacter.isULowercase(\\u" +\r
1964                           Integer.toHexString(props[i][0]) +\r
1965                           ") is wrong expected " +props[i][2]);\r
1966                 }\r
1967                 break;\r
1968             case UProperty.UPPERCASE:\r
1969                 if (UCharacter.isUUppercase(props[i][0]) != expect) {\r
1970                     errln("error: UCharacter.isUUppercase(\\u" +\r
1971                           Integer.toHexString(props[i][0]) +\r
1972                           ") is wrong expected " + props[i][2]);\r
1973                 }\r
1974                 break;\r
1975             case UProperty.WHITE_SPACE:\r
1976                 if (UCharacter.isUWhiteSpace(props[i][0]) != expect) {\r
1977                     errln("error: UCharacter.isUWhiteSpace(\\u" +\r
1978                           Integer.toHexString(props[i][0]) +\r
1979                           ") is wrong expected " + props[i][2]);\r
1980                 }\r
1981                 break;\r
1982             default:\r
1983                 break;\r
1984             }\r
1985         }\r
1986     }\r
1987 \r
1988     public void TestNumericProperties()\r
1989     {\r
1990         // see UnicodeData.txt, DerivedNumericValues.txt\r
1991         int testvar[][] = {\r
1992             { 0x0F33, UCharacter.NumericType.NUMERIC },\r
1993             { 0x0C66, UCharacter.NumericType.DECIMAL },\r
1994             { 0x2159, UCharacter.NumericType.NUMERIC },\r
1995             { 0x00BD, UCharacter.NumericType.NUMERIC },\r
1996             { 0x0031, UCharacter.NumericType.DECIMAL },\r
1997             { 0x10320, UCharacter.NumericType.NUMERIC },\r
1998             { 0x0F2B, UCharacter.NumericType.NUMERIC },\r
1999             { 0x00B2, UCharacter.NumericType.DIGIT }, /* Unicode 4.0 change */\r
2000             { 0x1813, UCharacter.NumericType.DECIMAL },\r
2001             { 0x2173, UCharacter.NumericType.NUMERIC },\r
2002             { 0x278E, UCharacter.NumericType.DIGIT },\r
2003             { 0x1D7F2, UCharacter.NumericType.DECIMAL },\r
2004             { 0x247A, UCharacter.NumericType.DIGIT },\r
2005             { 0x1372, UCharacter.NumericType.NUMERIC },\r
2006             { 0x216B, UCharacter.NumericType.NUMERIC },\r
2007             { 0x16EE, UCharacter.NumericType.NUMERIC },\r
2008             { 0x249A, UCharacter.NumericType.NUMERIC },\r
2009             { 0x303A, UCharacter.NumericType.NUMERIC },\r
2010             { 0x32B2, UCharacter.NumericType.NUMERIC },\r
2011             { 0x1375, UCharacter.NumericType.NUMERIC },\r
2012             { 0x10323, UCharacter.NumericType.NUMERIC },\r
2013             { 0x0BF1, UCharacter.NumericType.NUMERIC },\r
2014             { 0x217E, UCharacter.NumericType.NUMERIC },\r
2015             { 0x2180, UCharacter.NumericType.NUMERIC },\r
2016             { 0x2181, UCharacter.NumericType.NUMERIC },\r
2017             { 0x137C, UCharacter.NumericType.NUMERIC },\r
2018             { 0x61, UCharacter.NumericType.NONE },\r
2019             { 0x3000, UCharacter.NumericType.NONE },\r
2020             { 0xfffe, UCharacter.NumericType.NONE },\r
2021             { 0x10301, UCharacter.NumericType.NONE },\r
2022             { 0xe0033, UCharacter.NumericType.NONE },\r
2023             { 0x10ffff, UCharacter.NumericType.NONE },\r
2024             /* Unicode 4.0 Changes */\r
2025             { 0x96f6,  UCharacter.NumericType.NUMERIC },\r
2026             { 0x4e00,  UCharacter.NumericType.NUMERIC },\r
2027             { 0x58f1,  UCharacter.NumericType.NUMERIC },\r
2028             { 0x5f10,  UCharacter.NumericType.NUMERIC },\r
2029             { 0x5f0e,  UCharacter.NumericType.NUMERIC },\r
2030             { 0x8086,  UCharacter.NumericType.NUMERIC },\r
2031             { 0x7396,  UCharacter.NumericType.NUMERIC },\r
2032             { 0x5345,  UCharacter.NumericType.NUMERIC },\r
2033             { 0x964c,  UCharacter.NumericType.NUMERIC },\r
2034             { 0x4edf,  UCharacter.NumericType.NUMERIC },\r
2035             { 0x4e07,  UCharacter.NumericType.NUMERIC },\r
2036             { 0x4ebf,  UCharacter.NumericType.NUMERIC },\r
2037             { 0x5146,  UCharacter.NumericType.NUMERIC }\r
2038         };\r
2039 \r
2040         double expected[] = {-1/(double)2,\r
2041                              0,\r
2042                              1/(double)6,\r
2043                              1/(double)2,\r
2044                              1,\r
2045                              1,\r
2046                              3/(double)2,\r
2047                              2,\r
2048                              3,\r
2049                              4,\r
2050                              5,\r
2051                              6,\r
2052                              7,\r
2053                              10,\r
2054                              12,\r
2055                              17,\r
2056                              19,\r
2057                              30,\r
2058                              37,\r
2059                              40,\r
2060                              50,\r
2061                              100,\r
2062                              500,\r
2063                              1000,\r
2064                              5000,\r
2065                              10000,\r
2066                              UCharacter.NO_NUMERIC_VALUE,\r
2067                              UCharacter.NO_NUMERIC_VALUE,\r
2068                              UCharacter.NO_NUMERIC_VALUE,\r
2069                              UCharacter.NO_NUMERIC_VALUE,\r
2070                              UCharacter.NO_NUMERIC_VALUE,\r
2071                              UCharacter.NO_NUMERIC_VALUE,\r
2072                              0 ,\r
2073                              1 ,\r
2074                              1 ,\r
2075                              2 ,\r
2076                              3 ,\r
2077                              4 ,\r
2078                              9 ,\r
2079                              30 ,\r
2080                              100 ,\r
2081                              1000 ,\r
2082                              10000 ,\r
2083                              100000000 ,\r
2084                              1000000000000.00\r
2085         };\r
2086 \r
2087 \r
2088         for (int i = 0; i < testvar.length; ++ i) {\r
2089             int c = testvar[i][0];\r
2090             int type = UCharacter.getIntPropertyValue(c,\r
2091                                                       UProperty.NUMERIC_TYPE);\r
2092             double nv = UCharacter.getUnicodeNumericValue(c);\r
2093 \r
2094             if (type != testvar[i][1]) {\r
2095                 errln("UProperty.NUMERIC_TYPE(\\u" + Utility.hex(c, 4)\r
2096                        + ") = " + type + " should be " + testvar[i][1]);\r
2097             }\r
2098             if (0.000001 <= Math.abs(nv - expected[i])) {\r
2099                 errln("UCharacter.getNumericValue(\\u" + Utility.hex(c, 4)\r
2100                         + ") = " + nv + " should be " + expected[i]);\r
2101             }\r
2102         }\r
2103     }\r
2104 \r
2105     /**\r
2106      * Test the property values API.  See JB#2410.\r
2107      */\r
2108     public void TestPropertyValues() {\r
2109         int i, p, min, max;\r
2110 \r
2111         /* Min should be 0 for everything. */\r
2112         /* Until JB#2478 is fixed, the one exception is UProperty.BLOCK. */\r
2113         for (p=UProperty.INT_START; p<UProperty.INT_LIMIT; ++p) {\r
2114             min = UCharacter.getIntPropertyMinValue(p);\r
2115             if (min != 0) {\r
2116                 if (p == UProperty.BLOCK) {\r
2117                     /* This is okay...for now.  See JB#2487.\r
2118                        TODO Update this for JB#2487. */\r
2119                 } else {\r
2120                     String name;\r
2121                     name = UCharacter.getPropertyName(p, UProperty.NameChoice.LONG);\r
2122                     errln("FAIL: UCharacter.getIntPropertyMinValue(" + name + ") = " +\r
2123                           min + ", exp. 0");\r
2124                 }\r
2125             }\r
2126         }\r
2127 \r
2128         if (UCharacter.getIntPropertyMinValue(UProperty.GENERAL_CATEGORY_MASK)\r
2129             != 0\r
2130             || UCharacter.getIntPropertyMaxValue(\r
2131                                                UProperty.GENERAL_CATEGORY_MASK)\r
2132                != -1) {\r
2133             errln("error: UCharacter.getIntPropertyMin/MaxValue("\r
2134                   + "UProperty.GENERAL_CATEGORY_MASK) is wrong");\r
2135         }\r
2136 \r
2137         /* Max should be -1 for invalid properties. */\r
2138         max = UCharacter.getIntPropertyMaxValue(-1);\r
2139         if (max != -1) {\r
2140             errln("FAIL: UCharacter.getIntPropertyMaxValue(-1) = " +\r
2141                   max + ", exp. -1");\r
2142         }\r
2143 \r
2144         /* Script should return 0 for an invalid code point. If the API\r
2145            throws an exception then that's fine too. */\r
2146         for (i=0; i<2; ++i) {\r
2147             try {\r
2148                 int script = 0;\r
2149                 String desc = null;\r
2150                 switch (i) {\r
2151                 case 0:\r
2152                     script = UScript.getScript(-1);\r
2153                     desc = "UScript.getScript(-1)";\r
2154                     break;\r
2155                 case 1:\r
2156                     script = UCharacter.getIntPropertyValue(-1, UProperty.SCRIPT);\r
2157                     desc = "UCharacter.getIntPropertyValue(-1, UProperty.SCRIPT)";\r
2158                     break;\r
2159                 }\r
2160                 if (script != 0) {\r
2161                     errln("FAIL: " + desc + " = " + script + ", exp. 0");\r
2162                 }\r
2163             } catch (IllegalArgumentException e) {}\r
2164         }\r
2165     }\r
2166 \r
2167     public void TestIsBMP()\r
2168     {\r
2169         int ch[] = {0x0, -1, 0xffff, 0x10ffff, 0xff, 0x1ffff};\r
2170         boolean flag[] = {true, false, true, false, true, false};\r
2171         for (int i = 0; i < ch.length; i ++) {\r
2172             if (UCharacter.isBMP(ch[i]) != flag[i]) {\r
2173                 errln("Fail: \\u" + Utility.hex(ch[i], 8)\r
2174                       + " failed at UCharacter.isBMP");\r
2175             }\r
2176         }\r
2177     }\r
2178 \r
2179     /* add characters from a serialized set to a normal one */\r
2180     private static void _setAddSerialized(UnicodeSet set, USerializedSet sset) {\r
2181      //  int start, end;\r
2182        int i, count;\r
2183 \r
2184        count=sset.countRanges();\r
2185        int[] range = new int[2];\r
2186        for(i=0; i<count; ++i) {\r
2187            sset.getRange(i,range);\r
2188            set.add(range[0],range[1]);\r
2189        }\r
2190     }\r
2191 \r
2192     private boolean showADiffB(UnicodeSet a, UnicodeSet b,\r
2193                                         String a_name, String b_name,\r
2194                                         boolean expect,\r
2195                                         boolean diffIsError){\r
2196         int i, start, end, length;\r
2197         boolean equal;\r
2198         equal=true;\r
2199         i=0;\r
2200         for(;;) {\r
2201             start  = a.getRangeStart(i);\r
2202             length = (i < a.getRangeCount()) ? 0 : a.getRangeCount();\r
2203             end    = a.getRangeEnd(i);\r
2204 \r
2205             if(length!=0) {\r
2206                 return equal; /* done with code points, got a string or -1 */\r
2207             }\r
2208 \r
2209             if(expect!=b.contains(start, end)) {\r
2210                 equal=false;\r
2211                 while(start<=end) {\r
2212                     if(expect!=b.contains(start)) {\r
2213                         if(diffIsError) {\r
2214                             if(expect) {\r
2215                                 errln("error: "+ a_name +" contains "+ hex(start)+" but "+ b_name +" does not");\r
2216                             } else {\r
2217                                 errln("error: "+a_name +" and "+ b_name+" both contain "+hex(start) +" but should not intersect");\r
2218                             }\r
2219                         } else {\r
2220                             if(expect) {\r
2221                                 logln("info: "+a_name +" contains "+hex(start)+ "but " + b_name +" does not");\r
2222                             } else {\r
2223                                 logln("info: "+a_name +" and "+b_name+" both contain "+hex(start)+" but should not intersect");\r
2224                             }\r
2225                         }\r
2226                     }\r
2227                     ++start;\r
2228                 }\r
2229             }\r
2230 \r
2231             ++i;\r
2232         }\r
2233     }\r
2234     private boolean showAMinusB(UnicodeSet a, UnicodeSet b,\r
2235                                         String a_name, String b_name,\r
2236                                         boolean diffIsError) {\r
2237 \r
2238         return showADiffB(a, b, a_name, b_name, true, diffIsError);\r
2239     }\r
2240 \r
2241     private boolean showAIntersectB(UnicodeSet a, UnicodeSet b,\r
2242                                             String a_name, String b_name,\r
2243                                             boolean diffIsError) {\r
2244         return showADiffB(a, b, a_name, b_name, false, diffIsError);\r
2245     }\r
2246 \r
2247     private boolean compareUSets(UnicodeSet a, UnicodeSet b,\r
2248                                          String a_name, String b_name,\r
2249                                          boolean diffIsError) {\r
2250         return\r
2251             showAMinusB(a, b, a_name, b_name, diffIsError) &&\r
2252             showAMinusB(b, a, b_name, a_name, diffIsError);\r
2253     }\r
2254 \r
2255    /* various tests for consistency of UCD data and API behavior */\r
2256    public void TestConsistency() {\r
2257        char[] buffer16 = new char[300];\r
2258        char[] buffer   = new char[300];\r
2259        UnicodeSet set1, set2, set3, set4;\r
2260 \r
2261        USerializedSet sset;\r
2262        int start, end;\r
2263        int i, length;\r
2264 \r
2265        String hyphenPattern = "[:Hyphen:]";\r
2266        String dashPattern = "[:Dash:]";\r
2267        String lowerPattern = "[:Lowercase:]";\r
2268        String formatPattern = "[:Cf:]";\r
2269        String alphaPattern  =  "[:Alphabetic:]";\r
2270 \r
2271        /*\r
2272         * It used to be that UCD.html and its precursors said\r
2273         * "Those dashes used to mark connections between pieces of words,\r
2274         *  plus the Katakana middle dot."\r
2275         *\r
2276         * Unicode 4 changed 00AD Soft Hyphen to Cf and removed it from Dash\r
2277         * but not from Hyphen.\r
2278         * UTC 94 (2003mar) decided to leave it that way and to changed UCD.html.\r
2279         * Therefore, do not show errors when testing the Hyphen property.\r
2280         */\r
2281        logln("Starting with Unicode 4, inconsistencies with [:Hyphen:] are\n"\r
2282                    + "known to the UTC and not considered errors.\n");\r
2283 \r
2284        set1=new UnicodeSet(hyphenPattern);\r
2285        set2=new UnicodeSet(dashPattern);\r
2286 \r
2287            /* remove the Katakana middle dot(s) from set1 */\r
2288            set1.remove(0x30fb);\r
2289            set2.remove (0xff65); /* halfwidth variant */\r
2290            showAMinusB(set1, set2, "[:Hyphen:]", "[:Dash:]", false);\r
2291 \r
2292 \r
2293        /* check that Cf is neither Hyphen nor Dash nor Alphabetic */\r
2294        set3=new UnicodeSet(formatPattern);\r
2295        set4=new UnicodeSet(alphaPattern);\r
2296 \r
2297        showAIntersectB(set3, set1, "[:Cf:]", "[:Hyphen:]", false);\r
2298        showAIntersectB(set3, set2, "[:Cf:]", "[:Dash:]", true);\r
2299        showAIntersectB(set3, set4, "[:Cf:]", "[:Alphabetic:]", true);\r
2300        /*\r
2301         * Check that each lowercase character has "small" in its name\r
2302         * and not "capital".\r
2303         * There are some such characters, some of which seem odd.\r
2304         * Use the verbose flag to see these notices.\r
2305         */\r
2306        set1=new UnicodeSet(lowerPattern);\r
2307 \r
2308        for(i=0;; ++i) {\r
2309 //               try{\r
2310 //                   length=set1.getItem(set1, i, &start, &end, NULL, 0, &errorCode);\r
2311 //               }catch(Exception e){\r
2312 //                   break;\r
2313 //               }\r
2314             start = set1.getRangeStart(i);\r
2315             end = set1.getRangeEnd(i);\r
2316             length = i<set1.getRangeCount() ? set1.getRangeCount() : 0;\r
2317            if(length!=0) {\r
2318                break; /* done with code points, got a string or -1 */\r
2319            }\r
2320 \r
2321            while(start<=end) {\r
2322                String name=UCharacter.getName(start);\r
2323 \r
2324                if( (name.indexOf("SMALL")< 0 || name.indexOf("CAPITAL")<-1) &&\r
2325                    name.indexOf("SMALL CAPITAL")==-1\r
2326                ) {\r
2327                    logln("info: [:Lowercase:] contains U+"+hex(start) + " whose name does not suggest lowercase: " + name);\r
2328                }\r
2329                ++start;\r
2330            }\r
2331        }\r
2332 \r
2333 \r
2334        /*\r
2335         * Test for an example that unorm_getCanonStartSet() delivers\r
2336         * all characters that compose from the input one,\r
2337         * even in multiple steps.\r
2338         * For example, the set for "I" (0049) should contain both\r
2339         * I-diaeresis (00CF) and I-diaeresis-acute (1E2E).\r
2340         * In general, the set for the middle such character should be a subset\r
2341         * of the set for the first.\r
2342         */\r
2343        set1=new UnicodeSet();\r
2344        set2=new UnicodeSet();\r
2345        sset = new USerializedSet();\r
2346        NormalizerImpl.getCanonStartSet(0x49,sset);\r
2347        _setAddSerialized(set1, sset);\r
2348 \r
2349        /* enumerate all characters that are plausible to be latin letters */\r
2350        for(start=0xa0; start<0x2000; ++start) {\r
2351            if(NormalizerImpl.getDecomposition(start, false, buffer16,0,buffer16.length) > 1 && buffer[0]==0x0049) {\r
2352                set2.add(start);\r
2353            }\r
2354        }\r
2355 \r
2356        compareUSets(set1, set2,\r
2357                     "[canon start set of 0049]", "[all c with canon decomp with 0049]",\r
2358                     false);\r
2359 \r
2360    }\r
2361 \r
2362     public void TestCoverage() {\r
2363         //cover forDigit\r
2364         char ch1 = UCharacter.forDigit(7, 11);\r
2365         assertEquals("UCharacter.forDigit ", "7", String.valueOf(ch1));\r
2366         char ch2 = UCharacter.forDigit(17, 20);\r
2367         assertEquals("UCharacter.forDigit ", "h", String.valueOf(ch2));\r
2368 \r
2369         //Jitterbug 4451, for coverage\r
2370         for (int i = 0x0041; i < 0x005B; i++) {\r
2371             if (!UCharacter.isJavaLetter(i))\r
2372                 errln("FAIL \\u" + hex(i) + " expected to be a letter");\r
2373             if (!UCharacter.isJavaIdentifierStart(i))\r
2374                 errln("FAIL \\u" + hex(i) + " expected to be a Java identifier start character");\r
2375             if (!UCharacter.isJavaLetterOrDigit(i))\r
2376                 errln("FAIL \\u" + hex(i) + " expected not to be a Java letter");\r
2377             if (!UCharacter.isJavaIdentifierPart(i))\r
2378                 errln("FAIL \\u" + hex(i) + " expected to be a Java identifier part character");\r
2379         }\r
2380         char[] spaces = {'\t','\n','\f','\r',' '};\r
2381         for (int i = 0; i < spaces.length; i++){\r
2382             if (!UCharacter.isSpace(spaces[i]))\r
2383                 errln("FAIL \\u" + hex(spaces[i]) + " expected to be a Java space");\r
2384         }\r
2385         if (!UCharacter.getStringPropertyValue(UProperty.AGE,'\u3400',0).equals("3.0.0.0")){\r
2386             errln("FAIL \\u3400 expected to be 3.0.0.0");\r
2387         }\r
2388     }\r
2389 \r
2390     public void TestCasePropsDummy() {\r
2391         // code coverage for UCaseProps.getDummy() \r
2392         if(UCaseProps.getDummy().tolower(0x41)!=0x41) {\r
2393             errln("UCaseProps.getDummy().tolower(0x41)!=0x41");\r
2394         }\r
2395     }\r
2396 \r
2397     public void TestBiDiPropsDummy() {\r
2398         // code coverage for UBiDiProps.getDummy() \r
2399         if(UBiDiProps.getDummy().getClass(0x20)!=0) {\r
2400             errln("UBiDiProps.getDummy().getClass(0x20)!=0");\r
2401         }\r
2402     }\r
2403     \r
2404     public void TestBlockData()\r
2405     {\r
2406         Class ubc = UCharacter.UnicodeBlock.class;\r
2407         \r
2408         for (int b = 1; b < UCharacter.UnicodeBlock.COUNT; b += 1) {\r
2409             UCharacter.UnicodeBlock blk = UCharacter.UnicodeBlock.getInstance(b);\r
2410             int id = blk.getID();\r
2411             String name = blk.toString();\r
2412             \r
2413             if (id != b) {\r
2414                 errln("UCharacter.UnicodeBlock.getInstance(" + b + ") returned a block with id = " + id);\r
2415             }\r
2416             \r
2417             try {\r
2418                 if (ubc.getField(name + "_ID").getInt(blk) != b) {\r
2419                     errln("UCharacter.UnicodeBlock.getInstance(" + b + ") returned a block with a name of " + name +\r
2420                           " which does not match the block id.");\r
2421                 }\r
2422             } catch (Exception e) {\r
2423                 errln("Couldn't get the id name for id " + b);\r
2424             }\r
2425         }\r
2426     }\r
2427 }\r