2 *******************************************************************************
\r
3 * Copyright (C) 1996-2009, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 *******************************************************************************
\r
8 package com.ibm.icu.dev.test.lang;
\r
10 import com.ibm.icu.impl.UBiDiProps;
\r
11 import com.ibm.icu.impl.UCaseProps;
\r
13 import com.ibm.icu.dev.test.TestFmwk;
\r
14 import com.ibm.icu.dev.test.TestUtil;
\r
15 import com.ibm.icu.lang.UCharacter;
\r
16 import com.ibm.icu.lang.UCharacterCategory;
\r
17 import com.ibm.icu.lang.UCharacterDirection;
\r
18 import com.ibm.icu.lang.UProperty;
\r
19 import com.ibm.icu.lang.UScript;
\r
20 import com.ibm.icu.text.UTF16;
\r
21 import com.ibm.icu.text.UnicodeSet;
\r
22 import com.ibm.icu.text.UnicodeSetIterator;
\r
23 import com.ibm.icu.util.RangeValueIterator;
\r
24 import com.ibm.icu.util.ValueIterator;
\r
25 import com.ibm.icu.util.VersionInfo;
\r
26 import com.ibm.icu.impl.UCharacterName;
\r
27 import com.ibm.icu.impl.Utility;
\r
28 import com.ibm.icu.impl.USerializedSet;
\r
29 import com.ibm.icu.impl.NormalizerImpl;
\r
30 import com.ibm.icu.impl.UCharacterProperty;
\r
31 import java.io.BufferedReader;
\r
32 import java.util.Arrays;
\r
35 * Testing class for UCharacter
\r
36 * Mostly following the test cases for ICU
\r
37 * @author Syn Wee Quek
\r
38 * @since nov 04 2000
\r
40 public final class UCharacterTest extends TestFmwk
\r
42 // private variables =============================================
\r
45 * ICU4J data version number
\r
47 private final VersionInfo VERSION_ = VersionInfo.getInstance("5.1.0.0");
\r
49 // constructor ===================================================
\r
54 public UCharacterTest()
\r
58 // public methods ================================================
\r
60 public static void main(String[] arg)
\r
64 UCharacterTest test = new UCharacterTest();
\r
69 e.printStackTrace();
\r
74 * Testing the letter and number determination in UCharacter
\r
76 public void TestLetterNumber()
\r
78 for (int i = 0x0041; i < 0x005B; i ++)
\r
79 if (!UCharacter.isLetter(i))
\r
80 errln("FAIL \\u" + hex(i) + " expected to be a letter");
\r
82 for (int i = 0x0660; i < 0x066A; i ++)
\r
83 if (UCharacter.isLetter(i))
\r
84 errln("FAIL \\u" + hex(i) + " expected not to be a letter");
\r
86 for (int i = 0x0660; i < 0x066A; i ++)
\r
87 if (!UCharacter.isDigit(i))
\r
88 errln("FAIL \\u" + hex(i) + " expected to be a digit");
\r
90 for (int i = 0x0041; i < 0x005B; i ++)
\r
91 if (!UCharacter.isLetterOrDigit(i))
\r
92 errln("FAIL \\u" + hex(i) + " expected not to be a digit");
\r
94 for (int i = 0x0660; i < 0x066A; i ++)
\r
95 if (!UCharacter.isLetterOrDigit(i))
\r
96 errln("FAIL \\u" + hex(i) +
\r
97 "expected to be either a letter or a digit");
\r
100 * The following checks work only starting from Unicode 4.0.
\r
101 * Check the version number here.
\r
103 VersionInfo version = UCharacter.getUnicodeVersion();
\r
104 if(version.getMajor()<4 || version.equals(VersionInfo.getInstance(4, 0, 1))) {
\r
112 * Verify that exactly the digit characters have decimal digit values.
\r
113 * This assumption is used in the implementation of u_digit()
\r
114 * (which checks nt=de)
\r
115 * compared with the parallel java.lang.Character.digit()
\r
116 * (which checks Nd).
\r
118 * This was not true in Unicode 3.2 and earlier.
\r
119 * Unicode 4.0 fixed discrepancies.
\r
120 * Unicode 4.0.1 re-introduced problems in this area due to an
\r
121 * unintentionally incomplete last-minute change.
\r
123 String digitsPattern = "[:Nd:]";
\r
124 String decimalValuesPattern = "[:Numeric_Type=Decimal:]";
\r
126 UnicodeSet digits, decimalValues;
\r
128 digits= new UnicodeSet(digitsPattern);
\r
129 decimalValues=new UnicodeSet(decimalValuesPattern);
\r
132 compareUSets(digits, decimalValues, "[:Nd:]", "[:Numeric_Type=Decimal:]", true);
\r
138 * Tests for space determination in UCharacter
\r
140 public void TestSpaces()
\r
142 int spaces[] = {0x0020, 0x00a0, 0x2000, 0x2001, 0x2005};
\r
143 int nonspaces[] = {0x0061, 0x0062, 0x0063, 0x0064, 0x0074};
\r
144 int whitespaces[] = {0x2008, 0x2009, 0x200a, 0x001c, 0x000c /* ,0x200b */}; // 0x200b was "Zs" in Unicode 4.0, but it is "Cf" in Unicode 4.1
\r
145 int nonwhitespaces[] = {0x0061, 0x0062, 0x003c, 0x0028, 0x003f, 0x00a0, 0x2007, 0x202f, 0xfefe, 0x200b};
\r
147 int size = spaces.length;
\r
148 for (int i = 0; i < size; i ++)
\r
150 if (!UCharacter.isSpaceChar(spaces[i]))
\r
152 errln("FAIL \\u" + hex(spaces[i]) +
\r
153 " expected to be a space character");
\r
157 if (UCharacter.isSpaceChar(nonspaces[i]))
\r
159 errln("FAIL \\u" + hex(nonspaces[i]) +
\r
160 " expected not to be space character");
\r
164 if (!UCharacter.isWhitespace(whitespaces[i]))
\r
166 errln("FAIL \\u" + hex(whitespaces[i]) +
\r
167 " expected to be a white space character");
\r
170 if (UCharacter.isWhitespace(nonwhitespaces[i]))
\r
172 errln("FAIL \\u" + hex(nonwhitespaces[i]) +
\r
173 " expected not to be a space character");
\r
176 logln("Ok \\u" + hex(spaces[i]) + " and \\u" +
\r
177 hex(nonspaces[i]) + " and \\u" + hex(whitespaces[i]) +
\r
178 " and \\u" + hex(nonwhitespaces[i]));
\r
181 int rulewhitespace[] = {0x9, 0xd, 0x20, 0x85,
\r
182 0x200e, 0x200f, 0x2028, 0x2029};
\r
183 int nonrulewhitespace[] = {0x8, 0xe, 0x21, 0x86, 0xa0, 0xa1,
\r
184 0x1680, 0x1681, 0x180e, 0x180f,
\r
185 0x1FFF, 0x2000, 0x200a, 0x200b,
\r
186 0x2010, 0x202f, 0x2030, 0x205f,
\r
187 0x2060, 0x3000, 0x3001};
\r
188 for (int i = 0; i < rulewhitespace.length; i ++) {
\r
189 if (!UCharacterProperty.isRuleWhiteSpace(rulewhitespace[i])) {
\r
190 errln("\\u" + Utility.hex(rulewhitespace[i], 4)
\r
191 + " expected to be a rule white space");
\r
194 for (int i = 0; i < nonrulewhitespace.length; i ++) {
\r
195 if (UCharacterProperty.isRuleWhiteSpace(nonrulewhitespace[i])) {
\r
196 errln("\\u" + Utility.hex(nonrulewhitespace[i], 4)
\r
197 + " expected to be a non rule white space");
\r
203 * Tests for defined and undefined characters
\r
205 public void TestDefined()
\r
207 int undefined[] = {0xfff1, 0xfff7, 0xfa6b};
\r
208 int defined[] = {0x523E, 0x004f88, 0x00fffd};
\r
210 int size = undefined.length;
\r
211 for (int i = 0; i < size; i ++)
\r
213 if (UCharacter.isDefined(undefined[i]))
\r
215 errln("FAIL \\u" + hex(undefined[i]) +
\r
216 " expected not to be defined");
\r
219 if (!UCharacter.isDefined(defined[i]))
\r
221 errln("FAIL \\u" + hex(defined[i]) + " expected defined");
\r
228 * Tests for base characters and their cellwidth
\r
230 public void TestBase()
\r
232 int base[] = {0x0061, 0x000031, 0x0003d2};
\r
233 int nonbase[] = {0x002B, 0x000020, 0x00203B};
\r
234 int size = base.length;
\r
235 for (int i = 0; i < size; i ++)
\r
237 if (UCharacter.isBaseForm(nonbase[i]))
\r
239 errln("FAIL \\u" + hex(nonbase[i]) +
\r
240 " expected not to be a base character");
\r
243 if (!UCharacter.isBaseForm(base[i]))
\r
245 errln("FAIL \\u" + hex(base[i]) +
\r
246 " expected to be a base character");
\r
253 * Tests for digit characters
\r
255 public void TestDigits()
\r
257 int digits[] = {0x0030, 0x000662, 0x000F23, 0x000ED5, 0x002160};
\r
259 //special characters not in the properties table
\r
260 int digits2[] = {0x3007, 0x004e00, 0x004e8c, 0x004e09, 0x0056d8,
\r
261 0x004e94, 0x00516d, 0x4e03, 0x00516b, 0x004e5d};
\r
262 int nondigits[] = {0x0010, 0x000041, 0x000122, 0x0068FE};
\r
264 int digitvalues[] = {0, 2, 3, 5, 1};
\r
265 int digitvalues2[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
\r
267 int size = digits.length;
\r
268 for (int i = 0; i < size; i ++) {
\r
269 if (UCharacter.isDigit(digits[i]) &&
\r
270 UCharacter.digit(digits[i]) != digitvalues[i])
\r
272 errln("FAIL \\u" + hex(digits[i]) +
\r
273 " expected digit with value " + digitvalues[i]);
\r
277 size = nondigits.length;
\r
278 for (int i = 0; i < size; i ++)
\r
279 if (UCharacter.isDigit(nondigits[i]))
\r
281 errln("FAIL \\u" + hex(nondigits[i]) + " expected nondigit");
\r
285 size = digits2.length;
\r
286 for (int i = 0; i < 10; i ++) {
\r
287 if (UCharacter.isDigit(digits2[i]) &&
\r
288 UCharacter.digit(digits2[i]) != digitvalues2[i])
\r
290 errln("FAIL \\u" + hex(digits2[i]) +
\r
291 " expected digit with value " + digitvalues2[i]);
\r
298 * Tests for numeric characters
\r
300 public void TestNumeric()
\r
302 if (UCharacter.getNumericValue(0x00BC) != -2) {
\r
303 errln("Numeric value of 0x00BC expected to be -2");
\r
306 for (int i = '0'; i < '9'; i ++) {
\r
307 int n1 = UCharacter.getNumericValue(i);
\r
308 double n2 = UCharacter.getUnicodeNumericValue(i);
\r
309 if (n1 != n2 || n1 != (i - '0')) {
\r
310 errln("Numeric value of " + (char)i + " expected to be " +
\r
314 for (int i = 'A'; i < 'F'; i ++) {
\r
315 int n1 = UCharacter.getNumericValue(i);
\r
316 double n2 = UCharacter.getUnicodeNumericValue(i);
\r
317 if (n2 != UCharacter.NO_NUMERIC_VALUE || n1 != (i - 'A' + 10)) {
\r
318 errln("Numeric value of " + (char)i + " expected to be " +
\r
322 for (int i = 0xFF21; i < 0xFF26; i ++) {
\r
323 // testing full wideth latin characters A-F
\r
324 int n1 = UCharacter.getNumericValue(i);
\r
325 double n2 = UCharacter.getUnicodeNumericValue(i);
\r
326 if (n2 != UCharacter.NO_NUMERIC_VALUE || n1 != (i - 0xFF21 + 10)) {
\r
327 errln("Numeric value of " + (char)i + " expected to be " +
\r
328 (i - 0xFF21 + 10));
\r
331 // testing han numbers
\r
332 int han[] = {0x96f6, 0, 0x58f9, 1, 0x8cb3, 2, 0x53c3, 3,
\r
333 0x8086, 4, 0x4f0d, 5, 0x9678, 6, 0x67d2, 7,
\r
334 0x634c, 8, 0x7396, 9, 0x5341, 10, 0x62fe, 10,
\r
335 0x767e, 100, 0x4f70, 100, 0x5343, 1000, 0x4edf, 1000,
\r
336 0x824c, 10000, 0x5104, 100000000};
\r
337 for (int i = 0; i < han.length; i += 2) {
\r
338 if (UCharacter.getHanNumericValue(han[i]) != han[i + 1]) {
\r
339 errln("Numeric value of \\u" +
\r
340 Integer.toHexString(han[i]) + " expected to be " +
\r
347 * Tests for version
\r
349 public void TestVersion()
\r
351 if (!UCharacter.getUnicodeVersion().equals(VERSION_))
\r
352 errln("FAIL expected: " + VERSION_ + "got: " + UCharacter.getUnicodeVersion());
\r
356 * Tests for control characters
\r
358 public void TestISOControl()
\r
360 int control[] = {0x001b, 0x000097, 0x000082};
\r
361 int noncontrol[] = {0x61, 0x000031, 0x0000e2};
\r
363 int size = control.length;
\r
364 for (int i = 0; i < size; i ++)
\r
366 if (!UCharacter.isISOControl(control[i]))
\r
368 errln("FAIL 0x" + Integer.toHexString(control[i]) +
\r
369 " expected to be a control character");
\r
372 if (UCharacter.isISOControl(noncontrol[i]))
\r
374 errln("FAIL 0x" + Integer.toHexString(noncontrol[i]) +
\r
375 " expected to be not a control character");
\r
379 logln("Ok 0x" + Integer.toHexString(control[i]) + " and 0x" +
\r
380 Integer.toHexString(noncontrol[i]));
\r
385 * Test Supplementary
\r
387 public void TestSupplementary()
\r
389 for (int i = 0; i < 0x10000; i ++) {
\r
390 if (UCharacter.isSupplementary(i)) {
\r
391 errln("Codepoint \\u" + Integer.toHexString(i) +
\r
392 " is not supplementary");
\r
395 for (int i = 0x10000; i < 0x10FFFF; i ++) {
\r
396 if (!UCharacter.isSupplementary(i)) {
\r
397 errln("Codepoint \\u" + Integer.toHexString(i) +
\r
398 " is supplementary");
\r
406 public void TestMirror()
\r
408 if (!(UCharacter.isMirrored(0x28) && UCharacter.isMirrored(0xbb) &&
\r
409 UCharacter.isMirrored(0x2045) && UCharacter.isMirrored(0x232a)
\r
410 && !UCharacter.isMirrored(0x27) &&
\r
411 !UCharacter.isMirrored(0x61) && !UCharacter.isMirrored(0x284)
\r
412 && !UCharacter.isMirrored(0x3400))) {
\r
413 errln("isMirrored() does not work correctly");
\r
416 if (!(UCharacter.getMirror(0x3c) == 0x3e &&
\r
417 UCharacter.getMirror(0x5d) == 0x5b &&
\r
418 UCharacter.getMirror(0x208d) == 0x208e &&
\r
419 UCharacter.getMirror(0x3017) == 0x3016 &&
\r
421 UCharacter.getMirror(0xbb) == 0xab &&
\r
422 UCharacter.getMirror(0x2215) == 0x29F5 &&
\r
423 UCharacter.getMirror(0x29F5) == 0x2215 && /* large delta between the code points */
\r
425 UCharacter.getMirror(0x2e) == 0x2e &&
\r
426 UCharacter.getMirror(0x6f3) == 0x6f3 &&
\r
427 UCharacter.getMirror(0x301c) == 0x301c &&
\r
428 UCharacter.getMirror(0xa4ab) == 0xa4ab &&
\r
430 /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */
\r
431 UCharacter.getMirror(0x2018) == 0x2018 &&
\r
432 UCharacter.getMirror(0x201b) == 0x201b &&
\r
433 UCharacter.getMirror(0x301d) == 0x301d)) {
\r
434 errln("getMirror() does not work correctly");
\r
437 /* verify that Bidi_Mirroring_Glyph roundtrips */
\r
438 UnicodeSet set=new UnicodeSet("[:Bidi_Mirrored:]");
\r
439 UnicodeSetIterator iter=new UnicodeSetIterator(set);
\r
440 int start, end, c2, c3;
\r
441 while(iter.nextRange() && (start=iter.codepoint)>=0) {
\r
442 end=iter.codepointEnd;
\r
444 c2=UCharacter.getMirror(start);
\r
445 c3=UCharacter.getMirror(c2);
\r
447 errln("getMirror() does not roundtrip: U+"+hex(start)+"->U+"+hex(c2)+"->U+"+hex(c3));
\r
449 } while(++start<=end);
\r
452 // verify that Unicode Corrigendum #6 reverts mirrored status of the following
\r
453 if (UCharacter.isMirrored(0x2018) ||
\r
454 UCharacter.isMirrored(0x201d) ||
\r
455 UCharacter.isMirrored(0x201f) ||
\r
456 UCharacter.isMirrored(0x301e)) {
\r
457 errln("Unicode Corrigendum #6 conflict, one or more of 2018/201d/201f/301e has mirrored property");
\r
462 * Tests for printable characters
\r
464 public void TestPrint()
\r
466 int printable[] = {0x0042, 0x00005f, 0x002014};
\r
467 int nonprintable[] = {0x200c, 0x00009f, 0x00001b};
\r
469 int size = printable.length;
\r
470 for (int i = 0; i < size; i ++)
\r
472 if (!UCharacter.isPrintable(printable[i]))
\r
474 errln("FAIL \\u" + hex(printable[i]) +
\r
475 " expected to be a printable character");
\r
478 if (UCharacter.isPrintable(nonprintable[i]))
\r
480 errln("FAIL \\u" + hex(nonprintable[i]) +
\r
481 " expected not to be a printable character");
\r
484 logln("Ok \\u" + hex(printable[i]) + " and \\u" +
\r
485 hex(nonprintable[i]));
\r
488 // test all ISO 8 controls
\r
489 for (int ch = 0; ch <= 0x9f; ++ ch) {
\r
491 // skip ASCII graphic characters and continue with DEL
\r
494 if (UCharacter.isPrintable(ch)) {
\r
495 errln("Fail \\u" + hex(ch) +
\r
496 " is a ISO 8 control character hence not printable\n");
\r
500 /* test all Latin-1 graphic characters */
\r
501 for (int ch = 0x20; ch <= 0xff; ++ ch) {
\r
505 if (!UCharacter.isPrintable(ch)
\r
506 && ch != 0x00AD/* Unicode 4.0 changed the defintion of soft hyphen to be a Cf*/) {
\r
507 errln("Fail \\u" + hex(ch) +
\r
508 " is a Latin-1 graphic character\n");
\r
514 * Testing for identifier characters
\r
516 public void TestIdentifier()
\r
518 int unicodeidstart[] = {0x0250, 0x0000e2, 0x000061};
\r
519 int nonunicodeidstart[] = {0x2000, 0x00000a, 0x002019};
\r
520 int unicodeidpart[] = {0x005f, 0x000032, 0x000045};
\r
521 int nonunicodeidpart[] = {0x2030, 0x0000a3, 0x000020};
\r
522 int idignore[] = {0x0006, 0x0010, 0x206b};
\r
523 int nonidignore[] = {0x0075, 0x0000a3, 0x000061};
\r
525 int size = unicodeidstart.length;
\r
526 for (int i = 0; i < size; i ++)
\r
528 if (!UCharacter.isUnicodeIdentifierStart(unicodeidstart[i]))
\r
530 errln("FAIL \\u" + hex(unicodeidstart[i]) +
\r
531 " expected to be a unicode identifier start character");
\r
534 if (UCharacter.isUnicodeIdentifierStart(nonunicodeidstart[i]))
\r
536 errln("FAIL \\u" + hex(nonunicodeidstart[i]) +
\r
537 " expected not to be a unicode identifier start " +
\r
541 if (!UCharacter.isUnicodeIdentifierPart(unicodeidpart[i]))
\r
543 errln("FAIL \\u" + hex(unicodeidpart[i]) +
\r
544 " expected to be a unicode identifier part character");
\r
547 if (UCharacter.isUnicodeIdentifierPart(nonunicodeidpart[i]))
\r
549 errln("FAIL \\u" + hex(nonunicodeidpart[i]) +
\r
550 " expected not to be a unicode identifier part " +
\r
554 if (!UCharacter.isIdentifierIgnorable(idignore[i]))
\r
556 errln("FAIL \\u" + hex(idignore[i]) +
\r
557 " expected to be a ignorable unicode character");
\r
560 if (UCharacter.isIdentifierIgnorable(nonidignore[i]))
\r
562 errln("FAIL \\u" + hex(nonidignore[i]) +
\r
563 " expected not to be a ignorable unicode character");
\r
566 logln("Ok \\u" + hex(unicodeidstart[i]) + " and \\u" +
\r
567 hex(nonunicodeidstart[i]) + " and \\u" +
\r
568 hex(unicodeidpart[i]) + " and \\u" +
\r
569 hex(nonunicodeidpart[i]) + " and \\u" +
\r
570 hex(idignore[i]) + " and \\u" + hex(nonidignore[i]));
\r
575 * Tests for the character types, direction.<br>
\r
576 * This method reads in UnicodeData.txt file for testing purposes. A
\r
577 * default path is provided relative to the src path, however the user
\r
578 * could set a system property to change the directory path.<br>
\r
579 * e.g. java -DUnicodeData="data_directory_path"
\r
580 * com.ibm.icu.dev.test.lang.UCharacterTest
\r
582 public void TestUnicodeData()
\r
584 // this is the 2 char category types used in the UnicodeData file
\r
585 final String TYPE =
\r
586 "LuLlLtLmLoMnMeMcNdNlNoZsZlZpCcCfCoCsPdPsPePcPoSmScSkSoPiPf";
\r
588 // directory types used in the UnicodeData file
\r
589 // padded by spaces to make each type size 4
\r
591 "L R EN ES ET AN CS B S WS ON LRE LRO AL RLE RLO PDF NSM BN ";
\r
593 final int LASTUNICODECHAR = 0xFFFD;
\r
601 BufferedReader input = TestUtil.getDataReader(
\r
602 "unicode/UnicodeData.txt");
\r
605 while (ch != LASTUNICODECHAR)
\r
607 String s = input.readLine();
\r
608 if(s.length()<4 || s.startsWith("#")) {
\r
611 // geting the unicode character, its type and its direction
\r
612 ch = Integer.parseInt(s.substring(0, 4), 16);
\r
613 index = s.indexOf(';', 5);
\r
614 String t = s.substring(index + 1, index + 3);
\r
616 int oldindex = index;
\r
617 index = s.indexOf(';', index);
\r
618 int cc = Integer.parseInt(s.substring(oldindex, index));
\r
619 oldindex = index + 1;
\r
620 index = s.indexOf(';', oldindex);
\r
621 String d = s.substring(oldindex, index);
\r
623 for (int i = 0; i < 6; i ++) {
\r
624 index = s.indexOf(';', index + 1);
\r
625 // skipping to the 11th field
\r
628 oldindex = index + 1;
\r
629 index = s.indexOf(';', oldindex);
\r
630 String isocomment = s.substring(oldindex, index);
\r
632 oldindex = index + 1;
\r
633 index = s.indexOf(';', oldindex);
\r
634 String upper = s.substring(oldindex, index);
\r
636 oldindex = index + 1;
\r
637 index = s.indexOf(';', oldindex);
\r
638 String lower = s.substring(oldindex, index);
\r
639 // titlecase last element
\r
640 oldindex = index + 1;
\r
641 String title = s.substring(oldindex);
\r
643 // testing the category
\r
644 // we override the general category of some control
\r
646 type = TYPE.indexOf(t);
\r
650 type = (type >> 1) + 1;
\r
651 if (UCharacter.getType(ch) != type)
\r
653 errln("FAIL \\u" + hex(ch) + " expected type " +
\r
658 if (UCharacter.getIntPropertyValue(ch,
\r
659 UProperty.GENERAL_CATEGORY_MASK) != (1 << type)) {
\r
660 errln("error: getIntPropertyValue(\\u" +
\r
661 Integer.toHexString(ch) +
\r
662 ", UProperty.GENERAL_CATEGORY_MASK) != " +
\r
663 "getMask(getType(ch))");
\r
666 // testing combining class
\r
667 if (UCharacter.getCombiningClass(ch) != cc)
\r
669 errln("FAIL \\u" + hex(ch) + " expected combining " +
\r
674 // testing the direction
\r
675 if (d.length() == 1)
\r
678 dir = DIR.indexOf(d) >> 2;
\r
679 if (UCharacter.getDirection(ch) != dir)
\r
681 errln("FAIL \\u" + hex(ch) +
\r
682 " expected direction " + dir + " but got " +
\r
683 UCharacter.getDirection(ch));
\r
687 byte bdir = (byte)dir;
\r
688 if (UCharacter.getDirectionality(ch) != bdir)
\r
690 errln("FAIL \\u" + hex(ch) +
\r
691 " expected directionality " + bdir + " but got " +
\r
692 UCharacter.getDirectionality(ch));
\r
696 // testing iso comment
\r
698 String comment = UCharacter.getISOComment(ch);
\r
699 if (comment == null) {
\r
702 if (!comment.equals(isocomment)) {
\r
703 errln("FAIL \\u" + hex(ch) +
\r
704 " expected iso comment " + isocomment);
\r
707 }catch(Exception e){
\r
708 if(e.getMessage().indexOf("unames.icu") >= 0){
\r
716 if (upper.length() > 0) {
\r
717 tempchar = Integer.parseInt(upper, 16);
\r
719 if (UCharacter.toUpperCase(ch) != tempchar) {
\r
720 errln("FAIL \\u" + Utility.hex(ch, 4)
\r
721 + " expected uppercase \\u"
\r
722 + Utility.hex(tempchar, 4));
\r
726 if (lower.length() > 0) {
\r
727 tempchar = Integer.parseInt(lower, 16);
\r
729 if (UCharacter.toLowerCase(ch) != tempchar) {
\r
730 errln("FAIL \\u" + Utility.hex(ch, 4)
\r
731 + " expected lowercase \\u"
\r
732 + Utility.hex(tempchar, 4));
\r
736 if (title.length() > 0) {
\r
737 tempchar = Integer.parseInt(title, 16);
\r
739 if (UCharacter.toTitleCase(ch) != tempchar) {
\r
740 errln("FAIL \\u" + Utility.hex(ch, 4)
\r
741 + " expected titlecase \\u"
\r
742 + Utility.hex(tempchar, 4));
\r
748 warnln("Could not find unames.icu");
\r
751 catch (Exception e)
\r
753 e.printStackTrace();
\r
757 if (UCharacter.UnicodeBlock.of(0x0041)
\r
758 != UCharacter.UnicodeBlock.BASIC_LATIN
\r
759 || UCharacter.getIntPropertyValue(0x41, UProperty.BLOCK)
\r
760 != UCharacter.UnicodeBlock.BASIC_LATIN.getID()) {
\r
761 errln("UCharacter.UnicodeBlock.of(\\u0041) property failed! "
\r
763 + UCharacter.UnicodeBlock.BASIC_LATIN.getID() + " got "
\r
764 + UCharacter.UnicodeBlock.of(0x0041));
\r
767 // sanity check on repeated properties
\r
768 for (ch = 0xfffe; ch <= 0x10ffff;) {
\r
769 type = UCharacter.getType(ch);
\r
770 if (UCharacter.getIntPropertyValue(ch,
\r
771 UProperty.GENERAL_CATEGORY_MASK)
\r
773 errln("error: UCharacter.getIntPropertyValue(\\u"
\r
774 + Integer.toHexString(ch)
\r
775 + ", UProperty.GENERAL_CATEGORY_MASK) != "
\r
776 + "getMask(getType())");
\r
778 if (type != UCharacterCategory.UNASSIGNED) {
\r
779 errln("error: UCharacter.getType(\\u" + Utility.hex(ch, 4)
\r
780 + " != UCharacterCategory.UNASSIGNED (returns "
\r
781 + UCharacterCategory.toString(UCharacter.getType(ch))
\r
784 if ((ch & 0xffff) == 0xfffe) {
\r
792 // test that PUA is not "unassigned"
\r
793 for(ch = 0xe000; ch <= 0x10fffd;) {
\r
794 type = UCharacter.getType(ch);
\r
795 if (UCharacter.getIntPropertyValue(ch,
\r
796 UProperty.GENERAL_CATEGORY_MASK)
\r
798 errln("error: UCharacter.getIntPropertyValue(\\u"
\r
799 + Integer.toHexString(ch)
\r
800 + ", UProperty.GENERAL_CATEGORY_MASK) != "
\r
801 + "getMask(getType())");
\r
804 if (type == UCharacterCategory.UNASSIGNED) {
\r
805 errln("error: UCharacter.getType(\\u"
\r
806 + Utility.hex(ch, 4)
\r
807 + ") == UCharacterCategory.UNASSIGNED");
\r
809 else if (type != UCharacterCategory.PRIVATE_USE) {
\r
810 logln("PUA override: UCharacter.getType(\\u"
\r
811 + Utility.hex(ch, 4) + ")=" + type);
\r
813 if (ch == 0xf8ff) {
\r
816 else if (ch == 0xffffd) {
\r
827 * Test for the character names
\r
829 public void TestNames()
\r
832 int length = UCharacterName.getInstance().getMaxCharNameLength();
\r
833 if (length < 83) { // Unicode 3.2 max char name length
\r
834 errln("getMaxCharNameLength()=" + length + " is too short");
\r
836 // ### TODO same tests for max ISO comment length as for max name length
\r
838 int c[] = {0x0061, //LATIN SMALL LETTER A
\r
839 0x000284, //LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK
\r
840 0x003401, //CJK UNIFIED IDEOGRAPH-3401
\r
841 0x007fed, //CJK UNIFIED IDEOGRAPH-7FED
\r
842 0x00ac00, //HANGUL SYLLABLE GA
\r
843 0x00d7a3, //HANGUL SYLLABLE HIH
\r
844 0x00d800, 0x00dc00, //LINEAR B SYLLABLE B008 A
\r
845 0xff08, //FULLWIDTH LEFT PARENTHESIS
\r
846 0x00ffe5, //FULLWIDTH YEN SIGN
\r
848 0x0023456 //CJK UNIFIED IDEOGRAPH-23456
\r
851 "LATIN SMALL LETTER A",
\r
852 "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK",
\r
853 "CJK UNIFIED IDEOGRAPH-3401",
\r
854 "CJK UNIFIED IDEOGRAPH-7FED",
\r
855 "HANGUL SYLLABLE GA",
\r
856 "HANGUL SYLLABLE HIH",
\r
859 "FULLWIDTH LEFT PARENTHESIS",
\r
860 "FULLWIDTH YEN SIGN",
\r
862 "CJK UNIFIED IDEOGRAPH-23456"
\r
864 String oldname[] = {"", "LATIN SMALL LETTER DOTLESS J BAR HOOK", "",
\r
866 "", "", "", "", "FULLWIDTH OPENING PARENTHESIS", "",
\r
868 String extendedname[] = {"LATIN SMALL LETTER A",
\r
869 "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK",
\r
870 "CJK UNIFIED IDEOGRAPH-3401",
\r
871 "CJK UNIFIED IDEOGRAPH-7FED",
\r
872 "HANGUL SYLLABLE GA",
\r
873 "HANGUL SYLLABLE HIH",
\r
874 "<lead surrogate-D800>",
\r
875 "<trail surrogate-DC00>",
\r
876 "FULLWIDTH LEFT PARENTHESIS",
\r
877 "FULLWIDTH YEN SIGN",
\r
878 "<noncharacter-FFFF>",
\r
879 "CJK UNIFIED IDEOGRAPH-23456"};
\r
881 int size = c.length;
\r
885 for (int i = 0; i < size; i ++)
\r
887 // modern Unicode character name
\r
888 str = UCharacter.getName(c[i]);
\r
889 if ((str == null && name[i].length() > 0) ||
\r
890 (str != null && !str.equals(name[i])))
\r
892 errln("FAIL \\u" + hex(c[i]) + " expected name " +
\r
897 // 1.0 Unicode character name
\r
898 str = UCharacter.getName1_0(c[i]);
\r
899 if ((str == null && oldname[i].length() > 0) ||
\r
900 (str != null && !str.equals(oldname[i])))
\r
902 errln("FAIL \\u" + hex(c[i]) + " expected 1.0 name " +
\r
907 // extended character name
\r
908 str = UCharacter.getExtendedName(c[i]);
\r
909 if (str == null || !str.equals(extendedname[i]))
\r
911 errln("FAIL \\u" + hex(c[i]) + " expected extended name " +
\r
916 // retrieving unicode character from modern name
\r
917 uc = UCharacter.getCharFromName(name[i]);
\r
918 if (uc != c[i] && name[i].length() != 0)
\r
920 errln("FAIL " + name[i] + " expected character \\u" +
\r
925 //retrieving unicode character from 1.0 name
\r
926 uc = UCharacter.getCharFromName1_0(oldname[i]);
\r
927 if (uc != c[i] && oldname[i].length() != 0)
\r
929 errln("FAIL " + oldname[i] + " expected 1.0 character \\u" +
\r
934 //retrieving unicode character from 1.0 name
\r
935 uc = UCharacter.getCharFromExtendedName(extendedname[i]);
\r
936 if (uc != c[i] && i != 0 && (i == 1 || i == 6))
\r
938 errln("FAIL " + extendedname[i] +
\r
939 " expected extended character \\u" + hex(c[i]));
\r
944 // test getName works with mixed-case names (new in 2.0)
\r
945 if (0x61 != UCharacter.getCharFromName("LATin smALl letTER A")) {
\r
946 errln("FAIL: 'LATin smALl letTER A' should result in character "
\r
950 if (getInclusion() >= 5) {
\r
951 // extra testing different from icu
\r
952 for (int i = UCharacter.MIN_VALUE; i < UCharacter.MAX_VALUE; i ++)
\r
954 str = UCharacter.getName(i);
\r
955 if (str != null && UCharacter.getCharFromName(str) != i)
\r
957 errln("FAIL \\u" + hex(i) + " " + str +
\r
958 " retrieval of name and vice versa" );
\r
964 // Test getCharNameCharacters
\r
965 if (getInclusion() >= 10) {
\r
966 boolean map[] = new boolean[256];
\r
968 UnicodeSet set = new UnicodeSet(1, 0); // empty set
\r
969 UnicodeSet dumb = new UnicodeSet(1, 0); // empty set
\r
971 // uprv_getCharNameCharacters() will likely return more lowercase
\r
972 // letters than actual character names contain because
\r
973 // it includes all the characters in lowercased names of
\r
974 // general categories, for the full possible set of extended names.
\r
975 UCharacterName.getInstance().getCharNameCharacters(set);
\r
977 // build set the dumb (but sure-fire) way
\r
978 Arrays.fill(map, false);
\r
981 for (int cp = 0; cp < 0x110000; ++ cp) {
\r
982 String n = UCharacter.getExtendedName(cp);
\r
983 int len = n.length();
\r
984 if (len > maxLength) {
\r
988 for (int i = 0; i < len; ++ i) {
\r
989 char ch = n.charAt(i);
\r
990 if (!map[ch & 0xff]) {
\r
992 map[ch & 0xff] = true;
\r
997 length = UCharacterName.getInstance().getMaxCharNameLength();
\r
998 if (length != maxLength) {
\r
999 errln("getMaxCharNameLength()=" + length
\r
1000 + " differs from the maximum length " + maxLength
\r
1001 + " of all extended names");
\r
1004 // compare the sets. Where is my uset_equals?!!
\r
1005 boolean ok = true;
\r
1006 for (int i = 0; i < 256; ++ i) {
\r
1007 if (set.contains(i) != dumb.contains(i)) {
\r
1008 if (0x61 <= i && i <= 0x7a // a-z
\r
1009 && set.contains(i) && !dumb.contains(i)) {
\r
1010 // ignore lowercase a-z that are in set but not in dumb
\r
1020 String pattern1 = set.toPattern(true);
\r
1021 String pattern2 = dumb.toPattern(true);
\r
1024 errln("FAIL: getCharNameCharacters() returned " + pattern1
\r
1025 + " expected " + pattern2
\r
1026 + " (too many lowercase a-z are ok)");
\r
1028 logln("Ok: getCharNameCharacters() returned " + pattern1);
\r
1031 // improve code coverage
\r
1032 String expected = "LATIN SMALL LETTER A|LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK|"+
\r
1033 "CJK UNIFIED IDEOGRAPH-3401|CJK UNIFIED IDEOGRAPH-7FED|HANGUL SYLLABLE GA|"+
\r
1034 "HANGUL SYLLABLE HIH|LINEAR B SYLLABLE B008 A|FULLWIDTH LEFT PARENTHESIS|"+
\r
1035 "FULLWIDTH YEN SIGN|"+
\r
1036 "null|"+ // getName returns null because 0xFFFF does not have a name, but has an extended name!
\r
1037 "CJK UNIFIED IDEOGRAPH-23456";
\r
1038 String separator= "|";
\r
1039 String source = Utility.valueOf(c);
\r
1040 String result = UCharacter.getName(source, separator);
\r
1041 if(!result.equals(expected)){
\r
1042 errln("UCharacter.getName did not return the expected result.\n\t Expected: "+ expected+"\n\t Got: "+ result);
\r
1045 }catch(IllegalArgumentException e){
\r
1046 if(e.getMessage().indexOf("unames.icu") >= 0){
\r
1047 warnln("Could not find unames.icu");
\r
1057 * Testing name iteration
\r
1059 public void TestNameIteration()throws Exception
\r
1062 ValueIterator iterator = UCharacter.getExtendedNameIterator();
\r
1063 ValueIterator.Element element = new ValueIterator.Element();
\r
1064 ValueIterator.Element old = new ValueIterator.Element();
\r
1065 // testing subrange
\r
1066 iterator.setRange(-10, -5);
\r
1067 if (iterator.next(element)) {
\r
1068 errln("Fail, expected iterator to return false when range is set outside the meaningful range");
\r
1070 iterator.setRange(0x110000, 0x111111);
\r
1071 if (iterator.next(element)) {
\r
1072 errln("Fail, expected iterator to return false when range is set outside the meaningful range");
\r
1075 iterator.setRange(50, 10);
\r
1076 errln("Fail, expected exception when encountered invalid range");
\r
1077 } catch (Exception e) {
\r
1080 iterator.setRange(-10, 10);
\r
1081 if (!iterator.next(element) || element.integer != 0) {
\r
1082 errln("Fail, expected iterator to return 0 when range start limit is set outside the meaningful range");
\r
1085 iterator.setRange(0x10FFFE, 0x200000);
\r
1087 while (iterator.next(element)) {
\r
1088 last = element.integer;
\r
1090 if (last != 0x10FFFF) {
\r
1091 errln("Fail, expected iterator to return 0x10FFFF when range end limit is set outside the meaningful range");
\r
1094 iterator = UCharacter.getNameIterator();
\r
1095 iterator.setRange(0xF, 0x45);
\r
1096 while (iterator.next(element)) {
\r
1097 if (element.integer <= old.integer) {
\r
1098 errln("FAIL next returned a less codepoint \\u" +
\r
1099 Integer.toHexString(element.integer) + " than \\u" +
\r
1100 Integer.toHexString(old.integer));
\r
1103 if (!UCharacter.getName(element.integer).equals(element.value))
\r
1105 errln("FAIL next codepoint \\u" +
\r
1106 Integer.toHexString(element.integer) +
\r
1107 " does not have the expected name " +
\r
1108 UCharacter.getName(element.integer) +
\r
1109 " instead have the name " + (String)element.value);
\r
1112 old.integer = element.integer;
\r
1116 iterator.next(element);
\r
1117 if (element.integer != 0x20) {
\r
1118 errln("FAIL reset in iterator");
\r
1121 iterator.setRange(0, 0x110000);
\r
1123 while (iterator.next(element)) {
\r
1124 if (element.integer != 0 && element.integer <= old.integer) {
\r
1125 errln("FAIL next returned a less codepoint \\u" +
\r
1126 Integer.toHexString(element.integer) + " than \\u" +
\r
1127 Integer.toHexString(old.integer));
\r
1130 if (!UCharacter.getName(element.integer).equals(element.value))
\r
1132 errln("FAIL next codepoint \\u" +
\r
1133 Integer.toHexString(element.integer) +
\r
1134 " does not have the expected name " +
\r
1135 UCharacter.getName(element.integer) +
\r
1136 " instead have the name " + (String)element.value);
\r
1139 for (int i = old.integer + 1; i < element.integer; i ++) {
\r
1140 if (UCharacter.getName(i) != null) {
\r
1141 errln("FAIL between codepoints are not null \\u" +
\r
1142 Integer.toHexString(old.integer) + " and " +
\r
1143 Integer.toHexString(element.integer) + " has " +
\r
1144 Integer.toHexString(i) + " with a name " +
\r
1145 UCharacter.getName(i));
\r
1149 old.integer = element.integer;
\r
1152 iterator = UCharacter.getExtendedNameIterator();
\r
1154 while (iterator.next(element)) {
\r
1155 if (element.integer != 0 && element.integer != old.integer) {
\r
1156 errln("FAIL next returned a codepoint \\u" +
\r
1157 Integer.toHexString(element.integer) +
\r
1158 " different from \\u" +
\r
1159 Integer.toHexString(old.integer));
\r
1162 if (!UCharacter.getExtendedName(element.integer).equals(
\r
1164 errln("FAIL next codepoint \\u" +
\r
1165 Integer.toHexString(element.integer) +
\r
1166 " name should be "
\r
1167 + UCharacter.getExtendedName(element.integer) +
\r
1168 " instead of " + (String)element.value);
\r
1173 iterator = UCharacter.getName1_0Iterator();
\r
1175 while (iterator.next(element)) {
\r
1176 logln(Integer.toHexString(element.integer) + " " +
\r
1177 (String)element.value);
\r
1178 if (element.integer != 0 && element.integer <= old.integer) {
\r
1179 errln("FAIL next returned a less codepoint \\u" +
\r
1180 Integer.toHexString(element.integer) + " than \\u" +
\r
1181 Integer.toHexString(old.integer));
\r
1184 if (!element.value.equals(UCharacter.getName1_0(
\r
1185 element.integer))) {
\r
1186 errln("FAIL next codepoint \\u" +
\r
1187 Integer.toHexString(element.integer) +
\r
1188 " name cannot be null");
\r
1191 for (int i = old.integer + 1; i < element.integer; i ++) {
\r
1192 if (UCharacter.getName1_0(i) != null) {
\r
1193 errln("FAIL between codepoints are not null \\u" +
\r
1194 Integer.toHexString(old.integer) + " and " +
\r
1195 Integer.toHexString(element.integer) + " has " +
\r
1196 Integer.toHexString(i) + " with a name " +
\r
1197 UCharacter.getName1_0(i));
\r
1201 old.integer = element.integer;
\r
1203 } catch(Exception e){
\r
1204 // !!! wouldn't preflighting be simpler? This looks like
\r
1205 // it is effectively be doing that. It seems that for every
\r
1206 // true error the code will call errln, which will throw the error, which
\r
1207 // this will catch, which this will then rethrow the error. Just seems
\r
1209 if(e.getMessage().indexOf("unames.icu") >= 0){
\r
1210 warnln("Could not find unames.icu");
\r
1212 errln(e.getMessage());
\r
1218 * Testing the for illegal characters
\r
1220 public void TestIsLegal()
\r
1222 int illegal[] = {0xFFFE, 0x00FFFF, 0x005FFFE, 0x005FFFF, 0x0010FFFE,
\r
1223 0x0010FFFF, 0x110000, 0x00FDD0, 0x00FDDF, 0x00FDE0,
\r
1224 0x00FDEF, 0xD800, 0xDC00, -1};
\r
1225 int legal[] = {0x61, 0x00FFFD, 0x0010000, 0x005FFFD, 0x0060000,
\r
1226 0x0010FFFD, 0xFDCF, 0x00FDF0};
\r
1227 for (int count = 0; count < illegal.length; count ++) {
\r
1228 if (UCharacter.isLegal(illegal[count])) {
\r
1229 errln("FAIL \\u" + hex(illegal[count]) +
\r
1230 " is not a legal character");
\r
1234 for (int count = 0; count < legal.length; count ++) {
\r
1235 if (!UCharacter.isLegal(legal[count])) {
\r
1236 errln("FAIL \\u" + hex(legal[count]) +
\r
1237 " is a legal character");
\r
1241 String illegalStr = "This is an illegal string ";
\r
1242 String legalStr = "This is a legal string ";
\r
1244 for (int count = 0; count < illegal.length; count ++) {
\r
1245 StringBuffer str = new StringBuffer(illegalStr);
\r
1246 if (illegal[count] < 0x10000) {
\r
1247 str.append((char)illegal[count]);
\r
1250 char lead = UTF16.getLeadSurrogate(illegal[count]);
\r
1251 char trail = UTF16.getTrailSurrogate(illegal[count]);
\r
1253 str.append(trail);
\r
1255 if (UCharacter.isLegal(str.toString())) {
\r
1256 errln("FAIL " + hex(str.toString()) +
\r
1257 " is not a legal string");
\r
1261 for (int count = 0; count < legal.length; count ++) {
\r
1262 StringBuffer str = new StringBuffer(legalStr);
\r
1263 if (legal[count] < 0x10000) {
\r
1264 str.append((char)legal[count]);
\r
1267 char lead = UTF16.getLeadSurrogate(legal[count]);
\r
1268 char trail = UTF16.getTrailSurrogate(legal[count]);
\r
1270 str.append(trail);
\r
1272 if (!UCharacter.isLegal(str.toString())) {
\r
1273 errln("FAIL " + hex(str.toString()) + " is a legal string");
\r
1279 * Test getCodePoint
\r
1281 public void TestCodePoint()
\r
1284 for (char i = 0xD800; i < 0xDC00; i ++) {
\r
1285 for (char j = 0xDC00; j <= 0xDFFF; j ++) {
\r
1286 if (UCharacter.getCodePoint(i, j) != ch) {
\r
1287 errln("Error getting codepoint for surrogate " +
\r
1289 + Integer.toHexString(i) + " \\u" +
\r
1290 Integer.toHexString(j));
\r
1297 UCharacter.getCodePoint((char)0xD7ff, (char)0xDC00);
\r
1298 errln("Invalid surrogate characters should not form a " +
\r
1300 } catch(Exception e) {
\r
1302 for (char i = 0; i < 0xFFFF; i++) {
\r
1303 if (i == 0xFFFE ||
\r
1304 (i >= 0xD800 && i <= 0xDFFF) ||
\r
1305 (i >= 0xFDD0 && i <= 0xFDEF)) {
\r
1306 // not a character
\r
1308 UCharacter.getCodePoint(i);
\r
1309 errln("Not a character is not a valid codepoint");
\r
1310 } catch (Exception e) {
\r
1314 if (UCharacter.getCodePoint(i) != i) {
\r
1315 errln("A valid codepoint should return itself");
\r
1322 * This method is alittle different from the type test in icu4c.
\r
1323 * But combined with testUnicodeData, they basically do the same thing.
\r
1325 public void TestIteration()
\r
1328 int prevtype = -1;
\r
1330 int test[][]={{0x41, UCharacterCategory.UPPERCASE_LETTER},
\r
1331 {0x308, UCharacterCategory.NON_SPACING_MARK},
\r
1332 {0xfffe, UCharacterCategory.GENERAL_OTHER_TYPES},
\r
1333 {0xe0041, UCharacterCategory.FORMAT},
\r
1334 {0xeffff, UCharacterCategory.UNASSIGNED}};
\r
1336 // default Bidi classes for unassigned code points
\r
1337 int defaultBidi[][]={{ 0x0590, UCharacterDirection.LEFT_TO_RIGHT },
\r
1338 { 0x0600, UCharacterDirection.RIGHT_TO_LEFT },
\r
1339 { 0x07C0, UCharacterDirection.RIGHT_TO_LEFT_ARABIC },
\r
1340 { 0x0900, UCharacterDirection.RIGHT_TO_LEFT },
\r
1341 { 0xFB1D, UCharacterDirection.LEFT_TO_RIGHT },
\r
1342 { 0xFB50, UCharacterDirection.RIGHT_TO_LEFT },
\r
1343 { 0xFE00, UCharacterDirection.RIGHT_TO_LEFT_ARABIC },
\r
1344 { 0xFE70, UCharacterDirection.LEFT_TO_RIGHT },
\r
1345 { 0xFF00, UCharacterDirection.RIGHT_TO_LEFT_ARABIC },
\r
1346 { 0x10800, UCharacterDirection.LEFT_TO_RIGHT },
\r
1347 { 0x11000, UCharacterDirection.RIGHT_TO_LEFT },
\r
1348 { 0x110000, UCharacterDirection.LEFT_TO_RIGHT }};
\r
1350 RangeValueIterator iterator = UCharacter.getTypeIterator();
\r
1351 RangeValueIterator.Element result = new RangeValueIterator.Element();
\r
1352 while (iterator.next(result)) {
\r
1353 if (result.start != limit) {
\r
1354 errln("UCharacterIteration failed: Ranges not continuous " +
\r
1355 "0x" + Integer.toHexString(result.start));
\r
1358 limit = result.limit;
\r
1359 if (result.value == prevtype) {
\r
1360 errln("Type of the next set of enumeration should be different");
\r
1362 prevtype = result.value;
\r
1364 for (int i = result.start; i < limit; i ++) {
\r
1365 int temptype = UCharacter.getType(i);
\r
1366 if (temptype != result.value) {
\r
1367 errln("UCharacterIteration failed: Codepoint \\u" +
\r
1368 Integer.toHexString(i) + " should be of type " +
\r
1369 temptype + " not " + result.value);
\r
1373 for (int i = 0; i < test.length; ++ i) {
\r
1374 if (result.start <= test[i][0] && test[i][0] < result.limit) {
\r
1375 if (result.value != test[i][1]) {
\r
1376 errln("error: getTypes() has range ["
\r
1377 + Integer.toHexString(result.start) + ", "
\r
1378 + Integer.toHexString(result.limit)
\r
1379 + "] with type " + result.value
\r
1381 + Integer.toHexString(test[i][0]) + ", "
\r
1382 + Integer.toHexString(test[i][1]));
\r
1387 // LineBreak.txt specifies:
\r
1388 // # - Assigned characters that are not listed explicitly are given the value
\r
1390 // # - Unassigned characters are given the value "XX".
\r
1392 // PUA characters are listed explicitly with "XX".
\r
1393 // Verify that no assigned character has "XX".
\r
1394 if (result.value != UCharacterCategory.UNASSIGNED
\r
1395 && result.value != UCharacterCategory.PRIVATE_USE) {
\r
1396 int c = result.start;
\r
1397 while (c < result.limit) {
\r
1398 if (0 == UCharacter.getIntPropertyValue(c,
\r
1399 UProperty.LINE_BREAK)) {
\r
1400 logln("error UProperty.LINE_BREAK(assigned \\u"
\r
1401 + Utility.hex(c, 4) + ")=XX");
\r
1408 * Verify default Bidi classes.
\r
1409 * For recent Unicode versions, see UCD.html.
\r
1411 * For older Unicode versions:
\r
1412 * See table 3-7 "Bidirectional Character Types" in UAX #9.
\r
1413 * http://www.unicode.org/reports/tr9/
\r
1415 * See also DerivedBidiClass.txt for Cn code points!
\r
1417 * Unicode 4.0.1/Public Review Issue #28 (http://www.unicode.org/review/resolved-pri.html)
\r
1418 * changed some default values.
\r
1419 * In particular, non-characters and unassigned Default Ignorable Code Points
\r
1420 * change from L to BN.
\r
1422 * UCD.html version 4.0.1 does not yet reflect these changes.
\r
1424 if (result.value == UCharacterCategory.UNASSIGNED
\r
1425 || result.value == UCharacterCategory.PRIVATE_USE) {
\r
1426 int c = result.start;
\r
1427 for (int i = 0; i < defaultBidi.length && c < result.limit;
\r
1429 if (c < defaultBidi[i][0]) {
\r
1430 while (c < result.limit && c < defaultBidi[i][0]) {
\r
1431 // TODO change to public UCharacter.isNonCharacter(c) once it's available
\r
1432 if(com.ibm.icu.impl.UCharacterUtility.isNonCharacter(c) || UCharacter.hasBinaryProperty(c, UProperty.DEFAULT_IGNORABLE_CODE_POINT)) {
\r
1433 shouldBeDir=UCharacter.BOUNDARY_NEUTRAL;
\r
1435 shouldBeDir=defaultBidi[i][1];
\r
1438 if (UCharacter.getDirection(c) != shouldBeDir
\r
1439 || UCharacter.getIntPropertyValue(c,
\r
1440 UProperty.BIDI_CLASS)
\r
1442 errln("error: getDirection(unassigned/PUA "
\r
1443 + Integer.toHexString(c)
\r
1455 if (iterator.next(result) == false || result.start != 0) {
\r
1456 System.out.println("result " + result.start);
\r
1457 errln("UCharacterIteration reset() failed");
\r
1464 public void TestGetAge()
\r
1466 int ages[] = {0x41, 1, 1, 0, 0,
\r
1467 0xffff, 1, 1, 0, 0,
\r
1468 0x20ab, 2, 0, 0, 0,
\r
1469 0x2fffe, 2, 0, 0, 0,
\r
1470 0x20ac, 2, 1, 0, 0,
\r
1471 0xfb1d, 3, 0, 0, 0,
\r
1472 0x3f4, 3, 1, 0, 0,
\r
1473 0x10300, 3, 1, 0, 0,
\r
1474 0x220, 3, 2, 0, 0,
\r
1475 0xff60, 3, 2, 0, 0};
\r
1476 for (int i = 0; i < ages.length; i += 5) {
\r
1477 VersionInfo age = UCharacter.getAge(ages[i]);
\r
1478 if (age != VersionInfo.getInstance(ages[i + 1], ages[i + 2],
\r
1479 ages[i + 3], ages[i + 4])) {
\r
1480 errln("error: getAge(\\u" + Integer.toHexString(ages[i]) +
\r
1481 ") == " + age.toString() + " instead of " +
\r
1482 ages[i + 1] + "." + ages[i + 2] + "." + ages[i + 3] +
\r
1483 "." + ages[i + 4]);
\r
1489 * Test binary non core properties
\r
1491 public void TestAdditionalProperties()
\r
1493 // test data for hasBinaryProperty()
\r
1494 int props[][] = { // code point, property
\r
1495 { 0x0627, UProperty.ALPHABETIC, 1 },
\r
1496 { 0x1034a, UProperty.ALPHABETIC, 1 },
\r
1497 { 0x2028, UProperty.ALPHABETIC, 0 },
\r
1499 { 0x0066, UProperty.ASCII_HEX_DIGIT, 1 },
\r
1500 { 0x0067, UProperty.ASCII_HEX_DIGIT, 0 },
\r
1502 { 0x202c, UProperty.BIDI_CONTROL, 1 },
\r
1503 { 0x202f, UProperty.BIDI_CONTROL, 0 },
\r
1505 { 0x003c, UProperty.BIDI_MIRRORED, 1 },
\r
1506 { 0x003d, UProperty.BIDI_MIRRORED, 0 },
\r
1508 /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */
\r
1509 { 0x2018, UProperty.BIDI_MIRRORED, 0 },
\r
1510 { 0x201d, UProperty.BIDI_MIRRORED, 0 },
\r
1511 { 0x201f, UProperty.BIDI_MIRRORED, 0 },
\r
1512 { 0x301e, UProperty.BIDI_MIRRORED, 0 },
\r
1514 { 0x058a, UProperty.DASH, 1 },
\r
1515 { 0x007e, UProperty.DASH, 0 },
\r
1517 { 0x0c4d, UProperty.DIACRITIC, 1 },
\r
1518 { 0x3000, UProperty.DIACRITIC, 0 },
\r
1520 { 0x0e46, UProperty.EXTENDER, 1 },
\r
1521 { 0x0020, UProperty.EXTENDER, 0 },
\r
1523 { 0xfb1d, UProperty.FULL_COMPOSITION_EXCLUSION, 1 },
\r
1524 { 0x1d15f, UProperty.FULL_COMPOSITION_EXCLUSION, 1 },
\r
1525 { 0xfb1e, UProperty.FULL_COMPOSITION_EXCLUSION, 0 },
\r
1527 { 0x110a, UProperty.NFD_INERT, 1 }, /* Jamo L */
\r
1528 { 0x0308, UProperty.NFD_INERT, 0 },
\r
1530 { 0x1164, UProperty.NFKD_INERT, 1 }, /* Jamo V */
\r
1531 { 0x1d79d, UProperty.NFKD_INERT, 0 }, /* math compat version of xi */
\r
1533 { 0x0021, UProperty.NFC_INERT, 1 }, /* ! */
\r
1534 { 0x0061, UProperty.NFC_INERT, 0 }, /* a */
\r
1535 { 0x00e4, UProperty.NFC_INERT, 0 }, /* a-umlaut */
\r
1536 { 0x0102, UProperty.NFC_INERT, 0 }, /* a-breve */
\r
1537 { 0xac1c, UProperty.NFC_INERT, 0 }, /* Hangul LV */
\r
1538 { 0xac1d, UProperty.NFC_INERT, 1 }, /* Hangul LVT */
\r
1540 { 0x1d79d, UProperty.NFKC_INERT, 0 }, /* math compat version of xi */
\r
1541 { 0x2a6d6, UProperty.NFKC_INERT, 1 }, /* Han, last of CJK ext. B */
\r
1543 { 0x00e4, UProperty.SEGMENT_STARTER, 1 },
\r
1544 { 0x0308, UProperty.SEGMENT_STARTER, 0 },
\r
1545 { 0x110a, UProperty.SEGMENT_STARTER, 1 }, /* Jamo L */
\r
1546 { 0x1164, UProperty.SEGMENT_STARTER, 0 },/* Jamo V */
\r
1547 { 0xac1c, UProperty.SEGMENT_STARTER, 1 }, /* Hangul LV */
\r
1548 { 0xac1d, UProperty.SEGMENT_STARTER, 1 }, /* Hangul LVT */
\r
1550 { 0x0044, UProperty.HEX_DIGIT, 1 },
\r
1551 { 0xff46, UProperty.HEX_DIGIT, 1 },
\r
1552 { 0x0047, UProperty.HEX_DIGIT, 0 },
\r
1554 { 0x30fb, UProperty.HYPHEN, 1 },
\r
1555 { 0xfe58, UProperty.HYPHEN, 0 },
\r
1557 { 0x2172, UProperty.ID_CONTINUE, 1 },
\r
1558 { 0x0307, UProperty.ID_CONTINUE, 1 },
\r
1559 { 0x005c, UProperty.ID_CONTINUE, 0 },
\r
1561 { 0x2172, UProperty.ID_START, 1 },
\r
1562 { 0x007a, UProperty.ID_START, 1 },
\r
1563 { 0x0039, UProperty.ID_START, 0 },
\r
1565 { 0x4db5, UProperty.IDEOGRAPHIC, 1 },
\r
1566 { 0x2f999, UProperty.IDEOGRAPHIC, 1 },
\r
1567 { 0x2f99, UProperty.IDEOGRAPHIC, 0 },
\r
1569 { 0x200c, UProperty.JOIN_CONTROL, 1 },
\r
1570 { 0x2029, UProperty.JOIN_CONTROL, 0 },
\r
1572 { 0x1d7bc, UProperty.LOWERCASE, 1 },
\r
1573 { 0x0345, UProperty.LOWERCASE, 1 },
\r
1574 { 0x0030, UProperty.LOWERCASE, 0 },
\r
1576 { 0x1d7a9, UProperty.MATH, 1 },
\r
1577 { 0x2135, UProperty.MATH, 1 },
\r
1578 { 0x0062, UProperty.MATH, 0 },
\r
1580 { 0xfde1, UProperty.NONCHARACTER_CODE_POINT, 1 },
\r
1581 { 0x10ffff, UProperty.NONCHARACTER_CODE_POINT, 1 },
\r
1582 { 0x10fffd, UProperty.NONCHARACTER_CODE_POINT, 0 },
\r
1584 { 0x0022, UProperty.QUOTATION_MARK, 1 },
\r
1585 { 0xff62, UProperty.QUOTATION_MARK, 1 },
\r
1586 { 0xd840, UProperty.QUOTATION_MARK, 0 },
\r
1588 { 0x061f, UProperty.TERMINAL_PUNCTUATION, 1 },
\r
1589 { 0xe003f, UProperty.TERMINAL_PUNCTUATION, 0 },
\r
1591 { 0x1d44a, UProperty.UPPERCASE, 1 },
\r
1592 { 0x2162, UProperty.UPPERCASE, 1 },
\r
1593 { 0x0345, UProperty.UPPERCASE, 0 },
\r
1595 { 0x0020, UProperty.WHITE_SPACE, 1 },
\r
1596 { 0x202f, UProperty.WHITE_SPACE, 1 },
\r
1597 { 0x3001, UProperty.WHITE_SPACE, 0 },
\r
1599 { 0x0711, UProperty.XID_CONTINUE, 1 },
\r
1600 { 0x1d1aa, UProperty.XID_CONTINUE, 1 },
\r
1601 { 0x007c, UProperty.XID_CONTINUE, 0 },
\r
1603 { 0x16ee, UProperty.XID_START, 1 },
\r
1604 { 0x23456, UProperty.XID_START, 1 },
\r
1605 { 0x1d1aa, UProperty.XID_START, 0 },
\r
1609 * The following properties are only supported starting with the
\r
1610 * Unicode version indicated in the second field.
\r
1614 { 0x180c, UProperty.DEFAULT_IGNORABLE_CODE_POINT, 1 },
\r
1615 { 0xfe02, UProperty.DEFAULT_IGNORABLE_CODE_POINT, 1 },
\r
1616 { 0x1801, UProperty.DEFAULT_IGNORABLE_CODE_POINT, 0 },
\r
1618 { 0x0341, UProperty.DEPRECATED, 1 },
\r
1619 { 0xe0041, UProperty.DEPRECATED, 1 }, /* Changed from Unicode 5 to 5.1 */
\r
1621 { 0x00a0, UProperty.GRAPHEME_BASE, 1 },
\r
1622 { 0x0a4d, UProperty.GRAPHEME_BASE, 0 },
\r
1623 { 0xff9d, UProperty.GRAPHEME_BASE, 1 },
\r
1624 { 0xff9f, UProperty.GRAPHEME_BASE, 0 }, /* changed from Unicode 3.2 to 4 and again 5 to 5.1 */
\r
1626 { 0x0300, UProperty.GRAPHEME_EXTEND, 1 },
\r
1627 { 0xff9d, UProperty.GRAPHEME_EXTEND, 0 },
\r
1628 { 0xff9f, UProperty.GRAPHEME_EXTEND, 1 }, /* changed from Unicode 3.2 to 4 and again 5 to 5.1 */
\r
1629 { 0x0603, UProperty.GRAPHEME_EXTEND, 0 },
\r
1631 { 0x0a4d, UProperty.GRAPHEME_LINK, 1 },
\r
1632 { 0xff9f, UProperty.GRAPHEME_LINK, 0 },
\r
1634 { 0x2ff7, UProperty.IDS_BINARY_OPERATOR, 1 },
\r
1635 { 0x2ff3, UProperty.IDS_BINARY_OPERATOR, 0 },
\r
1637 { 0x2ff3, UProperty.IDS_TRINARY_OPERATOR, 1 },
\r
1638 { 0x2f03, UProperty.IDS_TRINARY_OPERATOR, 0 },
\r
1640 { 0x0ec1, UProperty.LOGICAL_ORDER_EXCEPTION, 1 },
\r
1641 { 0xdcba, UProperty.LOGICAL_ORDER_EXCEPTION, 0 },
\r
1643 { 0x2e9b, UProperty.RADICAL, 1 },
\r
1644 { 0x4e00, UProperty.RADICAL, 0 },
\r
1646 { 0x012f, UProperty.SOFT_DOTTED, 1 },
\r
1647 { 0x0049, UProperty.SOFT_DOTTED, 0 },
\r
1649 { 0xfa11, UProperty.UNIFIED_IDEOGRAPH, 1 },
\r
1650 { 0xfa12, UProperty.UNIFIED_IDEOGRAPH, 0 },
\r
1652 { -1, 0x401, 0 }, /* version break for Unicode 4.0.1 */
\r
1654 { 0x002e, UProperty.S_TERM, 1 },
\r
1655 { 0x0061, UProperty.S_TERM, 0 },
\r
1657 { 0x180c, UProperty.VARIATION_SELECTOR, 1 },
\r
1658 { 0xfe03, UProperty.VARIATION_SELECTOR, 1 },
\r
1659 { 0xe01ef, UProperty.VARIATION_SELECTOR, 1 },
\r
1660 { 0xe0200, UProperty.VARIATION_SELECTOR, 0 },
\r
1662 /* enum/integer type properties */
\r
1663 /* test default Bidi classes for unassigned code points */
\r
1664 { 0x0590, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT },
\r
1665 { 0x05cf, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT },
\r
1666 { 0x05ed, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT },
\r
1667 { 0x07f2, UProperty.BIDI_CLASS, UCharacterDirection.DIR_NON_SPACING_MARK }, /* Nko, new in Unicode 5.0 */
\r
1668 { 0x07fe, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT }, /* unassigned R */
\r
1669 { 0x08ba, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT },
\r
1670 { 0xfb37, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT },
\r
1671 { 0xfb42, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT },
\r
1672 { 0x10806, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT },
\r
1673 { 0x10909, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT },
\r
1674 { 0x10fe4, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT },
\r
1676 { 0x0605, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC },
\r
1677 { 0x061c, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC },
\r
1678 { 0x063f, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC },
\r
1679 { 0x070e, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC },
\r
1680 { 0x0775, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC },
\r
1681 { 0xfbc2, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC },
\r
1682 { 0xfd90, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC },
\r
1683 { 0xfefe, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC },
\r
1685 { 0x02AF, UProperty.BLOCK, UCharacter.UnicodeBlock.IPA_EXTENSIONS.getID() },
\r
1686 { 0x0C4E, UProperty.BLOCK, UCharacter.UnicodeBlock.TELUGU.getID()},
\r
1687 { 0x155A, UProperty.BLOCK, UCharacter.UnicodeBlock.UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS.getID() },
\r
1688 { 0x1717, UProperty.BLOCK, UCharacter.UnicodeBlock.TAGALOG.getID() },
\r
1689 { 0x1900, UProperty.BLOCK, UCharacter.UnicodeBlock.LIMBU.getID() },
\r
1690 { 0x1AFF, UProperty.BLOCK, UCharacter.UnicodeBlock.NO_BLOCK.getID()},
\r
1691 { 0x3040, UProperty.BLOCK, UCharacter.UnicodeBlock.HIRAGANA.getID()},
\r
1692 { 0x1D0FF, UProperty.BLOCK, UCharacter.UnicodeBlock.BYZANTINE_MUSICAL_SYMBOLS.getID()},
\r
1693 { 0x50000, UProperty.BLOCK, UCharacter.UnicodeBlock.NO_BLOCK.getID() },
\r
1694 { 0xEFFFF, UProperty.BLOCK, UCharacter.UnicodeBlock.NO_BLOCK.getID() },
\r
1695 { 0x10D0FF, UProperty.BLOCK, UCharacter.UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_B.getID() },
\r
1697 /* UProperty.CANONICAL_COMBINING_CLASS tested for assigned characters in TestUnicodeData() */
\r
1698 { 0xd7d7, UProperty.CANONICAL_COMBINING_CLASS, 0 },
\r
1700 { 0x00A0, UProperty.DECOMPOSITION_TYPE, UCharacter.DecompositionType.NOBREAK },
\r
1701 { 0x00A8, UProperty.DECOMPOSITION_TYPE, UCharacter.DecompositionType.COMPAT },
\r
1702 { 0x00bf, UProperty.DECOMPOSITION_TYPE, UCharacter.DecompositionType.NONE },
\r
1703 { 0x00c0, UProperty.DECOMPOSITION_TYPE, UCharacter.DecompositionType.CANONICAL },
\r
1704 { 0x1E9B, UProperty.DECOMPOSITION_TYPE, UCharacter.DecompositionType.CANONICAL },
\r
1705 { 0xBCDE, UProperty.DECOMPOSITION_TYPE, UCharacter.DecompositionType.CANONICAL },
\r
1706 { 0xFB5D, UProperty.DECOMPOSITION_TYPE, UCharacter.DecompositionType.MEDIAL },
\r
1707 { 0x1D736, UProperty.DECOMPOSITION_TYPE, UCharacter.DecompositionType.FONT },
\r
1708 { 0xe0033, UProperty.DECOMPOSITION_TYPE, UCharacter.DecompositionType.NONE },
\r
1710 { 0x0009, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.NEUTRAL },
\r
1711 { 0x0020, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.NARROW },
\r
1712 { 0x00B1, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.AMBIGUOUS },
\r
1713 { 0x20A9, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.HALFWIDTH },
\r
1714 { 0x2FFB, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.WIDE },
\r
1715 { 0x3000, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.FULLWIDTH },
\r
1716 { 0x35bb, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.WIDE },
\r
1717 { 0x58bd, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.WIDE },
\r
1718 { 0xD7A3, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.WIDE },
\r
1719 { 0xEEEE, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.AMBIGUOUS },
\r
1720 { 0x1D198, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.NEUTRAL },
\r
1721 { 0x20000, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.WIDE },
\r
1722 { 0x2F8C7, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.WIDE },
\r
1723 { 0x3a5bd, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.WIDE },
\r
1724 { 0x5a5bd, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.NEUTRAL },
\r
1725 { 0xFEEEE, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.AMBIGUOUS },
\r
1726 { 0x10EEEE, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.AMBIGUOUS },
\r
1728 /* UProperty.GENERAL_CATEGORY tested for assigned characters in TestUnicodeData() */
\r
1729 { 0xd7d7, UProperty.GENERAL_CATEGORY, 0 },
\r
1731 { 0x0444, UProperty.JOINING_GROUP, UCharacter.JoiningGroup.NO_JOINING_GROUP },
\r
1732 { 0x0639, UProperty.JOINING_GROUP, UCharacter.JoiningGroup.AIN },
\r
1733 { 0x072A, UProperty.JOINING_GROUP, UCharacter.JoiningGroup.DALATH_RISH },
\r
1734 { 0x0647, UProperty.JOINING_GROUP, UCharacter.JoiningGroup.HEH },
\r
1735 { 0x06C1, UProperty.JOINING_GROUP, UCharacter.JoiningGroup.HEH_GOAL },
\r
1736 { 0x06C3, UProperty.JOINING_GROUP, UCharacter.JoiningGroup.HAMZA_ON_HEH_GOAL },
\r
1738 { 0x200C, UProperty.JOINING_TYPE, UCharacter.JoiningType.NON_JOINING },
\r
1739 { 0x200D, UProperty.JOINING_TYPE, UCharacter.JoiningType.JOIN_CAUSING },
\r
1740 { 0x0639, UProperty.JOINING_TYPE, UCharacter.JoiningType.DUAL_JOINING },
\r
1741 { 0x0640, UProperty.JOINING_TYPE, UCharacter.JoiningType.JOIN_CAUSING },
\r
1742 { 0x06C3, UProperty.JOINING_TYPE, UCharacter.JoiningType.RIGHT_JOINING },
\r
1743 { 0x0300, UProperty.JOINING_TYPE, UCharacter.JoiningType.TRANSPARENT },
\r
1744 { 0x070F, UProperty.JOINING_TYPE, UCharacter.JoiningType.TRANSPARENT },
\r
1745 { 0xe0033, UProperty.JOINING_TYPE, UCharacter.JoiningType.TRANSPARENT },
\r
1747 /* TestUnicodeData() verifies that no assigned character has "XX" (unknown) */
\r
1748 { 0xe7e7, UProperty.LINE_BREAK, UCharacter.LineBreak.UNKNOWN },
\r
1749 { 0x10fffd, UProperty.LINE_BREAK, UCharacter.LineBreak.UNKNOWN },
\r
1750 { 0x0028, UProperty.LINE_BREAK, UCharacter.LineBreak.OPEN_PUNCTUATION },
\r
1751 { 0x232A, UProperty.LINE_BREAK, UCharacter.LineBreak.CLOSE_PUNCTUATION },
\r
1752 { 0x3401, UProperty.LINE_BREAK, UCharacter.LineBreak.IDEOGRAPHIC },
\r
1753 { 0x4e02, UProperty.LINE_BREAK, UCharacter.LineBreak.IDEOGRAPHIC },
\r
1754 { 0x20004, UProperty.LINE_BREAK, UCharacter.LineBreak.IDEOGRAPHIC },
\r
1755 { 0xf905, UProperty.LINE_BREAK, UCharacter.LineBreak.IDEOGRAPHIC },
\r
1756 { 0xdb7e, UProperty.LINE_BREAK, UCharacter.LineBreak.SURROGATE },
\r
1757 { 0xdbfd, UProperty.LINE_BREAK, UCharacter.LineBreak.SURROGATE },
\r
1758 { 0xdffc, UProperty.LINE_BREAK, UCharacter.LineBreak.SURROGATE },
\r
1759 { 0x2762, UProperty.LINE_BREAK, UCharacter.LineBreak.EXCLAMATION },
\r
1760 { 0x002F, UProperty.LINE_BREAK, UCharacter.LineBreak.BREAK_SYMBOLS },
\r
1761 { 0x1D49C, UProperty.LINE_BREAK, UCharacter.LineBreak.ALPHABETIC },
\r
1762 { 0x1731, UProperty.LINE_BREAK, UCharacter.LineBreak.ALPHABETIC },
\r
1764 /* UProperty.NUMERIC_TYPE tested in TestNumericProperties() */
\r
1766 /* UProperty.SCRIPT tested in TestUScriptCodeAPI() */
\r
1768 { 0x1100, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LEADING_JAMO },
\r
1769 { 0x1111, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LEADING_JAMO },
\r
1770 { 0x1159, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LEADING_JAMO },
\r
1771 { 0x115f, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LEADING_JAMO },
\r
1773 { 0x1160, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.VOWEL_JAMO },
\r
1774 { 0x1161, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.VOWEL_JAMO },
\r
1775 { 0x1172, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.VOWEL_JAMO },
\r
1776 { 0x11a2, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.VOWEL_JAMO },
\r
1778 { 0x11a8, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.TRAILING_JAMO },
\r
1779 { 0x11b8, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.TRAILING_JAMO },
\r
1780 { 0x11c8, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.TRAILING_JAMO },
\r
1781 { 0x11f9, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.TRAILING_JAMO },
\r
1783 { 0x115a, UProperty.HANGUL_SYLLABLE_TYPE, 0 },
\r
1784 { 0x115e, UProperty.HANGUL_SYLLABLE_TYPE, 0 },
\r
1785 { 0x11a3, UProperty.HANGUL_SYLLABLE_TYPE, 0 },
\r
1786 { 0x11a7, UProperty.HANGUL_SYLLABLE_TYPE, 0 },
\r
1787 { 0x11fa, UProperty.HANGUL_SYLLABLE_TYPE, 0 },
\r
1788 { 0x11ff, UProperty.HANGUL_SYLLABLE_TYPE, 0 },
\r
1790 { 0xac00, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LV_SYLLABLE },
\r
1791 { 0xac1c, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LV_SYLLABLE },
\r
1792 { 0xc5ec, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LV_SYLLABLE },
\r
1793 { 0xd788, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LV_SYLLABLE },
\r
1795 { 0xac01, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LVT_SYLLABLE },
\r
1796 { 0xac1b, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LVT_SYLLABLE },
\r
1797 { 0xac1d, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LVT_SYLLABLE },
\r
1798 { 0xc5ee, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LVT_SYLLABLE },
\r
1799 { 0xd7a3, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LVT_SYLLABLE },
\r
1801 { 0xd7a4, UProperty.HANGUL_SYLLABLE_TYPE, 0 },
\r
1803 { -1, 0x410, 0 }, /* version break for Unicode 4.1 */
\r
1805 { 0x00d7, UProperty.PATTERN_SYNTAX, 1 },
\r
1806 { 0xfe45, UProperty.PATTERN_SYNTAX, 1 },
\r
1807 { 0x0061, UProperty.PATTERN_SYNTAX, 0 },
\r
1809 { 0x0020, UProperty.PATTERN_WHITE_SPACE, 1 },
\r
1810 { 0x0085, UProperty.PATTERN_WHITE_SPACE, 1 },
\r
1811 { 0x200f, UProperty.PATTERN_WHITE_SPACE, 1 },
\r
1812 { 0x00a0, UProperty.PATTERN_WHITE_SPACE, 0 },
\r
1813 { 0x3000, UProperty.PATTERN_WHITE_SPACE, 0 },
\r
1815 { 0x1d200, UProperty.BLOCK, UCharacter.UnicodeBlock.ANCIENT_GREEK_MUSICAL_NOTATION_ID },
\r
1816 { 0x2c8e, UProperty.BLOCK, UCharacter.UnicodeBlock.COPTIC_ID },
\r
1817 { 0xfe17, UProperty.BLOCK, UCharacter.UnicodeBlock.VERTICAL_FORMS_ID },
\r
1819 { 0x1a00, UProperty.SCRIPT, UScript.BUGINESE },
\r
1820 { 0x2cea, UProperty.SCRIPT, UScript.COPTIC },
\r
1821 { 0xa82b, UProperty.SCRIPT, UScript.SYLOTI_NAGRI },
\r
1822 { 0x103d0, UProperty.SCRIPT, UScript.OLD_PERSIAN },
\r
1824 { 0xcc28, UProperty.LINE_BREAK, UCharacter.LineBreak.H2 },
\r
1825 { 0xcc29, UProperty.LINE_BREAK, UCharacter.LineBreak.H3 },
\r
1826 { 0xac03, UProperty.LINE_BREAK, UCharacter.LineBreak.H3 },
\r
1827 { 0x115f, UProperty.LINE_BREAK, UCharacter.LineBreak.JL },
\r
1828 { 0x11aa, UProperty.LINE_BREAK, UCharacter.LineBreak.JT },
\r
1829 { 0x11a1, UProperty.LINE_BREAK, UCharacter.LineBreak.JV },
\r
1831 { 0xb2c9, UProperty.GRAPHEME_CLUSTER_BREAK, UCharacter.GraphemeClusterBreak.LVT },
\r
1832 { 0x036f, UProperty.GRAPHEME_CLUSTER_BREAK, UCharacter.GraphemeClusterBreak.EXTEND },
\r
1833 { 0x0000, UProperty.GRAPHEME_CLUSTER_BREAK, UCharacter.GraphemeClusterBreak.CONTROL },
\r
1834 { 0x1160, UProperty.GRAPHEME_CLUSTER_BREAK, UCharacter.GraphemeClusterBreak.V },
\r
1836 { 0x05f4, UProperty.WORD_BREAK, UCharacter.WordBreak.MIDLETTER },
\r
1837 { 0x4ef0, UProperty.WORD_BREAK, UCharacter.WordBreak.OTHER },
\r
1838 { 0x19d9, UProperty.WORD_BREAK, UCharacter.WordBreak.NUMERIC },
\r
1839 { 0x2044, UProperty.WORD_BREAK, UCharacter.WordBreak.MIDNUM },
\r
1841 { 0xfffd, UProperty.SENTENCE_BREAK, UCharacter.SentenceBreak.OTHER },
\r
1842 { 0x1ffc, UProperty.SENTENCE_BREAK, UCharacter.SentenceBreak.UPPER },
\r
1843 { 0xff63, UProperty.SENTENCE_BREAK, UCharacter.SentenceBreak.CLOSE },
\r
1844 { 0x2028, UProperty.SENTENCE_BREAK, UCharacter.SentenceBreak.SEP },
\r
1846 /* undefined UProperty values */
\r
1847 { 0x61, 0x4a7, 0 },
\r
1848 { 0x234bc, 0x15ed, 0 }
\r
1852 if (UCharacter.getIntPropertyMinValue(UProperty.DASH) != 0
\r
1853 || UCharacter.getIntPropertyMinValue(UProperty.BIDI_CLASS) != 0
\r
1854 || UCharacter.getIntPropertyMinValue(UProperty.BLOCK)!= 0 /* j2478 */
\r
1855 || UCharacter.getIntPropertyMinValue(UProperty.SCRIPT)!= 0 /* JB#2410 */
\r
1856 || UCharacter.getIntPropertyMinValue(0x2345) != 0) {
\r
1857 errln("error: UCharacter.getIntPropertyMinValue() wrong");
\r
1860 if( UCharacter.getIntPropertyMaxValue(UProperty.DASH)!=1) {
\r
1861 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.DASH) wrong\n");
\r
1863 if( UCharacter.getIntPropertyMaxValue(UProperty.ID_CONTINUE)!=1) {
\r
1864 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.ID_CONTINUE) wrong\n");
\r
1866 if( UCharacter.getIntPropertyMaxValue(UProperty.BINARY_LIMIT-1)!=1) {
\r
1867 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.BINARY_LIMIT-1) wrong\n");
\r
1870 if( UCharacter.getIntPropertyMaxValue(UProperty.BIDI_CLASS)!=UCharacterDirection.CHAR_DIRECTION_COUNT-1 ) {
\r
1871 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.BIDI_CLASS) wrong\n");
\r
1873 if( UCharacter.getIntPropertyMaxValue(UProperty.BLOCK)!=UCharacter.UnicodeBlock.COUNT-1 ) {
\r
1874 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.BLOCK) wrong\n");
\r
1876 if(UCharacter.getIntPropertyMaxValue(UProperty.LINE_BREAK)!=UCharacter.LineBreak.COUNT-1) {
\r
1877 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.LINE_BREAK) wrong\n");
\r
1879 if(UCharacter.getIntPropertyMaxValue(UProperty.SCRIPT)!=UScript.CODE_LIMIT-1) {
\r
1880 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.SCRIPT) wrong\n");
\r
1882 if(UCharacter.getIntPropertyMaxValue(UProperty.NUMERIC_TYPE)!=UCharacter.NumericType.COUNT-1) {
\r
1883 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.NUMERIC_TYPE) wrong\n");
\r
1885 if(UCharacter.getIntPropertyMaxValue(UProperty.GENERAL_CATEGORY)!=UCharacterCategory.CHAR_CATEGORY_COUNT-1) {
\r
1886 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.GENERAL_CATEGORY) wrong\n");
\r
1888 if(UCharacter.getIntPropertyMaxValue(UProperty.HANGUL_SYLLABLE_TYPE)!=UCharacter.HangulSyllableType.COUNT-1) {
\r
1889 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.HANGUL_SYLLABLE_TYPE) wrong\n");
\r
1891 if(UCharacter.getIntPropertyMaxValue(UProperty.GRAPHEME_CLUSTER_BREAK)!=UCharacter.GraphemeClusterBreak.COUNT-1) {
\r
1892 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.GRAPHEME_CLUSTER_BREAK) wrong\n");
\r
1894 if(UCharacter.getIntPropertyMaxValue(UProperty.SENTENCE_BREAK)!=UCharacter.SentenceBreak.COUNT-1) {
\r
1895 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.SENTENCE_BREAK) wrong\n");
\r
1897 if(UCharacter.getIntPropertyMaxValue(UProperty.WORD_BREAK)!=UCharacter.WordBreak.COUNT-1) {
\r
1898 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.WORD_BREAK) wrong\n");
\r
1901 if( UCharacter.getIntPropertyMaxValue(0x2345)!=-1) {
\r
1902 errln("error: UCharacter.getIntPropertyMaxValue(0x2345) wrong\n");
\r
1904 if( UCharacter.getIntPropertyMaxValue(UProperty.DECOMPOSITION_TYPE) != (UCharacter.DecompositionType.COUNT - 1)) {
\r
1905 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.DECOMPOSITION_TYPE) wrong\n");
\r
1907 if( UCharacter.getIntPropertyMaxValue(UProperty.JOINING_GROUP) != (UCharacter.JoiningGroup.COUNT -1)) {
\r
1908 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.JOINING_GROUP) wrong\n");
\r
1910 if( UCharacter.getIntPropertyMaxValue(UProperty.JOINING_TYPE) != (UCharacter.JoiningType.COUNT -1)) {
\r
1911 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.JOINING_TYPE) wrong\n");
\r
1913 if( UCharacter.getIntPropertyMaxValue(UProperty.EAST_ASIAN_WIDTH) != (UCharacter.EastAsianWidth.COUNT -1)) {
\r
1914 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.EAST_ASIAN_WIDTH) wrong\n");
\r
1917 VersionInfo version = UCharacter.getUnicodeVersion();
\r
1919 // test hasBinaryProperty()
\r
1920 for (int i = 0; i < props.length; ++ i) {
\r
1921 if (props[i][0] < 0) {
\r
1922 if (version.compareTo(VersionInfo.getInstance(props[i][1] >> 8,
\r
1923 (props[i][1] >> 4) & 0xF,
\r
1924 props[i][1] & 0xF,
\r
1930 boolean expect = true;
\r
1931 if (props[i][2] == 0) {
\r
1934 if (props[i][1] < UProperty.INT_START) {
\r
1935 if (UCharacter.hasBinaryProperty(props[i][0], props[i][1])
\r
1937 errln("error: UCharacter.hasBinaryProperty(\\u" +
\r
1938 Integer.toHexString(props[i][0]) + ", " +
\r
1939 Integer.toHexString(props[i][1])
\r
1940 + ") has an error expected " + props[i][2]);
\r
1944 int retVal = UCharacter.getIntPropertyValue(props[i][0], props[i][1]);
\r
1945 if (retVal != props[i][2]) {
\r
1946 errln("error: UCharacter.getIntPropertyValue(\\u" +
\r
1947 Utility.hex(props[i][0], 4) +
\r
1948 ", " + props[i][1] + " is wrong, should be "
\r
1949 + props[i][2] + " not " + retVal);
\r
1952 // test separate functions, too
\r
1953 switch (props[i][1]) {
\r
1954 case UProperty.ALPHABETIC:
\r
1955 if (UCharacter.isUAlphabetic(props[i][0]) != expect) {
\r
1956 errln("error: UCharacter.isUAlphabetic(\\u" +
\r
1957 Integer.toHexString(props[i][0]) +
\r
1958 ") is wrong expected " + props[i][2]);
\r
1961 case UProperty.LOWERCASE:
\r
1962 if (UCharacter.isULowercase(props[i][0]) != expect) {
\r
1963 errln("error: UCharacter.isULowercase(\\u" +
\r
1964 Integer.toHexString(props[i][0]) +
\r
1965 ") is wrong expected " +props[i][2]);
\r
1968 case UProperty.UPPERCASE:
\r
1969 if (UCharacter.isUUppercase(props[i][0]) != expect) {
\r
1970 errln("error: UCharacter.isUUppercase(\\u" +
\r
1971 Integer.toHexString(props[i][0]) +
\r
1972 ") is wrong expected " + props[i][2]);
\r
1975 case UProperty.WHITE_SPACE:
\r
1976 if (UCharacter.isUWhiteSpace(props[i][0]) != expect) {
\r
1977 errln("error: UCharacter.isUWhiteSpace(\\u" +
\r
1978 Integer.toHexString(props[i][0]) +
\r
1979 ") is wrong expected " + props[i][2]);
\r
1988 public void TestNumericProperties()
\r
1990 // see UnicodeData.txt, DerivedNumericValues.txt
\r
1991 int testvar[][] = {
\r
1992 { 0x0F33, UCharacter.NumericType.NUMERIC },
\r
1993 { 0x0C66, UCharacter.NumericType.DECIMAL },
\r
1994 { 0x2159, UCharacter.NumericType.NUMERIC },
\r
1995 { 0x00BD, UCharacter.NumericType.NUMERIC },
\r
1996 { 0x0031, UCharacter.NumericType.DECIMAL },
\r
1997 { 0x10320, UCharacter.NumericType.NUMERIC },
\r
1998 { 0x0F2B, UCharacter.NumericType.NUMERIC },
\r
1999 { 0x00B2, UCharacter.NumericType.DIGIT }, /* Unicode 4.0 change */
\r
2000 { 0x1813, UCharacter.NumericType.DECIMAL },
\r
2001 { 0x2173, UCharacter.NumericType.NUMERIC },
\r
2002 { 0x278E, UCharacter.NumericType.DIGIT },
\r
2003 { 0x1D7F2, UCharacter.NumericType.DECIMAL },
\r
2004 { 0x247A, UCharacter.NumericType.DIGIT },
\r
2005 { 0x1372, UCharacter.NumericType.NUMERIC },
\r
2006 { 0x216B, UCharacter.NumericType.NUMERIC },
\r
2007 { 0x16EE, UCharacter.NumericType.NUMERIC },
\r
2008 { 0x249A, UCharacter.NumericType.NUMERIC },
\r
2009 { 0x303A, UCharacter.NumericType.NUMERIC },
\r
2010 { 0x32B2, UCharacter.NumericType.NUMERIC },
\r
2011 { 0x1375, UCharacter.NumericType.NUMERIC },
\r
2012 { 0x10323, UCharacter.NumericType.NUMERIC },
\r
2013 { 0x0BF1, UCharacter.NumericType.NUMERIC },
\r
2014 { 0x217E, UCharacter.NumericType.NUMERIC },
\r
2015 { 0x2180, UCharacter.NumericType.NUMERIC },
\r
2016 { 0x2181, UCharacter.NumericType.NUMERIC },
\r
2017 { 0x137C, UCharacter.NumericType.NUMERIC },
\r
2018 { 0x61, UCharacter.NumericType.NONE },
\r
2019 { 0x3000, UCharacter.NumericType.NONE },
\r
2020 { 0xfffe, UCharacter.NumericType.NONE },
\r
2021 { 0x10301, UCharacter.NumericType.NONE },
\r
2022 { 0xe0033, UCharacter.NumericType.NONE },
\r
2023 { 0x10ffff, UCharacter.NumericType.NONE },
\r
2024 /* Unicode 4.0 Changes */
\r
2025 { 0x96f6, UCharacter.NumericType.NUMERIC },
\r
2026 { 0x4e00, UCharacter.NumericType.NUMERIC },
\r
2027 { 0x58f1, UCharacter.NumericType.NUMERIC },
\r
2028 { 0x5f10, UCharacter.NumericType.NUMERIC },
\r
2029 { 0x5f0e, UCharacter.NumericType.NUMERIC },
\r
2030 { 0x8086, UCharacter.NumericType.NUMERIC },
\r
2031 { 0x7396, UCharacter.NumericType.NUMERIC },
\r
2032 { 0x5345, UCharacter.NumericType.NUMERIC },
\r
2033 { 0x964c, UCharacter.NumericType.NUMERIC },
\r
2034 { 0x4edf, UCharacter.NumericType.NUMERIC },
\r
2035 { 0x4e07, UCharacter.NumericType.NUMERIC },
\r
2036 { 0x4ebf, UCharacter.NumericType.NUMERIC },
\r
2037 { 0x5146, UCharacter.NumericType.NUMERIC }
\r
2040 double expected[] = {-1/(double)2,
\r
2066 UCharacter.NO_NUMERIC_VALUE,
\r
2067 UCharacter.NO_NUMERIC_VALUE,
\r
2068 UCharacter.NO_NUMERIC_VALUE,
\r
2069 UCharacter.NO_NUMERIC_VALUE,
\r
2070 UCharacter.NO_NUMERIC_VALUE,
\r
2071 UCharacter.NO_NUMERIC_VALUE,
\r
2088 for (int i = 0; i < testvar.length; ++ i) {
\r
2089 int c = testvar[i][0];
\r
2090 int type = UCharacter.getIntPropertyValue(c,
\r
2091 UProperty.NUMERIC_TYPE);
\r
2092 double nv = UCharacter.getUnicodeNumericValue(c);
\r
2094 if (type != testvar[i][1]) {
\r
2095 errln("UProperty.NUMERIC_TYPE(\\u" + Utility.hex(c, 4)
\r
2096 + ") = " + type + " should be " + testvar[i][1]);
\r
2098 if (0.000001 <= Math.abs(nv - expected[i])) {
\r
2099 errln("UCharacter.getNumericValue(\\u" + Utility.hex(c, 4)
\r
2100 + ") = " + nv + " should be " + expected[i]);
\r
2106 * Test the property values API. See JB#2410.
\r
2108 public void TestPropertyValues() {
\r
2109 int i, p, min, max;
\r
2111 /* Min should be 0 for everything. */
\r
2112 /* Until JB#2478 is fixed, the one exception is UProperty.BLOCK. */
\r
2113 for (p=UProperty.INT_START; p<UProperty.INT_LIMIT; ++p) {
\r
2114 min = UCharacter.getIntPropertyMinValue(p);
\r
2116 if (p == UProperty.BLOCK) {
\r
2117 /* This is okay...for now. See JB#2487.
\r
2118 TODO Update this for JB#2487. */
\r
2121 name = UCharacter.getPropertyName(p, UProperty.NameChoice.LONG);
\r
2122 errln("FAIL: UCharacter.getIntPropertyMinValue(" + name + ") = " +
\r
2123 min + ", exp. 0");
\r
2128 if (UCharacter.getIntPropertyMinValue(UProperty.GENERAL_CATEGORY_MASK)
\r
2130 || UCharacter.getIntPropertyMaxValue(
\r
2131 UProperty.GENERAL_CATEGORY_MASK)
\r
2133 errln("error: UCharacter.getIntPropertyMin/MaxValue("
\r
2134 + "UProperty.GENERAL_CATEGORY_MASK) is wrong");
\r
2137 /* Max should be -1 for invalid properties. */
\r
2138 max = UCharacter.getIntPropertyMaxValue(-1);
\r
2140 errln("FAIL: UCharacter.getIntPropertyMaxValue(-1) = " +
\r
2141 max + ", exp. -1");
\r
2144 /* Script should return 0 for an invalid code point. If the API
\r
2145 throws an exception then that's fine too. */
\r
2146 for (i=0; i<2; ++i) {
\r
2149 String desc = null;
\r
2152 script = UScript.getScript(-1);
\r
2153 desc = "UScript.getScript(-1)";
\r
2156 script = UCharacter.getIntPropertyValue(-1, UProperty.SCRIPT);
\r
2157 desc = "UCharacter.getIntPropertyValue(-1, UProperty.SCRIPT)";
\r
2160 if (script != 0) {
\r
2161 errln("FAIL: " + desc + " = " + script + ", exp. 0");
\r
2163 } catch (IllegalArgumentException e) {}
\r
2167 public void TestIsBMP()
\r
2169 int ch[] = {0x0, -1, 0xffff, 0x10ffff, 0xff, 0x1ffff};
\r
2170 boolean flag[] = {true, false, true, false, true, false};
\r
2171 for (int i = 0; i < ch.length; i ++) {
\r
2172 if (UCharacter.isBMP(ch[i]) != flag[i]) {
\r
2173 errln("Fail: \\u" + Utility.hex(ch[i], 8)
\r
2174 + " failed at UCharacter.isBMP");
\r
2179 /* add characters from a serialized set to a normal one */
\r
2180 private static void _setAddSerialized(UnicodeSet set, USerializedSet sset) {
\r
2181 // int start, end;
\r
2184 count=sset.countRanges();
\r
2185 int[] range = new int[2];
\r
2186 for(i=0; i<count; ++i) {
\r
2187 sset.getRange(i,range);
\r
2188 set.add(range[0],range[1]);
\r
2192 private boolean showADiffB(UnicodeSet a, UnicodeSet b,
\r
2193 String a_name, String b_name,
\r
2195 boolean diffIsError){
\r
2196 int i, start, end, length;
\r
2201 start = a.getRangeStart(i);
\r
2202 length = (i < a.getRangeCount()) ? 0 : a.getRangeCount();
\r
2203 end = a.getRangeEnd(i);
\r
2206 return equal; /* done with code points, got a string or -1 */
\r
2209 if(expect!=b.contains(start, end)) {
\r
2211 while(start<=end) {
\r
2212 if(expect!=b.contains(start)) {
\r
2215 errln("error: "+ a_name +" contains "+ hex(start)+" but "+ b_name +" does not");
\r
2217 errln("error: "+a_name +" and "+ b_name+" both contain "+hex(start) +" but should not intersect");
\r
2221 logln("info: "+a_name +" contains "+hex(start)+ "but " + b_name +" does not");
\r
2223 logln("info: "+a_name +" and "+b_name+" both contain "+hex(start)+" but should not intersect");
\r
2234 private boolean showAMinusB(UnicodeSet a, UnicodeSet b,
\r
2235 String a_name, String b_name,
\r
2236 boolean diffIsError) {
\r
2238 return showADiffB(a, b, a_name, b_name, true, diffIsError);
\r
2241 private boolean showAIntersectB(UnicodeSet a, UnicodeSet b,
\r
2242 String a_name, String b_name,
\r
2243 boolean diffIsError) {
\r
2244 return showADiffB(a, b, a_name, b_name, false, diffIsError);
\r
2247 private boolean compareUSets(UnicodeSet a, UnicodeSet b,
\r
2248 String a_name, String b_name,
\r
2249 boolean diffIsError) {
\r
2251 showAMinusB(a, b, a_name, b_name, diffIsError) &&
\r
2252 showAMinusB(b, a, b_name, a_name, diffIsError);
\r
2255 /* various tests for consistency of UCD data and API behavior */
\r
2256 public void TestConsistency() {
\r
2257 char[] buffer16 = new char[300];
\r
2258 char[] buffer = new char[300];
\r
2259 UnicodeSet set1, set2, set3, set4;
\r
2261 USerializedSet sset;
\r
2265 String hyphenPattern = "[:Hyphen:]";
\r
2266 String dashPattern = "[:Dash:]";
\r
2267 String lowerPattern = "[:Lowercase:]";
\r
2268 String formatPattern = "[:Cf:]";
\r
2269 String alphaPattern = "[:Alphabetic:]";
\r
2272 * It used to be that UCD.html and its precursors said
\r
2273 * "Those dashes used to mark connections between pieces of words,
\r
2274 * plus the Katakana middle dot."
\r
2276 * Unicode 4 changed 00AD Soft Hyphen to Cf and removed it from Dash
\r
2277 * but not from Hyphen.
\r
2278 * UTC 94 (2003mar) decided to leave it that way and to changed UCD.html.
\r
2279 * Therefore, do not show errors when testing the Hyphen property.
\r
2281 logln("Starting with Unicode 4, inconsistencies with [:Hyphen:] are\n"
\r
2282 + "known to the UTC and not considered errors.\n");
\r
2284 set1=new UnicodeSet(hyphenPattern);
\r
2285 set2=new UnicodeSet(dashPattern);
\r
2287 /* remove the Katakana middle dot(s) from set1 */
\r
2288 set1.remove(0x30fb);
\r
2289 set2.remove (0xff65); /* halfwidth variant */
\r
2290 showAMinusB(set1, set2, "[:Hyphen:]", "[:Dash:]", false);
\r
2293 /* check that Cf is neither Hyphen nor Dash nor Alphabetic */
\r
2294 set3=new UnicodeSet(formatPattern);
\r
2295 set4=new UnicodeSet(alphaPattern);
\r
2297 showAIntersectB(set3, set1, "[:Cf:]", "[:Hyphen:]", false);
\r
2298 showAIntersectB(set3, set2, "[:Cf:]", "[:Dash:]", true);
\r
2299 showAIntersectB(set3, set4, "[:Cf:]", "[:Alphabetic:]", true);
\r
2301 * Check that each lowercase character has "small" in its name
\r
2302 * and not "capital".
\r
2303 * There are some such characters, some of which seem odd.
\r
2304 * Use the verbose flag to see these notices.
\r
2306 set1=new UnicodeSet(lowerPattern);
\r
2310 // length=set1.getItem(set1, i, &start, &end, NULL, 0, &errorCode);
\r
2311 // }catch(Exception e){
\r
2314 start = set1.getRangeStart(i);
\r
2315 end = set1.getRangeEnd(i);
\r
2316 length = i<set1.getRangeCount() ? set1.getRangeCount() : 0;
\r
2318 break; /* done with code points, got a string or -1 */
\r
2321 while(start<=end) {
\r
2322 String name=UCharacter.getName(start);
\r
2324 if( (name.indexOf("SMALL")< 0 || name.indexOf("CAPITAL")<-1) &&
\r
2325 name.indexOf("SMALL CAPITAL")==-1
\r
2327 logln("info: [:Lowercase:] contains U+"+hex(start) + " whose name does not suggest lowercase: " + name);
\r
2335 * Test for an example that unorm_getCanonStartSet() delivers
\r
2336 * all characters that compose from the input one,
\r
2337 * even in multiple steps.
\r
2338 * For example, the set for "I" (0049) should contain both
\r
2339 * I-diaeresis (00CF) and I-diaeresis-acute (1E2E).
\r
2340 * In general, the set for the middle such character should be a subset
\r
2341 * of the set for the first.
\r
2343 set1=new UnicodeSet();
\r
2344 set2=new UnicodeSet();
\r
2345 sset = new USerializedSet();
\r
2346 NormalizerImpl.getCanonStartSet(0x49,sset);
\r
2347 _setAddSerialized(set1, sset);
\r
2349 /* enumerate all characters that are plausible to be latin letters */
\r
2350 for(start=0xa0; start<0x2000; ++start) {
\r
2351 if(NormalizerImpl.getDecomposition(start, false, buffer16,0,buffer16.length) > 1 && buffer[0]==0x0049) {
\r
2356 compareUSets(set1, set2,
\r
2357 "[canon start set of 0049]", "[all c with canon decomp with 0049]",
\r
2362 public void TestCoverage() {
\r
2364 char ch1 = UCharacter.forDigit(7, 11);
\r
2365 assertEquals("UCharacter.forDigit ", "7", String.valueOf(ch1));
\r
2366 char ch2 = UCharacter.forDigit(17, 20);
\r
2367 assertEquals("UCharacter.forDigit ", "h", String.valueOf(ch2));
\r
2369 //Jitterbug 4451, for coverage
\r
2370 for (int i = 0x0041; i < 0x005B; i++) {
\r
2371 if (!UCharacter.isJavaLetter(i))
\r
2372 errln("FAIL \\u" + hex(i) + " expected to be a letter");
\r
2373 if (!UCharacter.isJavaIdentifierStart(i))
\r
2374 errln("FAIL \\u" + hex(i) + " expected to be a Java identifier start character");
\r
2375 if (!UCharacter.isJavaLetterOrDigit(i))
\r
2376 errln("FAIL \\u" + hex(i) + " expected not to be a Java letter");
\r
2377 if (!UCharacter.isJavaIdentifierPart(i))
\r
2378 errln("FAIL \\u" + hex(i) + " expected to be a Java identifier part character");
\r
2380 char[] spaces = {'\t','\n','\f','\r',' '};
\r
2381 for (int i = 0; i < spaces.length; i++){
\r
2382 if (!UCharacter.isSpace(spaces[i]))
\r
2383 errln("FAIL \\u" + hex(spaces[i]) + " expected to be a Java space");
\r
2385 if (!UCharacter.getStringPropertyValue(UProperty.AGE,'\u3400',0).equals("3.0.0.0")){
\r
2386 errln("FAIL \\u3400 expected to be 3.0.0.0");
\r
2390 public void TestCasePropsDummy() {
\r
2391 // code coverage for UCaseProps.getDummy()
\r
2392 if(UCaseProps.getDummy().tolower(0x41)!=0x41) {
\r
2393 errln("UCaseProps.getDummy().tolower(0x41)!=0x41");
\r
2397 public void TestBiDiPropsDummy() {
\r
2398 // code coverage for UBiDiProps.getDummy()
\r
2399 if(UBiDiProps.getDummy().getClass(0x20)!=0) {
\r
2400 errln("UBiDiProps.getDummy().getClass(0x20)!=0");
\r
2404 public void TestBlockData()
\r
2406 Class ubc = UCharacter.UnicodeBlock.class;
\r
2408 for (int b = 1; b < UCharacter.UnicodeBlock.COUNT; b += 1) {
\r
2409 UCharacter.UnicodeBlock blk = UCharacter.UnicodeBlock.getInstance(b);
\r
2410 int id = blk.getID();
\r
2411 String name = blk.toString();
\r
2414 errln("UCharacter.UnicodeBlock.getInstance(" + b + ") returned a block with id = " + id);
\r
2418 if (ubc.getField(name + "_ID").getInt(blk) != b) {
\r
2419 errln("UCharacter.UnicodeBlock.getInstance(" + b + ") returned a block with a name of " + name +
\r
2420 " which does not match the block id.");
\r
2422 } catch (Exception e) {
\r
2423 errln("Couldn't get the id name for id " + b);
\r