2 *******************************************************************************
3 * Copyright (C) 1996-2013, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 *******************************************************************************
8 package com.ibm.icu.dev.test.lang;
10 import java.io.BufferedReader;
11 import java.util.Arrays;
12 import java.util.Locale;
14 import com.ibm.icu.dev.test.TestFmwk;
15 import com.ibm.icu.dev.test.TestUtil;
16 import com.ibm.icu.impl.Norm2AllModes;
17 import com.ibm.icu.impl.Normalizer2Impl;
18 import com.ibm.icu.impl.PatternProps;
19 import com.ibm.icu.impl.UCharacterName;
20 import com.ibm.icu.impl.Utility;
21 import com.ibm.icu.lang.UCharacter;
22 import com.ibm.icu.lang.UCharacterCategory;
23 import com.ibm.icu.lang.UCharacterDirection;
24 import com.ibm.icu.lang.UCharacterEnums;
25 import com.ibm.icu.lang.UProperty;
26 import com.ibm.icu.lang.UScript;
27 import com.ibm.icu.text.Normalizer2;
28 import com.ibm.icu.text.UTF16;
29 import com.ibm.icu.text.UnicodeSet;
30 import com.ibm.icu.text.UnicodeSetIterator;
31 import com.ibm.icu.util.RangeValueIterator;
32 import com.ibm.icu.util.ULocale;
33 import com.ibm.icu.util.ValueIterator;
34 import com.ibm.icu.util.VersionInfo;
37 * Testing class for UCharacter
38 * Mostly following the test cases for ICU
39 * @author Syn Wee Quek
42 public final class UCharacterTest extends TestFmwk
44 // private variables =============================================
47 * ICU4J data version number
49 private final VersionInfo VERSION_ = VersionInfo.getInstance("6.3.0.0");
51 // constructor ===================================================
56 public UCharacterTest()
60 // public methods ================================================
62 public static void main(String[] arg)
66 UCharacterTest test = new UCharacterTest();
76 * Testing the letter and number determination in UCharacter
78 public void TestLetterNumber()
80 for (int i = 0x0041; i < 0x005B; i ++)
81 if (!UCharacter.isLetter(i))
82 errln("FAIL \\u" + hex(i) + " expected to be a letter");
84 for (int i = 0x0660; i < 0x066A; i ++)
85 if (UCharacter.isLetter(i))
86 errln("FAIL \\u" + hex(i) + " expected not to be a letter");
88 for (int i = 0x0660; i < 0x066A; i ++)
89 if (!UCharacter.isDigit(i))
90 errln("FAIL \\u" + hex(i) + " expected to be a digit");
92 for (int i = 0x0041; i < 0x005B; i ++)
93 if (!UCharacter.isLetterOrDigit(i))
94 errln("FAIL \\u" + hex(i) + " expected not to be a digit");
96 for (int i = 0x0660; i < 0x066A; i ++)
97 if (!UCharacter.isLetterOrDigit(i))
98 errln("FAIL \\u" + hex(i) +
99 "expected to be either a letter or a digit");
102 * The following checks work only starting from Unicode 4.0.
103 * Check the version number here.
105 VersionInfo version = UCharacter.getUnicodeVersion();
106 if(version.getMajor()<4 || version.equals(VersionInfo.getInstance(4, 0, 1))) {
114 * Verify that exactly the digit characters have decimal digit values.
115 * This assumption is used in the implementation of u_digit()
116 * (which checks nt=de)
117 * compared with the parallel java.lang.Character.digit()
120 * This was not true in Unicode 3.2 and earlier.
121 * Unicode 4.0 fixed discrepancies.
122 * Unicode 4.0.1 re-introduced problems in this area due to an
123 * unintentionally incomplete last-minute change.
125 String digitsPattern = "[:Nd:]";
126 String decimalValuesPattern = "[:Numeric_Type=Decimal:]";
128 UnicodeSet digits, decimalValues;
130 digits= new UnicodeSet(digitsPattern);
131 decimalValues=new UnicodeSet(decimalValuesPattern);
134 compareUSets(digits, decimalValues, "[:Nd:]", "[:Numeric_Type=Decimal:]", true);
140 * Tests for space determination in UCharacter
142 public void TestSpaces()
144 int spaces[] = {0x0020, 0x00a0, 0x2000, 0x2001, 0x2005};
145 int nonspaces[] = {0x0061, 0x0062, 0x0063, 0x0064, 0x0074};
146 int whitespaces[] = {0x2008, 0x2009, 0x200a, 0x001c, 0x000c /* ,0x200b */}; // 0x200b was "Zs" in Unicode 4.0, but it is "Cf" in Unicode 4.1
147 int nonwhitespaces[] = {0x0061, 0x0062, 0x003c, 0x0028, 0x003f, 0x00a0, 0x2007, 0x202f, 0xfefe, 0x200b};
149 int size = spaces.length;
150 for (int i = 0; i < size; i ++)
152 if (!UCharacter.isSpaceChar(spaces[i]))
154 errln("FAIL \\u" + hex(spaces[i]) +
155 " expected to be a space character");
159 if (UCharacter.isSpaceChar(nonspaces[i]))
161 errln("FAIL \\u" + hex(nonspaces[i]) +
162 " expected not to be space character");
166 if (!UCharacter.isWhitespace(whitespaces[i]))
168 errln("FAIL \\u" + hex(whitespaces[i]) +
169 " expected to be a white space character");
172 if (UCharacter.isWhitespace(nonwhitespaces[i]))
174 errln("FAIL \\u" + hex(nonwhitespaces[i]) +
175 " expected not to be a space character");
178 logln("Ok \\u" + hex(spaces[i]) + " and \\u" +
179 hex(nonspaces[i]) + " and \\u" + hex(whitespaces[i]) +
180 " and \\u" + hex(nonwhitespaces[i]));
183 int patternWhiteSpace[] = {0x9, 0xd, 0x20, 0x85,
184 0x200e, 0x200f, 0x2028, 0x2029};
185 int nonPatternWhiteSpace[] = {0x8, 0xe, 0x21, 0x86, 0xa0, 0xa1,
186 0x1680, 0x1681, 0x180e, 0x180f,
187 0x1FFF, 0x2000, 0x200a, 0x200b,
188 0x2010, 0x202f, 0x2030, 0x205f,
189 0x2060, 0x3000, 0x3001};
190 for (int i = 0; i < patternWhiteSpace.length; i ++) {
191 if (!PatternProps.isWhiteSpace(patternWhiteSpace[i])) {
192 errln("\\u" + Utility.hex(patternWhiteSpace[i], 4)
193 + " expected to be a Pattern_White_Space");
196 for (int i = 0; i < nonPatternWhiteSpace.length; i ++) {
197 if (PatternProps.isWhiteSpace(nonPatternWhiteSpace[i])) {
198 errln("\\u" + Utility.hex(nonPatternWhiteSpace[i], 4)
199 + " expected to be a non-Pattern_White_Space");
203 // TODO: propose public API for constants like uchar.h's U_GC_*_MASK
204 // (http://bugs.icu-project.org/trac/ticket/7461)
206 (1 << UCharacter.SPACE_SEPARATOR) |
207 (1 << UCharacter.LINE_SEPARATOR) |
208 (1 << UCharacter.PARAGRAPH_SEPARATOR);
210 // UCharacter.isWhitespace(c) should be the same as Character.isWhitespace().
211 // This uses logln() because Character.isWhitespace() differs between Java versions, thus
212 // it is not necessarily an error if there is a difference between
213 // particular Java and ICU versions.
214 // However, you need to run tests with -v to see the output.
215 // Also note that, at least as of Unicode 5.2,
216 // there are no supplementary white space characters.
217 for (int c = 0; c <= 0xffff; ++c) {
218 boolean j = Character.isWhitespace(c);
219 boolean i = UCharacter.isWhitespace(c);
220 boolean u = UCharacter.isUWhiteSpace(c);
221 boolean z = (UCharacter.getIntPropertyValue(c, UProperty.GENERAL_CATEGORY_MASK) &
225 "isWhitespace(U+%04x) difference: JDK %5b ICU %5b Unicode WS %5b Z Separator %5b",
227 } else if (j || i || u || z) {
229 "isWhitespace(U+%04x) FYI: JDK %5b ICU %5b Unicode WS %5b Z Separator %5b",
233 for (char c = 0; c <= 0xff; ++c) {
234 boolean j = Character.isSpace(c);
235 boolean i = UCharacter.isSpace(c);
236 boolean z = (UCharacter.getIntPropertyValue(c, UProperty.GENERAL_CATEGORY_MASK) &
240 "isSpace(U+%04x) difference: JDK %5b ICU %5b Z Separator %5b",
242 } else if (j || i || z) {
244 "isSpace(U+%04x) FYI: JDK %5b ICU %5b Z Separator %5b",
251 * Test various implementations of Pattern_Syntax & Pattern_White_Space.
253 public void TestPatternProperties() {
254 UnicodeSet syn_pp = new UnicodeSet();
255 UnicodeSet syn_prop = new UnicodeSet("[:Pattern_Syntax:]");
256 UnicodeSet syn_list = new UnicodeSet(
257 "[!-/\\:-@\\[-\\^`\\{-~"+
258 "\u00A1-\u00A7\u00A9\u00AB\u00AC\u00AE\u00B0\u00B1\u00B6\u00BB\u00BF\u00D7\u00F7"+
259 "\u2010-\u2027\u2030-\u203E\u2041-\u2053\u2055-\u205E\u2190-\u245F\u2500-\u2775"+
260 "\u2794-\u2BFF\u2E00-\u2E7F\u3001-\u3003\u3008-\u3020\u3030\uFD3E\uFD3F\uFE45\uFE46]");
261 UnicodeSet ws_pp = new UnicodeSet();
262 UnicodeSet ws_prop = new UnicodeSet("[:Pattern_White_Space:]");
263 UnicodeSet ws_list = new UnicodeSet("[\\u0009-\\u000D\\ \\u0085\\u200E\\u200F\\u2028\\u2029]");
264 UnicodeSet syn_ws_pp = new UnicodeSet();
265 UnicodeSet syn_ws_prop = new UnicodeSet(syn_prop).addAll(ws_prop);
266 for(int c=0; c<=0xffff; ++c) {
267 if(PatternProps.isSyntax(c)) {
270 if(PatternProps.isWhiteSpace(c)) {
273 if(PatternProps.isSyntaxOrWhiteSpace(c)) {
277 compareUSets(syn_pp, syn_prop,
278 "PatternProps.isSyntax()", "[:Pattern_Syntax:]", true);
279 compareUSets(syn_pp, syn_list,
280 "PatternProps.isSyntax()", "[Pattern_Syntax ranges]", true);
281 compareUSets(ws_pp, ws_prop,
282 "PatternProps.isWhiteSpace()", "[:Pattern_White_Space:]", true);
283 compareUSets(ws_pp, ws_list,
284 "PatternProps.isWhiteSpace()", "[Pattern_White_Space ranges]", true);
285 compareUSets(syn_ws_pp, syn_ws_prop,
286 "PatternProps.isSyntaxOrWhiteSpace()",
287 "[[:Pattern_Syntax:][:Pattern_White_Space:]]", true);
291 * Tests for defined and undefined characters
293 public void TestDefined()
295 int undefined[] = {0xfff1, 0xfff7, 0xfa6e};
296 int defined[] = {0x523E, 0x004f88, 0x00fffd};
298 int size = undefined.length;
299 for (int i = 0; i < size; i ++)
301 if (UCharacter.isDefined(undefined[i]))
303 errln("FAIL \\u" + hex(undefined[i]) +
304 " expected not to be defined");
307 if (!UCharacter.isDefined(defined[i]))
309 errln("FAIL \\u" + hex(defined[i]) + " expected defined");
316 * Tests for base characters and their cellwidth
318 public void TestBase()
320 int base[] = {0x0061, 0x000031, 0x0003d2};
321 int nonbase[] = {0x002B, 0x000020, 0x00203B};
322 int size = base.length;
323 for (int i = 0; i < size; i ++)
325 if (UCharacter.isBaseForm(nonbase[i]))
327 errln("FAIL \\u" + hex(nonbase[i]) +
328 " expected not to be a base character");
331 if (!UCharacter.isBaseForm(base[i]))
333 errln("FAIL \\u" + hex(base[i]) +
334 " expected to be a base character");
341 * Tests for digit characters
343 public void TestDigits()
345 int digits[] = {0x0030, 0x000662, 0x000F23, 0x000ED5, 0x002160};
347 //special characters not in the properties table
348 int digits2[] = {0x3007, 0x004e00, 0x004e8c, 0x004e09, 0x0056d8,
349 0x004e94, 0x00516d, 0x4e03, 0x00516b, 0x004e5d};
350 int nondigits[] = {0x0010, 0x000041, 0x000122, 0x0068FE};
352 int digitvalues[] = {0, 2, 3, 5, 1};
353 int digitvalues2[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
355 int size = digits.length;
356 for (int i = 0; i < size; i ++) {
357 if (UCharacter.isDigit(digits[i]) &&
358 UCharacter.digit(digits[i]) != digitvalues[i])
360 errln("FAIL \\u" + hex(digits[i]) +
361 " expected digit with value " + digitvalues[i]);
365 size = nondigits.length;
366 for (int i = 0; i < size; i ++)
367 if (UCharacter.isDigit(nondigits[i]))
369 errln("FAIL \\u" + hex(nondigits[i]) + " expected nondigit");
373 size = digits2.length;
374 for (int i = 0; i < 10; i ++) {
375 if (UCharacter.isDigit(digits2[i]) &&
376 UCharacter.digit(digits2[i]) != digitvalues2[i])
378 errln("FAIL \\u" + hex(digits2[i]) +
379 " expected digit with value " + digitvalues2[i]);
386 * Tests for numeric characters
388 public void TestNumeric()
390 if (UCharacter.getNumericValue(0x00BC) != -2) {
391 errln("Numeric value of 0x00BC expected to be -2");
394 for (int i = '0'; i < '9'; i ++) {
395 int n1 = UCharacter.getNumericValue(i);
396 double n2 = UCharacter.getUnicodeNumericValue(i);
397 if (n1 != n2 || n1 != (i - '0')) {
398 errln("Numeric value of " + (char)i + " expected to be " +
402 for (int i = 'A'; i < 'F'; i ++) {
403 int n1 = UCharacter.getNumericValue(i);
404 double n2 = UCharacter.getUnicodeNumericValue(i);
405 if (n2 != UCharacter.NO_NUMERIC_VALUE || n1 != (i - 'A' + 10)) {
406 errln("Numeric value of " + (char)i + " expected to be " +
410 for (int i = 0xFF21; i < 0xFF26; i ++) {
411 // testing full wideth latin characters A-F
412 int n1 = UCharacter.getNumericValue(i);
413 double n2 = UCharacter.getUnicodeNumericValue(i);
414 if (n2 != UCharacter.NO_NUMERIC_VALUE || n1 != (i - 0xFF21 + 10)) {
415 errln("Numeric value of " + (char)i + " expected to be " +
419 // testing han numbers
420 int han[] = {0x96f6, 0, 0x58f9, 1, 0x8cb3, 2, 0x53c3, 3,
421 0x8086, 4, 0x4f0d, 5, 0x9678, 6, 0x67d2, 7,
422 0x634c, 8, 0x7396, 9, 0x5341, 10, 0x62fe, 10,
423 0x767e, 100, 0x4f70, 100, 0x5343, 1000, 0x4edf, 1000,
424 0x824c, 10000, 0x5104, 100000000};
425 for (int i = 0; i < han.length; i += 2) {
426 if (UCharacter.getHanNumericValue(han[i]) != han[i + 1]) {
427 errln("Numeric value of \\u" +
428 Integer.toHexString(han[i]) + " expected to be " +
437 public void TestVersion()
439 if (!UCharacter.getUnicodeVersion().equals(VERSION_))
440 errln("FAIL expected: " + VERSION_ + " got: " + UCharacter.getUnicodeVersion());
444 * Tests for control characters
446 public void TestISOControl()
448 int control[] = {0x001b, 0x000097, 0x000082};
449 int noncontrol[] = {0x61, 0x000031, 0x0000e2};
451 int size = control.length;
452 for (int i = 0; i < size; i ++)
454 if (!UCharacter.isISOControl(control[i]))
456 errln("FAIL 0x" + Integer.toHexString(control[i]) +
457 " expected to be a control character");
460 if (UCharacter.isISOControl(noncontrol[i]))
462 errln("FAIL 0x" + Integer.toHexString(noncontrol[i]) +
463 " expected to be not a control character");
467 logln("Ok 0x" + Integer.toHexString(control[i]) + " and 0x" +
468 Integer.toHexString(noncontrol[i]));
475 public void TestSupplementary()
477 for (int i = 0; i < 0x10000; i ++) {
478 if (UCharacter.isSupplementary(i)) {
479 errln("Codepoint \\u" + Integer.toHexString(i) +
480 " is not supplementary");
483 for (int i = 0x10000; i < 0x10FFFF; i ++) {
484 if (!UCharacter.isSupplementary(i)) {
485 errln("Codepoint \\u" + Integer.toHexString(i) +
486 " is supplementary");
494 public void TestMirror()
496 if (!(UCharacter.isMirrored(0x28) && UCharacter.isMirrored(0xbb) &&
497 UCharacter.isMirrored(0x2045) && UCharacter.isMirrored(0x232a)
498 && !UCharacter.isMirrored(0x27) &&
499 !UCharacter.isMirrored(0x61) && !UCharacter.isMirrored(0x284)
500 && !UCharacter.isMirrored(0x3400))) {
501 errln("isMirrored() does not work correctly");
504 if (!(UCharacter.getMirror(0x3c) == 0x3e &&
505 UCharacter.getMirror(0x5d) == 0x5b &&
506 UCharacter.getMirror(0x208d) == 0x208e &&
507 UCharacter.getMirror(0x3017) == 0x3016 &&
509 UCharacter.getMirror(0xbb) == 0xab &&
510 UCharacter.getMirror(0x2215) == 0x29F5 &&
511 UCharacter.getMirror(0x29F5) == 0x2215 && /* large delta between the code points */
513 UCharacter.getMirror(0x2e) == 0x2e &&
514 UCharacter.getMirror(0x6f3) == 0x6f3 &&
515 UCharacter.getMirror(0x301c) == 0x301c &&
516 UCharacter.getMirror(0xa4ab) == 0xa4ab &&
518 /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */
519 UCharacter.getMirror(0x2018) == 0x2018 &&
520 UCharacter.getMirror(0x201b) == 0x201b &&
521 UCharacter.getMirror(0x301d) == 0x301d)) {
522 errln("getMirror() does not work correctly");
525 /* verify that Bidi_Mirroring_Glyph roundtrips */
526 UnicodeSet set=new UnicodeSet("[:Bidi_Mirrored:]");
527 UnicodeSetIterator iter=new UnicodeSetIterator(set);
528 int start, end, c2, c3;
529 while(iter.nextRange() && (start=iter.codepoint)>=0) {
530 end=iter.codepointEnd;
532 c2=UCharacter.getMirror(start);
533 c3=UCharacter.getMirror(c2);
535 errln("getMirror() does not roundtrip: U+"+hex(start)+"->U+"+hex(c2)+"->U+"+hex(c3));
537 c3=UCharacter.getBidiPairedBracket(start);
538 if(UCharacter.getIntPropertyValue(start, UProperty.BIDI_PAIRED_BRACKET_TYPE)==UCharacter.BidiPairedBracketType.NONE) {
540 errln("u_getBidiPairedBracket(U+"+hex(start)+") != self for bpt(c)==None");
544 errln("u_getBidiPairedBracket(U+"+hex(start)+") != U+"+hex(c2)+" = bmg(c)'");
547 } while(++start<=end);
550 // verify that Unicode Corrigendum #6 reverts mirrored status of the following
551 if (UCharacter.isMirrored(0x2018) ||
552 UCharacter.isMirrored(0x201d) ||
553 UCharacter.isMirrored(0x201f) ||
554 UCharacter.isMirrored(0x301e)) {
555 errln("Unicode Corrigendum #6 conflict, one or more of 2018/201d/201f/301e has mirrored property");
560 * Tests for printable characters
562 public void TestPrint()
564 int printable[] = {0x0042, 0x00005f, 0x002014};
565 int nonprintable[] = {0x200c, 0x00009f, 0x00001b};
567 int size = printable.length;
568 for (int i = 0; i < size; i ++)
570 if (!UCharacter.isPrintable(printable[i]))
572 errln("FAIL \\u" + hex(printable[i]) +
573 " expected to be a printable character");
576 if (UCharacter.isPrintable(nonprintable[i]))
578 errln("FAIL \\u" + hex(nonprintable[i]) +
579 " expected not to be a printable character");
582 logln("Ok \\u" + hex(printable[i]) + " and \\u" +
583 hex(nonprintable[i]));
586 // test all ISO 8 controls
587 for (int ch = 0; ch <= 0x9f; ++ ch) {
589 // skip ASCII graphic characters and continue with DEL
592 if (UCharacter.isPrintable(ch)) {
593 errln("Fail \\u" + hex(ch) +
594 " is a ISO 8 control character hence not printable\n");
598 /* test all Latin-1 graphic characters */
599 for (int ch = 0x20; ch <= 0xff; ++ ch) {
603 if (!UCharacter.isPrintable(ch)
604 && ch != 0x00AD/* Unicode 4.0 changed the defintion of soft hyphen to be a Cf*/) {
605 errln("Fail \\u" + hex(ch) +
606 " is a Latin-1 graphic character\n");
612 * Testing for identifier characters
614 public void TestIdentifier()
616 int unicodeidstart[] = {0x0250, 0x0000e2, 0x000061};
617 int nonunicodeidstart[] = {0x2000, 0x00000a, 0x002019};
618 int unicodeidpart[] = {0x005f, 0x000032, 0x000045};
619 int nonunicodeidpart[] = {0x2030, 0x0000a3, 0x000020};
620 int idignore[] = {0x0006, 0x0010, 0x206b};
621 int nonidignore[] = {0x0075, 0x0000a3, 0x000061};
623 int size = unicodeidstart.length;
624 for (int i = 0; i < size; i ++)
626 if (!UCharacter.isUnicodeIdentifierStart(unicodeidstart[i]))
628 errln("FAIL \\u" + hex(unicodeidstart[i]) +
629 " expected to be a unicode identifier start character");
632 if (UCharacter.isUnicodeIdentifierStart(nonunicodeidstart[i]))
634 errln("FAIL \\u" + hex(nonunicodeidstart[i]) +
635 " expected not to be a unicode identifier start " +
639 if (!UCharacter.isUnicodeIdentifierPart(unicodeidpart[i]))
641 errln("FAIL \\u" + hex(unicodeidpart[i]) +
642 " expected to be a unicode identifier part character");
645 if (UCharacter.isUnicodeIdentifierPart(nonunicodeidpart[i]))
647 errln("FAIL \\u" + hex(nonunicodeidpart[i]) +
648 " expected not to be a unicode identifier part " +
652 if (!UCharacter.isIdentifierIgnorable(idignore[i]))
654 errln("FAIL \\u" + hex(idignore[i]) +
655 " expected to be a ignorable unicode character");
658 if (UCharacter.isIdentifierIgnorable(nonidignore[i]))
660 errln("FAIL \\u" + hex(nonidignore[i]) +
661 " expected not to be a ignorable unicode character");
664 logln("Ok \\u" + hex(unicodeidstart[i]) + " and \\u" +
665 hex(nonunicodeidstart[i]) + " and \\u" +
666 hex(unicodeidpart[i]) + " and \\u" +
667 hex(nonunicodeidpart[i]) + " and \\u" +
668 hex(idignore[i]) + " and \\u" + hex(nonidignore[i]));
673 * Tests for the character types, direction.<br>
674 * This method reads in UnicodeData.txt file for testing purposes. A
675 * default path is provided relative to the src path, however the user
676 * could set a system property to change the directory path.<br>
677 * e.g. java -DUnicodeData="data_directory_path"
678 * com.ibm.icu.dev.test.lang.UCharacterTest
680 public void TestUnicodeData()
682 // this is the 2 char category types used in the UnicodeData file
684 "LuLlLtLmLoMnMeMcNdNlNoZsZlZpCcCfCoCsPdPsPePcPoSmScSkSoPiPf";
686 // directorionality types used in the UnicodeData file
687 // padded by spaces to make each type size 4
689 "L R EN ES ET AN CS B S WS ON LRE LRO AL RLE RLO PDF NSM BN FSI LRI RLI PDI ";
691 Normalizer2 nfc = Normalizer2.getNFCInstance();
692 Normalizer2 nfkc = Normalizer2.getNFKCInstance();
696 BufferedReader input = TestUtil.getDataReader("unicode/UnicodeData.txt");
700 String s = input.readLine();
704 if(s.length()<4 || s.startsWith("#")) {
707 String[] fields = s.split(";", -1);
708 assert (fields.length == 15 ) : "Number of fields is " + fields.length + ": " + s;
710 int ch = Integer.parseInt(fields[0], 16);
712 // testing the general category
713 int type = TYPE.indexOf(fields[2]);
717 type = (type >> 1) + 1;
718 if (UCharacter.getType(ch) != type)
720 errln("FAIL \\u" + hex(ch) + " expected type " + type);
724 if (UCharacter.getIntPropertyValue(ch,
725 UProperty.GENERAL_CATEGORY_MASK) != (1 << type)) {
726 errln("error: getIntPropertyValue(\\u" +
727 Integer.toHexString(ch) +
728 ", UProperty.GENERAL_CATEGORY_MASK) != " +
729 "getMask(getType(ch))");
732 // testing combining class
733 int cc = Integer.parseInt(fields[3]);
734 if (UCharacter.getCombiningClass(ch) != cc)
736 errln("FAIL \\u" + hex(ch) + " expected combining " +
740 if (nfkc.getCombiningClass(ch) != cc)
742 errln("FAIL \\u" + hex(ch) + " expected NFKC combining " +
747 // testing the direction
748 String d = fields[4];
752 int dir = DIR.indexOf(d) >> 2;
753 if (UCharacter.getDirection(ch) != dir)
755 errln("FAIL \\u" + hex(ch) +
756 " expected direction " + dir + " but got " + UCharacter.getDirection(ch));
760 byte bdir = (byte)dir;
761 if (UCharacter.getDirectionality(ch) != bdir)
763 errln("FAIL \\u" + hex(ch) +
764 " expected directionality " + bdir + " but got " +
765 UCharacter.getDirectionality(ch));
769 /* get Decomposition_Type & Decomposition_Mapping, field 5 */
771 if(fields[5].length()==0) {
772 /* no decomposition, except UnicodeData.txt omits Hangul syllable decompositions */
773 if(ch==0xac00 || ch==0xd7a3) {
774 dt=UCharacter.DecompositionType.CANONICAL;
776 dt=UCharacter.DecompositionType.NONE;
781 if(d.charAt(0)=='<') {
782 int end=d.indexOf('>', 1);
784 dt=UCharacter.getPropertyValueEnum(UProperty.DECOMPOSITION_TYPE, d.substring(1, end));
785 while(d.charAt(++end)==' ') {} // skip spaces
789 dt=UCharacter.DecompositionType.CANONICAL;
793 if(dt>UCharacter.DecompositionType.NONE) {
796 } else if(ch==0xd7a3) {
799 String[] dmChars=d.split(" +");
800 StringBuilder dmb=new StringBuilder(dmChars.length);
801 for(String dmc : dmChars) {
802 dmb.appendCodePoint(Integer.parseInt(dmc, 16));
810 errln(String.format("error in UnicodeData.txt: syntax error in U+%04lX decomposition field", ch));
813 int i=UCharacter.getIntPropertyValue(ch, UProperty.DECOMPOSITION_TYPE);
815 String.format("error: UCharacter.getIntPropertyValue(U+%04x, UProperty.DECOMPOSITION_TYPE) is wrong", ch),
817 /* Expect Decomposition_Mapping=nfkc.getRawDecomposition(c). */
818 String mapping=nfkc.getRawDecomposition(ch);
820 String.format("error: nfkc.getRawDecomposition(U+%04x) is wrong", ch),
822 /* For canonical decompositions only, expect Decomposition_Mapping=nfc.getRawDecomposition(c). */
823 if(dt!=UCharacter.DecompositionType.CANONICAL) {
826 mapping=nfc.getRawDecomposition(ch);
828 String.format("error: nfc.getRawDecomposition(U+%04x) is wrong", ch),
831 if(dt==UCharacter.DecompositionType.CANONICAL
832 && !UCharacter.hasBinaryProperty(ch, UProperty.FULL_COMPOSITION_EXCLUSION)) {
833 int a=dm.codePointAt(0);
834 int b=dm.codePointBefore(dm.length());
835 int composite=nfc.composePair(a, b);
838 "error: nfc U+%04X decomposes to U+%04X+U+%04X "+
839 "but does not compose back (instead U+%04X)",
840 ch, a, b, composite),
843 * Note: NFKC has fewer round-trip mappings than NFC,
844 * so we can't just test nfkc.composePair(a, b) here without further data.
848 // testing iso comment
850 String isocomment = fields[11];
851 String comment = UCharacter.getISOComment(ch);
852 if (comment == null) {
855 if (!comment.equals(isocomment)) {
856 errln("FAIL \\u" + hex(ch) +
857 " expected iso comment " + isocomment);
861 if(e.getMessage().indexOf("unames.icu") >= 0){
868 String upper = fields[12];
870 if (upper.length() > 0) {
871 tempchar = Integer.parseInt(upper, 16);
873 int resultCp = UCharacter.toUpperCase(ch);
874 if (resultCp != tempchar) {
875 errln("FAIL \\u" + Utility.hex(ch, 4)
876 + " expected uppercase \\u"
877 + Utility.hex(tempchar, 4)
879 + Utility.hex(resultCp, 4));
883 String lower = fields[13];
885 if (lower.length() > 0) {
886 tempchar = Integer.parseInt(lower, 16);
888 if (UCharacter.toLowerCase(ch) != tempchar) {
889 errln("FAIL \\u" + Utility.hex(ch, 4)
890 + " expected lowercase \\u"
891 + Utility.hex(tempchar, 4));
897 String title = fields[14];
899 if (title.length() > 0) {
900 tempchar = Integer.parseInt(title, 16);
902 if (UCharacter.toTitleCase(ch) != tempchar) {
903 errln("FAIL \\u" + Utility.hex(ch, 4)
904 + " expected titlecase \\u"
905 + Utility.hex(tempchar, 4));
911 warnln("Could not find unames.icu");
920 if (UCharacter.UnicodeBlock.of(0x0041)
921 != UCharacter.UnicodeBlock.BASIC_LATIN
922 || UCharacter.getIntPropertyValue(0x41, UProperty.BLOCK)
923 != UCharacter.UnicodeBlock.BASIC_LATIN.getID()) {
924 errln("UCharacter.UnicodeBlock.of(\\u0041) property failed! "
926 + UCharacter.UnicodeBlock.BASIC_LATIN.getID() + " got "
927 + UCharacter.UnicodeBlock.of(0x0041));
930 // sanity check on repeated properties
931 for (int ch = 0xfffe; ch <= 0x10ffff;) {
932 int type = UCharacter.getType(ch);
933 if (UCharacter.getIntPropertyValue(ch,
934 UProperty.GENERAL_CATEGORY_MASK)
936 errln("error: UCharacter.getIntPropertyValue(\\u"
937 + Integer.toHexString(ch)
938 + ", UProperty.GENERAL_CATEGORY_MASK) != "
939 + "getMask(getType())");
941 if (type != UCharacterCategory.UNASSIGNED) {
942 errln("error: UCharacter.getType(\\u" + Utility.hex(ch, 4)
943 + " != UCharacterCategory.UNASSIGNED (returns "
944 + UCharacterCategory.toString(UCharacter.getType(ch))
947 if ((ch & 0xffff) == 0xfffe) {
955 // test that PUA is not "unassigned"
956 for(int ch = 0xe000; ch <= 0x10fffd;) {
957 int type = UCharacter.getType(ch);
958 if (UCharacter.getIntPropertyValue(ch,
959 UProperty.GENERAL_CATEGORY_MASK)
961 errln("error: UCharacter.getIntPropertyValue(\\u"
962 + Integer.toHexString(ch)
963 + ", UProperty.GENERAL_CATEGORY_MASK) != "
964 + "getMask(getType())");
967 if (type == UCharacterCategory.UNASSIGNED) {
968 errln("error: UCharacter.getType(\\u"
970 + ") == UCharacterCategory.UNASSIGNED");
972 else if (type != UCharacterCategory.PRIVATE_USE) {
973 logln("PUA override: UCharacter.getType(\\u"
974 + Utility.hex(ch, 4) + ")=" + type);
979 else if (ch == 0xffffd) {
990 * Test for the character names
992 public void TestNames()
995 int length = UCharacterName.INSTANCE.getMaxCharNameLength();
996 if (length < 83) { // Unicode 3.2 max char name length
997 errln("getMaxCharNameLength()=" + length + " is too short");
1000 int c[] = {0x0061, //LATIN SMALL LETTER A
1001 0x000284, //LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK
1002 0x003401, //CJK UNIFIED IDEOGRAPH-3401
1003 0x007fed, //CJK UNIFIED IDEOGRAPH-7FED
1004 0x00ac00, //HANGUL SYLLABLE GA
1005 0x00d7a3, //HANGUL SYLLABLE HIH
1006 0x00d800, 0x00dc00, //LINEAR B SYLLABLE B008 A
1007 0xff08, //FULLWIDTH LEFT PARENTHESIS
1008 0x00ffe5, //FULLWIDTH YEN SIGN
1010 0x0023456 //CJK UNIFIED IDEOGRAPH-23456
1013 "LATIN SMALL LETTER A",
1014 "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK",
1015 "CJK UNIFIED IDEOGRAPH-3401",
1016 "CJK UNIFIED IDEOGRAPH-7FED",
1017 "HANGUL SYLLABLE GA",
1018 "HANGUL SYLLABLE HIH",
1021 "FULLWIDTH LEFT PARENTHESIS",
1022 "FULLWIDTH YEN SIGN",
1024 "CJK UNIFIED IDEOGRAPH-23456"
1026 String oldname[] = {"", "", "",
1028 "", "", "", "", "", "",
1030 String extendedname[] = {"LATIN SMALL LETTER A",
1031 "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK",
1032 "CJK UNIFIED IDEOGRAPH-3401",
1033 "CJK UNIFIED IDEOGRAPH-7FED",
1034 "HANGUL SYLLABLE GA",
1035 "HANGUL SYLLABLE HIH",
1036 "<lead surrogate-D800>",
1037 "<trail surrogate-DC00>",
1038 "FULLWIDTH LEFT PARENTHESIS",
1039 "FULLWIDTH YEN SIGN",
1040 "<noncharacter-FFFF>",
1041 "CJK UNIFIED IDEOGRAPH-23456"};
1043 int size = c.length;
1047 for (int i = 0; i < size; i ++)
1049 // modern Unicode character name
1050 str = UCharacter.getName(c[i]);
1051 if ((str == null && name[i].length() > 0) ||
1052 (str != null && !str.equals(name[i])))
1054 errln("FAIL \\u" + hex(c[i]) + " expected name " +
1059 // 1.0 Unicode character name
1060 str = UCharacter.getName1_0(c[i]);
1061 if ((str == null && oldname[i].length() > 0) ||
1062 (str != null && !str.equals(oldname[i])))
1064 errln("FAIL \\u" + hex(c[i]) + " expected 1.0 name " +
1069 // extended character name
1070 str = UCharacter.getExtendedName(c[i]);
1071 if (str == null || !str.equals(extendedname[i]))
1073 errln("FAIL \\u" + hex(c[i]) + " expected extended name " +
1078 // retrieving unicode character from modern name
1079 uc = UCharacter.getCharFromName(name[i]);
1080 if (uc != c[i] && name[i].length() != 0)
1082 errln("FAIL " + name[i] + " expected character \\u" +
1087 //retrieving unicode character from 1.0 name
1088 uc = UCharacter.getCharFromName1_0(oldname[i]);
1089 if (uc != c[i] && oldname[i].length() != 0)
1091 errln("FAIL " + oldname[i] + " expected 1.0 character \\u" +
1096 //retrieving unicode character from 1.0 name
1097 uc = UCharacter.getCharFromExtendedName(extendedname[i]);
1098 if (uc != c[i] && i != 0 && (i == 1 || i == 6))
1100 errln("FAIL " + extendedname[i] +
1101 " expected extended character \\u" + hex(c[i]));
1106 // test getName works with mixed-case names (new in 2.0)
1107 if (0x61 != UCharacter.getCharFromName("LATin smALl letTER A")) {
1108 errln("FAIL: 'LATin smALl letTER A' should result in character "
1112 if (getInclusion() >= 5) {
1113 // extra testing different from icu
1114 for (int i = UCharacter.MIN_VALUE; i < UCharacter.MAX_VALUE; i ++)
1116 str = UCharacter.getName(i);
1117 if (str != null && UCharacter.getCharFromName(str) != i)
1119 errln("FAIL \\u" + hex(i) + " " + str +
1120 " retrieval of name and vice versa" );
1126 // Test getCharNameCharacters
1127 if (getInclusion() >= 10) {
1128 boolean map[] = new boolean[256];
1130 UnicodeSet set = new UnicodeSet(1, 0); // empty set
1131 UnicodeSet dumb = new UnicodeSet(1, 0); // empty set
1133 // uprv_getCharNameCharacters() will likely return more lowercase
1134 // letters than actual character names contain because
1135 // it includes all the characters in lowercased names of
1136 // general categories, for the full possible set of extended names.
1137 UCharacterName.INSTANCE.getCharNameCharacters(set);
1139 // build set the dumb (but sure-fire) way
1140 Arrays.fill(map, false);
1143 for (int cp = 0; cp < 0x110000; ++ cp) {
1144 String n = UCharacter.getExtendedName(cp);
1145 int len = n.length();
1146 if (len > maxLength) {
1150 for (int i = 0; i < len; ++ i) {
1151 char ch = n.charAt(i);
1152 if (!map[ch & 0xff]) {
1154 map[ch & 0xff] = true;
1159 length = UCharacterName.INSTANCE.getMaxCharNameLength();
1160 if (length != maxLength) {
1161 errln("getMaxCharNameLength()=" + length
1162 + " differs from the maximum length " + maxLength
1163 + " of all extended names");
1166 // compare the sets. Where is my uset_equals?!!
1168 for (int i = 0; i < 256; ++ i) {
1169 if (set.contains(i) != dumb.contains(i)) {
1170 if (0x61 <= i && i <= 0x7a // a-z
1171 && set.contains(i) && !dumb.contains(i)) {
1172 // ignore lowercase a-z that are in set but not in dumb
1182 String pattern1 = set.toPattern(true);
1183 String pattern2 = dumb.toPattern(true);
1186 errln("FAIL: getCharNameCharacters() returned " + pattern1
1187 + " expected " + pattern2
1188 + " (too many lowercase a-z are ok)");
1190 logln("Ok: getCharNameCharacters() returned " + pattern1);
1193 // improve code coverage
1194 String expected = "LATIN SMALL LETTER A|LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK|"+
1195 "CJK UNIFIED IDEOGRAPH-3401|CJK UNIFIED IDEOGRAPH-7FED|HANGUL SYLLABLE GA|"+
1196 "HANGUL SYLLABLE HIH|LINEAR B SYLLABLE B008 A|FULLWIDTH LEFT PARENTHESIS|"+
1197 "FULLWIDTH YEN SIGN|"+
1198 "null|"+ // getName returns null because 0xFFFF does not have a name, but has an extended name!
1199 "CJK UNIFIED IDEOGRAPH-23456";
1200 String separator= "|";
1201 String source = Utility.valueOf(c);
1202 String result = UCharacter.getName(source, separator);
1203 if(!result.equals(expected)){
1204 errln("UCharacter.getName did not return the expected result.\n\t Expected: "+ expected+"\n\t Got: "+ result);
1207 }catch(IllegalArgumentException e){
1208 if(e.getMessage().indexOf("unames.icu") >= 0){
1209 warnln("Could not find unames.icu");
1219 * Testing name iteration
1221 public void TestNameIteration()throws Exception
1224 ValueIterator iterator = UCharacter.getExtendedNameIterator();
1225 ValueIterator.Element element = new ValueIterator.Element();
1226 ValueIterator.Element old = new ValueIterator.Element();
1228 iterator.setRange(-10, -5);
1229 if (iterator.next(element)) {
1230 errln("Fail, expected iterator to return false when range is set outside the meaningful range");
1232 iterator.setRange(0x110000, 0x111111);
1233 if (iterator.next(element)) {
1234 errln("Fail, expected iterator to return false when range is set outside the meaningful range");
1237 iterator.setRange(50, 10);
1238 errln("Fail, expected exception when encountered invalid range");
1239 } catch (Exception e) {
1242 iterator.setRange(-10, 10);
1243 if (!iterator.next(element) || element.integer != 0) {
1244 errln("Fail, expected iterator to return 0 when range start limit is set outside the meaningful range");
1247 iterator.setRange(0x10FFFE, 0x200000);
1249 while (iterator.next(element)) {
1250 last = element.integer;
1252 if (last != 0x10FFFF) {
1253 errln("Fail, expected iterator to return 0x10FFFF when range end limit is set outside the meaningful range");
1256 iterator = UCharacter.getNameIterator();
1257 iterator.setRange(0xF, 0x45);
1258 while (iterator.next(element)) {
1259 if (element.integer <= old.integer) {
1260 errln("FAIL next returned a less codepoint \\u" +
1261 Integer.toHexString(element.integer) + " than \\u" +
1262 Integer.toHexString(old.integer));
1265 if (!UCharacter.getName(element.integer).equals(element.value))
1267 errln("FAIL next codepoint \\u" +
1268 Integer.toHexString(element.integer) +
1269 " does not have the expected name " +
1270 UCharacter.getName(element.integer) +
1271 " instead have the name " + (String)element.value);
1274 old.integer = element.integer;
1278 iterator.next(element);
1279 if (element.integer != 0x20) {
1280 errln("FAIL reset in iterator");
1283 iterator.setRange(0, 0x110000);
1285 while (iterator.next(element)) {
1286 if (element.integer != 0 && element.integer <= old.integer) {
1287 errln("FAIL next returned a less codepoint \\u" +
1288 Integer.toHexString(element.integer) + " than \\u" +
1289 Integer.toHexString(old.integer));
1292 if (!UCharacter.getName(element.integer).equals(element.value))
1294 errln("FAIL next codepoint \\u" +
1295 Integer.toHexString(element.integer) +
1296 " does not have the expected name " +
1297 UCharacter.getName(element.integer) +
1298 " instead have the name " + (String)element.value);
1301 for (int i = old.integer + 1; i < element.integer; i ++) {
1302 if (UCharacter.getName(i) != null) {
1303 errln("FAIL between codepoints are not null \\u" +
1304 Integer.toHexString(old.integer) + " and " +
1305 Integer.toHexString(element.integer) + " has " +
1306 Integer.toHexString(i) + " with a name " +
1307 UCharacter.getName(i));
1311 old.integer = element.integer;
1314 iterator = UCharacter.getExtendedNameIterator();
1316 while (iterator.next(element)) {
1317 if (element.integer != 0 && element.integer != old.integer) {
1318 errln("FAIL next returned a codepoint \\u" +
1319 Integer.toHexString(element.integer) +
1320 " different from \\u" +
1321 Integer.toHexString(old.integer));
1324 if (!UCharacter.getExtendedName(element.integer).equals(
1326 errln("FAIL next codepoint \\u" +
1327 Integer.toHexString(element.integer) +
1329 + UCharacter.getExtendedName(element.integer) +
1330 " instead of " + (String)element.value);
1335 iterator = UCharacter.getName1_0Iterator();
1337 while (iterator.next(element)) {
1338 logln(Integer.toHexString(element.integer) + " " +
1339 (String)element.value);
1340 if (element.integer != 0 && element.integer <= old.integer) {
1341 errln("FAIL next returned a less codepoint \\u" +
1342 Integer.toHexString(element.integer) + " than \\u" +
1343 Integer.toHexString(old.integer));
1346 if (!element.value.equals(UCharacter.getName1_0(
1347 element.integer))) {
1348 errln("FAIL next codepoint \\u" +
1349 Integer.toHexString(element.integer) +
1350 " name cannot be null");
1353 for (int i = old.integer + 1; i < element.integer; i ++) {
1354 if (UCharacter.getName1_0(i) != null) {
1355 errln("FAIL between codepoints are not null \\u" +
1356 Integer.toHexString(old.integer) + " and " +
1357 Integer.toHexString(element.integer) + " has " +
1358 Integer.toHexString(i) + " with a name " +
1359 UCharacter.getName1_0(i));
1363 old.integer = element.integer;
1365 } catch(Exception e){
1366 // !!! wouldn't preflighting be simpler? This looks like
1367 // it is effectively be doing that. It seems that for every
1368 // true error the code will call errln, which will throw the error, which
1369 // this will catch, which this will then rethrow the error. Just seems
1371 if(e.getMessage().indexOf("unames.icu") >= 0){
1372 warnln("Could not find unames.icu");
1374 errln(e.getMessage());
1380 * Testing the for illegal characters
1382 public void TestIsLegal()
1384 int illegal[] = {0xFFFE, 0x00FFFF, 0x005FFFE, 0x005FFFF, 0x0010FFFE,
1385 0x0010FFFF, 0x110000, 0x00FDD0, 0x00FDDF, 0x00FDE0,
1386 0x00FDEF, 0xD800, 0xDC00, -1};
1387 int legal[] = {0x61, 0x00FFFD, 0x0010000, 0x005FFFD, 0x0060000,
1388 0x0010FFFD, 0xFDCF, 0x00FDF0};
1389 for (int count = 0; count < illegal.length; count ++) {
1390 if (UCharacter.isLegal(illegal[count])) {
1391 errln("FAIL \\u" + hex(illegal[count]) +
1392 " is not a legal character");
1396 for (int count = 0; count < legal.length; count ++) {
1397 if (!UCharacter.isLegal(legal[count])) {
1398 errln("FAIL \\u" + hex(legal[count]) +
1399 " is a legal character");
1403 String illegalStr = "This is an illegal string ";
1404 String legalStr = "This is a legal string ";
1406 for (int count = 0; count < illegal.length; count ++) {
1407 StringBuffer str = new StringBuffer(illegalStr);
1408 if (illegal[count] < 0x10000) {
1409 str.append((char)illegal[count]);
1412 char lead = UTF16.getLeadSurrogate(illegal[count]);
1413 char trail = UTF16.getTrailSurrogate(illegal[count]);
1417 if (UCharacter.isLegal(str.toString())) {
1418 errln("FAIL " + hex(str.toString()) +
1419 " is not a legal string");
1423 for (int count = 0; count < legal.length; count ++) {
1424 StringBuffer str = new StringBuffer(legalStr);
1425 if (legal[count] < 0x10000) {
1426 str.append((char)legal[count]);
1429 char lead = UTF16.getLeadSurrogate(legal[count]);
1430 char trail = UTF16.getTrailSurrogate(legal[count]);
1434 if (!UCharacter.isLegal(str.toString())) {
1435 errln("FAIL " + hex(str.toString()) + " is a legal string");
1443 public void TestCodePoint()
1446 for (char i = 0xD800; i < 0xDC00; i ++) {
1447 for (char j = 0xDC00; j <= 0xDFFF; j ++) {
1448 if (UCharacter.getCodePoint(i, j) != ch) {
1449 errln("Error getting codepoint for surrogate " +
1451 + Integer.toHexString(i) + " \\u" +
1452 Integer.toHexString(j));
1459 UCharacter.getCodePoint((char)0xD7ff, (char)0xDC00);
1460 errln("Invalid surrogate characters should not form a " +
1462 } catch(Exception e) {
1464 for (char i = 0; i < 0xFFFF; i++) {
1466 (i >= 0xD800 && i <= 0xDFFF) ||
1467 (i >= 0xFDD0 && i <= 0xFDEF)) {
1470 UCharacter.getCodePoint(i);
1471 errln("Not a character is not a valid codepoint");
1472 } catch (Exception e) {
1476 if (UCharacter.getCodePoint(i) != i) {
1477 errln("A valid codepoint should return itself");
1484 * This method is a little different from the type test in icu4c.
1485 * But combined with testUnicodeData, they basically do the same thing.
1487 public void TestIteration()
1492 int test[][]={{0x41, UCharacterCategory.UPPERCASE_LETTER},
1493 {0x308, UCharacterCategory.NON_SPACING_MARK},
1494 {0xfffe, UCharacterCategory.GENERAL_OTHER_TYPES},
1495 {0xe0041, UCharacterCategory.FORMAT},
1496 {0xeffff, UCharacterCategory.UNASSIGNED}};
1498 // default Bidi classes for unassigned code points, from the DerivedBidiClass.txt header
1499 int defaultBidi[][]={
1500 { 0x0590, UCharacterDirection.LEFT_TO_RIGHT },
1501 { 0x0600, UCharacterDirection.RIGHT_TO_LEFT },
1502 { 0x07C0, UCharacterDirection.RIGHT_TO_LEFT_ARABIC },
1503 { 0x08A0, UCharacterDirection.RIGHT_TO_LEFT },
1504 { 0x0900, UCharacterDirection.RIGHT_TO_LEFT_ARABIC }, /* Unicode 6.1 changes U+08A0..U+08FF from R to AL */
1505 { 0x20A0, UCharacterDirection.LEFT_TO_RIGHT },
1506 { 0x20D0, UCharacterDirection.EUROPEAN_NUMBER_TERMINATOR }, /* Unicode 6.3 changes the currency symbols block U+20A0..U+20CF to default to ET not L */
1507 { 0xFB1D, UCharacterDirection.LEFT_TO_RIGHT },
1508 { 0xFB50, UCharacterDirection.RIGHT_TO_LEFT },
1509 { 0xFE00, UCharacterDirection.RIGHT_TO_LEFT_ARABIC },
1510 { 0xFE70, UCharacterDirection.LEFT_TO_RIGHT },
1511 { 0xFF00, UCharacterDirection.RIGHT_TO_LEFT_ARABIC },
1512 { 0x10800, UCharacterDirection.LEFT_TO_RIGHT },
1513 { 0x11000, UCharacterDirection.RIGHT_TO_LEFT },
1514 { 0x1E800, UCharacterDirection.LEFT_TO_RIGHT }, /* new default-R range in Unicode 5.2: U+1E800 - U+1EFFF */
1515 { 0x1EE00, UCharacterDirection.RIGHT_TO_LEFT },
1516 { 0x1EF00, UCharacterDirection.RIGHT_TO_LEFT_ARABIC }, /* Unicode 6.1 changes U+1EE00..U+1EEFF from R to AL */
1517 { 0x1F000, UCharacterDirection.RIGHT_TO_LEFT },
1518 { 0x110000, UCharacterDirection.LEFT_TO_RIGHT }
1521 RangeValueIterator iterator = UCharacter.getTypeIterator();
1522 RangeValueIterator.Element result = new RangeValueIterator.Element();
1523 while (iterator.next(result)) {
1524 if (result.start != limit) {
1525 errln("UCharacterIteration failed: Ranges not continuous " +
1526 "0x" + Integer.toHexString(result.start));
1529 limit = result.limit;
1530 if (result.value == prevtype) {
1531 errln("Type of the next set of enumeration should be different");
1533 prevtype = result.value;
1535 for (int i = result.start; i < limit; i ++) {
1536 int temptype = UCharacter.getType(i);
1537 if (temptype != result.value) {
1538 errln("UCharacterIteration failed: Codepoint \\u" +
1539 Integer.toHexString(i) + " should be of type " +
1540 temptype + " not " + result.value);
1544 for (int i = 0; i < test.length; ++ i) {
1545 if (result.start <= test[i][0] && test[i][0] < result.limit) {
1546 if (result.value != test[i][1]) {
1547 errln("error: getTypes() has range ["
1548 + Integer.toHexString(result.start) + ", "
1549 + Integer.toHexString(result.limit)
1550 + "] with type " + result.value
1552 + Integer.toHexString(test[i][0]) + ", "
1553 + Integer.toHexString(test[i][1]));
1558 // LineBreak.txt specifies:
1559 // # - Assigned characters that are not listed explicitly are given the value
1561 // # - Unassigned characters are given the value "XX".
1563 // PUA characters are listed explicitly with "XX".
1564 // Verify that no assigned character has "XX".
1565 if (result.value != UCharacterCategory.UNASSIGNED
1566 && result.value != UCharacterCategory.PRIVATE_USE) {
1567 int c = result.start;
1568 while (c < result.limit) {
1569 if (0 == UCharacter.getIntPropertyValue(c,
1570 UProperty.LINE_BREAK)) {
1571 logln("error UProperty.LINE_BREAK(assigned \\u"
1572 + Utility.hex(c, 4) + ")=XX");
1579 * Verify default Bidi classes.
1580 * For recent Unicode versions, see UCD.html.
1582 * For older Unicode versions:
1583 * See table 3-7 "Bidirectional Character Types" in UAX #9.
1584 * http://www.unicode.org/reports/tr9/
1586 * See also DerivedBidiClass.txt for Cn code points!
1588 * Unicode 4.0.1/Public Review Issue #28 (http://www.unicode.org/review/resolved-pri.html)
1589 * changed some default values.
1590 * In particular, non-characters and unassigned Default Ignorable Code Points
1591 * change from L to BN.
1593 * UCD.html version 4.0.1 does not yet reflect these changes.
1595 if (result.value == UCharacterCategory.UNASSIGNED
1596 || result.value == UCharacterCategory.PRIVATE_USE) {
1597 int c = result.start;
1598 for (int i = 0; i < defaultBidi.length && c < result.limit;
1600 if (c < defaultBidi[i][0]) {
1601 while (c < result.limit && c < defaultBidi[i][0]) {
1602 // TODO change to public UCharacter.isNonCharacter(c) once it's available
1603 if(com.ibm.icu.impl.UCharacterUtility.isNonCharacter(c) || UCharacter.hasBinaryProperty(c, UProperty.DEFAULT_IGNORABLE_CODE_POINT)) {
1604 shouldBeDir=UCharacter.BOUNDARY_NEUTRAL;
1606 shouldBeDir=defaultBidi[i][1];
1609 if (UCharacter.getDirection(c) != shouldBeDir
1610 || UCharacter.getIntPropertyValue(c,
1611 UProperty.BIDI_CLASS)
1613 errln("error: getDirection(unassigned/PUA "
1614 + Integer.toHexString(c)
1626 if (iterator.next(result) == false || result.start != 0) {
1627 System.out.println("result " + result.start);
1628 errln("UCharacterIteration reset() failed");
1635 public void TestGetAge()
1637 int ages[] = {0x41, 1, 1, 0, 0,
1640 0x2fffe, 2, 0, 0, 0,
1644 0x10300, 3, 1, 0, 0,
1646 0xff60, 3, 2, 0, 0};
1647 for (int i = 0; i < ages.length; i += 5) {
1648 VersionInfo age = UCharacter.getAge(ages[i]);
1649 if (age != VersionInfo.getInstance(ages[i + 1], ages[i + 2],
1650 ages[i + 3], ages[i + 4])) {
1651 errln("error: getAge(\\u" + Integer.toHexString(ages[i]) +
1652 ") == " + age.toString() + " instead of " +
1653 ages[i + 1] + "." + ages[i + 2] + "." + ages[i + 3] +
1658 int[] valid_tests = {
1659 UCharacter.MIN_VALUE, UCharacter.MIN_VALUE+1,
1660 UCharacter.MAX_VALUE-1, UCharacter.MAX_VALUE};
1661 int[] invalid_tests = {
1662 UCharacter.MIN_VALUE-1, UCharacter.MIN_VALUE-2,
1663 UCharacter.MAX_VALUE+1, UCharacter.MAX_VALUE+2};
1665 for(int i=0; i< valid_tests.length; i++){
1667 UCharacter.getAge(valid_tests[i]);
1668 } catch(Exception e){
1669 errln("UCharacter.getAge(int) was not suppose to have " +
1670 "an exception. Value passed: " + valid_tests[i]);
1674 for(int i=0; i< invalid_tests.length; i++){
1676 UCharacter.getAge(invalid_tests[i]);
1677 errln("UCharacter.getAge(int) was suppose to have " +
1678 "an exception. Value passed: " + invalid_tests[i]);
1679 } catch(Exception e){
1685 * Test binary non core properties
1687 public void TestAdditionalProperties()
1689 // test data for hasBinaryProperty()
1690 int props[][] = { // code point, property
1691 { 0x0627, UProperty.ALPHABETIC, 1 },
1692 { 0x1034a, UProperty.ALPHABETIC, 1 },
1693 { 0x2028, UProperty.ALPHABETIC, 0 },
1695 { 0x0066, UProperty.ASCII_HEX_DIGIT, 1 },
1696 { 0x0067, UProperty.ASCII_HEX_DIGIT, 0 },
1698 { 0x202c, UProperty.BIDI_CONTROL, 1 },
1699 { 0x202f, UProperty.BIDI_CONTROL, 0 },
1701 { 0x003c, UProperty.BIDI_MIRRORED, 1 },
1702 { 0x003d, UProperty.BIDI_MIRRORED, 0 },
1704 /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */
1705 { 0x2018, UProperty.BIDI_MIRRORED, 0 },
1706 { 0x201d, UProperty.BIDI_MIRRORED, 0 },
1707 { 0x201f, UProperty.BIDI_MIRRORED, 0 },
1708 { 0x301e, UProperty.BIDI_MIRRORED, 0 },
1710 { 0x058a, UProperty.DASH, 1 },
1711 { 0x007e, UProperty.DASH, 0 },
1713 { 0x0c4d, UProperty.DIACRITIC, 1 },
1714 { 0x3000, UProperty.DIACRITIC, 0 },
1716 { 0x0e46, UProperty.EXTENDER, 1 },
1717 { 0x0020, UProperty.EXTENDER, 0 },
1719 { 0xfb1d, UProperty.FULL_COMPOSITION_EXCLUSION, 1 },
1720 { 0x1d15f, UProperty.FULL_COMPOSITION_EXCLUSION, 1 },
1721 { 0xfb1e, UProperty.FULL_COMPOSITION_EXCLUSION, 0 },
1723 { 0x110a, UProperty.NFD_INERT, 1 }, /* Jamo L */
1724 { 0x0308, UProperty.NFD_INERT, 0 },
1726 { 0x1164, UProperty.NFKD_INERT, 1 }, /* Jamo V */
1727 { 0x1d79d, UProperty.NFKD_INERT, 0 }, /* math compat version of xi */
1729 { 0x0021, UProperty.NFC_INERT, 1 }, /* ! */
1730 { 0x0061, UProperty.NFC_INERT, 0 }, /* a */
1731 { 0x00e4, UProperty.NFC_INERT, 0 }, /* a-umlaut */
1732 { 0x0102, UProperty.NFC_INERT, 0 }, /* a-breve */
1733 { 0xac1c, UProperty.NFC_INERT, 0 }, /* Hangul LV */
1734 { 0xac1d, UProperty.NFC_INERT, 1 }, /* Hangul LVT */
1736 { 0x1d79d, UProperty.NFKC_INERT, 0 }, /* math compat version of xi */
1737 { 0x2a6d6, UProperty.NFKC_INERT, 1 }, /* Han, last of CJK ext. B */
1739 { 0x00e4, UProperty.SEGMENT_STARTER, 1 },
1740 { 0x0308, UProperty.SEGMENT_STARTER, 0 },
1741 { 0x110a, UProperty.SEGMENT_STARTER, 1 }, /* Jamo L */
1742 { 0x1164, UProperty.SEGMENT_STARTER, 0 },/* Jamo V */
1743 { 0xac1c, UProperty.SEGMENT_STARTER, 1 }, /* Hangul LV */
1744 { 0xac1d, UProperty.SEGMENT_STARTER, 1 }, /* Hangul LVT */
1746 { 0x0044, UProperty.HEX_DIGIT, 1 },
1747 { 0xff46, UProperty.HEX_DIGIT, 1 },
1748 { 0x0047, UProperty.HEX_DIGIT, 0 },
1750 { 0x30fb, UProperty.HYPHEN, 1 },
1751 { 0xfe58, UProperty.HYPHEN, 0 },
1753 { 0x2172, UProperty.ID_CONTINUE, 1 },
1754 { 0x0307, UProperty.ID_CONTINUE, 1 },
1755 { 0x005c, UProperty.ID_CONTINUE, 0 },
1757 { 0x2172, UProperty.ID_START, 1 },
1758 { 0x007a, UProperty.ID_START, 1 },
1759 { 0x0039, UProperty.ID_START, 0 },
1761 { 0x4db5, UProperty.IDEOGRAPHIC, 1 },
1762 { 0x2f999, UProperty.IDEOGRAPHIC, 1 },
1763 { 0x2f99, UProperty.IDEOGRAPHIC, 0 },
1765 { 0x200c, UProperty.JOIN_CONTROL, 1 },
1766 { 0x2029, UProperty.JOIN_CONTROL, 0 },
1768 { 0x1d7bc, UProperty.LOWERCASE, 1 },
1769 { 0x0345, UProperty.LOWERCASE, 1 },
1770 { 0x0030, UProperty.LOWERCASE, 0 },
1772 { 0x1d7a9, UProperty.MATH, 1 },
1773 { 0x2135, UProperty.MATH, 1 },
1774 { 0x0062, UProperty.MATH, 0 },
1776 { 0xfde1, UProperty.NONCHARACTER_CODE_POINT, 1 },
1777 { 0x10ffff, UProperty.NONCHARACTER_CODE_POINT, 1 },
1778 { 0x10fffd, UProperty.NONCHARACTER_CODE_POINT, 0 },
1780 { 0x0022, UProperty.QUOTATION_MARK, 1 },
1781 { 0xff62, UProperty.QUOTATION_MARK, 1 },
1782 { 0xd840, UProperty.QUOTATION_MARK, 0 },
1784 { 0x061f, UProperty.TERMINAL_PUNCTUATION, 1 },
1785 { 0xe003f, UProperty.TERMINAL_PUNCTUATION, 0 },
1787 { 0x1d44a, UProperty.UPPERCASE, 1 },
1788 { 0x2162, UProperty.UPPERCASE, 1 },
1789 { 0x0345, UProperty.UPPERCASE, 0 },
1791 { 0x0020, UProperty.WHITE_SPACE, 1 },
1792 { 0x202f, UProperty.WHITE_SPACE, 1 },
1793 { 0x3001, UProperty.WHITE_SPACE, 0 },
1795 { 0x0711, UProperty.XID_CONTINUE, 1 },
1796 { 0x1d1aa, UProperty.XID_CONTINUE, 1 },
1797 { 0x007c, UProperty.XID_CONTINUE, 0 },
1799 { 0x16ee, UProperty.XID_START, 1 },
1800 { 0x23456, UProperty.XID_START, 1 },
1801 { 0x1d1aa, UProperty.XID_START, 0 },
1805 * The following properties are only supported starting with the
1806 * Unicode version indicated in the second field.
1810 { 0x180c, UProperty.DEFAULT_IGNORABLE_CODE_POINT, 1 },
1811 { 0xfe02, UProperty.DEFAULT_IGNORABLE_CODE_POINT, 1 },
1812 { 0x1801, UProperty.DEFAULT_IGNORABLE_CODE_POINT, 0 },
1814 { 0x0149, UProperty.DEPRECATED, 1 }, /* changed in Unicode 5.2 */
1815 { 0x0341, UProperty.DEPRECATED, 0 }, /* changed in Unicode 5.2 */
1816 { 0xe0041, UProperty.DEPRECATED, 1 }, /* Changed from Unicode 5 to 5.1 */
1817 { 0xe0100, UProperty.DEPRECATED, 0 },
1819 { 0x00a0, UProperty.GRAPHEME_BASE, 1 },
1820 { 0x0a4d, UProperty.GRAPHEME_BASE, 0 },
1821 { 0xff9d, UProperty.GRAPHEME_BASE, 1 },
1822 { 0xff9f, UProperty.GRAPHEME_BASE, 0 }, /* changed from Unicode 3.2 to 4 and again 5 to 5.1 */
1824 { 0x0300, UProperty.GRAPHEME_EXTEND, 1 },
1825 { 0xff9d, UProperty.GRAPHEME_EXTEND, 0 },
1826 { 0xff9f, UProperty.GRAPHEME_EXTEND, 1 }, /* changed from Unicode 3.2 to 4 and again 5 to 5.1 */
1827 { 0x0603, UProperty.GRAPHEME_EXTEND, 0 },
1829 { 0x0a4d, UProperty.GRAPHEME_LINK, 1 },
1830 { 0xff9f, UProperty.GRAPHEME_LINK, 0 },
1832 { 0x2ff7, UProperty.IDS_BINARY_OPERATOR, 1 },
1833 { 0x2ff3, UProperty.IDS_BINARY_OPERATOR, 0 },
1835 { 0x2ff3, UProperty.IDS_TRINARY_OPERATOR, 1 },
1836 { 0x2f03, UProperty.IDS_TRINARY_OPERATOR, 0 },
1838 { 0x0ec1, UProperty.LOGICAL_ORDER_EXCEPTION, 1 },
1839 { 0xdcba, UProperty.LOGICAL_ORDER_EXCEPTION, 0 },
1841 { 0x2e9b, UProperty.RADICAL, 1 },
1842 { 0x4e00, UProperty.RADICAL, 0 },
1844 { 0x012f, UProperty.SOFT_DOTTED, 1 },
1845 { 0x0049, UProperty.SOFT_DOTTED, 0 },
1847 { 0xfa11, UProperty.UNIFIED_IDEOGRAPH, 1 },
1848 { 0xfa12, UProperty.UNIFIED_IDEOGRAPH, 0 },
1850 { -1, 0x401, 0 }, /* version break for Unicode 4.0.1 */
1852 { 0x002e, UProperty.S_TERM, 1 },
1853 { 0x0061, UProperty.S_TERM, 0 },
1855 { 0x180c, UProperty.VARIATION_SELECTOR, 1 },
1856 { 0xfe03, UProperty.VARIATION_SELECTOR, 1 },
1857 { 0xe01ef, UProperty.VARIATION_SELECTOR, 1 },
1858 { 0xe0200, UProperty.VARIATION_SELECTOR, 0 },
1860 /* enum/integer type properties */
1861 /* test default Bidi classes for unassigned code points */
1862 { 0x0590, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT },
1863 { 0x05cf, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT },
1864 { 0x05ed, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT },
1865 { 0x07f2, UProperty.BIDI_CLASS, UCharacterDirection.DIR_NON_SPACING_MARK }, /* Nko, new in Unicode 5.0 */
1866 { 0x07fe, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT }, /* unassigned R */
1867 { 0x089f, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT },
1868 { 0xfb37, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT },
1869 { 0xfb42, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT },
1870 { 0x10806, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT },
1871 { 0x10909, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT },
1872 { 0x10fe4, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT },
1874 { 0x0605, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC },
1875 { 0x061c, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC },
1876 { 0x063f, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC },
1877 { 0x070e, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC },
1878 { 0x0775, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC },
1879 { 0xfbc2, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC },
1880 { 0xfd90, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC },
1881 { 0xfefe, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC },
1883 { 0x02AF, UProperty.BLOCK, UCharacter.UnicodeBlock.IPA_EXTENSIONS.getID() },
1884 { 0x0C4E, UProperty.BLOCK, UCharacter.UnicodeBlock.TELUGU.getID()},
1885 { 0x155A, UProperty.BLOCK, UCharacter.UnicodeBlock.UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS.getID() },
1886 { 0x1717, UProperty.BLOCK, UCharacter.UnicodeBlock.TAGALOG.getID() },
1887 { 0x1900, UProperty.BLOCK, UCharacter.UnicodeBlock.LIMBU.getID() },
1888 { 0x1AFF, UProperty.BLOCK, UCharacter.UnicodeBlock.NO_BLOCK.getID()},
1889 { 0x3040, UProperty.BLOCK, UCharacter.UnicodeBlock.HIRAGANA.getID()},
1890 { 0x1D0FF, UProperty.BLOCK, UCharacter.UnicodeBlock.BYZANTINE_MUSICAL_SYMBOLS.getID()},
1891 { 0x50000, UProperty.BLOCK, UCharacter.UnicodeBlock.NO_BLOCK.getID() },
1892 { 0xEFFFF, UProperty.BLOCK, UCharacter.UnicodeBlock.NO_BLOCK.getID() },
1893 { 0x10D0FF, UProperty.BLOCK, UCharacter.UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_B.getID() },
1895 /* UProperty.CANONICAL_COMBINING_CLASS tested for assigned characters in TestUnicodeData() */
1896 { 0xd7d7, UProperty.CANONICAL_COMBINING_CLASS, 0 },
1898 { 0x00A0, UProperty.DECOMPOSITION_TYPE, UCharacter.DecompositionType.NOBREAK },
1899 { 0x00A8, UProperty.DECOMPOSITION_TYPE, UCharacter.DecompositionType.COMPAT },
1900 { 0x00bf, UProperty.DECOMPOSITION_TYPE, UCharacter.DecompositionType.NONE },
1901 { 0x00c0, UProperty.DECOMPOSITION_TYPE, UCharacter.DecompositionType.CANONICAL },
1902 { 0x1E9B, UProperty.DECOMPOSITION_TYPE, UCharacter.DecompositionType.CANONICAL },
1903 { 0xBCDE, UProperty.DECOMPOSITION_TYPE, UCharacter.DecompositionType.CANONICAL },
1904 { 0xFB5D, UProperty.DECOMPOSITION_TYPE, UCharacter.DecompositionType.MEDIAL },
1905 { 0x1D736, UProperty.DECOMPOSITION_TYPE, UCharacter.DecompositionType.FONT },
1906 { 0xe0033, UProperty.DECOMPOSITION_TYPE, UCharacter.DecompositionType.NONE },
1908 { 0x0009, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.NEUTRAL },
1909 { 0x0020, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.NARROW },
1910 { 0x00B1, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.AMBIGUOUS },
1911 { 0x20A9, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.HALFWIDTH },
1912 { 0x2FFB, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.WIDE },
1913 { 0x3000, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.FULLWIDTH },
1914 { 0x35bb, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.WIDE },
1915 { 0x58bd, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.WIDE },
1916 { 0xD7A3, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.WIDE },
1917 { 0xEEEE, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.AMBIGUOUS },
1918 { 0x1D198, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.NEUTRAL },
1919 { 0x20000, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.WIDE },
1920 { 0x2F8C7, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.WIDE },
1921 { 0x3a5bd, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.WIDE },
1922 { 0x5a5bd, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.NEUTRAL },
1923 { 0xFEEEE, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.AMBIGUOUS },
1924 { 0x10EEEE, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.AMBIGUOUS },
1926 /* UProperty.GENERAL_CATEGORY tested for assigned characters in TestUnicodeData() */
1927 { 0xd7c7, UProperty.GENERAL_CATEGORY, 0 },
1928 { 0xd7d7, UProperty.GENERAL_CATEGORY, UCharacterEnums.ECharacterCategory.OTHER_LETTER }, /* changed in Unicode 5.2 */
1930 { 0x0444, UProperty.JOINING_GROUP, UCharacter.JoiningGroup.NO_JOINING_GROUP },
1931 { 0x0639, UProperty.JOINING_GROUP, UCharacter.JoiningGroup.AIN },
1932 { 0x072A, UProperty.JOINING_GROUP, UCharacter.JoiningGroup.DALATH_RISH },
1933 { 0x0647, UProperty.JOINING_GROUP, UCharacter.JoiningGroup.HEH },
1934 { 0x06C1, UProperty.JOINING_GROUP, UCharacter.JoiningGroup.HEH_GOAL },
1936 { 0x200C, UProperty.JOINING_TYPE, UCharacter.JoiningType.NON_JOINING },
1937 { 0x200D, UProperty.JOINING_TYPE, UCharacter.JoiningType.JOIN_CAUSING },
1938 { 0x0639, UProperty.JOINING_TYPE, UCharacter.JoiningType.DUAL_JOINING },
1939 { 0x0640, UProperty.JOINING_TYPE, UCharacter.JoiningType.JOIN_CAUSING },
1940 { 0x06C3, UProperty.JOINING_TYPE, UCharacter.JoiningType.RIGHT_JOINING },
1941 { 0x0300, UProperty.JOINING_TYPE, UCharacter.JoiningType.TRANSPARENT },
1942 { 0x070F, UProperty.JOINING_TYPE, UCharacter.JoiningType.TRANSPARENT },
1943 { 0xe0033, UProperty.JOINING_TYPE, UCharacter.JoiningType.TRANSPARENT },
1945 /* TestUnicodeData() verifies that no assigned character has "XX" (unknown) */
1946 { 0xe7e7, UProperty.LINE_BREAK, UCharacter.LineBreak.UNKNOWN },
1947 { 0x10fffd, UProperty.LINE_BREAK, UCharacter.LineBreak.UNKNOWN },
1948 { 0x0028, UProperty.LINE_BREAK, UCharacter.LineBreak.OPEN_PUNCTUATION },
1949 { 0x232A, UProperty.LINE_BREAK, UCharacter.LineBreak.CLOSE_PUNCTUATION },
1950 { 0x3401, UProperty.LINE_BREAK, UCharacter.LineBreak.IDEOGRAPHIC },
1951 { 0x4e02, UProperty.LINE_BREAK, UCharacter.LineBreak.IDEOGRAPHIC },
1952 { 0x20004, UProperty.LINE_BREAK, UCharacter.LineBreak.IDEOGRAPHIC },
1953 { 0xf905, UProperty.LINE_BREAK, UCharacter.LineBreak.IDEOGRAPHIC },
1954 { 0xdb7e, UProperty.LINE_BREAK, UCharacter.LineBreak.SURROGATE },
1955 { 0xdbfd, UProperty.LINE_BREAK, UCharacter.LineBreak.SURROGATE },
1956 { 0xdffc, UProperty.LINE_BREAK, UCharacter.LineBreak.SURROGATE },
1957 { 0x2762, UProperty.LINE_BREAK, UCharacter.LineBreak.EXCLAMATION },
1958 { 0x002F, UProperty.LINE_BREAK, UCharacter.LineBreak.BREAK_SYMBOLS },
1959 { 0x1D49C, UProperty.LINE_BREAK, UCharacter.LineBreak.ALPHABETIC },
1960 { 0x1731, UProperty.LINE_BREAK, UCharacter.LineBreak.ALPHABETIC },
1962 /* UProperty.NUMERIC_TYPE tested in TestNumericProperties() */
1964 /* UProperty.SCRIPT tested in TestUScriptCodeAPI() */
1966 { 0x10ff, UProperty.HANGUL_SYLLABLE_TYPE, 0 },
1967 { 0x1100, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LEADING_JAMO },
1968 { 0x1111, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LEADING_JAMO },
1969 { 0x1159, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LEADING_JAMO },
1970 { 0x115a, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LEADING_JAMO }, /* changed in Unicode 5.2 */
1971 { 0x115e, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LEADING_JAMO }, /* changed in Unicode 5.2 */
1972 { 0x115f, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LEADING_JAMO },
1974 { 0xa95f, UProperty.HANGUL_SYLLABLE_TYPE, 0 },
1975 { 0xa960, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LEADING_JAMO }, /* changed in Unicode 5.2 */
1976 { 0xa97c, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LEADING_JAMO }, /* changed in Unicode 5.2 */
1977 { 0xa97d, UProperty.HANGUL_SYLLABLE_TYPE, 0 },
1979 { 0x1160, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.VOWEL_JAMO },
1980 { 0x1161, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.VOWEL_JAMO },
1981 { 0x1172, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.VOWEL_JAMO },
1982 { 0x11a2, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.VOWEL_JAMO },
1983 { 0x11a3, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.VOWEL_JAMO }, /* changed in Unicode 5.2 */
1984 { 0x11a7, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.VOWEL_JAMO }, /* changed in Unicode 5.2 */
1986 { 0xd7af, UProperty.HANGUL_SYLLABLE_TYPE, 0 },
1987 { 0xd7b0, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.VOWEL_JAMO }, /* changed in Unicode 5.2 */
1988 { 0xd7c6, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.VOWEL_JAMO }, /* changed in Unicode 5.2 */
1989 { 0xd7c7, UProperty.HANGUL_SYLLABLE_TYPE, 0 },
1991 { 0x11a8, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.TRAILING_JAMO },
1992 { 0x11b8, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.TRAILING_JAMO },
1993 { 0x11c8, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.TRAILING_JAMO },
1994 { 0x11f9, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.TRAILING_JAMO },
1995 { 0x11fa, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.TRAILING_JAMO }, /* changed in Unicode 5.2 */
1996 { 0x11ff, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.TRAILING_JAMO }, /* changed in Unicode 5.2 */
1997 { 0x1200, UProperty.HANGUL_SYLLABLE_TYPE, 0 },
1999 { 0xd7ca, UProperty.HANGUL_SYLLABLE_TYPE, 0 },
2000 { 0xd7cb, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.TRAILING_JAMO }, /* changed in Unicode 5.2 */
2001 { 0xd7fb, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.TRAILING_JAMO }, /* changed in Unicode 5.2 */
2002 { 0xd7fc, UProperty.HANGUL_SYLLABLE_TYPE, 0 },
2004 { 0xac00, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LV_SYLLABLE },
2005 { 0xac1c, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LV_SYLLABLE },
2006 { 0xc5ec, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LV_SYLLABLE },
2007 { 0xd788, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LV_SYLLABLE },
2009 { 0xac01, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LVT_SYLLABLE },
2010 { 0xac1b, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LVT_SYLLABLE },
2011 { 0xac1d, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LVT_SYLLABLE },
2012 { 0xc5ee, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LVT_SYLLABLE },
2013 { 0xd7a3, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LVT_SYLLABLE },
2015 { 0xd7a4, UProperty.HANGUL_SYLLABLE_TYPE, 0 },
2017 { -1, 0x410, 0 }, /* version break for Unicode 4.1 */
2019 { 0x00d7, UProperty.PATTERN_SYNTAX, 1 },
2020 { 0xfe45, UProperty.PATTERN_SYNTAX, 1 },
2021 { 0x0061, UProperty.PATTERN_SYNTAX, 0 },
2023 { 0x0020, UProperty.PATTERN_WHITE_SPACE, 1 },
2024 { 0x0085, UProperty.PATTERN_WHITE_SPACE, 1 },
2025 { 0x200f, UProperty.PATTERN_WHITE_SPACE, 1 },
2026 { 0x00a0, UProperty.PATTERN_WHITE_SPACE, 0 },
2027 { 0x3000, UProperty.PATTERN_WHITE_SPACE, 0 },
2029 { 0x1d200, UProperty.BLOCK, UCharacter.UnicodeBlock.ANCIENT_GREEK_MUSICAL_NOTATION_ID },
2030 { 0x2c8e, UProperty.BLOCK, UCharacter.UnicodeBlock.COPTIC_ID },
2031 { 0xfe17, UProperty.BLOCK, UCharacter.UnicodeBlock.VERTICAL_FORMS_ID },
2033 { 0x1a00, UProperty.SCRIPT, UScript.BUGINESE },
2034 { 0x2cea, UProperty.SCRIPT, UScript.COPTIC },
2035 { 0xa82b, UProperty.SCRIPT, UScript.SYLOTI_NAGRI },
2036 { 0x103d0, UProperty.SCRIPT, UScript.OLD_PERSIAN },
2038 { 0xcc28, UProperty.LINE_BREAK, UCharacter.LineBreak.H2 },
2039 { 0xcc29, UProperty.LINE_BREAK, UCharacter.LineBreak.H3 },
2040 { 0xac03, UProperty.LINE_BREAK, UCharacter.LineBreak.H3 },
2041 { 0x115f, UProperty.LINE_BREAK, UCharacter.LineBreak.JL },
2042 { 0x11aa, UProperty.LINE_BREAK, UCharacter.LineBreak.JT },
2043 { 0x11a1, UProperty.LINE_BREAK, UCharacter.LineBreak.JV },
2045 { 0xb2c9, UProperty.GRAPHEME_CLUSTER_BREAK, UCharacter.GraphemeClusterBreak.LVT },
2046 { 0x036f, UProperty.GRAPHEME_CLUSTER_BREAK, UCharacter.GraphemeClusterBreak.EXTEND },
2047 { 0x0000, UProperty.GRAPHEME_CLUSTER_BREAK, UCharacter.GraphemeClusterBreak.CONTROL },
2048 { 0x1160, UProperty.GRAPHEME_CLUSTER_BREAK, UCharacter.GraphemeClusterBreak.V },
2050 { 0x05f4, UProperty.WORD_BREAK, UCharacter.WordBreak.MIDLETTER },
2051 { 0x4ef0, UProperty.WORD_BREAK, UCharacter.WordBreak.OTHER },
2052 { 0x19d9, UProperty.WORD_BREAK, UCharacter.WordBreak.NUMERIC },
2053 { 0x2044, UProperty.WORD_BREAK, UCharacter.WordBreak.MIDNUM },
2055 { 0xfffd, UProperty.SENTENCE_BREAK, UCharacter.SentenceBreak.OTHER },
2056 { 0x1ffc, UProperty.SENTENCE_BREAK, UCharacter.SentenceBreak.UPPER },
2057 { 0xff63, UProperty.SENTENCE_BREAK, UCharacter.SentenceBreak.CLOSE },
2058 { 0x2028, UProperty.SENTENCE_BREAK, UCharacter.SentenceBreak.SEP },
2060 { -1, 0x520, 0 }, /* version break for Unicode 5.2 */
2062 /* unassigned code points in new default Bidi R blocks */
2063 { 0x1ede4, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT },
2064 { 0x1efe4, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT },
2066 /* test some script codes >127 */
2067 { 0xa6e6, UProperty.SCRIPT, UScript.BAMUM },
2068 { 0xa4d0, UProperty.SCRIPT, UScript.LISU },
2069 { 0x10a7f, UProperty.SCRIPT, UScript.OLD_SOUTH_ARABIAN },
2071 { -1, 0x600, 0 }, /* version break for Unicode 6.0 */
2073 /* value changed in Unicode 6.0 */
2074 { 0x06C3, UProperty.JOINING_GROUP, UCharacter.JoiningGroup.TEH_MARBUTA_GOAL },
2076 { -1, 0x610, 0 }, /* version break for Unicode 6.1 */
2078 /* unassigned code points in new/changed default Bidi AL blocks */
2079 { 0x08ba, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC },
2080 { 0x1eee4, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC },
2082 { -1, 0x630, 0 }, /* version break for Unicode 6.3 */
2084 /* unassigned code points in the currency symbols block now default to ET */
2085 { 0x20C0, UProperty.BIDI_CLASS, UCharacterDirection.EUROPEAN_NUMBER_TERMINATOR },
2086 { 0x20CF, UProperty.BIDI_CLASS, UCharacterDirection.EUROPEAN_NUMBER_TERMINATOR },
2088 /* new property in Unicode 6.3 */
2089 { 0x0027, UProperty.BIDI_PAIRED_BRACKET_TYPE, UCharacter.BidiPairedBracketType.NONE },
2090 { 0x0028, UProperty.BIDI_PAIRED_BRACKET_TYPE, UCharacter.BidiPairedBracketType.OPEN },
2091 { 0x0029, UProperty.BIDI_PAIRED_BRACKET_TYPE, UCharacter.BidiPairedBracketType.CLOSE },
2092 { 0xFF5C, UProperty.BIDI_PAIRED_BRACKET_TYPE, UCharacter.BidiPairedBracketType.NONE },
2093 { 0xFF5B, UProperty.BIDI_PAIRED_BRACKET_TYPE, UCharacter.BidiPairedBracketType.OPEN },
2094 { 0xFF5D, UProperty.BIDI_PAIRED_BRACKET_TYPE, UCharacter.BidiPairedBracketType.CLOSE },
2096 /* undefined UProperty values */
2098 { 0x234bc, 0x15ed, 0 }
2102 if (UCharacter.getIntPropertyMinValue(UProperty.DASH) != 0
2103 || UCharacter.getIntPropertyMinValue(UProperty.BIDI_CLASS) != 0
2104 || UCharacter.getIntPropertyMinValue(UProperty.BLOCK)!= 0 /* j2478 */
2105 || UCharacter.getIntPropertyMinValue(UProperty.SCRIPT)!= 0 /* JB#2410 */
2106 || UCharacter.getIntPropertyMinValue(0x2345) != 0) {
2107 errln("error: UCharacter.getIntPropertyMinValue() wrong");
2110 if( UCharacter.getIntPropertyMaxValue(UProperty.DASH)!=1) {
2111 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.DASH) wrong\n");
2113 if( UCharacter.getIntPropertyMaxValue(UProperty.ID_CONTINUE)!=1) {
2114 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.ID_CONTINUE) wrong\n");
2116 if( UCharacter.getIntPropertyMaxValue(UProperty.BINARY_LIMIT-1)!=1) {
2117 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.BINARY_LIMIT-1) wrong\n");
2120 if( UCharacter.getIntPropertyMaxValue(UProperty.BIDI_CLASS)!=UCharacterDirection.CHAR_DIRECTION_COUNT-1 ) {
2121 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.BIDI_CLASS) wrong\n");
2123 if( UCharacter.getIntPropertyMaxValue(UProperty.BLOCK)!=UCharacter.UnicodeBlock.COUNT-1 ) {
2124 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.BLOCK) wrong\n");
2126 if(UCharacter.getIntPropertyMaxValue(UProperty.LINE_BREAK)!=UCharacter.LineBreak.COUNT-1) {
2127 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.LINE_BREAK) wrong\n");
2129 if(UCharacter.getIntPropertyMaxValue(UProperty.SCRIPT)!=UScript.CODE_LIMIT-1) {
2130 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.SCRIPT) wrong\n");
2132 if(UCharacter.getIntPropertyMaxValue(UProperty.NUMERIC_TYPE)!=UCharacter.NumericType.COUNT-1) {
2133 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.NUMERIC_TYPE) wrong\n");
2135 if(UCharacter.getIntPropertyMaxValue(UProperty.GENERAL_CATEGORY)!=UCharacterCategory.CHAR_CATEGORY_COUNT-1) {
2136 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.GENERAL_CATEGORY) wrong\n");
2138 if(UCharacter.getIntPropertyMaxValue(UProperty.HANGUL_SYLLABLE_TYPE)!=UCharacter.HangulSyllableType.COUNT-1) {
2139 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.HANGUL_SYLLABLE_TYPE) wrong\n");
2141 if(UCharacter.getIntPropertyMaxValue(UProperty.GRAPHEME_CLUSTER_BREAK)!=UCharacter.GraphemeClusterBreak.COUNT-1) {
2142 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.GRAPHEME_CLUSTER_BREAK) wrong\n");
2144 if(UCharacter.getIntPropertyMaxValue(UProperty.SENTENCE_BREAK)!=UCharacter.SentenceBreak.COUNT-1) {
2145 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.SENTENCE_BREAK) wrong\n");
2147 if(UCharacter.getIntPropertyMaxValue(UProperty.WORD_BREAK)!=UCharacter.WordBreak.COUNT-1) {
2148 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.WORD_BREAK) wrong\n");
2150 if(UCharacter.getIntPropertyMaxValue(UProperty.BIDI_PAIRED_BRACKET_TYPE)!=UCharacter.BidiPairedBracketType.COUNT-1) {
2151 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.BIDI_PAIRED_BRACKET_TYPE) wrong\n");
2154 if( UCharacter.getIntPropertyMaxValue(0x2345)!=-1) {
2155 errln("error: UCharacter.getIntPropertyMaxValue(0x2345) wrong\n");
2157 if( UCharacter.getIntPropertyMaxValue(UProperty.DECOMPOSITION_TYPE) != (UCharacter.DecompositionType.COUNT - 1)) {
2158 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.DECOMPOSITION_TYPE) wrong\n");
2160 if( UCharacter.getIntPropertyMaxValue(UProperty.JOINING_GROUP) != (UCharacter.JoiningGroup.COUNT -1)) {
2161 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.JOINING_GROUP) wrong\n");
2163 if( UCharacter.getIntPropertyMaxValue(UProperty.JOINING_TYPE) != (UCharacter.JoiningType.COUNT -1)) {
2164 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.JOINING_TYPE) wrong\n");
2166 if( UCharacter.getIntPropertyMaxValue(UProperty.EAST_ASIAN_WIDTH) != (UCharacter.EastAsianWidth.COUNT -1)) {
2167 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.EAST_ASIAN_WIDTH) wrong\n");
2170 VersionInfo version = UCharacter.getUnicodeVersion();
2172 // test hasBinaryProperty()
2173 for (int i = 0; i < props.length; ++ i) {
2174 int which = props[i][1];
2175 if (props[i][0] < 0) {
2176 if (version.compareTo(VersionInfo.getInstance(which >> 8,
2186 whichName = UCharacter.getPropertyName(which, UProperty.NameChoice.LONG);
2187 } catch(IllegalArgumentException e) {
2188 // There are intentionally invalid property integer values ("which").
2189 // Catch and ignore the exception from getPropertyName().
2190 whichName = "undefined UProperty value";
2192 boolean expect = true;
2193 if (props[i][2] == 0) {
2196 if (which < UProperty.INT_START) {
2197 if (UCharacter.hasBinaryProperty(props[i][0], which)
2199 errln("error: UCharacter.hasBinaryProperty(U+" +
2200 Utility.hex(props[i][0], 4) + ", " +
2201 whichName + ") has an error, expected=" + expect);
2205 int retVal = UCharacter.getIntPropertyValue(props[i][0], which);
2206 if (retVal != props[i][2]) {
2207 errln("error: UCharacter.getIntPropertyValue(U+" +
2208 Utility.hex(props[i][0], 4) +
2209 ", " + whichName + ") is wrong, expected="
2210 + props[i][2] + " actual=" + retVal);
2213 // test separate functions, too
2215 case UProperty.ALPHABETIC:
2216 if (UCharacter.isUAlphabetic(props[i][0]) != expect) {
2217 errln("error: UCharacter.isUAlphabetic(\\u" +
2218 Integer.toHexString(props[i][0]) +
2219 ") is wrong expected " + props[i][2]);
2222 case UProperty.LOWERCASE:
2223 if (UCharacter.isULowercase(props[i][0]) != expect) {
2224 errln("error: UCharacter.isULowercase(\\u" +
2225 Integer.toHexString(props[i][0]) +
2226 ") is wrong expected " +props[i][2]);
2229 case UProperty.UPPERCASE:
2230 if (UCharacter.isUUppercase(props[i][0]) != expect) {
2231 errln("error: UCharacter.isUUppercase(\\u" +
2232 Integer.toHexString(props[i][0]) +
2233 ") is wrong expected " + props[i][2]);
2236 case UProperty.WHITE_SPACE:
2237 if (UCharacter.isUWhiteSpace(props[i][0]) != expect) {
2238 errln("error: UCharacter.isUWhiteSpace(\\u" +
2239 Integer.toHexString(props[i][0]) +
2240 ") is wrong expected " + props[i][2]);
2249 public void TestNumericProperties()
2251 // see UnicodeData.txt, DerivedNumericValues.txt
2252 double values[][] = {
2253 // Code point, numeric type, numeric value.
2254 // If a fourth value is specified, it is the getNumericValue().
2255 // Otherwise it is expected to be the same as the getUnicodeNumericValue(),
2256 // where UCharacter.NO_NUMERIC_VALUE is turned into -1.
2257 // getNumericValue() returns -2 if the code point has a value
2258 // which is not a non-negative integer. (This is mostly auto-converted to -2.)
2259 { 0x0F33, UCharacter.NumericType.NUMERIC, -1./2. },
2260 { 0x0C66, UCharacter.NumericType.DECIMAL, 0 },
2261 { 0x96f6, UCharacter.NumericType.NUMERIC, 0 },
2262 { 0xa833, UCharacter.NumericType.NUMERIC, 1./16. },
2263 { 0x2152, UCharacter.NumericType.NUMERIC, 1./10. },
2264 { 0x2151, UCharacter.NumericType.NUMERIC, 1./9. },
2265 { 0x1245f, UCharacter.NumericType.NUMERIC, 1./8. },
2266 { 0x2150, UCharacter.NumericType.NUMERIC, 1./7. },
2267 { 0x2159, UCharacter.NumericType.NUMERIC, 1./6. },
2268 { 0x09f6, UCharacter.NumericType.NUMERIC, 3./16. },
2269 { 0x2155, UCharacter.NumericType.NUMERIC, 1./5. },
2270 { 0x00BD, UCharacter.NumericType.NUMERIC, 1./2. },
2271 { 0x0031, UCharacter.NumericType.DECIMAL, 1. },
2272 { 0x4e00, UCharacter.NumericType.NUMERIC, 1. },
2273 { 0x58f1, UCharacter.NumericType.NUMERIC, 1. },
2274 { 0x10320, UCharacter.NumericType.NUMERIC, 1. },
2275 { 0x0F2B, UCharacter.NumericType.NUMERIC, 3./2. },
2276 { 0x00B2, UCharacter.NumericType.DIGIT, 2. }, /* Unicode 4.0 change */
2277 { 0x5f10, UCharacter.NumericType.NUMERIC, 2. },
2278 { 0x1813, UCharacter.NumericType.DECIMAL, 3. },
2279 { 0x5f0e, UCharacter.NumericType.NUMERIC, 3. },
2280 { 0x2173, UCharacter.NumericType.NUMERIC, 4. },
2281 { 0x8086, UCharacter.NumericType.NUMERIC, 4. },
2282 { 0x278E, UCharacter.NumericType.DIGIT, 5. },
2283 { 0x1D7F2, UCharacter.NumericType.DECIMAL, 6. },
2284 { 0x247A, UCharacter.NumericType.DIGIT, 7. },
2285 { 0x7396, UCharacter.NumericType.NUMERIC, 9. },
2286 { 0x1372, UCharacter.NumericType.NUMERIC, 10. },
2287 { 0x216B, UCharacter.NumericType.NUMERIC, 12. },
2288 { 0x16EE, UCharacter.NumericType.NUMERIC, 17. },
2289 { 0x249A, UCharacter.NumericType.NUMERIC, 19. },
2290 { 0x303A, UCharacter.NumericType.NUMERIC, 30. },
2291 { 0x5345, UCharacter.NumericType.NUMERIC, 30. },
2292 { 0x32B2, UCharacter.NumericType.NUMERIC, 37. },
2293 { 0x1375, UCharacter.NumericType.NUMERIC, 40. },
2294 { 0x10323, UCharacter.NumericType.NUMERIC, 50. },
2295 { 0x0BF1, UCharacter.NumericType.NUMERIC, 100. },
2296 { 0x964c, UCharacter.NumericType.NUMERIC, 100. },
2297 { 0x217E, UCharacter.NumericType.NUMERIC, 500. },
2298 { 0x2180, UCharacter.NumericType.NUMERIC, 1000. },
2299 { 0x4edf, UCharacter.NumericType.NUMERIC, 1000. },
2300 { 0x2181, UCharacter.NumericType.NUMERIC, 5000. },
2301 { 0x137C, UCharacter.NumericType.NUMERIC, 10000. },
2302 { 0x4e07, UCharacter.NumericType.NUMERIC, 10000. },
2303 { 0x12432, UCharacter.NumericType.NUMERIC, 216000. },
2304 { 0x12433, UCharacter.NumericType.NUMERIC, 432000. },
2305 { 0x4ebf, UCharacter.NumericType.NUMERIC, 100000000. },
2306 { 0x5146, UCharacter.NumericType.NUMERIC, 1000000000000. },
2307 { -1, UCharacter.NumericType.NONE, UCharacter.NO_NUMERIC_VALUE },
2308 { 0x61, UCharacter.NumericType.NONE, UCharacter.NO_NUMERIC_VALUE, 10. },
2309 { 0x3000, UCharacter.NumericType.NONE, UCharacter.NO_NUMERIC_VALUE },
2310 { 0xfffe, UCharacter.NumericType.NONE, UCharacter.NO_NUMERIC_VALUE },
2311 { 0x10301, UCharacter.NumericType.NONE, UCharacter.NO_NUMERIC_VALUE },
2312 { 0xe0033, UCharacter.NumericType.NONE, UCharacter.NO_NUMERIC_VALUE },
2313 { 0x10ffff, UCharacter.NumericType.NONE, UCharacter.NO_NUMERIC_VALUE },
2314 { 0x110000, UCharacter.NumericType.NONE, UCharacter.NO_NUMERIC_VALUE }
2317 for (int i = 0; i < values.length; ++ i) {
2318 int c = (int)values[i][0];
2319 int type = UCharacter.getIntPropertyValue(c,
2320 UProperty.NUMERIC_TYPE);
2321 double nv = UCharacter.getUnicodeNumericValue(c);
2323 if (type != values[i][1]) {
2324 errln("UProperty.NUMERIC_TYPE(\\u" + Utility.hex(c, 4)
2325 + ") = " + type + " should be " + (int)values[i][1]);
2327 if (0.000001 <= Math.abs(nv - values[i][2])) {
2328 errln("UCharacter.getUnicodeNumericValue(\\u" + Utility.hex(c, 4)
2329 + ") = " + nv + " should be " + values[i][2]);
2332 // Test getNumericValue() as well.
2333 // It can only return the subset of numeric values that are
2334 // non-negative and fit into an int.
2336 if (values[i].length == 3) {
2337 if (values[i][2] == UCharacter.NO_NUMERIC_VALUE) {
2340 expectedInt = (int)values[i][2];
2341 if (expectedInt < 0 || expectedInt != values[i][2]) {
2342 // The numeric value is not a non-negative integer.
2347 expectedInt = (int)values[i][3];
2349 int nvInt = UCharacter.getNumericValue(c);
2350 if (nvInt != expectedInt) {
2351 errln("UCharacter.getNumericValue(\\u" + Utility.hex(c, 4)
2352 + ") = " + nvInt + " should be " + expectedInt);
2358 * Test the property values API. See JB#2410.
2360 public void TestPropertyValues() {
2363 /* Min should be 0 for everything. */
2364 /* Until JB#2478 is fixed, the one exception is UProperty.BLOCK. */
2365 for (p=UProperty.INT_START; p<UProperty.INT_LIMIT; ++p) {
2366 min = UCharacter.getIntPropertyMinValue(p);
2368 if (p == UProperty.BLOCK) {
2369 /* This is okay...for now. See JB#2487.
2370 TODO Update this for JB#2487. */
2373 name = UCharacter.getPropertyName(p, UProperty.NameChoice.LONG);
2374 errln("FAIL: UCharacter.getIntPropertyMinValue(" + name + ") = " +
2380 if (UCharacter.getIntPropertyMinValue(UProperty.GENERAL_CATEGORY_MASK)
2382 || UCharacter.getIntPropertyMaxValue(
2383 UProperty.GENERAL_CATEGORY_MASK)
2385 errln("error: UCharacter.getIntPropertyMin/MaxValue("
2386 + "UProperty.GENERAL_CATEGORY_MASK) is wrong");
2389 /* Max should be -1 for invalid properties. */
2390 max = UCharacter.getIntPropertyMaxValue(-1);
2392 errln("FAIL: UCharacter.getIntPropertyMaxValue(-1) = " +
2396 /* Script should return 0 for an invalid code point. If the API
2397 throws an exception then that's fine too. */
2398 for (i=0; i<2; ++i) {
2404 script = UScript.getScript(-1);
2405 desc = "UScript.getScript(-1)";
2408 script = UCharacter.getIntPropertyValue(-1, UProperty.SCRIPT);
2409 desc = "UCharacter.getIntPropertyValue(-1, UProperty.SCRIPT)";
2413 errln("FAIL: " + desc + " = " + script + ", exp. 0");
2415 } catch (IllegalArgumentException e) {}
2419 public void TestBidiPairedBracketType() {
2420 // BidiBrackets-6.3.0.txt says:
2422 // The set of code points listed in this file was originally derived
2423 // using the character properties General_Category (gc), Bidi_Class (bc),
2424 // Bidi_Mirrored (Bidi_M), and Bidi_Mirroring_Glyph (bmg), as follows:
2425 // two characters, A and B, form a pair if A has gc=Ps and B has gc=Pe,
2426 // both have bc=ON and Bidi_M=Y, and bmg of A is B. Bidi_Paired_Bracket
2427 // maps A to B and vice versa, and their Bidi_Paired_Bracket_Type
2428 // property values are Open and Close, respectively.
2429 UnicodeSet bpt = new UnicodeSet("[:^bpt=n:]");
2430 assertTrue("bpt!=None is not empty", !bpt.isEmpty());
2431 // The following should always be true.
2432 UnicodeSet mirrored = new UnicodeSet("[:Bidi_M:]");
2433 UnicodeSet other_neutral = new UnicodeSet("[:bc=ON:]");
2434 assertTrue("bpt!=None is a subset of Bidi_M", mirrored.containsAll(bpt));
2435 assertTrue("bpt!=None is a subset of bc=ON", other_neutral.containsAll(bpt));
2436 // The following are true at least initially in Unicode 6.3.
2437 UnicodeSet bpt_open = new UnicodeSet("[:bpt=o:]");
2438 UnicodeSet bpt_close = new UnicodeSet("[:bpt=c:]");
2439 UnicodeSet ps = new UnicodeSet("[:Ps:]");
2440 UnicodeSet pe = new UnicodeSet("[:Pe:]");
2441 assertTrue("bpt=Open is a subset of Ps", ps.containsAll(bpt_open));
2442 assertTrue("bpt=Close is a subset of Pe", pe.containsAll(bpt_close));
2445 public void TestIsBMP()
2447 int ch[] = {0x0, -1, 0xffff, 0x10ffff, 0xff, 0x1ffff};
2448 boolean flag[] = {true, false, true, false, true, false};
2449 for (int i = 0; i < ch.length; i ++) {
2450 if (UCharacter.isBMP(ch[i]) != flag[i]) {
2451 errln("Fail: \\u" + Utility.hex(ch[i], 8)
2452 + " failed at UCharacter.isBMP");
2457 private boolean showADiffB(UnicodeSet a, UnicodeSet b,
2458 String a_name, String b_name,
2460 boolean diffIsError){
2463 for(i=0; i < a.getRangeCount(); ++i) {
2464 start = a.getRangeStart(i);
2465 end = a.getRangeEnd(i);
2466 if(expect!=b.contains(start, end)) {
2469 if(expect!=b.contains(start)) {
2472 errln("error: "+ a_name +" contains "+ hex(start)+" but "+ b_name +" does not");
2474 errln("error: "+a_name +" and "+ b_name+" both contain "+hex(start) +" but should not intersect");
2478 logln("info: "+a_name +" contains "+hex(start)+ "but " + b_name +" does not");
2480 logln("info: "+a_name +" and "+b_name+" both contain "+hex(start)+" but should not intersect");
2490 private boolean showAMinusB(UnicodeSet a, UnicodeSet b,
2491 String a_name, String b_name,
2492 boolean diffIsError) {
2494 return showADiffB(a, b, a_name, b_name, true, diffIsError);
2497 private boolean showAIntersectB(UnicodeSet a, UnicodeSet b,
2498 String a_name, String b_name,
2499 boolean diffIsError) {
2500 return showADiffB(a, b, a_name, b_name, false, diffIsError);
2503 private boolean compareUSets(UnicodeSet a, UnicodeSet b,
2504 String a_name, String b_name,
2505 boolean diffIsError) {
2507 showAMinusB(a, b, a_name, b_name, diffIsError) &&
2508 showAMinusB(b, a, b_name, a_name, diffIsError);
2511 /* various tests for consistency of UCD data and API behavior */
2512 public void TestConsistency() {
2513 UnicodeSet set1, set2, set3, set4;
2518 String hyphenPattern = "[:Hyphen:]";
2519 String dashPattern = "[:Dash:]";
2520 String lowerPattern = "[:Lowercase:]";
2521 String formatPattern = "[:Cf:]";
2522 String alphaPattern = "[:Alphabetic:]";
2525 * It used to be that UCD.html and its precursors said
2526 * "Those dashes used to mark connections between pieces of words,
2527 * plus the Katakana middle dot."
2529 * Unicode 4 changed 00AD Soft Hyphen to Cf and removed it from Dash
2530 * but not from Hyphen.
2531 * UTC 94 (2003mar) decided to leave it that way and to change UCD.html.
2532 * Therefore, do not show errors when testing the Hyphen property.
2534 logln("Starting with Unicode 4, inconsistencies with [:Hyphen:] are\n"
2535 + "known to the UTC and not considered errors.\n");
2537 set1=new UnicodeSet(hyphenPattern);
2538 set2=new UnicodeSet(dashPattern);
2540 /* remove the Katakana middle dot(s) from set1 */
2541 set1.remove(0x30fb);
2542 set2.remove (0xff65); /* halfwidth variant */
2543 showAMinusB(set1, set2, "[:Hyphen:]", "[:Dash:]", false);
2546 /* check that Cf is neither Hyphen nor Dash nor Alphabetic */
2547 set3=new UnicodeSet(formatPattern);
2548 set4=new UnicodeSet(alphaPattern);
2550 showAIntersectB(set3, set1, "[:Cf:]", "[:Hyphen:]", false);
2551 showAIntersectB(set3, set2, "[:Cf:]", "[:Dash:]", true);
2552 showAIntersectB(set3, set4, "[:Cf:]", "[:Alphabetic:]", true);
2554 * Check that each lowercase character has "small" in its name
2555 * and not "capital".
2556 * There are some such characters, some of which seem odd.
2557 * Use the verbose flag to see these notices.
2559 set1=new UnicodeSet(lowerPattern);
2563 // length=set1.getItem(set1, i, &start, &end, NULL, 0, &errorCode);
2564 // }catch(Exception e){
2567 start = set1.getRangeStart(i);
2568 end = set1.getRangeEnd(i);
2569 length = i<set1.getRangeCount() ? set1.getRangeCount() : 0;
2571 break; /* done with code points, got a string or -1 */
2575 String name=UCharacter.getName(start);
2577 if( (name.indexOf("SMALL")< 0 || name.indexOf("CAPITAL")<-1) &&
2578 name.indexOf("SMALL CAPITAL")==-1
2580 logln("info: [:Lowercase:] contains U+"+hex(start) + " whose name does not suggest lowercase: " + name);
2588 * Test for an example that unorm_getCanonStartSet() delivers
2589 * all characters that compose from the input one,
2590 * even in multiple steps.
2591 * For example, the set for "I" (0049) should contain both
2592 * I-diaeresis (00CF) and I-diaeresis-acute (1E2E).
2593 * In general, the set for the middle such character should be a subset
2594 * of the set for the first.
2596 Normalizer2 norm2=Normalizer2.getNFDInstance();
2597 set1=new UnicodeSet();
2598 Norm2AllModes.getNFCInstance().impl.
2599 ensureCanonIterData().getCanonStartSet(0x49, set1);
2600 set2=new UnicodeSet();
2602 /* enumerate all characters that are plausible to be latin letters */
2603 for(start=0xa0; start<0x2000; ++start) {
2604 String decomp=norm2.normalize(UTF16.valueOf(start));
2605 if(decomp.length() > 1 && decomp.charAt(0)==0x49) {
2610 compareUSets(set1, set2,
2611 "[canon start set of 0049]", "[all c with canon decomp with 0049]",
2616 public void TestCoverage() {
2618 char ch1 = UCharacter.forDigit(7, 11);
2619 assertEquals("UCharacter.forDigit ", "7", String.valueOf(ch1));
2620 char ch2 = UCharacter.forDigit(17, 20);
2621 assertEquals("UCharacter.forDigit ", "h", String.valueOf(ch2));
2623 //Jitterbug 4451, for coverage
2624 for (int i = 0x0041; i < 0x005B; i++) {
2625 if (!UCharacter.isJavaLetter(i))
2626 errln("FAIL \\u" + hex(i) + " expected to be a letter");
2627 if (!UCharacter.isJavaIdentifierStart(i))
2628 errln("FAIL \\u" + hex(i) + " expected to be a Java identifier start character");
2629 if (!UCharacter.isJavaLetterOrDigit(i))
2630 errln("FAIL \\u" + hex(i) + " expected not to be a Java letter");
2631 if (!UCharacter.isJavaIdentifierPart(i))
2632 errln("FAIL \\u" + hex(i) + " expected to be a Java identifier part character");
2634 char[] spaces = {'\t','\n','\f','\r',' '};
2635 for (int i = 0; i < spaces.length; i++){
2636 if (!UCharacter.isSpace(spaces[i]))
2637 errln("FAIL \\u" + hex(spaces[i]) + " expected to be a Java space");
2641 public void TestBlockData()
2643 Class ubc = UCharacter.UnicodeBlock.class;
2645 for (int b = 1; b < UCharacter.UnicodeBlock.COUNT; b += 1) {
2646 UCharacter.UnicodeBlock blk = UCharacter.UnicodeBlock.getInstance(b);
2647 int id = blk.getID();
2648 String name = blk.toString();
2651 errln("UCharacter.UnicodeBlock.getInstance(" + b + ") returned a block with id = " + id);
2655 if (ubc.getField(name + "_ID").getInt(blk) != b) {
2656 errln("UCharacter.UnicodeBlock.getInstance(" + b + ") returned a block with a name of " + name +
2657 " which does not match the block id.");
2659 } catch (Exception e) {
2660 errln("Couldn't get the id name for id " + b);
2666 * The following method tests
2667 * public static UnicodeBlock getInstance(int id)
2669 public void TestGetInstance(){
2670 // Testing values for invalid and valid ID
2671 int[] invalid_test = {-1,-10,-100};
2672 for(int i=0; i< invalid_test.length; i++){
2673 if(UCharacter.UnicodeBlock.INVALID_CODE != UCharacter.UnicodeBlock.getInstance(invalid_test[i])){
2674 errln("UCharacter.UnicodeBlock.getInstance(invalid_test[i]) was " +
2675 "suppose to return UCharacter.UnicodeBlock.INVALID_CODE. Got " +
2676 UCharacter.UnicodeBlock.getInstance(invalid_test[i]) + ". Expected " +
2677 UCharacter.UnicodeBlock.INVALID_CODE);
2683 * The following method tests
2684 * public static UnicodeBlock of(int ch)
2686 public void TestOf(){
2687 if(UCharacter.UnicodeBlock.INVALID_CODE != UCharacter.UnicodeBlock.of(UTF16.CODEPOINT_MAX_VALUE+1)){
2688 errln("UCharacter.UnicodeBlock.of(UTF16.CODEPOINT_MAX_VALUE+1) was " +
2689 "suppose to return UCharacter.UnicodeBlock.INVALID_CODE. Got " +
2690 UCharacter.UnicodeBlock.of(UTF16.CODEPOINT_MAX_VALUE+1) + ". Expected " +
2691 UCharacter.UnicodeBlock.INVALID_CODE);
2696 * The following method tests
2697 * public static final UnicodeBlock forName(String blockName)
2699 public void TestForName(){
2700 //UCharacter.UnicodeBlock.forName("");
2701 //Tests when "if (b == null)" is true
2705 * The following method tests
2706 * public static int getNumericValue(int ch)
2708 public void TestGetNumericValue(){
2709 // The following tests the else statement when
2710 // if(numericType<NumericType.COUNT) is false
2711 // The following values were obtained by testing all values from
2712 // UTF16.CODEPOINT_MIN_VALUE to UTF16.CODEPOINT_MAX_VALUE inclusively
2713 // to obtain the value to go through the else statement.
2714 int[] valid_values =
2715 {3058,3442,4988,8558,8559,8574,8575,8576,8577,8578,8583,8584,19975,
2716 20159,20191,20740,20806,21315,33836,38433,65819,65820,65821,65822,
2717 65823,65824,65825,65826,65827,65828,65829,65830,65831,65832,65833,
2718 65834,65835,65836,65837,65838,65839,65840,65841,65842,65843,65861,
2719 65862,65863,65868,65869,65870,65875,65876,65877,65878,65899,65900,
2720 65901,65902,65903,65904,65905,65906,66378,68167};
2723 {1000,1000,10000,500,1000,500,1000,1000,5000,10000,50000,100000,
2724 10000,100000000,1000,100000000,-2,1000,10000,1000,300,400,500,
2725 600,700,800,900,1000,2000,3000,4000,5000,6000,7000,8000,9000,
2726 10000,20000,30000,40000,50000,60000,70000,80000,90000,500,5000,
2727 50000,500,1000,5000,500,1000,10000,50000,300,500,500,500,500,500,
2728 1000,5000,900,1000};
2730 if(valid_values.length != results.length){
2731 errln("The valid_values array and the results array need to be "+
2732 "the same length.");
2734 for(int i = 0; i < valid_values.length; i++){
2736 if(UCharacter.getNumericValue(valid_values[i]) != results[i]){
2737 errln("UCharacter.getNumericValue(i) returned a " +
2738 "different value from the expected result. " +
2739 "Got " + UCharacter.getNumericValue(valid_values[i]) +
2740 "Expected" + results[i]);
2742 } catch(Exception e){
2743 errln("UCharacter.getNumericValue(int) returned an exception " +
2744 "with the parameter value");
2751 * The following method tests
2752 * public static double getUnicodeNumericValue(int ch)
2754 // The following tests covers if(mant==0), else if(mant > 9), and default
2755 public void TestGetUnicodeNumericValue(){
2756 /* The code coverage for if(mant==0), else if(mant > 9), and default
2757 * could not be covered even with input values from UTF16.CODEPOINT_MIN_VALUE
2758 * to UTF16.CODEPOINT_MAX_VALUE. I also tested from UTF16.CODEPOINT_MAX_VALUE to
2759 * Integer.MAX_VALUE and didn't recieve any code coverage there too.
2760 * Therefore, the code could either be dead code or meaningless.
2765 * The following method tests
2766 * public static String toString(int ch)
2768 public void TestToString(){
2769 int[] valid_tests = {
2770 UCharacter.MIN_VALUE, UCharacter.MIN_VALUE+1,
2771 UCharacter.MAX_VALUE-1, UCharacter.MAX_VALUE};
2772 int[] invalid_tests = {
2773 UCharacter.MIN_VALUE-1, UCharacter.MIN_VALUE-2,
2774 UCharacter.MAX_VALUE+1, UCharacter.MAX_VALUE+2};
2776 for(int i=0; i< valid_tests.length; i++){
2777 if(UCharacter.toString(valid_tests[i]) == null){
2778 errln("UCharacter.toString(int) was not suppose to return " +
2779 "null because it was given a valid parameter. Value passed: " +
2780 valid_tests[i] + ". Got null.");
2784 for(int i=0; i< invalid_tests.length; i++){
2785 if(UCharacter.toString(invalid_tests[i]) != null){
2786 errln("UCharacter.toString(int) was suppose to return " +
2787 "null because it was given an invalid parameter. Value passed: " +
2788 invalid_tests[i] + ". Got: " + UCharacter.toString(invalid_tests[i]));
2794 * The following method tests
2795 * public static int getCombiningClass(int ch)
2797 public void TestGetCombiningClass(){
2798 int[] valid_tests = {
2799 UCharacter.MIN_VALUE, UCharacter.MIN_VALUE+1,
2800 UCharacter.MAX_VALUE-1, UCharacter.MAX_VALUE};
2801 int[] invalid_tests = {
2802 UCharacter.MIN_VALUE-1, UCharacter.MIN_VALUE-2,
2803 UCharacter.MAX_VALUE+1, UCharacter.MAX_VALUE+2};
2805 for(int i=0; i< valid_tests.length; i++){
2807 UCharacter.getCombiningClass(valid_tests[i]);
2808 } catch(Exception e){
2809 errln("UCharacter.getCombiningClass(int) was not supposed to have " +
2810 "an exception. Value passed: " + valid_tests[i]);
2814 for(int i=0; i< invalid_tests.length; i++){
2816 assertEquals("getCombiningClass(out of range)",
2817 0, UCharacter.getCombiningClass(invalid_tests[i]));
2818 } catch(Exception e){
2819 errln("UCharacter.getCombiningClass(int) was not supposed to have " +
2820 "an exception. Value passed: " + invalid_tests[i]);
2826 * The following method tests
2827 * public static String getName(int ch)
2829 public void TestGetName(){
2830 // Need to test on other "one characters" for the getName() method
2831 String[] data = {"a","z"};
2832 String[] results = {"LATIN SMALL LETTER A","LATIN SMALL LETTER Z"};
2833 if(data.length != results.length){
2834 errln("The data array and the results array need to be "+
2835 "the same length.");
2837 for(int i=0; i < data.length; i++){
2838 if(UCharacter.getName(data[i], "").compareTo(results[i]) != 0){
2839 errln("UCharacter.getName(String, String) was suppose " +
2840 "to have the same result for the data in the parameter. " +
2841 "Value passed: " + data[i] + ". Got: " +
2842 UCharacter.getName(data[i], "") + ". Expected: " +
2850 * The following method tests
2851 * public static String getISOComment(int ch)
2853 public void TestGetISOComment(){
2854 int[] invalid_tests = {
2855 UCharacter.MIN_VALUE-1, UCharacter.MIN_VALUE-2,
2856 UCharacter.MAX_VALUE+1, UCharacter.MAX_VALUE+2};
2858 for(int i=0; i< invalid_tests.length; i++){
2859 if(UCharacter.getISOComment(invalid_tests[i]) != null){
2860 errln("UCharacter.getISOComment(int) was suppose to return " +
2861 "null because it was given an invalid parameter. Value passed: " +
2862 invalid_tests[i] + ". Got: " + UCharacter.getISOComment(invalid_tests[i]));
2868 * The following method tests
2869 * public void setLimit(int lim)
2871 public void TestSetLimit(){
2872 // TODO: Tests when "if(0<=lim && lim<=s.length())" is false
2876 * The following method tests
2877 * public int nextCaseMapCP()
2879 public void TestNextCaseMapCP(){
2880 // TODO: Tests when "if(UTF16.LEAD_SURROGATE_MIN_VALUE<=c || c<=UTF16.TRAIL_SURROGATE_MAX_VALUE)" is false
2881 /* TODO: Tests when "if( c<=UTF16.LEAD_SURROGATE_MAX_VALUE && cpLimit<limit &&
2882 * UTF16.TRAIL_SURROGATE_MIN_VALUE<=(c2=s.charAt(cpLimit)) && c2<=UTF16.TRAIL_SURROGATE_MAX_VALUE)" is false
2887 * The following method tests
2888 * public void reset(int direction)
2890 public void TestReset(){
2891 // The method reset() is never called by another function
2892 // TODO: Tests when "else if(direction<0)" is false
2896 * The following method tests
2897 * public static String toTitleCase(Locale locale, String str, BreakIterator breakiter)
2899 public void TestToTitleCaseCoverage(){
2900 //Calls the function "toTitleCase(Locale locale, String str, BreakIterator breakiter)"
2901 String[] locale={"en","fr","zh","ko","ja","it","de",""};
2902 for(int i=0; i<locale.length; i++){
2903 UCharacter.toTitleCase(new Locale(locale[i]), "", null);
2906 // Calls the function "String toTitleCase(ULocale locale, String str, BreakIterator titleIter, int options)"
2907 // Tests when "if (locale == null)" is true
2908 UCharacter.toTitleCase(null, "", null, 0);
2910 // TODO: Tests when "if(index==BreakIterator.DONE || index>srcLength)" is true
2911 // TODO: Tests when "while((c=iter.nextCaseMapCP())>=0 && UCaseProps.NONE==gCsp.getType(c))" is false
2912 // TODO: Tests when "if(prev<titleStart)" is false
2913 // TODO: Tests when "if(c<=0xffff)" is false
2914 // TODO: Tests when "if(c<=0xffff)" is false
2915 // TODO: Tests when "if(titleLimit<index)" is false
2916 // TODO: Tests when "else if((nc=iter.nextCaseMapCP())>=0)" is false
2919 * The following method tests
2920 * public static String toUpperCase(ULocale locale, String str)
2922 public void TestToUpperCase(){
2923 // TODO: Tests when "while((c=iter.nextCaseMapCP())>=0)" is false
2927 * The following method tests
2928 * public static String toLowerCase(ULocale locale, String str)
2930 public void TestToLowerCase(){
2931 // Test when locale is null
2932 String[] cases = {"","a","A","z","Z","Dummy","DUMMY","dummy","a z","A Z",
2933 "'","\"","0","9","0a","a0","*","~!@#$%^&*()_+"};
2934 for(int i=0; i<cases.length; i++){
2936 UCharacter.toLowerCase((ULocale) null, cases[i]);
2937 } catch(Exception e){
2938 errln("UCharacter.toLowerCase was not suppose to return an " +
2939 "exception for input of null and string: " + cases[i]);
2942 // TODO: Tests when "while((c=iter.nextCaseMapCP())>=0)" is false
2946 * The following method tests
2947 * public static int getHanNumericValue(int ch)
2949 public void TestGetHanNumericValue(){
2951 0x3007, //IDEOGRAPHIC_NUMBER_ZERO_
2952 0x96f6, //CJK_IDEOGRAPH_COMPLEX_ZERO_
2953 0x4e00, //CJK_IDEOGRAPH_FIRST_
2954 0x58f9, //CJK_IDEOGRAPH_COMPLEX_ONE_
2955 0x4e8c, //CJK_IDEOGRAPH_SECOND_
2956 0x8cb3, //CJK_IDEOGRAPH_COMPLEX_TWO_
2957 0x4e09, //CJK_IDEOGRAPH_THIRD_
2958 0x53c3, //CJK_IDEOGRAPH_COMPLEX_THREE_
2959 0x56db, //CJK_IDEOGRAPH_FOURTH_
2960 0x8086, //CJK_IDEOGRAPH_COMPLEX_FOUR_
2961 0x4e94, //CJK_IDEOGRAPH_FIFTH_
2962 0x4f0d, //CJK_IDEOGRAPH_COMPLEX_FIVE_
2963 0x516d, //CJK_IDEOGRAPH_SIXTH_
2964 0x9678, //CJK_IDEOGRAPH_COMPLEX_SIX_
2965 0x4e03, //CJK_IDEOGRAPH_SEVENTH_
2966 0x67d2, //CJK_IDEOGRAPH_COMPLEX_SEVEN_
2967 0x516b, //CJK_IDEOGRAPH_EIGHTH_
2968 0x634c, //CJK_IDEOGRAPH_COMPLEX_EIGHT_
2969 0x4e5d, //CJK_IDEOGRAPH_NINETH_
2970 0x7396, //CJK_IDEOGRAPH_COMPLEX_NINE_
2971 0x5341, //CJK_IDEOGRAPH_TEN_
2972 0x62fe, //CJK_IDEOGRAPH_COMPLEX_TEN_
2973 0x767e, //CJK_IDEOGRAPH_HUNDRED_
2974 0x4f70, //CJK_IDEOGRAPH_COMPLEX_HUNDRED_
2975 0x5343, //CJK_IDEOGRAPH_THOUSAND_
2976 0x4edf, //CJK_IDEOGRAPH_COMPLEX_THOUSAND_
2977 0x824c, //CJK_IDEOGRAPH_TEN_THOUSAND_
2978 0x5104, //CJK_IDEOGRAPH_HUNDRED_MILLION_
2981 int[] invalid = {-5,-2,-1,0};
2983 int[] results = {0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,100,100,
2984 1000,1000,10000,100000000};
2986 if(valid.length != results.length){
2987 errln("The arrays valid and results are suppose to be the same length " +
2988 "to test getHanNumericValue(int ch).");
2990 for(int i=0; i<valid.length; i++){
2991 if(UCharacter.getHanNumericValue(valid[i]) != results[i]){
2992 errln("UCharacter.getHanNumericValue does not return the " +
2993 "same result as expected. Passed value: " + valid[i] +
2994 ". Got: " + UCharacter.getHanNumericValue(valid[i]) +
2995 ". Expected: " + results[i]);
3000 for(int i=0; i<invalid.length; i++){
3001 if(UCharacter.getHanNumericValue(invalid[i]) != -1){
3002 errln("UCharacter.getHanNumericValue does not return the " +
3003 "same result as expected. Passed value: " + invalid[i] +
3004 ". Got: " + UCharacter.getHanNumericValue(invalid[i]) +
3011 * The following method tests
3012 * public static boolean hasBinaryProperty(int ch, int property)
3014 public void TestHasBinaryProperty(){
3015 // Testing when "if (ch < MIN_VALUE || ch > MAX_VALUE)" is true
3017 UCharacter.MIN_VALUE-1, UCharacter.MIN_VALUE-2,
3018 UCharacter.MAX_VALUE+1, UCharacter.MAX_VALUE+2};
3020 UCharacter.MIN_VALUE, UCharacter.MIN_VALUE+1,
3021 UCharacter.MAX_VALUE, UCharacter.MAX_VALUE-1};
3023 for(int i=0; i<invalid.length; i++){
3025 if (UCharacter.hasBinaryProperty(invalid[i], 1)) {
3026 errln("UCharacter.hasBinaryProperty(ch, property) should return " +
3027 "false for out-of-range code points but " +
3028 "returns true for " + invalid[i]);
3030 } catch(Exception e) {
3031 errln("UCharacter.hasBinaryProperty(ch, property) should not " +
3032 "throw an exception for any input. Value passed: " +
3037 for(int i=0; i<valid.length; i++){
3039 UCharacter.hasBinaryProperty(valid[i], 1);
3040 } catch(Exception e) {
3041 errln("UCharacter.hasBinaryProperty(ch, property) should not " +
3042 "throw an exception for any input. Value passed: " +
3049 * The following method tests
3050 * public static int getIntPropertyValue(int ch, int type)
3052 public void TestGetIntPropertyValue(){
3053 /* Testing UCharacter.getIntPropertyValue(ch, type) */
3054 // Testing when "if (type < UProperty.BINARY_START)" is true
3055 int[] negative_cases = {-100,-50,-10,-5,-2,-1};
3056 for(int i=0; i<negative_cases.length; i++){
3057 if(UCharacter.getIntPropertyValue(0, negative_cases[i]) != 0){
3058 errln("UCharacter.getIntPropertyValue(ch, type) was suppose to return 0 " +
3059 "when passing a negative value of " + negative_cases[i]);
3064 // Testing when "if(ch<NormalizerImpl.JAMO_L_BASE)" is true
3065 for(int i=Normalizer2Impl.Hangul.JAMO_L_BASE-5; i<Normalizer2Impl.Hangul.JAMO_L_BASE; i++){
3066 if(UCharacter.getIntPropertyValue(i, UProperty.HANGUL_SYLLABLE_TYPE) != 0){
3067 errln("UCharacter.getIntPropertyValue(ch, type) was suppose to return 0 " +
3068 "when passing ch: " + i + "and type of Property.HANGUL_SYLLABLE_TYPE");
3073 // Testing when "else if((ch-=NormalizerImpl.HANGUL_BASE)<0)" is true
3074 for(int i=Normalizer2Impl.Hangul.HANGUL_BASE-5; i<Normalizer2Impl.Hangul.HANGUL_BASE; i++){
3075 if(UCharacter.getIntPropertyValue(i, UProperty.HANGUL_SYLLABLE_TYPE) != 0){
3076 errln("UCharacter.getIntPropertyValue(ch, type) was suppose to return 0 " +
3077 "when passing ch: " + i + "and type of Property.HANGUL_SYLLABLE_TYPE");
3084 * The following method tests
3085 * public static int getIntPropertyMaxValue(int type)
3087 public void TestGetIntPropertyMaxValue(){
3088 /* Testing UCharacter.getIntPropertyMaxValue(type) */
3089 // Testing when "else if (type < UProperty.INT_START)" is true
3090 int[] cases = {UProperty.BINARY_LIMIT, UProperty.BINARY_LIMIT+1,
3091 UProperty.INT_START-2, UProperty.INT_START-1};
3092 for(int i=0; i<cases.length; i++){
3093 if(UCharacter.getIntPropertyMaxValue(cases[i]) != -1){
3094 errln("UCharacter.getIntPropertyMaxValue was suppose to return -1 " +
3095 "but got " + UCharacter.getIntPropertyMaxValue(cases[i]));
3099 // TODO: Testing when the case statment reaches "default"
3100 // After testing between values of UProperty.INT_START and
3101 // UProperty.INT_LIMIT are covered, none of the values reaches default.
3105 * The following method tests
3106 * public static final int codePointAt(CharSequence seq, int index)
3107 * public static final int codePointAt(char[] text, int index, int limit)
3109 public void TestCodePointAt(){
3111 // {LEAD_SURROGATE_MIN_VALUE,
3112 // LEAD_SURROGATE_MAX_VALUE, LEAD_SURROGATE_MAX_VALUE-1
3113 String[] cases = {"\uD800","\uDBFF","\uDBFE"};
3114 int[] result = {55296,56319,56318};
3115 for(int i=0; i < cases.length; i++){
3116 /* Testing UCharacter.codePointAt(seq, index) */
3117 // Testing when "if (index < seq.length())" is false
3118 if(UCharacter.codePointAt((CharSequence) cases[i], 0) != result[i])
3119 errln("UCharacter.codePointAt(CharSequence ...) did not return as expected. " +
3120 "Passed value: " + cases[i] + ". Expected: " +
3121 result[i] + ". Got: " +
3122 UCharacter.codePointAt((CharSequence) cases[i], 0));
3124 /* Testing UCharacter.codePointAt(text, index) */
3125 // Testing when "if (index < text.length)" is false
3126 if(UCharacter.codePointAt(cases[i].toCharArray(), 0) != result[i])
3127 errln("UCharacter.codePointAt(char[] ...) did not return as expected. " +
3128 "Passed value: " + cases[i] + ". Expected: " +
3129 result[i] + ". Got: " +
3130 UCharacter.codePointAt(cases[i].toCharArray(), 0));
3132 /* Testing UCharacter.codePointAt(text, index, limit) */
3133 // Testing when "if (index < limit)" is false
3134 if(UCharacter.codePointAt(cases[i].toCharArray(), 0, 1) != result[i])
3135 errln("UCharacter.codePointAt(char[], int, int) did not return as expected. " +
3136 "Passed value: " + cases[i] + ". Expected: " +
3137 result[i] + ". Got: " +
3138 UCharacter.codePointAt(cases[i].toCharArray(), 0, 1));
3141 /* Testing UCharacter.codePointAt(text, index, limit) */
3142 // Testing when "if (index >= limit || limit > text.length)" is true
3143 char[] empty_text = {};
3144 char[] one_char_text = {'a'};
3145 char[] reg_text = {'d','u','m','m','y'};
3146 int[] limitCases = {2,3,5,10,25};
3148 // When index >= limit
3149 for(int i=0; i < limitCases.length; i++){
3151 UCharacter.codePointAt(reg_text, 100, limitCases[i]);
3152 errln("UCharacter.codePointAt was suppose to return an exception " +
3153 "but got " + UCharacter.codePointAt(reg_text, 100, limitCases[i]) +
3154 ". The following passed parameters were Text: " + String.valueOf(reg_text) + ", Start: " +
3155 100 + ", Limit: " + limitCases[i] + ".");
3156 } catch(Exception e){
3160 // When limit > text.length
3161 for(int i=0; i < limitCases.length; i++){
3163 UCharacter.codePointAt(empty_text, 0, limitCases[i]);
3164 errln("UCharacter.codePointAt was suppose to return an exception " +
3165 "but got " + UCharacter.codePointAt(empty_text, 0, limitCases[i]) +
3166 ". The following passed parameters were Text: " + String.valueOf(empty_text) + ", Start: " +
3167 0 + ", Limit: " + limitCases[i] + ".");
3168 } catch(Exception e){
3172 UCharacter.codePointCount(one_char_text, 0, limitCases[i]);
3173 errln("UCharacter.codePointCount was suppose to return an exception " +
3174 "but got " + UCharacter.codePointCount(one_char_text, 0, limitCases[i]) +
3175 ". The following passed parameters were Text: " + String.valueOf(one_char_text) + ", Start: " +
3176 0 + ", Limit: " + limitCases[i] + ".");
3177 } catch(Exception e){
3183 * The following method tests
3184 * public static final int codePointBefore(CharSequence seq, int index)
3185 * public static final int codePointBefore(char[] text, int index)
3186 * public static final int codePointBefore(char[] text, int index, int limit)
3188 public void TestCodePointBefore(){
3189 // {TRAIL_SURROGATE_MIN_VALUE,
3190 // TRAIL_SURROGATE_MAX_VALUE, TRAIL_SURROGATE_MAX_VALUE -1
3191 String[] cases = {"\uDC00","\uDFFF","\uDDFE"};
3192 int[] result = {56320,57343,56830};
3193 for(int i=0; i < cases.length; i++){
3194 /* Testing UCharacter.codePointBefore(seq, index) */
3195 // Testing when "if (index > 0)" is false
3196 if(UCharacter.codePointBefore((CharSequence) cases[i], 1) != result[i])
3197 errln("UCharacter.codePointBefore(CharSequence ...) did not return as expected. " +
3198 "Passed value: " + cases[i] + ". Expected: " +
3199 result[i] + ". Got: " +
3200 UCharacter.codePointBefore((CharSequence) cases[i], 1));
3202 /* Testing UCharacter.codePointBefore(text, index) */
3203 // Testing when "if (index > 0)" is false
3204 if(UCharacter.codePointBefore(cases[i].toCharArray(), 1) != result[i])
3205 errln("UCharacter.codePointBefore(char[] ...) did not return as expected. " +
3206 "Passed value: " + cases[i] + ". Expected: " +
3207 result[i] + ". Got: " +
3208 UCharacter.codePointBefore(cases[i].toCharArray(), 1));
3210 /* Testing UCharacter.codePointBefore(text, index, limit) */
3211 // Testing when "if (index > limit)" is false
3212 if(UCharacter.codePointBefore(cases[i].toCharArray(), 1, 0) != result[i])
3213 errln("UCharacter.codePointBefore(char[], int, int) did not return as expected. " +
3214 "Passed value: " + cases[i] + ". Expected: " +
3215 result[i] + ". Got: " +
3216 UCharacter.codePointBefore(cases[i].toCharArray(), 1, 0));
3219 /* Testing UCharacter.codePointBefore(text, index, limit) */
3220 char[] dummy = {'d','u','m','m','y'};
3221 // Testing when "if (index <= limit || limit < 0)" is true
3222 int[] negative_cases = {-100,-10,-5,-2,-1};
3223 int[] index_cases = {0,1,2,5,10,100};
3225 for(int i=0; i < negative_cases.length; i++){
3227 UCharacter.codePointBefore(dummy, 10000, negative_cases[i]);
3228 errln("UCharacter.codePointBefore(text, index, limit) was suppose to return an exception " +
3229 "when the parameter limit of " + negative_cases[i] + " is a negative number.");
3230 } catch(Exception e) {}
3233 for(int i=0; i < index_cases.length; i++){
3235 UCharacter.codePointBefore(dummy, index_cases[i], 101);
3236 errln("UCharacter.codePointBefore(text, index, limit) was suppose to return an exception " +
3237 "when the parameter index of " + index_cases[i] + " is a negative number.");
3238 } catch(Exception e) {}
3243 * The following method tests
3244 * public static final int toChars(int cp, char[] dst, int dstIndex)
3245 * public static final char[] toChars(int cp)
3247 public void TestToChars(){
3248 int[] positive_cases = {1,2,5,10,100};
3251 /* Testing UCharacter.toChars(cp, dst, dstIndex) */
3252 for(int i=0; i < positive_cases.length; i++){
3253 // Testing negative values when cp < 0 for if (cp >= 0)
3255 UCharacter.toChars(-1*positive_cases[i],dst,0);
3256 errln("UCharacter.toChars(int,char[],int) was suppose to return an exception " +
3257 "when the parameter " + (-1*positive_cases[i]) + " is a negative number.");
3258 } catch(Exception e){
3261 // Testing when "if (cp < MIN_SUPPLEMENTARY_CODE_POINT)" is true
3262 if(UCharacter.toChars(UCharacter.MIN_SUPPLEMENTARY_CODE_POINT-positive_cases[i], dst, 0) != 1){
3263 errln("UCharacter.toChars(int,char[],int) was suppose to return a value of 1. Got: " +
3264 UCharacter.toChars(UCharacter.MIN_SUPPLEMENTARY_CODE_POINT-positive_cases[i], dst, 0));
3267 // Testing when "if (cp < MIN_SUPPLEMENTARY_CODE_POINT)" is false and
3268 // when "if (cp <= MAX_CODE_POINT)" is false
3270 UCharacter.toChars(UCharacter.MAX_CODE_POINT+positive_cases[i],dst,0);
3271 errln("UCharacter.toChars(int,char[],int) was suppose to return an exception " +
3272 "when the parameter " + (UCharacter.MAX_CODE_POINT+positive_cases[i]) +
3273 " is a large number.");
3274 } catch(Exception e){
3279 /* Testing UCharacter.toChars(cp)*/
3280 for(int i=0; i<positive_cases.length; i++){
3281 // Testing negative values when cp < 0 for if (cp >= 0)
3283 UCharacter.toChars(-1*positive_cases[i]);
3284 errln("UCharacter.toChars(cint) was suppose to return an exception " +
3285 "when the parameter " + positive_cases[i] + " is a negative number.");
3286 } catch(Exception e){
3289 // Testing when "if (cp < MIN_SUPPLEMENTARY_CODE_POINT)" is true
3290 if(UCharacter.toChars(UCharacter.MIN_SUPPLEMENTARY_CODE_POINT-positive_cases[i]).length <= 0){
3291 errln("UCharacter.toChars(int) was suppose to return some result result when the parameter " +
3292 (UCharacter.MIN_SUPPLEMENTARY_CODE_POINT-positive_cases[i]) + "is passed.");
3295 // Testing when "if (cp < MIN_SUPPLEMENTARY_CODE_POINT)" is false and
3296 // when "if (cp <= MAX_CODE_POINT)" is false
3298 UCharacter.toChars(UCharacter.MAX_CODE_POINT+positive_cases[i]);
3299 errln("UCharacter.toChars(int) was suppose to return an exception " +
3300 "when the parameter " + positive_cases[i] + " is a large number.");
3301 } catch(Exception e){
3307 * The following method tests
3308 * public static int codePointCount(CharSequence text, int start, int limit)
3309 * public static int codePointCount(char[] text, int start, int limit)
3311 public void TestCodePointCount(){
3312 // The following tests the first if statement to make it true:
3313 // if (start < 0 || limit < start || limit > text.length)
3314 // which will throw an exception.
3315 char[] empty_text = {};
3316 char[] one_char_text = {'a'};
3317 char[] reg_text = {'d','u','m','m','y'};
3318 int[] invalid_startCases = {-1,-2,-5,-10,-100};
3319 int[] limitCases = {2,3,5,10,25};
3322 for(int i=0; i < invalid_startCases.length; i++){
3324 UCharacter.codePointCount(reg_text, invalid_startCases[i], 1);
3325 errln("UCharacter.codePointCount was suppose to return an exception " +
3326 "but got " + UCharacter.codePointCount(reg_text, invalid_startCases[i], 1) +
3327 ". The following passed parameters were Text: " + reg_text.toString() + ", Start: " +
3328 invalid_startCases[i] + ", Limit: " + 1 + ".");
3329 } catch(Exception e){
3333 // When limit < start
3334 for(int i=0; i < limitCases.length; i++){
3336 UCharacter.codePointCount(reg_text, 100, limitCases[i]);
3337 errln("UCharacter.codePointCount was suppose to return an exception " +
3338 "but got " + UCharacter.codePointCount(reg_text, 100, limitCases[i]) +
3339 ". The following passed parameters were Text: " + String.valueOf(reg_text) + ", Start: " +
3340 100 + ", Limit: " + limitCases[i] + ".");
3341 } catch(Exception e){
3345 // When limit > text.length
3346 for(int i=0; i < limitCases.length; i++){
3348 UCharacter.codePointCount(empty_text, 0, limitCases[i]);
3349 errln("UCharacter.codePointCount was suppose to return an exception " +
3350 "but got " + UCharacter.codePointCount(empty_text, 0, limitCases[i]) +
3351 ". The following passed parameters were Text: " + String.valueOf(empty_text) + ", Start: " +
3352 0 + ", Limit: " + limitCases[i] + ".");
3353 } catch(Exception e){
3357 UCharacter.codePointCount(one_char_text, 0, limitCases[i]);
3358 errln("UCharacter.codePointCount was suppose to return an exception " +
3359 "but got " + UCharacter.codePointCount(one_char_text, 0, limitCases[i]) +
3360 ". The following passed parameters were Text: " + String.valueOf(one_char_text) + ", Start: " +
3361 0 + ", Limit: " + limitCases[i] + ".");
3362 } catch(Exception e){
3368 * The following method tests
3369 * private static int getEuropeanDigit(int ch)
3370 * The method needs to use the method "digit" in order to access the
3371 * getEuropeanDigit method.
3373 public void TestGetEuropeanDigit(){
3374 //The number retrieved from 0xFF41 to 0xFF5A is due to
3375 // exhaustive testing from UTF16.CODEPOINT_MIN_VALUE to
3376 // UTF16.CODEPOINT_MAX_VALUE return a value of -1.
3378 int[] radixResult = {
3379 10,11,12,13,14,15,16,17,18,19,20,21,22,
3380 23,24,25,26,27,28,29,30,31,32,33,34,35};
3381 // Invalid and too-small-for-these-digits radix values.
3382 int[] radixCase1 = {0,1,5,10,100};
3383 // Radix values that work for at least some of the "digits".
3384 int[] radixCase2 = {12,16,20,36};
3386 for(int i=0xFF41; i<=0xFF5A; i++){
3387 for(int j=0; j < radixCase1.length; j++){
3388 if(UCharacter.digit(i, radixCase1[j]) != -1){
3389 errln("UCharacter.digit(int,int) was supposed to return -1 for radix " + radixCase1[j]
3390 + ". Value passed: U+" + Integer.toHexString(i) + ". Got: " + UCharacter.digit(i, radixCase1[j]));
3393 for(int j=0; j < radixCase2.length; j++){
3394 int radix = radixCase2[j];
3395 int expected = (radixResult[i-0xFF41] < radix) ? radixResult[i-0xFF41] : -1;
3396 int actual = UCharacter.digit(i, radix);
3397 if(actual != expected){
3398 errln("UCharacter.digit(int,int) was supposed to return " +
3399 expected + " for radix " + radix +
3400 ". Value passed: U+" + Integer.toHexString(i) + ". Got: " + actual);
3408 * private static final int getProperty(int ch)
3409 * from public static int getType(int ch)
3411 public void TestGetProperty(){
3412 int[] cases = {UTF16.CODEPOINT_MAX_VALUE+1, UTF16.CODEPOINT_MAX_VALUE+2};
3413 for(int i=0; i < cases.length; i++)
3414 if(UCharacter.getType(cases[i]) != 0)
3415 errln("UCharacter.getType for testing UCharacter.getProperty "
3416 + "did not return 0 for passed value of " + cases[i] +
3417 " but got " + UCharacter.getType(cases[i]));
3421 * abstract public static class XSymbolTable implements SymbolTable
3423 public void TestXSymbolTable(){
3424 class MyXSymbolTable extends UnicodeSet.XSymbolTable {}
3425 MyXSymbolTable st = new MyXSymbolTable();
3427 // Tests "public UnicodeMatcher lookupMatcher(int i)"
3428 if(st.lookupMatcher(0) != null)
3429 errln("XSymbolTable.lookupMatcher(int i) was suppose to return null.");
3431 // Tests "public boolean applyPropertyAlias(String propertyName, String propertyValue, UnicodeSet result)"
3432 if(st.applyPropertyAlias("", "", new UnicodeSet()) != false)
3433 errln("XSymbolTable.applyPropertyAlias(String propertyName, String propertyValue, UnicodeSet result) was suppose to return false.");
3435 // Tests "public char[] lookup(String s)"
3436 if(st.lookup("") != null)
3437 errln("XSymbolTable.lookup(String s) was suppose to return null.");
3439 // Tests "public String parseReference(String text, ParsePosition pos, int limit)"
3440 if(st.parseReference("", null, 0) != null)
3441 errln("XSymbolTable.parseReference(String text, ParsePosition pos, int limit) was suppose to return null.");
3445 * public boolean isFrozen()
3447 public void TestIsFrozen(){
3448 UnicodeSet us = new UnicodeSet();
3449 if(us.isFrozen() != false)
3450 errln("Unicode.isFrozen() was suppose to return false.");
3453 if(us.isFrozen() != true)
3454 errln("Unicode.isFrozen() was suppose to return true.");