jars/icu4j-4_8_1_1/main/classes/core/src/com/ibm/icu/lang/UCharacter.java

   1 //##header
   2 /**
   3 *******************************************************************************
   4 * Copyright (C) 1996-2011, International Business Machines Corporation and    *
   5 * others. All Rights Reserved.                                                *
   6 *******************************************************************************
   7 */
   8
   9 package com.ibm.icu.lang;
  10
  11 import java.lang.ref.SoftReference;
  12 import java.util.HashMap;
  13 import java.util.Iterator;
  14 import java.util.Locale;
  15 import java.util.Map;
  16
  17 import com.ibm.icu.impl.IllegalIcuArgumentException;
  18 import com.ibm.icu.impl.Norm2AllModes;
  19 import com.ibm.icu.impl.Normalizer2Impl;
  20 import com.ibm.icu.impl.Trie2;
  21 import com.ibm.icu.impl.UBiDiProps;
  22 import com.ibm.icu.impl.UCaseProps;
  23 import com.ibm.icu.impl.UCharacterName;
  24 import com.ibm.icu.impl.UCharacterNameChoice;
  25 import com.ibm.icu.impl.UCharacterProperty;
  26 import com.ibm.icu.impl.UCharacterUtility;
  27 import com.ibm.icu.impl.UPropertyAliases;
  28 import com.ibm.icu.lang.UCharacterEnums.ECharacterCategory;
  29 import com.ibm.icu.lang.UCharacterEnums.ECharacterDirection;
  30 import com.ibm.icu.text.BreakIterator;
  31 import com.ibm.icu.text.UTF16;
  32 import com.ibm.icu.util.RangeValueIterator;
  33 import com.ibm.icu.util.ULocale;
  34 import com.ibm.icu.util.ValueIterator;
  35 import com.ibm.icu.util.VersionInfo;
  36
  37 /**
  38  * {@icuenhanced java.lang.Character}.{@icu _usage_}
  39  *
  40  * <p>The UCharacter class provides extensions to the
  41  * <a href="http://java.sun.com/j2se/1.5/docs/api/java/lang/Character.html">
  42  * java.lang.Character</a> class. These extensions provide support for
  43  * more Unicode properties and together with the <a href=../text/UTF16.html>UTF16</a>
  44  * class, provide support for supplementary characters (those with code
  45  * points above U+FFFF).
  46  * Each ICU release supports the latest version of Unicode available at that time.
  47  *
  48  * <p>Code points are represented in these API using ints. While it would be
  49  * more convenient in Java to have a separate primitive datatype for them,
  50  * ints suffice in the meantime.
  51  *
  52  * <p>To use this class please add the jar file name icu4j.jar to the
  53  * class path, since it contains data files which supply the information used
  54  * by this file.<br>
  55  * E.g. In Windows <br>
  56  * <code>set CLASSPATH=%CLASSPATH%;$JAR_FILE_PATH/ucharacter.jar</code>.<br>
  57  * Otherwise, another method would be to copy the files uprops.dat and
  58  * unames.icu from the icu4j source subdirectory
  59  * <i>$ICU4J_SRC/src/com.ibm.icu.impl.data</i> to your class directory
  60  * <i>$ICU4J_CLASS/com.ibm.icu.impl.data</i>.
  61  *
  62  * <p>Aside from the additions for UTF-16 support, and the updated Unicode
  63  * properties, the main differences between UCharacter and Character are:
  64  * <ul>
  65  * <li> UCharacter is not designed to be a char wrapper and does not have
  66  *      APIs to which involves management of that single char.<br>
  67  *      These include:
  68  *      <ul>
  69  *        <li> char charValue(),
  70  *        <li> int compareTo(java.lang.Character, java.lang.Character), etc.
  71  *      </ul>
  72  * <li> UCharacter does not include Character APIs that are deprecated, nor
  73  *      does it include the Java-specific character information, such as
  74  *      boolean isJavaIdentifierPart(char ch).
  75  * <li> Character maps characters 'A' - 'Z' and 'a' - 'z' to the numeric
  76  *      values '10' - '35'. UCharacter also does this in digit and
  77  *      getNumericValue, to adhere to the java semantics of these
  78  *      methods.  New methods unicodeDigit, and
  79  *      getUnicodeNumericValue do not treat the above code points
  80  *      as having numeric values.  This is a semantic change from ICU4J 1.3.1.
  81  * </ul>
  82  * <p>
  83  * Further detail on differences can be determined using the program
  84  *        <a href=
  85  * "http://source.icu-project.org/repos/icu/icu4j/trunk/src/com/ibm/icu/dev/test/lang/UCharacterCompare.java">
  86  *        com.ibm.icu.dev.test.lang.UCharacterCompare</a>
  87  * </p>
  88  * <p>
  89  * In addition to Java compatibility functions, which calculate derived properties,
  90  * this API provides low-level access to the Unicode Character Database.
  91  * </p>
  92  * <p>
  93  * Unicode assigns each code point (not just assigned character) values for
  94  * many properties.
  95  * Most of them are simple boolean flags, or constants from a small enumerated list.
  96  * For some properties, values are strings or other relatively more complex types.
  97  * </p>
  98  * <p>
  99  * For more information see
 100  * <a href="http://www.unicode/org/ucd/">"About the Unicode Character Database"</a>
 101  * (http://www.unicode.org/ucd/)
 102  * and the <a href="http://www.icu-project.org/userguide/properties.html">ICU
 103  * User Guide chapter on Properties</a>
 104  * (http://www.icu-project.org/userguide/properties.html).
 105  * </p>
 106  * <p>
 107  * There are also functions that provide easy migration from C/POSIX functions
 108  * like isblank(). Their use is generally discouraged because the C/POSIX
 109  * standards do not define their semantics beyond the ASCII range, which means
 110  * that different implementations exhibit very different behavior.
 111  * Instead, Unicode properties should be used directly.
 112  * </p>
 113  * <p>
 114  * There are also only a few, broad C/POSIX character classes, and they tend
 115  * to be used for conflicting purposes. For example, the "isalpha()" class
 116  * is sometimes used to determine word boundaries, while a more sophisticated
 117  * approach would at least distinguish initial letters from continuation
 118  * characters (the latter including combining marks).
 119  * (In ICU, BreakIterator is the most sophisticated API for word boundaries.)
 120  * Another example: There is no "istitle()" class for titlecase characters.
 121  * </p>
 122  * <p>
 123  * ICU 3.4 and later provides API access for all twelve C/POSIX character classes.
 124  * ICU implements them according to the Standard Recommendations in
 125  * Annex C: Compatibility Properties of UTS #18 Unicode Regular Expressions
 126  * (http://www.unicode.org/reports/tr18/#Compatibility_Properties).
 127  * </p>
 128  * <p>
 129  * API access for C/POSIX character classes is as follows:
 130  * <pre>{@code
 131  * - alpha:     isUAlphabetic(c) or hasBinaryProperty(c, UProperty.ALPHABETIC)
 132  * - lower:     isULowercase(c) or hasBinaryProperty(c, UProperty.LOWERCASE)
 133  * - upper:     isUUppercase(c) or hasBinaryProperty(c, UProperty.UPPERCASE)
 134  * - punct:     ((1<<getType(c)) & ((1<<DASH_PUNCTUATION)|(1<<START_PUNCTUATION)|
 135  *               (1<<END_PUNCTUATION)|(1<<CONNECTOR_PUNCTUATION)|(1<<OTHER_PUNCTUATION)|
 136  *               (1<<INITIAL_PUNCTUATION)|(1<<FINAL_PUNCTUATION)))!=0
 137  * - digit:     isDigit(c) or getType(c)==DECIMAL_DIGIT_NUMBER
 138  * - xdigit:    hasBinaryProperty(c, UProperty.POSIX_XDIGIT)
 139  * - alnum:     hasBinaryProperty(c, UProperty.POSIX_ALNUM)
 140  * - space:     isUWhiteSpace(c) or hasBinaryProperty(c, UProperty.WHITE_SPACE)
 141  * - blank:     hasBinaryProperty(c, UProperty.POSIX_BLANK)
 142  * - cntrl:     getType(c)==CONTROL
 143  * - graph:     hasBinaryProperty(c, UProperty.POSIX_GRAPH)
 144  * - print:     hasBinaryProperty(c, UProperty.POSIX_PRINT)}</pre>
 145  * </p>
 146  * <p>
 147  * The C/POSIX character classes are also available in UnicodeSet patterns,
 148  * using patterns like [:graph:] or \p{graph}.
 149  * </p>
 150  *
 151  * {@icunote} There are several ICU (and Java) whitespace functions.
 152  * Comparison:<ul>
 153  * <li> isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property;
 154  *       most of general categories "Z" (separators) + most whitespace ISO controls
 155  *       (including no-break spaces, but excluding IS1..IS4 and ZWSP)
 156  * <li> isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces
 157  * <li> isSpaceChar: just Z (including no-break spaces)</ul>
 158  * </p>
 159  * <p>
 160  * This class is not subclassable.
 161  * </p>
 162  * @author Syn Wee Quek
 163  * @stable ICU 2.1
 164  * @see com.ibm.icu.lang.UCharacterEnums
 165  */
 166
 167 public final class UCharacter implements ECharacterCategory, ECharacterDirection
 168 {
 169     // public inner classes ----------------------------------------------
 170
 171     /**
 172      * {@icuenhanced java.lang.Character.UnicodeBlock}.{@icu _usage_}
 173      *
 174      * A family of character subsets representing the character blocks in the
 175      * Unicode specification, generated from Unicode Data file Blocks.txt.
 176      * Character blocks generally define characters used for a specific script
 177      * or purpose. A character is contained by at most one Unicode block.
 178      *
 179      * {@icunote} All fields named XXX_ID are specific to ICU.
 180      *
 181      * @stable ICU 2.4
 182      */
 183     public static final class UnicodeBlock extends Character.Subset
 184     {
 185         // block id corresponding to icu4c -----------------------------------
 186
 187         /**
 188          * @stable ICU 2.4
 189          */
 190         public static final int INVALID_CODE_ID = -1;
 191         /**
 192          * @stable ICU 2.4
 193          */
 194         public static final int BASIC_LATIN_ID = 1;
 195         /**
 196          * @stable ICU 2.4
 197          */
 198         public static final int LATIN_1_SUPPLEMENT_ID = 2;
 199         /**
 200          * @stable ICU 2.4
 201          */
 202         public static final int LATIN_EXTENDED_A_ID = 3;
 203         /**
 204          * @stable ICU 2.4
 205          */
 206         public static final int LATIN_EXTENDED_B_ID = 4;
 207         /**
 208          * @stable ICU 2.4
 209          */
 210         public static final int IPA_EXTENSIONS_ID = 5;
 211         /**
 212          * @stable ICU 2.4
 213          */
 214         public static final int SPACING_MODIFIER_LETTERS_ID = 6;
 215         /**
 216          * @stable ICU 2.4
 217          */
 218         public static final int COMBINING_DIACRITICAL_MARKS_ID = 7;
 219         /**
 220          * Unicode 3.2 renames this block to "Greek and Coptic".
 221          * @stable ICU 2.4
 222          */
 223         public static final int GREEK_ID = 8;
 224         /**
 225          * @stable ICU 2.4
 226          */
 227         public static final int CYRILLIC_ID = 9;
 228         /**
 229          * @stable ICU 2.4
 230          */
 231         public static final int ARMENIAN_ID = 10;
 232         /**
 233          * @stable ICU 2.4
 234          */
 235         public static final int HEBREW_ID = 11;
 236         /**
 237          * @stable ICU 2.4
 238          */
 239         public static final int ARABIC_ID = 12;
 240         /**
 241          * @stable ICU 2.4
 242          */
 243         public static final int SYRIAC_ID = 13;
 244         /**
 245          * @stable ICU 2.4
 246          */
 247         public static final int THAANA_ID = 14;
 248         /**
 249          * @stable ICU 2.4
 250          */
 251         public static final int DEVANAGARI_ID = 15;
 252         /**
 253          * @stable ICU 2.4
 254          */
 255         public static final int BENGALI_ID = 16;
 256         /**
 257          * @stable ICU 2.4
 258          */
 259         public static final int GURMUKHI_ID = 17;
 260         /**
 261          * @stable ICU 2.4
 262          */
 263         public static final int GUJARATI_ID = 18;
 264         /**
 265          * @stable ICU 2.4
 266          */
 267         public static final int ORIYA_ID = 19;
 268         /**
 269          * @stable ICU 2.4
 270          */
 271         public static final int TAMIL_ID = 20;
 272         /**
 273          * @stable ICU 2.4
 274          */
 275         public static final int TELUGU_ID = 21;
 276         /**
 277          * @stable ICU 2.4
 278          */
 279         public static final int KANNADA_ID = 22;
 280         /**
 281          * @stable ICU 2.4
 282          */
 283         public static final int MALAYALAM_ID = 23;
 284         /**
 285          * @stable ICU 2.4
 286          */
 287         public static final int SINHALA_ID = 24;
 288         /**
 289          * @stable ICU 2.4
 290          */
 291         public static final int THAI_ID = 25;
 292         /**
 293          * @stable ICU 2.4
 294          */
 295         public static final int LAO_ID = 26;
 296         /**
 297          * @stable ICU 2.4
 298          */
 299         public static final int TIBETAN_ID = 27;
 300         /**
 301          * @stable ICU 2.4
 302          */
 303         public static final int MYANMAR_ID = 28;
 304         /**
 305          * @stable ICU 2.4
 306          */
 307         public static final int GEORGIAN_ID = 29;
 308         /**
 309          * @stable ICU 2.4
 310          */
 311         public static final int HANGUL_JAMO_ID = 30;
 312         /**
 313          * @stable ICU 2.4
 314          */
 315         public static final int ETHIOPIC_ID = 31;
 316         /**
 317          * @stable ICU 2.4
 318          */
 319         public static final int CHEROKEE_ID = 32;
 320         /**
 321          * @stable ICU 2.4
 322          */
 323         public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID = 33;
 324         /**
 325          * @stable ICU 2.4
 326          */
 327         public static final int OGHAM_ID = 34;
 328         /**
 329          * @stable ICU 2.4
 330          */
 331         public static final int RUNIC_ID = 35;
 332         /**
 333          * @stable ICU 2.4
 334          */
 335         public static final int KHMER_ID = 36;
 336         /**
 337          * @stable ICU 2.4
 338          */
 339         public static final int MONGOLIAN_ID = 37;
 340         /**
 341          * @stable ICU 2.4
 342          */
 343         public static final int LATIN_EXTENDED_ADDITIONAL_ID = 38;
 344         /**
 345          * @stable ICU 2.4
 346          */
 347         public static final int GREEK_EXTENDED_ID = 39;
 348         /**
 349          * @stable ICU 2.4
 350          */
 351         public static final int GENERAL_PUNCTUATION_ID = 40;
 352         /**
 353          * @stable ICU 2.4
 354          */
 355         public static final int SUPERSCRIPTS_AND_SUBSCRIPTS_ID = 41;
 356         /**
 357          * @stable ICU 2.4
 358          */
 359         public static final int CURRENCY_SYMBOLS_ID = 42;
 360         /**
 361          * Unicode 3.2 renames this block to "Combining Diacritical Marks for
 362          * Symbols".
 363          * @stable ICU 2.4
 364          */
 365         public static final int COMBINING_MARKS_FOR_SYMBOLS_ID = 43;
 366         /**
 367          * @stable ICU 2.4
 368          */
 369         public static final int LETTERLIKE_SYMBOLS_ID = 44;
 370         /**
 371          * @stable ICU 2.4
 372          */
 373         public static final int NUMBER_FORMS_ID = 45;
 374         /**
 375          * @stable ICU 2.4
 376          */
 377         public static final int ARROWS_ID = 46;
 378         /**
 379          * @stable ICU 2.4
 380          */
 381         public static final int MATHEMATICAL_OPERATORS_ID = 47;
 382         /**
 383          * @stable ICU 2.4
 384          */
 385         public static final int MISCELLANEOUS_TECHNICAL_ID = 48;
 386         /**
 387          * @stable ICU 2.4
 388          */
 389         public static final int CONTROL_PICTURES_ID = 49;
 390         /**
 391          * @stable ICU 2.4
 392          */
 393         public static final int OPTICAL_CHARACTER_RECOGNITION_ID = 50;
 394         /**
 395          * @stable ICU 2.4
 396          */
 397         public static final int ENCLOSED_ALPHANUMERICS_ID = 51;
 398         /**
 399          * @stable ICU 2.4
 400          */
 401         public static final int BOX_DRAWING_ID = 52;
 402         /**
 403          * @stable ICU 2.4
 404          */
 405         public static final int BLOCK_ELEMENTS_ID = 53;
 406         /**
 407          * @stable ICU 2.4
 408          */
 409         public static final int GEOMETRIC_SHAPES_ID = 54;
 410         /**
 411          * @stable ICU 2.4
 412          */
 413         public static final int MISCELLANEOUS_SYMBOLS_ID = 55;
 414         /**
 415          * @stable ICU 2.4
 416          */
 417         public static final int DINGBATS_ID = 56;
 418         /**
 419          * @stable ICU 2.4
 420          */
 421         public static final int BRAILLE_PATTERNS_ID = 57;
 422         /**
 423          * @stable ICU 2.4
 424          */
 425         public static final int CJK_RADICALS_SUPPLEMENT_ID = 58;
 426         /**
 427          * @stable ICU 2.4
 428          */
 429         public static final int KANGXI_RADICALS_ID = 59;
 430         /**
 431          * @stable ICU 2.4
 432          */
 433         public static final int IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID = 60;
 434         /**
 435          * @stable ICU 2.4
 436          */
 437         public static final int CJK_SYMBOLS_AND_PUNCTUATION_ID = 61;
 438         /**
 439          * @stable ICU 2.4
 440          */
 441         public static final int HIRAGANA_ID = 62;
 442         /**
 443          * @stable ICU 2.4
 444          */
 445         public static final int KATAKANA_ID = 63;
 446         /**
 447          * @stable ICU 2.4
 448          */
 449         public static final int BOPOMOFO_ID = 64;
 450         /**
 451          * @stable ICU 2.4
 452          */
 453         public static final int HANGUL_COMPATIBILITY_JAMO_ID = 65;
 454         /**
 455          * @stable ICU 2.4
 456          */
 457         public static final int KANBUN_ID = 66;
 458         /**
 459          * @stable ICU 2.4
 460          */
 461         public static final int BOPOMOFO_EXTENDED_ID = 67;
 462         /**
 463          * @stable ICU 2.4
 464          */
 465         public static final int ENCLOSED_CJK_LETTERS_AND_MONTHS_ID = 68;
 466         /**
 467          * @stable ICU 2.4
 468          */
 469         public static final int CJK_COMPATIBILITY_ID = 69;
 470         /**
 471          * @stable ICU 2.4
 472          */
 473         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID = 70;
 474         /**
 475          * @stable ICU 2.4
 476          */
 477         public static final int CJK_UNIFIED_IDEOGRAPHS_ID = 71;
 478         /**
 479          * @stable ICU 2.4
 480          */
 481         public static final int YI_SYLLABLES_ID = 72;
 482         /**
 483          * @stable ICU 2.4
 484          */
 485         public static final int YI_RADICALS_ID = 73;
 486         /**
 487          * @stable ICU 2.4
 488          */
 489         public static final int HANGUL_SYLLABLES_ID = 74;
 490         /**
 491          * @stable ICU 2.4
 492          */
 493         public static final int HIGH_SURROGATES_ID = 75;
 494         /**
 495          * @stable ICU 2.4
 496          */
 497         public static final int HIGH_PRIVATE_USE_SURROGATES_ID = 76;
 498         /**
 499          * @stable ICU 2.4
 500          */
 501         public static final int LOW_SURROGATES_ID = 77;
 502         /**
 503          * Same as public static final int PRIVATE_USE.
 504          * Until Unicode 3.1.1; the corresponding block name was "Private Use";
 505          * and multiple code point ranges had this block.
 506          * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area"
 507          * and adds separate blocks for the supplementary PUAs.
 508          * @stable ICU 2.4
 509          */
 510         public static final int PRIVATE_USE_AREA_ID = 78;
 511         /**
 512          * Same as public static final int PRIVATE_USE_AREA.
 513          * Until Unicode 3.1.1; the corresponding block name was "Private Use";
 514          * and multiple code point ranges had this block.
 515          * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area"
 516          * and adds separate blocks for the supplementary PUAs.
 517          * @stable ICU 2.4
 518          */
 519         public static final int PRIVATE_USE_ID = PRIVATE_USE_AREA_ID;
 520         /**
 521          * @stable ICU 2.4
 522          */
 523         public static final int CJK_COMPATIBILITY_IDEOGRAPHS_ID = 79;
 524         /**
 525          * @stable ICU 2.4
 526          */
 527         public static final int ALPHABETIC_PRESENTATION_FORMS_ID = 80;
 528         /**
 529          * @stable ICU 2.4
 530          */
 531         public static final int ARABIC_PRESENTATION_FORMS_A_ID = 81;
 532         /**
 533          * @stable ICU 2.4
 534          */
 535         public static final int COMBINING_HALF_MARKS_ID = 82;
 536         /**
 537          * @stable ICU 2.4
 538          */
 539         public static final int CJK_COMPATIBILITY_FORMS_ID = 83;
 540         /**
 541          * @stable ICU 2.4
 542          */
 543         public static final int SMALL_FORM_VARIANTS_ID = 84;
 544         /**
 545          * @stable ICU 2.4
 546          */
 547         public static final int ARABIC_PRESENTATION_FORMS_B_ID = 85;
 548         /**
 549          * @stable ICU 2.4
 550          */
 551         public static final int SPECIALS_ID = 86;
 552         /**
 553          * @stable ICU 2.4
 554          */
 555         public static final int HALFWIDTH_AND_FULLWIDTH_FORMS_ID = 87;
 556         /**
 557          * @stable ICU 2.4
 558          */
 559         public static final int OLD_ITALIC_ID = 88;
 560         /**
 561          * @stable ICU 2.4
 562          */
 563         public static final int GOTHIC_ID = 89;
 564         /**
 565          * @stable ICU 2.4
 566          */
 567         public static final int DESERET_ID = 90;
 568         /**
 569          * @stable ICU 2.4
 570          */
 571         public static final int BYZANTINE_MUSICAL_SYMBOLS_ID = 91;
 572         /**
 573          * @stable ICU 2.4
 574          */
 575         public static final int MUSICAL_SYMBOLS_ID = 92;
 576         /**
 577          * @stable ICU 2.4
 578          */
 579         public static final int MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID = 93;
 580         /**
 581          * @stable ICU 2.4
 582          */
 583         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID = 94;
 584         /**
 585          * @stable ICU 2.4
 586          */
 587         public static final int
 588             CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID = 95;
 589         /**
 590          * @stable ICU 2.4
 591          */
 592         public static final int TAGS_ID = 96;
 593
 594         // New blocks in Unicode 3.2
 595
 596         /**
 597          * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
 598          * @stable ICU 2.4
 599          */
 600         public static final int CYRILLIC_SUPPLEMENTARY_ID = 97;
 601         /**
 602          * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
 603          * @stable ICU 3.0
 604          */
 605
 606         public static final int CYRILLIC_SUPPLEMENT_ID = 97;
 607         /**
 608          * @stable ICU 2.4
 609          */
 610         public static final int TAGALOG_ID = 98;
 611         /**
 612          * @stable ICU 2.4
 613          */
 614         public static final int HANUNOO_ID = 99;
 615         /**
 616          * @stable ICU 2.4
 617          */
 618         public static final int BUHID_ID = 100;
 619         /**
 620          * @stable ICU 2.4
 621          */
 622         public static final int TAGBANWA_ID = 101;
 623         /**
 624          * @stable ICU 2.4
 625          */
 626         public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID = 102;
 627         /**
 628          * @stable ICU 2.4
 629          */
 630         public static final int SUPPLEMENTAL_ARROWS_A_ID = 103;
 631         /**
 632          * @stable ICU 2.4
 633          */
 634         public static final int SUPPLEMENTAL_ARROWS_B_ID = 104;
 635         /**
 636          * @stable ICU 2.4
 637          */
 638         public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID = 105;
 639         /**
 640          * @stable ICU 2.4
 641          */
 642         public static final int SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID = 106;
 643         /**
 644          * @stable ICU 2.4
 645          */
 646         public static final int KATAKANA_PHONETIC_EXTENSIONS_ID = 107;
 647         /**
 648          * @stable ICU 2.4
 649          */
 650         public static final int VARIATION_SELECTORS_ID = 108;
 651         /**
 652          * @stable ICU 2.4
 653          */
 654         public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID = 109;
 655         /**
 656          * @stable ICU 2.4
 657          */
 658         public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID = 110;
 659
 660         /**
 661          * @stable ICU 2.6
 662          */
 663         public static final int LIMBU_ID = 111; /*[1900]*/
 664         /**
 665          * @stable ICU 2.6
 666          */
 667         public static final int TAI_LE_ID = 112; /*[1950]*/
 668         /**
 669          * @stable ICU 2.6
 670          */
 671         public static final int KHMER_SYMBOLS_ID = 113; /*[19E0]*/
 672         /**
 673          * @stable ICU 2.6
 674          */
 675         public static final int PHONETIC_EXTENSIONS_ID = 114; /*[1D00]*/
 676         /**
 677          * @stable ICU 2.6
 678          */
 679         public static final int MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID = 115; /*[2B00]*/
 680         /**
 681          * @stable ICU 2.6
 682          */
 683         public static final int YIJING_HEXAGRAM_SYMBOLS_ID = 116; /*[4DC0]*/
 684         /**
 685          * @stable ICU 2.6
 686          */
 687         public static final int LINEAR_B_SYLLABARY_ID = 117; /*[10000]*/
 688         /**
 689          * @stable ICU 2.6
 690          */
 691         public static final int LINEAR_B_IDEOGRAMS_ID = 118; /*[10080]*/
 692         /**
 693          * @stable ICU 2.6
 694          */
 695         public static final int AEGEAN_NUMBERS_ID = 119; /*[10100]*/
 696         /**
 697          * @stable ICU 2.6
 698          */
 699         public static final int UGARITIC_ID = 120; /*[10380]*/
 700         /**
 701          * @stable ICU 2.6
 702          */
 703         public static final int SHAVIAN_ID = 121; /*[10450]*/
 704         /**
 705          * @stable ICU 2.6
 706          */
 707         public static final int OSMANYA_ID = 122; /*[10480]*/
 708         /**
 709          * @stable ICU 2.6
 710          */
 711         public static final int CYPRIOT_SYLLABARY_ID = 123; /*[10800]*/
 712         /**
 713          * @stable ICU 2.6
 714          */
 715         public static final int TAI_XUAN_JING_SYMBOLS_ID = 124; /*[1D300]*/
 716         /**
 717          * @stable ICU 2.6
 718          */
 719         public static final int VARIATION_SELECTORS_SUPPLEMENT_ID = 125; /*[E0100]*/
 720
 721         /* New blocks in Unicode 4.1 */
 722
 723         /**
 724          * @stable ICU 3.4
 725          */
 726         public static final int ANCIENT_GREEK_MUSICAL_NOTATION_ID = 126; /*[1D200]*/
 727
 728         /**
 729          * @stable ICU 3.4
 730          */
 731         public static final int ANCIENT_GREEK_NUMBERS_ID = 127; /*[10140]*/
 732
 733         /**
 734          * @stable ICU 3.4
 735          */
 736         public static final int ARABIC_SUPPLEMENT_ID = 128; /*[0750]*/
 737
 738         /**
 739          * @stable ICU 3.4
 740          */
 741         public static final int BUGINESE_ID = 129; /*[1A00]*/
 742
 743         /**
 744          * @stable ICU 3.4
 745          */
 746         public static final int CJK_STROKES_ID = 130; /*[31C0]*/
 747
 748         /**
 749          * @stable ICU 3.4
 750          */
 751         public static final int COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID = 131; /*[1DC0]*/
 752
 753         /**
 754          * @stable ICU 3.4
 755          */
 756         public static final int COPTIC_ID = 132; /*[2C80]*/
 757
 758         /**
 759          * @stable ICU 3.4
 760          */
 761         public static final int ETHIOPIC_EXTENDED_ID = 133; /*[2D80]*/
 762
 763         /**
 764          * @stable ICU 3.4
 765          */
 766         public static final int ETHIOPIC_SUPPLEMENT_ID = 134; /*[1380]*/
 767
 768         /**
 769          * @stable ICU 3.4
 770          */
 771         public static final int GEORGIAN_SUPPLEMENT_ID = 135; /*[2D00]*/
 772
 773         /**
 774          * @stable ICU 3.4
 775          */
 776         public static final int GLAGOLITIC_ID = 136; /*[2C00]*/
 777
 778         /**
 779          * @stable ICU 3.4
 780          */
 781         public static final int KHAROSHTHI_ID = 137; /*[10A00]*/
 782
 783         /**
 784          * @stable ICU 3.4
 785          */
 786         public static final int MODIFIER_TONE_LETTERS_ID = 138; /*[A700]*/
 787
 788         /**
 789          * @stable ICU 3.4
 790          */
 791         public static final int NEW_TAI_LUE_ID = 139; /*[1980]*/
 792
 793         /**
 794          * @stable ICU 3.4
 795          */
 796         public static final int OLD_PERSIAN_ID = 140; /*[103A0]*/
 797
 798         /**
 799          * @stable ICU 3.4
 800          */
 801         public static final int PHONETIC_EXTENSIONS_SUPPLEMENT_ID = 141; /*[1D80]*/
 802
 803         /**
 804          * @stable ICU 3.4
 805          */
 806         public static final int SUPPLEMENTAL_PUNCTUATION_ID = 142; /*[2E00]*/
 807
 808         /**
 809          * @stable ICU 3.4
 810          */
 811         public static final int SYLOTI_NAGRI_ID = 143; /*[A800]*/
 812
 813         /**
 814          * @stable ICU 3.4
 815          */
 816         public static final int TIFINAGH_ID = 144; /*[2D30]*/
 817
 818         /**
 819          * @stable ICU 3.4
 820          */
 821         public static final int VERTICAL_FORMS_ID = 145; /*[FE10]*/
 822
 823         /* New blocks in Unicode 5.0 */
 824
 825         /**
 826          * @stable ICU 3.6
 827          */
 828         public static final int NKO_ID = 146; /*[07C0]*/
 829         /**
 830          * @stable ICU 3.6
 831          */
 832         public static final int BALINESE_ID = 147; /*[1B00]*/
 833         /**
 834          * @stable ICU 3.6
 835          */
 836         public static final int LATIN_EXTENDED_C_ID = 148; /*[2C60]*/
 837         /**
 838          * @stable ICU 3.6
 839          */
 840         public static final int LATIN_EXTENDED_D_ID = 149; /*[A720]*/
 841         /**
 842          * @stable ICU 3.6
 843          */
 844         public static final int PHAGS_PA_ID = 150; /*[A840]*/
 845         /**
 846          * @stable ICU 3.6
 847          */
 848         public static final int PHOENICIAN_ID = 151; /*[10900]*/
 849         /**
 850          * @stable ICU 3.6
 851          */
 852         public static final int CUNEIFORM_ID = 152; /*[12000]*/
 853         /**
 854          * @stable ICU 3.6
 855          */
 856         public static final int CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID = 153; /*[12400]*/
 857         /**
 858          * @stable ICU 3.6
 859          */
 860         public static final int COUNTING_ROD_NUMERALS_ID = 154; /*[1D360]*/
 861
 862         /**
 863          * @stable ICU 4.0
 864          */
 865         public static final int SUNDANESE_ID = 155; /* [1B80] */
 866
 867         /**
 868          * @stable ICU 4.0
 869          */
 870         public static final int LEPCHA_ID = 156; /* [1C00] */
 871
 872         /**
 873          * @stable ICU 4.0
 874          */
 875         public static final int OL_CHIKI_ID = 157; /* [1C50] */
 876
 877         /**
 878          * @stable ICU 4.0
 879          */
 880         public static final int CYRILLIC_EXTENDED_A_ID = 158; /* [2DE0] */
 881
 882         /**
 883          * @stable ICU 4.0
 884          */
 885         public static final int VAI_ID = 159; /* [A500] */
 886
 887         /**
 888          * @stable ICU 4.0
 889          */
 890         public static final int CYRILLIC_EXTENDED_B_ID = 160; /* [A640] */
 891
 892         /**
 893          * @stable ICU 4.0
 894          */
 895         public static final int SAURASHTRA_ID = 161; /* [A880] */
 896
 897         /**
 898          * @stable ICU 4.0
 899          */
 900         public static final int KAYAH_LI_ID = 162; /* [A900] */
 901
 902         /**
 903          * @stable ICU 4.0
 904          */
 905         public static final int REJANG_ID = 163; /* [A930] */
 906
 907         /**
 908          * @stable ICU 4.0
 909          */
 910         public static final int CHAM_ID = 164; /* [AA00] */
 911
 912         /**
 913          * @stable ICU 4.0
 914          */
 915         public static final int ANCIENT_SYMBOLS_ID = 165; /* [10190] */
 916
 917         /**
 918          * @stable ICU 4.0
 919          */
 920         public static final int PHAISTOS_DISC_ID = 166; /* [101D0] */
 921
 922         /**
 923          * @stable ICU 4.0
 924          */
 925         public static final int LYCIAN_ID = 167; /* [10280] */
 926
 927         /**
 928          * @stable ICU 4.0
 929          */
 930         public static final int CARIAN_ID = 168; /* [102A0] */
 931
 932         /**
 933          * @stable ICU 4.0
 934          */
 935         public static final int LYDIAN_ID = 169; /* [10920] */
 936
 937         /**
 938          * @stable ICU 4.0
 939          */
 940         public static final int MAHJONG_TILES_ID = 170; /* [1F000] */
 941
 942         /**
 943          * @stable ICU 4.0
 944          */
 945         public static final int DOMINO_TILES_ID = 171; /* [1F030] */
 946
 947         /* New blocks in Unicode 5.2 */
 948
 949         /** @stable ICU 4.4 */
 950         public static final int SAMARITAN_ID = 172; /*[0800]*/
 951         /** @stable ICU 4.4 */
 952         public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_ID = 173; /*[18B0]*/
 953         /** @stable ICU 4.4 */
 954         public static final int TAI_THAM_ID = 174; /*[1A20]*/
 955         /** @stable ICU 4.4 */
 956         public static final int VEDIC_EXTENSIONS_ID = 175; /*[1CD0]*/
 957         /** @stable ICU 4.4 */
 958         public static final int LISU_ID = 176; /*[A4D0]*/
 959         /** @stable ICU 4.4 */
 960         public static final int BAMUM_ID = 177; /*[A6A0]*/
 961         /** @stable ICU 4.4 */
 962         public static final int COMMON_INDIC_NUMBER_FORMS_ID = 178; /*[A830]*/
 963         /** @stable ICU 4.4 */
 964         public static final int DEVANAGARI_EXTENDED_ID = 179; /*[A8E0]*/
 965         /** @stable ICU 4.4 */
 966         public static final int HANGUL_JAMO_EXTENDED_A_ID = 180; /*[A960]*/
 967         /** @stable ICU 4.4 */
 968         public static final int JAVANESE_ID = 181; /*[A980]*/
 969         /** @stable ICU 4.4 */
 970         public static final int MYANMAR_EXTENDED_A_ID = 182; /*[AA60]*/
 971         /** @stable ICU 4.4 */
 972         public static final int TAI_VIET_ID = 183; /*[AA80]*/
 973         /** @stable ICU 4.4 */
 974         public static final int MEETEI_MAYEK_ID = 184; /*[ABC0]*/
 975         /** @stable ICU 4.4 */
 976         public static final int HANGUL_JAMO_EXTENDED_B_ID = 185; /*[D7B0]*/
 977         /** @stable ICU 4.4 */
 978         public static final int IMPERIAL_ARAMAIC_ID = 186; /*[10840]*/
 979         /** @stable ICU 4.4 */
 980         public static final int OLD_SOUTH_ARABIAN_ID = 187; /*[10A60]*/
 981         /** @stable ICU 4.4 */
 982         public static final int AVESTAN_ID = 188; /*[10B00]*/
 983         /** @stable ICU 4.4 */
 984         public static final int INSCRIPTIONAL_PARTHIAN_ID = 189; /*[10B40]*/
 985         /** @stable ICU 4.4 */
 986         public static final int INSCRIPTIONAL_PAHLAVI_ID = 190; /*[10B60]*/
 987         /** @stable ICU 4.4 */
 988         public static final int OLD_TURKIC_ID = 191; /*[10C00]*/
 989         /** @stable ICU 4.4 */
 990         public static final int RUMI_NUMERAL_SYMBOLS_ID = 192; /*[10E60]*/
 991         /** @stable ICU 4.4 */
 992         public static final int KAITHI_ID = 193; /*[11080]*/
 993         /** @stable ICU 4.4 */
 994         public static final int EGYPTIAN_HIEROGLYPHS_ID = 194; /*[13000]*/
 995         /** @stable ICU 4.4 */
 996         public static final int ENCLOSED_ALPHANUMERIC_SUPPLEMENT_ID = 195; /*[1F100]*/
 997         /** @stable ICU 4.4 */
 998         public static final int ENCLOSED_IDEOGRAPHIC_SUPPLEMENT_ID = 196; /*[1F200]*/
 999         /** @stable ICU 4.4 */
1000         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_ID = 197; /*[2A700]*/
1001
1002         /* New blocks in Unicode 6.0 */
1003
1004         /** @stable ICU 4.6 */
1005         public static final int MANDAIC_ID = 198; /*[0840]*/
1006         /** @stable ICU 4.6 */
1007         public static final int BATAK_ID = 199; /*[1BC0]*/
1008         /** @stable ICU 4.6 */
1009         public static final int ETHIOPIC_EXTENDED_A_ID = 200; /*[AB00]*/
1010         /** @stable ICU 4.6 */
1011         public static final int BRAHMI_ID = 201; /*[11000]*/
1012         /** @stable ICU 4.6 */
1013         public static final int BAMUM_SUPPLEMENT_ID = 202; /*[16800]*/
1014         /** @stable ICU 4.6 */
1015         public static final int KANA_SUPPLEMENT_ID = 203; /*[1B000]*/
1016         /** @stable ICU 4.6 */
1017         public static final int PLAYING_CARDS_ID = 204; /*[1F0A0]*/
1018         /** @stable ICU 4.6 */
1019         public static final int MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS_ID = 205; /*[1F300]*/
1020         /** @stable ICU 4.6 */
1021         public static final int EMOTICONS_ID = 206; /*[1F600]*/
1022         /** @stable ICU 4.6 */
1023         public static final int TRANSPORT_AND_MAP_SYMBOLS_ID = 207; /*[1F680]*/
1024         /** @stable ICU 4.6 */
1025         public static final int ALCHEMICAL_SYMBOLS_ID = 208; /*[1F700]*/
1026         /** @stable ICU 4.6 */
1027         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_ID = 209; /*[2B740]*/
1028
1029         /**
1030          * @stable ICU 2.4
1031          */
1032         public static final int COUNT = 210;
1033
1034         // blocks objects ---------------------------------------------------
1035
1036         /**
1037          * Array of UnicodeBlocks, for easy access in getInstance(int)
1038          */
1039         private final static UnicodeBlock BLOCKS_[] = new UnicodeBlock[COUNT];
1040
1041         /**
1042          * @stable ICU 2.6
1043          */
1044         public static final UnicodeBlock NO_BLOCK
1045             = new UnicodeBlock("NO_BLOCK", 0);
1046
1047         /**
1048          * @stable ICU 2.4
1049          */
1050         public static final UnicodeBlock BASIC_LATIN
1051             = new UnicodeBlock("BASIC_LATIN", BASIC_LATIN_ID);
1052         /**
1053          * @stable ICU 2.4
1054          */
1055         public static final UnicodeBlock LATIN_1_SUPPLEMENT
1056             = new UnicodeBlock("LATIN_1_SUPPLEMENT", LATIN_1_SUPPLEMENT_ID);
1057         /**
1058          * @stable ICU 2.4
1059          */
1060         public static final UnicodeBlock LATIN_EXTENDED_A
1061             = new UnicodeBlock("LATIN_EXTENDED_A", LATIN_EXTENDED_A_ID);
1062         /**
1063          * @stable ICU 2.4
1064          */
1065         public static final UnicodeBlock LATIN_EXTENDED_B
1066             = new UnicodeBlock("LATIN_EXTENDED_B", LATIN_EXTENDED_B_ID);
1067         /**
1068          * @stable ICU 2.4
1069          */
1070         public static final UnicodeBlock IPA_EXTENSIONS
1071             = new UnicodeBlock("IPA_EXTENSIONS", IPA_EXTENSIONS_ID);
1072         /**
1073          * @stable ICU 2.4
1074          */
1075         public static final UnicodeBlock SPACING_MODIFIER_LETTERS
1076             = new UnicodeBlock("SPACING_MODIFIER_LETTERS", SPACING_MODIFIER_LETTERS_ID);
1077         /**
1078          * @stable ICU 2.4
1079          */
1080         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS
1081             = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", COMBINING_DIACRITICAL_MARKS_ID);
1082         /**
1083          * Unicode 3.2 renames this block to "Greek and Coptic".
1084          * @stable ICU 2.4
1085          */
1086         public static final UnicodeBlock GREEK
1087             = new UnicodeBlock("GREEK", GREEK_ID);
1088         /**
1089          * @stable ICU 2.4
1090          */
1091         public static final UnicodeBlock CYRILLIC
1092             = new UnicodeBlock("CYRILLIC", CYRILLIC_ID);
1093         /**
1094          * @stable ICU 2.4
1095          */
1096         public static final UnicodeBlock ARMENIAN
1097             = new UnicodeBlock("ARMENIAN", ARMENIAN_ID);
1098         /**
1099          * @stable ICU 2.4
1100          */
1101         public static final UnicodeBlock HEBREW
1102             = new UnicodeBlock("HEBREW", HEBREW_ID);
1103         /**
1104          * @stable ICU 2.4
1105          */
1106         public static final UnicodeBlock ARABIC
1107             = new UnicodeBlock("ARABIC", ARABIC_ID);
1108         /**
1109          * @stable ICU 2.4
1110          */
1111         public static final UnicodeBlock SYRIAC
1112             = new UnicodeBlock("SYRIAC", SYRIAC_ID);
1113         /**
1114          * @stable ICU 2.4
1115          */
1116         public static final UnicodeBlock THAANA
1117             = new UnicodeBlock("THAANA", THAANA_ID);
1118         /**
1119          * @stable ICU 2.4
1120          */
1121         public static final UnicodeBlock DEVANAGARI
1122             = new UnicodeBlock("DEVANAGARI", DEVANAGARI_ID);
1123         /**
1124          * @stable ICU 2.4
1125          */
1126         public static final UnicodeBlock BENGALI
1127             = new UnicodeBlock("BENGALI", BENGALI_ID);
1128         /**
1129          * @stable ICU 2.4
1130          */
1131         public static final UnicodeBlock GURMUKHI
1132             = new UnicodeBlock("GURMUKHI", GURMUKHI_ID);
1133         /**
1134          * @stable ICU 2.4
1135          */
1136         public static final UnicodeBlock GUJARATI
1137             = new UnicodeBlock("GUJARATI", GUJARATI_ID);
1138         /**
1139          * @stable ICU 2.4
1140          */
1141         public static final UnicodeBlock ORIYA
1142             = new UnicodeBlock("ORIYA", ORIYA_ID);
1143         /**
1144          * @stable ICU 2.4
1145          */
1146         public static final UnicodeBlock TAMIL
1147             = new UnicodeBlock("TAMIL", TAMIL_ID);
1148         /**
1149          * @stable ICU 2.4
1150          */
1151         public static final UnicodeBlock TELUGU
1152             = new UnicodeBlock("TELUGU", TELUGU_ID);
1153         /**
1154          * @stable ICU 2.4
1155          */
1156         public static final UnicodeBlock KANNADA
1157             = new UnicodeBlock("KANNADA", KANNADA_ID);
1158         /**
1159          * @stable ICU 2.4
1160          */
1161         public static final UnicodeBlock MALAYALAM
1162             = new UnicodeBlock("MALAYALAM", MALAYALAM_ID);
1163         /**
1164          * @stable ICU 2.4
1165          */
1166         public static final UnicodeBlock SINHALA
1167             = new UnicodeBlock("SINHALA", SINHALA_ID);
1168         /**
1169          * @stable ICU 2.4
1170          */
1171         public static final UnicodeBlock THAI
1172             = new UnicodeBlock("THAI", THAI_ID);
1173         /**
1174          * @stable ICU 2.4
1175          */
1176         public static final UnicodeBlock LAO
1177             = new UnicodeBlock("LAO", LAO_ID);
1178         /**
1179          * @stable ICU 2.4
1180          */
1181         public static final UnicodeBlock TIBETAN
1182             = new UnicodeBlock("TIBETAN", TIBETAN_ID);
1183         /**
1184          * @stable ICU 2.4
1185          */
1186         public static final UnicodeBlock MYANMAR
1187             = new UnicodeBlock("MYANMAR", MYANMAR_ID);
1188         /**
1189          * @stable ICU 2.4
1190          */
1191         public static final UnicodeBlock GEORGIAN
1192             = new UnicodeBlock("GEORGIAN", GEORGIAN_ID);
1193         /**
1194          * @stable ICU 2.4
1195          */
1196         public static final UnicodeBlock HANGUL_JAMO
1197             = new UnicodeBlock("HANGUL_JAMO", HANGUL_JAMO_ID);
1198         /**
1199          * @stable ICU 2.4
1200          */
1201         public static final UnicodeBlock ETHIOPIC
1202             = new UnicodeBlock("ETHIOPIC", ETHIOPIC_ID);
1203         /**
1204          * @stable ICU 2.4
1205          */
1206         public static final UnicodeBlock CHEROKEE
1207             = new UnicodeBlock("CHEROKEE", CHEROKEE_ID);
1208         /**
1209          * @stable ICU 2.4
1210          */
1211         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS
1212             = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
1213                                UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID);
1214         /**
1215          * @stable ICU 2.4
1216          */
1217         public static final UnicodeBlock OGHAM
1218             = new UnicodeBlock("OGHAM", OGHAM_ID);
1219         /**
1220          * @stable ICU 2.4
1221          */
1222         public static final UnicodeBlock RUNIC
1223             = new UnicodeBlock("RUNIC", RUNIC_ID);
1224         /**
1225          * @stable ICU 2.4
1226          */
1227         public static final UnicodeBlock KHMER
1228             = new UnicodeBlock("KHMER", KHMER_ID);
1229         /**
1230          * @stable ICU 2.4
1231          */
1232         public static final UnicodeBlock MONGOLIAN
1233             = new UnicodeBlock("MONGOLIAN", MONGOLIAN_ID);
1234         /**
1235          * @stable ICU 2.4
1236          */
1237         public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL
1238             = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", LATIN_EXTENDED_ADDITIONAL_ID);
1239         /**
1240          * @stable ICU 2.4
1241          */
1242         public static final UnicodeBlock GREEK_EXTENDED
1243             = new UnicodeBlock("GREEK_EXTENDED", GREEK_EXTENDED_ID);
1244         /**
1245          * @stable ICU 2.4
1246          */
1247         public static final UnicodeBlock GENERAL_PUNCTUATION
1248             = new UnicodeBlock("GENERAL_PUNCTUATION", GENERAL_PUNCTUATION_ID);
1249         /**
1250          * @stable ICU 2.4
1251          */
1252         public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS
1253             = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", SUPERSCRIPTS_AND_SUBSCRIPTS_ID);
1254         /**
1255          * @stable ICU 2.4
1256          */
1257         public static final UnicodeBlock CURRENCY_SYMBOLS
1258             = new UnicodeBlock("CURRENCY_SYMBOLS", CURRENCY_SYMBOLS_ID);
1259         /**
1260          * Unicode 3.2 renames this block to "Combining Diacritical Marks for
1261          * Symbols".
1262          * @stable ICU 2.4
1263          */
1264         public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS
1265             = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", COMBINING_MARKS_FOR_SYMBOLS_ID);
1266         /**
1267          * @stable ICU 2.4
1268          */
1269         public static final UnicodeBlock LETTERLIKE_SYMBOLS
1270             = new UnicodeBlock("LETTERLIKE_SYMBOLS", LETTERLIKE_SYMBOLS_ID);
1271         /**
1272          * @stable ICU 2.4
1273          */
1274         public static final UnicodeBlock NUMBER_FORMS
1275             = new UnicodeBlock("NUMBER_FORMS", NUMBER_FORMS_ID);
1276         /**
1277          * @stable ICU 2.4
1278          */
1279         public static final UnicodeBlock ARROWS
1280             = new UnicodeBlock("ARROWS", ARROWS_ID);
1281         /**
1282          * @stable ICU 2.4
1283          */
1284         public static final UnicodeBlock MATHEMATICAL_OPERATORS
1285             = new UnicodeBlock("MATHEMATICAL_OPERATORS", MATHEMATICAL_OPERATORS_ID);
1286         /**
1287          * @stable ICU 2.4
1288          */
1289         public static final UnicodeBlock MISCELLANEOUS_TECHNICAL
1290             = new UnicodeBlock("MISCELLANEOUS_TECHNICAL", MISCELLANEOUS_TECHNICAL_ID);
1291         /**
1292          * @stable ICU 2.4
1293          */
1294         public static final UnicodeBlock CONTROL_PICTURES
1295             = new UnicodeBlock("CONTROL_PICTURES", CONTROL_PICTURES_ID);
1296         /**
1297          * @stable ICU 2.4
1298          */
1299         public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION
1300             = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", OPTICAL_CHARACTER_RECOGNITION_ID);
1301         /**
1302          * @stable ICU 2.4
1303          */
1304         public static final UnicodeBlock ENCLOSED_ALPHANUMERICS
1305             = new UnicodeBlock("ENCLOSED_ALPHANUMERICS", ENCLOSED_ALPHANUMERICS_ID);
1306         /**
1307          * @stable ICU 2.4
1308          */
1309         public static final UnicodeBlock BOX_DRAWING
1310             = new UnicodeBlock("BOX_DRAWING", BOX_DRAWING_ID);
1311         /**
1312          * @stable ICU 2.4
1313          */
1314         public static final UnicodeBlock BLOCK_ELEMENTS
1315             = new UnicodeBlock("BLOCK_ELEMENTS", BLOCK_ELEMENTS_ID);
1316         /**
1317          * @stable ICU 2.4
1318          */
1319         public static final UnicodeBlock GEOMETRIC_SHAPES
1320             = new UnicodeBlock("GEOMETRIC_SHAPES", GEOMETRIC_SHAPES_ID);
1321         /**
1322          * @stable ICU 2.4
1323          */
1324         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS
1325             = new UnicodeBlock("MISCELLANEOUS_SYMBOLS", MISCELLANEOUS_SYMBOLS_ID);
1326         /**
1327          * @stable ICU 2.4
1328          */
1329         public static final UnicodeBlock DINGBATS
1330             = new UnicodeBlock("DINGBATS", DINGBATS_ID);
1331         /**
1332          * @stable ICU 2.4
1333          */
1334         public static final UnicodeBlock BRAILLE_PATTERNS
1335             = new UnicodeBlock("BRAILLE_PATTERNS", BRAILLE_PATTERNS_ID);
1336         /**
1337          * @stable ICU 2.4
1338          */
1339         public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT
1340             = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", CJK_RADICALS_SUPPLEMENT_ID);
1341         /**
1342          * @stable ICU 2.4
1343          */
1344         public static final UnicodeBlock KANGXI_RADICALS
1345             = new UnicodeBlock("KANGXI_RADICALS", KANGXI_RADICALS_ID);
1346         /**
1347          * @stable ICU 2.4
1348          */
1349         public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS
1350             = new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS",
1351                                IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID);
1352         /**
1353          * @stable ICU 2.4
1354          */
1355         public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION
1356             = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", CJK_SYMBOLS_AND_PUNCTUATION_ID);
1357         /**
1358          * @stable ICU 2.4
1359          */
1360         public static final UnicodeBlock HIRAGANA
1361             = new UnicodeBlock("HIRAGANA", HIRAGANA_ID);
1362         /**
1363          * @stable ICU 2.4
1364          */
1365         public static final UnicodeBlock KATAKANA
1366             = new UnicodeBlock("KATAKANA", KATAKANA_ID);
1367         /**
1368          * @stable ICU 2.4
1369          */
1370         public static final UnicodeBlock BOPOMOFO
1371             = new UnicodeBlock("BOPOMOFO", BOPOMOFO_ID);
1372         /**
1373          * @stable ICU 2.4
1374          */
1375         public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO
1376             = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", HANGUL_COMPATIBILITY_JAMO_ID);
1377         /**
1378          * @stable ICU 2.4
1379          */
1380         public static final UnicodeBlock KANBUN
1381             = new UnicodeBlock("KANBUN", KANBUN_ID);
1382         /**
1383          * @stable ICU 2.4
1384          */
1385         public static final UnicodeBlock BOPOMOFO_EXTENDED
1386             = new UnicodeBlock("BOPOMOFO_EXTENDED", BOPOMOFO_EXTENDED_ID);
1387         /**
1388          * @stable ICU 2.4
1389          */
1390         public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS
1391             = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS",
1392                                ENCLOSED_CJK_LETTERS_AND_MONTHS_ID);
1393         /**
1394          * @stable ICU 2.4
1395          */
1396         public static final UnicodeBlock CJK_COMPATIBILITY
1397             = new UnicodeBlock("CJK_COMPATIBILITY", CJK_COMPATIBILITY_ID);
1398         /**
1399          * @stable ICU 2.4
1400          */
1401         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
1402             = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A",
1403                                CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID);
1404         /**
1405          * @stable ICU 2.4
1406          */
1407         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS
1408             = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", CJK_UNIFIED_IDEOGRAPHS_ID);
1409         /**
1410          * @stable ICU 2.4
1411          */
1412         public static final UnicodeBlock YI_SYLLABLES
1413             = new UnicodeBlock("YI_SYLLABLES", YI_SYLLABLES_ID);
1414         /**
1415          * @stable ICU 2.4
1416          */
1417         public static final UnicodeBlock YI_RADICALS
1418             = new UnicodeBlock("YI_RADICALS", YI_RADICALS_ID);
1419         /**
1420          * @stable ICU 2.4
1421          */
1422         public static final UnicodeBlock HANGUL_SYLLABLES
1423             = new UnicodeBlock("HANGUL_SYLLABLES", HANGUL_SYLLABLES_ID);
1424         /**
1425          * @stable ICU 2.4
1426          */
1427         public static final UnicodeBlock HIGH_SURROGATES
1428             = new UnicodeBlock("HIGH_SURROGATES", HIGH_SURROGATES_ID);
1429         /**
1430          * @stable ICU 2.4
1431          */
1432         public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES
1433             = new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", HIGH_PRIVATE_USE_SURROGATES_ID);
1434         /**
1435          * @stable ICU 2.4
1436          */
1437         public static final UnicodeBlock LOW_SURROGATES
1438             = new UnicodeBlock("LOW_SURROGATES", LOW_SURROGATES_ID);
1439         /**
1440          * Same as public static final int PRIVATE_USE.
1441          * Until Unicode 3.1.1; the corresponding block name was "Private Use";
1442          * and multiple code point ranges had this block.
1443          * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area"
1444          * and adds separate blocks for the supplementary PUAs.
1445          * @stable ICU 2.4
1446          */
1447         public static final UnicodeBlock PRIVATE_USE_AREA
1448             = new UnicodeBlock("PRIVATE_USE_AREA",  78);
1449         /**
1450          * Same as public static final int PRIVATE_USE_AREA.
1451          * Until Unicode 3.1.1; the corresponding block name was "Private Use";
1452          * and multiple code point ranges had this block.
1453          * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area"
1454          * and adds separate blocks for the supplementary PUAs.
1455          * @stable ICU 2.4
1456          */
1457         public static final UnicodeBlock PRIVATE_USE
1458             = PRIVATE_USE_AREA;
1459         /**
1460          * @stable ICU 2.4
1461          */
1462         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS
1463             = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", CJK_COMPATIBILITY_IDEOGRAPHS_ID);
1464         /**
1465          * @stable ICU 2.4
1466          */
1467         public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS
1468             = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", ALPHABETIC_PRESENTATION_FORMS_ID);
1469         /**
1470          * @stable ICU 2.4
1471          */
1472         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A
1473             = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", ARABIC_PRESENTATION_FORMS_A_ID);
1474         /**
1475          * @stable ICU 2.4
1476          */
1477         public static final UnicodeBlock COMBINING_HALF_MARKS
1478             = new UnicodeBlock("COMBINING_HALF_MARKS", COMBINING_HALF_MARKS_ID);
1479         /**
1480          * @stable ICU 2.4
1481          */
1482         public static final UnicodeBlock CJK_COMPATIBILITY_FORMS
1483             = new UnicodeBlock("CJK_COMPATIBILITY_FORMS", CJK_COMPATIBILITY_FORMS_ID);
1484         /**
1485          * @stable ICU 2.4
1486          */
1487         public static final UnicodeBlock SMALL_FORM_VARIANTS
1488             = new UnicodeBlock("SMALL_FORM_VARIANTS", SMALL_FORM_VARIANTS_ID);
1489         /**
1490          * @stable ICU 2.4
1491          */
1492         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B
1493             = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", ARABIC_PRESENTATION_FORMS_B_ID);
1494         /**
1495          * @stable ICU 2.4
1496          */
1497         public static final UnicodeBlock SPECIALS
1498             = new UnicodeBlock("SPECIALS", SPECIALS_ID);
1499         /**
1500          * @stable ICU 2.4
1501          */
1502         public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS
1503             = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", HALFWIDTH_AND_FULLWIDTH_FORMS_ID);
1504         /**
1505          * @stable ICU 2.4
1506          */
1507         public static final UnicodeBlock OLD_ITALIC
1508             = new UnicodeBlock("OLD_ITALIC", OLD_ITALIC_ID);
1509         /**
1510          * @stable ICU 2.4
1511          */
1512         public static final UnicodeBlock GOTHIC
1513             = new UnicodeBlock("GOTHIC", GOTHIC_ID);
1514         /**
1515          * @stable ICU 2.4
1516          */
1517         public static final UnicodeBlock DESERET
1518             = new UnicodeBlock("DESERET", DESERET_ID);
1519         /**
1520          * @stable ICU 2.4
1521          */
1522         public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS
1523             = new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", BYZANTINE_MUSICAL_SYMBOLS_ID);
1524         /**
1525          * @stable ICU 2.4
1526          */
1527         public static final UnicodeBlock MUSICAL_SYMBOLS
1528             = new UnicodeBlock("MUSICAL_SYMBOLS", MUSICAL_SYMBOLS_ID);
1529         /**
1530          * @stable ICU 2.4
1531          */
1532         public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS
1533             = new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
1534                                MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID);
1535         /**
1536          * @stable ICU 2.4
1537          */
1538         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B
1539             = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
1540                                CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID);
1541         /**
1542          * @stable ICU 2.4
1543          */
1544         public static final UnicodeBlock
1545             CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT
1546             = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
1547                                CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID);
1548         /**
1549          * @stable ICU 2.4
1550          */
1551         public static final UnicodeBlock TAGS
1552             = new UnicodeBlock("TAGS", TAGS_ID);
1553
1554         // New blocks in Unicode 3.2
1555
1556         /**
1557          * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
1558          * @stable ICU 2.4
1559          */
1560         public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY
1561             = new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", CYRILLIC_SUPPLEMENTARY_ID);
1562         /**
1563          * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
1564          * @stable ICU 3.0
1565          */
1566         public static final UnicodeBlock CYRILLIC_SUPPLEMENT
1567             = new UnicodeBlock("CYRILLIC_SUPPLEMENT", CYRILLIC_SUPPLEMENT_ID);
1568         /**
1569          * @stable ICU 2.4
1570          */
1571         public static final UnicodeBlock TAGALOG
1572             = new UnicodeBlock("TAGALOG", TAGALOG_ID);
1573         /**
1574          * @stable ICU 2.4
1575          */
1576         public static final UnicodeBlock HANUNOO
1577             = new UnicodeBlock("HANUNOO", HANUNOO_ID);
1578         /**
1579          * @stable ICU 2.4
1580          */
1581         public static final UnicodeBlock BUHID
1582             = new UnicodeBlock("BUHID", BUHID_ID);
1583         /**
1584          * @stable ICU 2.4
1585          */
1586         public static final UnicodeBlock TAGBANWA
1587             = new UnicodeBlock("TAGBANWA", TAGBANWA_ID);
1588         /**
1589          * @stable ICU 2.4
1590          */
1591         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A
1592             = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
1593                                MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID);
1594         /**
1595          * @stable ICU 2.4
1596          */
1597         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A
1598             = new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", SUPPLEMENTAL_ARROWS_A_ID);
1599         /**
1600          * @stable ICU 2.4
1601          */
1602         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B
1603             = new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", SUPPLEMENTAL_ARROWS_B_ID);
1604         /**
1605          * @stable ICU 2.4
1606          */
1607         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B
1608             = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
1609                                MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID);
1610         /**
1611          * @stable ICU 2.4
1612          */
1613         public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS
1614             = new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
1615                                SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID);
1616         /**
1617          * @stable ICU 2.4
1618          */
1619         public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS
1620             = new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", KATAKANA_PHONETIC_EXTENSIONS_ID);
1621         /**
1622          * @stable ICU 2.4
1623          */
1624         public static final UnicodeBlock VARIATION_SELECTORS
1625             = new UnicodeBlock("VARIATION_SELECTORS", VARIATION_SELECTORS_ID);
1626         /**
1627          * @stable ICU 2.4
1628          */
1629         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A
1630             = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
1631                                SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID);
1632         /**
1633          * @stable ICU 2.4
1634          */
1635         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B
1636             = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
1637                                SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID);
1638
1639         /**
1640          * @stable ICU 2.6
1641          */
1642         public static final UnicodeBlock LIMBU
1643             = new UnicodeBlock("LIMBU", LIMBU_ID);
1644         /**
1645          * @stable ICU 2.6
1646          */
1647         public static final UnicodeBlock TAI_LE
1648             = new UnicodeBlock("TAI_LE", TAI_LE_ID);
1649         /**
1650          * @stable ICU 2.6
1651          */
1652         public static final UnicodeBlock KHMER_SYMBOLS
1653             = new UnicodeBlock("KHMER_SYMBOLS", KHMER_SYMBOLS_ID);
1654
1655         /**
1656          * @stable ICU 2.6
1657          */
1658         public static final UnicodeBlock PHONETIC_EXTENSIONS
1659             = new UnicodeBlock("PHONETIC_EXTENSIONS", PHONETIC_EXTENSIONS_ID);
1660
1661         /**
1662          * @stable ICU 2.6
1663          */
1664         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS
1665             = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS",
1666                                MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID);
1667         /**
1668          * @stable ICU 2.6
1669          */
1670         public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS
1671             = new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", YIJING_HEXAGRAM_SYMBOLS_ID);
1672         /**
1673          * @stable ICU 2.6
1674          */
1675         public static final UnicodeBlock LINEAR_B_SYLLABARY
1676             = new UnicodeBlock("LINEAR_B_SYLLABARY", LINEAR_B_SYLLABARY_ID);
1677         /**
1678          * @stable ICU 2.6
1679          */
1680         public static final UnicodeBlock LINEAR_B_IDEOGRAMS
1681             = new UnicodeBlock("LINEAR_B_IDEOGRAMS", LINEAR_B_IDEOGRAMS_ID);
1682         /**
1683          * @stable ICU 2.6
1684          */
1685         public static final UnicodeBlock AEGEAN_NUMBERS
1686             = new UnicodeBlock("AEGEAN_NUMBERS", AEGEAN_NUMBERS_ID);
1687         /**
1688          * @stable ICU 2.6
1689          */
1690         public static final UnicodeBlock UGARITIC
1691             = new UnicodeBlock("UGARITIC", UGARITIC_ID);
1692         /**
1693          * @stable ICU 2.6
1694          */
1695         public static final UnicodeBlock SHAVIAN
1696             = new UnicodeBlock("SHAVIAN", SHAVIAN_ID);
1697         /**
1698          * @stable ICU 2.6
1699          */
1700         public static final UnicodeBlock OSMANYA
1701             = new UnicodeBlock("OSMANYA", OSMANYA_ID);
1702         /**
1703          * @stable ICU 2.6
1704          */
1705         public static final UnicodeBlock CYPRIOT_SYLLABARY
1706             = new UnicodeBlock("CYPRIOT_SYLLABARY", CYPRIOT_SYLLABARY_ID);
1707         /**
1708          * @stable ICU 2.6
1709          */
1710         public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS
1711             = new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", TAI_XUAN_JING_SYMBOLS_ID);
1712
1713         /**
1714          * @stable ICU 2.6
1715          */
1716         public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT
1717             = new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", VARIATION_SELECTORS_SUPPLEMENT_ID);
1718
1719         /* New blocks in Unicode 4.1 */
1720
1721         /**
1722          * @stable ICU 3.4
1723          */
1724         public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION =
1725             new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION",
1726                              ANCIENT_GREEK_MUSICAL_NOTATION_ID); /*[1D200]*/
1727
1728         /**
1729          * @stable ICU 3.4
1730          */
1731         public static final UnicodeBlock ANCIENT_GREEK_NUMBERS =
1732             new UnicodeBlock("ANCIENT_GREEK_NUMBERS", ANCIENT_GREEK_NUMBERS_ID); /*[10140]*/
1733
1734         /**
1735          * @stable ICU 3.4
1736          */
1737         public static final UnicodeBlock ARABIC_SUPPLEMENT =
1738             new UnicodeBlock("ARABIC_SUPPLEMENT", ARABIC_SUPPLEMENT_ID); /*[0750]*/
1739
1740         /**
1741          * @stable ICU 3.4
1742          */
1743         public static final UnicodeBlock BUGINESE =
1744             new UnicodeBlock("BUGINESE", BUGINESE_ID); /*[1A00]*/
1745
1746         /**
1747          * @stable ICU 3.4
1748          */
1749         public static final UnicodeBlock CJK_STROKES =
1750             new UnicodeBlock("CJK_STROKES", CJK_STROKES_ID); /*[31C0]*/
1751
1752         /**
1753          * @stable ICU 3.4
1754          */
1755         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT =
1756             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT",
1757                              COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID); /*[1DC0]*/
1758
1759         /**
1760          * @stable ICU 3.4
1761          */
1762         public static final UnicodeBlock COPTIC = new UnicodeBlock("COPTIC", COPTIC_ID); /*[2C80]*/
1763
1764         /**
1765          * @stable ICU 3.4
1766          */
1767         public static final UnicodeBlock ETHIOPIC_EXTENDED =
1768             new UnicodeBlock("ETHIOPIC_EXTENDED", ETHIOPIC_EXTENDED_ID); /*[2D80]*/
1769
1770         /**
1771          * @stable ICU 3.4
1772          */
1773         public static final UnicodeBlock ETHIOPIC_SUPPLEMENT =
1774             new UnicodeBlock("ETHIOPIC_SUPPLEMENT", ETHIOPIC_SUPPLEMENT_ID); /*[1380]*/
1775
1776         /**
1777          * @stable ICU 3.4
1778          */
1779         public static final UnicodeBlock GEORGIAN_SUPPLEMENT =
1780             new UnicodeBlock("GEORGIAN_SUPPLEMENT", GEORGIAN_SUPPLEMENT_ID); /*[2D00]*/
1781
1782         /**
1783          * @stable ICU 3.4
1784          */
1785         public static final UnicodeBlock GLAGOLITIC =
1786             new UnicodeBlock("GLAGOLITIC", GLAGOLITIC_ID); /*[2C00]*/
1787
1788         /**
1789          * @stable ICU 3.4
1790          */
1791         public static final UnicodeBlock KHAROSHTHI =
1792             new UnicodeBlock("KHAROSHTHI", KHAROSHTHI_ID); /*[10A00]*/
1793
1794         /**
1795          * @stable ICU 3.4
1796          */
1797         public static final UnicodeBlock MODIFIER_TONE_LETTERS =
1798             new UnicodeBlock("MODIFIER_TONE_LETTERS", MODIFIER_TONE_LETTERS_ID); /*[A700]*/
1799
1800         /**
1801          * @stable ICU 3.4
1802          */
1803         public static final UnicodeBlock NEW_TAI_LUE =
1804             new UnicodeBlock("NEW_TAI_LUE", NEW_TAI_LUE_ID); /*[1980]*/
1805
1806         /**
1807          * @stable ICU 3.4
1808          */
1809         public static final UnicodeBlock OLD_PERSIAN =
1810             new UnicodeBlock("OLD_PERSIAN", OLD_PERSIAN_ID); /*[103A0]*/
1811
1812         /**
1813          * @stable ICU 3.4
1814          */
1815         public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT =
1816             new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT",
1817                              PHONETIC_EXTENSIONS_SUPPLEMENT_ID); /*[1D80]*/
1818
1819         /**
1820          * @stable ICU 3.4
1821          */
1822         public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION =
1823             new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION", SUPPLEMENTAL_PUNCTUATION_ID); /*[2E00]*/
1824
1825         /**
1826          * @stable ICU 3.4
1827          */
1828         public static final UnicodeBlock SYLOTI_NAGRI =
1829             new UnicodeBlock("SYLOTI_NAGRI", SYLOTI_NAGRI_ID); /*[A800]*/
1830
1831         /**
1832          * @stable ICU 3.4
1833          */
1834         public static final UnicodeBlock TIFINAGH =
1835             new UnicodeBlock("TIFINAGH", TIFINAGH_ID); /*[2D30]*/
1836
1837         /**
1838          * @stable ICU 3.4
1839          */
1840         public static final UnicodeBlock VERTICAL_FORMS =
1841             new UnicodeBlock("VERTICAL_FORMS", VERTICAL_FORMS_ID); /*[FE10]*/
1842
1843         /**
1844          * @stable ICU 3.6
1845          */
1846         public static final UnicodeBlock NKO = new UnicodeBlock("NKO", NKO_ID); /*[07C0]*/
1847         /**
1848          * @stable ICU 3.6
1849          */
1850         public static final UnicodeBlock BALINESE =
1851             new UnicodeBlock("BALINESE", BALINESE_ID); /*[1B00]*/
1852         /**
1853          * @stable ICU 3.6
1854          */
1855         public static final UnicodeBlock LATIN_EXTENDED_C =
1856             new UnicodeBlock("LATIN_EXTENDED_C", LATIN_EXTENDED_C_ID); /*[2C60]*/
1857         /**
1858          * @stable ICU 3.6
1859          */
1860         public static final UnicodeBlock LATIN_EXTENDED_D =
1861             new UnicodeBlock("LATIN_EXTENDED_D", LATIN_EXTENDED_D_ID); /*[A720]*/
1862         /**
1863          * @stable ICU 3.6
1864          */
1865         public static final UnicodeBlock PHAGS_PA =
1866             new UnicodeBlock("PHAGS_PA", PHAGS_PA_ID); /*[A840]*/
1867         /**
1868          * @stable ICU 3.6
1869          */
1870         public static final UnicodeBlock PHOENICIAN =
1871             new UnicodeBlock("PHOENICIAN", PHOENICIAN_ID); /*[10900]*/
1872         /**
1873          * @stable ICU 3.6
1874          */
1875         public static final UnicodeBlock CUNEIFORM =
1876             new UnicodeBlock("CUNEIFORM", CUNEIFORM_ID); /*[12000]*/
1877         /**
1878          * @stable ICU 3.6
1879          */
1880         public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION =
1881             new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION",
1882                              CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID); /*[12400]*/
1883         /**
1884          * @stable ICU 3.6
1885          */
1886         public static final UnicodeBlock COUNTING_ROD_NUMERALS =
1887             new UnicodeBlock("COUNTING_ROD_NUMERALS", COUNTING_ROD_NUMERALS_ID); /*[1D360]*/
1888
1889         /**
1890          * @stable ICU 4.0
1891          */
1892         public static final UnicodeBlock SUNDANESE =
1893             new UnicodeBlock("SUNDANESE", SUNDANESE_ID); /* [1B80] */
1894
1895         /**
1896          * @stable ICU 4.0
1897          */
1898         public static final UnicodeBlock LEPCHA =
1899             new UnicodeBlock("LEPCHA", LEPCHA_ID); /* [1C00] */
1900
1901         /**
1902          * @stable ICU 4.0
1903          */
1904         public static final UnicodeBlock OL_CHIKI =
1905             new UnicodeBlock("OL_CHIKI", OL_CHIKI_ID); /* [1C50] */
1906
1907         /**
1908          * @stable ICU 4.0
1909          */
1910         public static final UnicodeBlock CYRILLIC_EXTENDED_A =
1911             new UnicodeBlock("CYRILLIC_EXTENDED_A", CYRILLIC_EXTENDED_A_ID); /* [2DE0] */
1912
1913         /**
1914          * @stable ICU 4.0
1915          */
1916         public static final UnicodeBlock VAI = new UnicodeBlock("VAI", VAI_ID); /* [A500] */
1917
1918         /**
1919          * @stable ICU 4.0
1920          */
1921         public static final UnicodeBlock CYRILLIC_EXTENDED_B =
1922             new UnicodeBlock("CYRILLIC_EXTENDED_B", CYRILLIC_EXTENDED_B_ID); /* [A640] */
1923
1924         /**
1925          * @stable ICU 4.0
1926          */
1927         public static final UnicodeBlock SAURASHTRA =
1928             new UnicodeBlock("SAURASHTRA", SAURASHTRA_ID); /* [A880] */
1929
1930         /**
1931          * @stable ICU 4.0
1932          */
1933         public static final UnicodeBlock KAYAH_LI =
1934             new UnicodeBlock("KAYAH_LI", KAYAH_LI_ID); /* [A900] */
1935
1936         /**
1937          * @stable ICU 4.0
1938          */
1939         public static final UnicodeBlock REJANG =
1940             new UnicodeBlock("REJANG", REJANG_ID); /* [A930] */
1941
1942         /**
1943          * @stable ICU 4.0
1944          */
1945         public static final UnicodeBlock CHAM =
1946             new UnicodeBlock("CHAM", CHAM_ID); /* [AA00] */
1947
1948         /**
1949          * @stable ICU 4.0
1950          */
1951         public static final UnicodeBlock ANCIENT_SYMBOLS =
1952             new UnicodeBlock("ANCIENT_SYMBOLS", ANCIENT_SYMBOLS_ID); /* [10190] */
1953
1954         /**
1955          * @stable ICU 4.0
1956          */
1957         public static final UnicodeBlock PHAISTOS_DISC =
1958             new UnicodeBlock("PHAISTOS_DISC", PHAISTOS_DISC_ID); /* [101D0] */
1959
1960         /**
1961          * @stable ICU 4.0
1962          */
1963         public static final UnicodeBlock LYCIAN =
1964             new UnicodeBlock("LYCIAN", LYCIAN_ID); /* [10280] */
1965
1966         /**
1967          * @stable ICU 4.0
1968          */
1969         public static final UnicodeBlock CARIAN =
1970             new UnicodeBlock("CARIAN", CARIAN_ID); /* [102A0] */
1971
1972         /**
1973          * @stable ICU 4.0
1974          */
1975         public static final UnicodeBlock LYDIAN =
1976             new UnicodeBlock("LYDIAN", LYDIAN_ID); /* [10920] */
1977
1978         /**
1979          * @stable ICU 4.0
1980          */
1981         public static final UnicodeBlock MAHJONG_TILES =
1982             new UnicodeBlock("MAHJONG_TILES", MAHJONG_TILES_ID); /* [1F000] */
1983
1984         /**
1985          * @stable ICU 4.0
1986          */
1987         public static final UnicodeBlock DOMINO_TILES =
1988             new UnicodeBlock("DOMINO_TILES", DOMINO_TILES_ID); /* [1F030] */
1989
1990         /* New blocks in Unicode 5.2 */
1991
1992         /** @stable ICU 4.4 */
1993         public static final UnicodeBlock SAMARITAN =
1994             new UnicodeBlock("SAMARITAN", SAMARITAN_ID); /*[0800]*/
1995         /** @stable ICU 4.4 */
1996         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED =
1997             new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED",
1998                              UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_ID); /*[18B0]*/
1999         /** @stable ICU 4.4 */
2000         public static final UnicodeBlock TAI_THAM =
2001             new UnicodeBlock("TAI_THAM", TAI_THAM_ID); /*[1A20]*/
2002         /** @stable ICU 4.4 */
2003         public static final UnicodeBlock VEDIC_EXTENSIONS =
2004             new UnicodeBlock("VEDIC_EXTENSIONS", VEDIC_EXTENSIONS_ID); /*[1CD0]*/
2005         /** @stable ICU 4.4 */
2006         public static final UnicodeBlock LISU =
2007             new UnicodeBlock("LISU", LISU_ID); /*[A4D0]*/
2008         /** @stable ICU 4.4 */
2009         public static final UnicodeBlock BAMUM =
2010             new UnicodeBlock("BAMUM", BAMUM_ID); /*[A6A0]*/
2011         /** @stable ICU 4.4 */
2012         public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS =
2013             new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS", COMMON_INDIC_NUMBER_FORMS_ID); /*[A830]*/
2014         /** @stable ICU 4.4 */
2015         public static final UnicodeBlock DEVANAGARI_EXTENDED =
2016             new UnicodeBlock("DEVANAGARI_EXTENDED", DEVANAGARI_EXTENDED_ID); /*[A8E0]*/
2017         /** @stable ICU 4.4 */
2018         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A =
2019             new UnicodeBlock("HANGUL_JAMO_EXTENDED_A", HANGUL_JAMO_EXTENDED_A_ID); /*[A960]*/
2020         /** @stable ICU 4.4 */
2021         public static final UnicodeBlock JAVANESE =
2022             new UnicodeBlock("JAVANESE", JAVANESE_ID); /*[A980]*/
2023         /** @stable ICU 4.4 */
2024         public static final UnicodeBlock MYANMAR_EXTENDED_A =
2025             new UnicodeBlock("MYANMAR_EXTENDED_A", MYANMAR_EXTENDED_A_ID); /*[AA60]*/
2026         /** @stable ICU 4.4 */
2027         public static final UnicodeBlock TAI_VIET =
2028             new UnicodeBlock("TAI_VIET", TAI_VIET_ID); /*[AA80]*/
2029         /** @stable ICU 4.4 */
2030         public static final UnicodeBlock MEETEI_MAYEK =
2031             new UnicodeBlock("MEETEI_MAYEK", MEETEI_MAYEK_ID); /*[ABC0]*/
2032         /** @stable ICU 4.4 */
2033         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B =
2034             new UnicodeBlock("HANGUL_JAMO_EXTENDED_B", HANGUL_JAMO_EXTENDED_B_ID); /*[D7B0]*/
2035         /** @stable ICU 4.4 */
2036         public static final UnicodeBlock IMPERIAL_ARAMAIC =
2037             new UnicodeBlock("IMPERIAL_ARAMAIC", IMPERIAL_ARAMAIC_ID); /*[10840]*/
2038         /** @stable ICU 4.4 */
2039         public static final UnicodeBlock OLD_SOUTH_ARABIAN =
2040             new UnicodeBlock("OLD_SOUTH_ARABIAN", OLD_SOUTH_ARABIAN_ID); /*[10A60]*/
2041         /** @stable ICU 4.4 */
2042         public static final UnicodeBlock AVESTAN =
2043             new UnicodeBlock("AVESTAN", AVESTAN_ID); /*[10B00]*/
2044         /** @stable ICU 4.4 */
2045         public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN =
2046             new UnicodeBlock("INSCRIPTIONAL_PARTHIAN", INSCRIPTIONAL_PARTHIAN_ID); /*[10B40]*/
2047         /** @stable ICU 4.4 */
2048         public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI =
2049             new UnicodeBlock("INSCRIPTIONAL_PAHLAVI", INSCRIPTIONAL_PAHLAVI_ID); /*[10B60]*/
2050         /** @stable ICU 4.4 */
2051         public static final UnicodeBlock OLD_TURKIC =
2052             new UnicodeBlock("OLD_TURKIC", OLD_TURKIC_ID); /*[10C00]*/
2053         /** @stable ICU 4.4 */
2054         public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS =
2055             new UnicodeBlock("RUMI_NUMERAL_SYMBOLS", RUMI_NUMERAL_SYMBOLS_ID); /*[10E60]*/
2056         /** @stable ICU 4.4 */
2057         public static final UnicodeBlock KAITHI =
2058             new UnicodeBlock("KAITHI", KAITHI_ID); /*[11080]*/
2059         /** @stable ICU 4.4 */
2060         public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS =
2061             new UnicodeBlock("EGYPTIAN_HIEROGLYPHS", EGYPTIAN_HIEROGLYPHS_ID); /*[13000]*/
2062         /** @stable ICU 4.4 */
2063         public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT =
2064             new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT",
2065                              ENCLOSED_ALPHANUMERIC_SUPPLEMENT_ID); /*[1F100]*/
2066         /** @stable ICU 4.4 */
2067         public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT =
2068             new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT",
2069                              ENCLOSED_IDEOGRAPHIC_SUPPLEMENT_ID); /*[1F200]*/
2070         /** @stable ICU 4.4 */
2071         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C =
2072             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C",
2073                              CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_ID); /*[2A700]*/
2074
2075         /* New blocks in Unicode 6.0 */
2076
2077         /** @stable ICU 4.6 */
2078         public static final UnicodeBlock MANDAIC =
2079             new UnicodeBlock("MANDAIC", MANDAIC_ID); /*[0840]*/
2080         /** @stable ICU 4.6 */
2081         public static final UnicodeBlock BATAK =
2082             new UnicodeBlock("BATAK", BATAK_ID); /*[1BC0]*/
2083         /** @stable ICU 4.6 */
2084         public static final UnicodeBlock ETHIOPIC_EXTENDED_A =
2085             new UnicodeBlock("ETHIOPIC_EXTENDED_A", ETHIOPIC_EXTENDED_A_ID); /*[AB00]*/
2086         /** @stable ICU 4.6 */
2087         public static final UnicodeBlock BRAHMI =
2088             new UnicodeBlock("BRAHMI", BRAHMI_ID); /*[11000]*/
2089         /** @stable ICU 4.6 */
2090         public static final UnicodeBlock BAMUM_SUPPLEMENT =
2091             new UnicodeBlock("BAMUM_SUPPLEMENT", BAMUM_SUPPLEMENT_ID); /*[16800]*/
2092         /** @stable ICU 4.6 */
2093         public static final UnicodeBlock KANA_SUPPLEMENT =
2094             new UnicodeBlock("KANA_SUPPLEMENT", KANA_SUPPLEMENT_ID); /*[1B000]*/
2095         /** @stable ICU 4.6 */
2096         public static final UnicodeBlock PLAYING_CARDS =
2097             new UnicodeBlock("PLAYING_CARDS", PLAYING_CARDS_ID); /*[1F0A0]*/
2098         /** @stable ICU 4.6 */
2099         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS =
2100             new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS",
2101                              MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS_ID); /*[1F300]*/
2102         /** @stable ICU 4.6 */
2103         public static final UnicodeBlock EMOTICONS =
2104             new UnicodeBlock("EMOTICONS", EMOTICONS_ID); /*[1F600]*/
2105         /** @stable ICU 4.6 */
2106         public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS =
2107             new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS", TRANSPORT_AND_MAP_SYMBOLS_ID); /*[1F680]*/
2108         /** @stable ICU 4.6 */
2109         public static final UnicodeBlock ALCHEMICAL_SYMBOLS =
2110             new UnicodeBlock("ALCHEMICAL_SYMBOLS", ALCHEMICAL_SYMBOLS_ID); /*[1F700]*/
2111         /** @stable ICU 4.6 */
2112         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D =
2113             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D",
2114                              CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_ID); /*[2B740]*/
2115
2116         /**
2117          * @stable ICU 2.4
2118          */
2119         public static final UnicodeBlock INVALID_CODE
2120             = new UnicodeBlock("INVALID_CODE", INVALID_CODE_ID);
2121
2122         static {
2123             for (int blockId = 0; blockId < COUNT; ++blockId) {
2124                 if (BLOCKS_[blockId] == null) {
2125                     throw new java.lang.IllegalStateException(
2126                         "UnicodeBlock.BLOCKS_[" + blockId + "] not initialized");
2127                 }
2128             }
2129         }
2130
2131         // public methods --------------------------------------------------
2132
2133         /**
2134          * {@icu} Returns the only instance of the UnicodeBlock with the argument ID.
2135          * If no such ID exists, a INVALID_CODE UnicodeBlock will be returned.
2136          * @param id UnicodeBlock ID
2137          * @return the only instance of the UnicodeBlock with the argument ID
2138          *         if it exists, otherwise a INVALID_CODE UnicodeBlock will be
2139          *         returned.
2140          * @stable ICU 2.4
2141          */
2142         public static UnicodeBlock getInstance(int id)
2143         {
2144             if (id >= 0 && id < BLOCKS_.length) {
2145                 return BLOCKS_[id];
2146             }
2147             return INVALID_CODE;
2148         }
2149
2150         /**
2151          * Returns the Unicode allocation block that contains the code point,
2152          * or null if the code point is not a member of a defined block.
2153          * @param ch code point to be tested
2154          * @return the Unicode allocation block that contains the code point
2155          * @stable ICU 2.4
2156          */
2157         public static UnicodeBlock of(int ch)
2158         {
2159             if (ch > MAX_VALUE) {
2160                 return INVALID_CODE;
2161             }
2162
2163             return UnicodeBlock.getInstance(
2164                 UCharacterProperty.INSTANCE.getIntPropertyValue(ch, UProperty.BLOCK));
2165         }
2166
2167         /**
2168          * Cover the JDK 1.5 API.  Return the Unicode block with the
2169          * given name. {@icunote} Unlike JDK 1.5, this only matches
2170          * against the official UCD name and the Java block name
2171          * (ignoring case).
2172          * @param blockName the name of the block to match
2173          * @return the UnicodeBlock with that name
2174          * @throws IllegalArgumentException if the blockName could not be matched
2175          * @stable ICU 3.0
2176          */
2177         public static final UnicodeBlock forName(String blockName) {
2178             Map<String, UnicodeBlock> m = null;
2179             if (mref != null) {
2180                 m = mref.get();
2181             }
2182             if (m == null) {
2183                 m = new HashMap<String, UnicodeBlock>(BLOCKS_.length);
2184                 for (int i = 0; i < BLOCKS_.length; ++i) {
2185                     UnicodeBlock b = BLOCKS_[i];
2186                     String name = trimBlockName(
2187                         getPropertyValueName(UProperty.BLOCK, b.getID(),
2188                                              UProperty.NameChoice.LONG));
2189                     m.put(name, b);
2190                 }
2191                 mref = new SoftReference<Map<String, UnicodeBlock>>(m);
2192             }
2193             UnicodeBlock b = m.get(trimBlockName(blockName));
2194             if (b == null) {
2195                 throw new IllegalArgumentException();
2196             }
2197             return b;
2198         }
2199         private static SoftReference<Map<String, UnicodeBlock>> mref;
2200
2201         private static String trimBlockName(String name) {
2202             String upper = name.toUpperCase();
2203             StringBuilder result = new StringBuilder(upper.length());
2204             for (int i = 0; i < upper.length(); i++) {
2205                 char c = upper.charAt(i);
2206                 if (c != ' ' && c != '_' && c != '-') {
2207                     result.append(c);
2208                 }
2209             }
2210             return result.toString();
2211         }
2212
2213         /**
2214          * {icu} Returns the type ID of this Unicode block
2215          * @return integer type ID of this Unicode block
2216          * @stable ICU 2.4
2217          */
2218         public int getID()
2219         {
2220             return m_id_;
2221         }
2222
2223         // private data members ---------------------------------------------
2224
2225         /**
2226          * Identification code for this UnicodeBlock
2227          */
2228         private int m_id_;
2229
2230         // private constructor ----------------------------------------------
2231
2232         /**
2233          * UnicodeBlock constructor
2234          * @param name name of this UnicodeBlock
2235          * @param id unique id of this UnicodeBlock
2236          * @exception NullPointerException if name is <code>null</code>
2237          */
2238         private UnicodeBlock(String name, int id)
2239         {
2240             super(name);
2241             m_id_ = id;
2242             if (id >= 0) {
2243                 BLOCKS_[id] = this;
2244             }
2245         }
2246     }
2247
2248     /**
2249      * East Asian Width constants.
2250      * @see UProperty#EAST_ASIAN_WIDTH
2251      * @see UCharacter#getIntPropertyValue
2252      * @stable ICU 2.4
2253      */
2254     public static interface EastAsianWidth
2255     {
2256         /**
2257          * @stable ICU 2.4
2258          */
2259         public static final int NEUTRAL = 0;
2260         /**
2261          * @stable ICU 2.4
2262          */
2263         public static final int AMBIGUOUS = 1;
2264         /**
2265          * @stable ICU 2.4
2266          */
2267         public static final int HALFWIDTH = 2;
2268         /**
2269          * @stable ICU 2.4
2270          */
2271         public static final int FULLWIDTH = 3;
2272         /**
2273          * @stable ICU 2.4
2274          */
2275         public static final int NARROW = 4;
2276         /**
2277          * @stable ICU 2.4
2278          */
2279         public static final int WIDE = 5;
2280         /**
2281          * @stable ICU 2.4
2282          */
2283         public static final int COUNT = 6;
2284     }
2285
2286     /**
2287      * Decomposition Type constants.
2288      * @see UProperty#DECOMPOSITION_TYPE
2289      * @stable ICU 2.4
2290      */
2291     public static interface DecompositionType
2292     {
2293         /**
2294          * @stable ICU 2.4
2295          */
2296         public static final int NONE = 0;
2297         /**
2298          * @stable ICU 2.4
2299          */
2300         public static final int CANONICAL = 1;
2301         /**
2302          * @stable ICU 2.4
2303          */
2304         public static final int COMPAT = 2;
2305         /**
2306          * @stable ICU 2.4
2307          */
2308         public static final int CIRCLE = 3;
2309         /**
2310          * @stable ICU 2.4
2311          */
2312         public static final int FINAL = 4;
2313         /**
2314          * @stable ICU 2.4
2315          */
2316         public static final int FONT = 5;
2317         /**
2318          * @stable ICU 2.4
2319          */
2320         public static final int FRACTION = 6;
2321         /**
2322          * @stable ICU 2.4
2323          */
2324         public static final int INITIAL = 7;
2325         /**
2326          * @stable ICU 2.4
2327          */
2328         public static final int ISOLATED = 8;
2329         /**
2330          * @stable ICU 2.4
2331          */
2332         public static final int MEDIAL = 9;
2333         /**
2334          * @stable ICU 2.4
2335          */
2336         public static final int NARROW = 10;
2337         /**
2338          * @stable ICU 2.4
2339          */
2340         public static final int NOBREAK = 11;
2341         /**
2342          * @stable ICU 2.4
2343          */
2344         public static final int SMALL = 12;
2345         /**
2346          * @stable ICU 2.4
2347          */
2348         public static final int SQUARE = 13;
2349         /**
2350          * @stable ICU 2.4
2351          */
2352         public static final int SUB = 14;
2353         /**
2354          * @stable ICU 2.4
2355          */
2356         public static final int SUPER = 15;
2357         /**
2358          * @stable ICU 2.4
2359          */
2360         public static final int VERTICAL = 16;
2361         /**
2362          * @stable ICU 2.4
2363          */
2364         public static final int WIDE = 17;
2365         /**
2366          * @stable ICU 2.4
2367          */
2368         public static final int COUNT = 18;
2369     }
2370
2371     /**
2372      * Joining Type constants.
2373      * @see UProperty#JOINING_TYPE
2374      * @stable ICU 2.4
2375      */
2376     public static interface JoiningType
2377     {
2378         /**
2379          * @stable ICU 2.4
2380          */
2381         public static final int NON_JOINING = 0;
2382         /**
2383          * @stable ICU 2.4
2384          */
2385         public static final int JOIN_CAUSING = 1;
2386         /**
2387          * @stable ICU 2.4
2388          */
2389         public static final int DUAL_JOINING = 2;
2390         /**
2391          * @stable ICU 2.4
2392          */
2393         public static final int LEFT_JOINING = 3;
2394         /**
2395          * @stable ICU 2.4
2396          */
2397         public static final int RIGHT_JOINING = 4;
2398         /**
2399          * @stable ICU 2.4
2400          */
2401         public static final int TRANSPARENT = 5;
2402         /**
2403          * @stable ICU 2.4
2404          */
2405         public static final int COUNT = 6;
2406     }
2407
2408     /**
2409      * Joining Group constants.
2410      * @see UProperty#JOINING_GROUP
2411      * @stable ICU 2.4
2412      */
2413     public static interface JoiningGroup
2414     {
2415         /**
2416          * @stable ICU 2.4
2417          */
2418         public static final int NO_JOINING_GROUP = 0;
2419         /**
2420          * @stable ICU 2.4
2421          */
2422         public static final int AIN = 1;
2423         /**
2424          * @stable ICU 2.4
2425          */
2426         public static final int ALAPH = 2;
2427         /**
2428          * @stable ICU 2.4
2429          */
2430         public static final int ALEF = 3;
2431         /**
2432          * @stable ICU 2.4
2433          */
2434         public static final int BEH = 4;
2435         /**
2436          * @stable ICU 2.4
2437          */
2438         public static final int BETH = 5;
2439         /**
2440          * @stable ICU 2.4
2441          */
2442         public static final int DAL = 6;
2443         /**
2444          * @stable ICU 2.4
2445          */
2446         public static final int DALATH_RISH = 7;
2447         /**
2448          * @stable ICU 2.4
2449          */
2450         public static final int E = 8;
2451         /**
2452          * @stable ICU 2.4
2453          */
2454         public static final int FEH = 9;
2455         /**
2456          * @stable ICU 2.4
2457          */
2458         public static final int FINAL_SEMKATH = 10;
2459         /**
2460          * @stable ICU 2.4
2461          */
2462         public static final int GAF = 11;
2463         /**
2464          * @stable ICU 2.4
2465          */
2466         public static final int GAMAL = 12;
2467         /**
2468          * @stable ICU 2.4
2469          */
2470         public static final int HAH = 13;
2471         /** @stable ICU 4.6 */
2472         public static final int TEH_MARBUTA_GOAL = 14;
2473         /**
2474          * @stable ICU 2.4
2475          */
2476         public static final int HAMZA_ON_HEH_GOAL = TEH_MARBUTA_GOAL;
2477         /**
2478          * @stable ICU 2.4
2479          */
2480         public static final int HE = 15;
2481         /**
2482          * @stable ICU 2.4
2483          */
2484         public static final int HEH = 16;
2485         /**
2486          * @stable ICU 2.4
2487          */
2488         public static final int HEH_GOAL = 17;
2489         /**
2490          * @stable ICU 2.4
2491          */
2492         public static final int HETH = 18;
2493         /**
2494          * @stable ICU 2.4
2495          */
2496         public static final int KAF = 19;
2497         /**
2498          * @stable ICU 2.4
2499          */
2500         public static final int KAPH = 20;
2501         /**
2502          * @stable ICU 2.4
2503          */
2504         public static final int KNOTTED_HEH = 21;
2505         /**
2506          * @stable ICU 2.4
2507          */
2508         public static final int LAM = 22;
2509         /**
2510          * @stable ICU 2.4
2511          */
2512         public static final int LAMADH = 23;
2513         /**
2514          * @stable ICU 2.4
2515          */
2516         public static final int MEEM = 24;
2517         /**
2518          * @stable ICU 2.4
2519          */
2520         public static final int MIM = 25;
2521         /**
2522          * @stable ICU 2.4
2523          */
2524         public static final int NOON = 26;
2525         /**
2526          * @stable ICU 2.4
2527          */
2528         public static final int NUN = 27;
2529         /**
2530          * @stable ICU 2.4
2531          */
2532         public static final int PE = 28;
2533         /**
2534          * @stable ICU 2.4
2535          */
2536         public static final int QAF = 29;
2537         /**
2538          * @stable ICU 2.4
2539          */
2540         public static final int QAPH = 30;
2541         /**
2542          * @stable ICU 2.4
2543          */
2544         public static final int REH = 31;
2545         /**
2546          * @stable ICU 2.4
2547          */
2548         public static final int REVERSED_PE = 32;
2549         /**
2550          * @stable ICU 2.4
2551          */
2552         public static final int SAD = 33;
2553         /**
2554          * @stable ICU 2.4
2555          */
2556         public static final int SADHE = 34;
2557         /**
2558          * @stable ICU 2.4
2559          */
2560         public static final int SEEN = 35;
2561         /**
2562          * @stable ICU 2.4
2563          */
2564         public static final int SEMKATH = 36;
2565         /**
2566          * @stable ICU 2.4
2567          */
2568         public static final int SHIN = 37;
2569         /**
2570          * @stable ICU 2.4
2571          */
2572         public static final int SWASH_KAF = 38;
2573         /**
2574          * @stable ICU 2.4
2575          */
2576         public static final int SYRIAC_WAW = 39;
2577         /**
2578          * @stable ICU 2.4
2579          */
2580         public static final int TAH = 40;
2581         /**
2582          * @stable ICU 2.4
2583          */
2584         public static final int TAW = 41;
2585         /**
2586          * @stable ICU 2.4
2587          */
2588         public static final int TEH_MARBUTA = 42;
2589         /**
2590          * @stable ICU 2.4
2591          */
2592         public static final int TETH = 43;
2593         /**
2594          * @stable ICU 2.4
2595          */
2596         public static final int WAW = 44;
2597         /**
2598          * @stable ICU 2.4
2599          */
2600         public static final int YEH = 45;
2601         /**
2602          * @stable ICU 2.4
2603          */
2604         public static final int YEH_BARREE = 46;
2605         /**
2606          * @stable ICU 2.4
2607          */
2608         public static final int YEH_WITH_TAIL = 47;
2609         /**
2610          * @stable ICU 2.4
2611          */
2612         public static final int YUDH = 48;
2613         /**
2614          * @stable ICU 2.4
2615          */
2616         public static final int YUDH_HE = 49;
2617         /**
2618          * @stable ICU 2.4
2619          */
2620         public static final int ZAIN = 50;
2621         /**
2622          * @stable ICU 2.6
2623          */
2624         public static final int FE = 51;
2625         /**
2626          * @stable ICU 2.6
2627          */
2628         public static final int KHAPH = 52;
2629         /**
2630          * @stable ICU 2.6
2631          */
2632         public static final int ZHAIN = 53;
2633         /**
2634          * @stable ICU 4.0
2635          */
2636         public static final int BURUSHASKI_YEH_BARREE = 54;
2637         /** @stable ICU 4.4 */
2638         public static final int FARSI_YEH = 55;
2639         /** @stable ICU 4.4 */
2640         public static final int NYA = 56;
2641         /**
2642          * @stable ICU 2.4
2643          */
2644         public static final int COUNT = 57;
2645     }
2646
2647     /**
2648      * Grapheme Cluster Break constants.
2649      * @see UProperty#GRAPHEME_CLUSTER_BREAK
2650      * @stable ICU 3.4
2651      */
2652     public static interface GraphemeClusterBreak {
2653         /**
2654          * @stable ICU 3.4
2655          */
2656         public static final int OTHER = 0;
2657         /**
2658          * @stable ICU 3.4
2659          */
2660         public static final int CONTROL = 1;
2661         /**
2662          * @stable ICU 3.4
2663          */
2664         public static final int CR = 2;
2665         /**
2666          * @stable ICU 3.4
2667          */
2668         public static final int EXTEND = 3;
2669         /**
2670          * @stable ICU 3.4
2671          */
2672         public static final int L = 4;
2673         /**
2674          * @stable ICU 3.4
2675          */
2676         public static final int LF = 5;
2677         /**
2678          * @stable ICU 3.4
2679          */
2680         public static final int LV = 6;
2681         /**
2682          * @stable ICU 3.4
2683          */
2684         public static final int LVT = 7;
2685         /**
2686          * @stable ICU 3.4
2687          */
2688         public static final int T = 8;
2689         /**
2690          * @stable ICU 3.4
2691          */
2692         public static final int V = 9;
2693         /**
2694          * @stable ICU 4.0
2695          */
2696         public static final int SPACING_MARK = 10;
2697         /**
2698          * @stable ICU 4.0
2699          */
2700         public static final int PREPEND = 11;
2701         /**
2702          * @stable ICU 3.4
2703          */
2704         public static final int COUNT = 12;
2705     }
2706
2707     /**
2708      * Word Break constants.
2709      * @see UProperty#WORD_BREAK
2710      * @stable ICU 3.4
2711      */
2712     public static interface WordBreak {
2713         /**
2714          * @stable ICU 3.8
2715          */
2716         public static final int OTHER = 0;
2717         /**
2718          * @stable ICU 3.8
2719          */
2720         public static final int ALETTER = 1;
2721         /**
2722          * @stable ICU 3.8
2723          */
2724         public static final int FORMAT = 2;
2725         /**
2726          * @stable ICU 3.8
2727          */
2728         public static final int KATAKANA = 3;
2729         /**
2730          * @stable ICU 3.8
2731          */
2732         public static final int MIDLETTER = 4;
2733         /**
2734          * @stable ICU 3.8
2735          */
2736         public static final int MIDNUM = 5;
2737         /**
2738          * @stable ICU 3.8
2739          */
2740         public static final int NUMERIC = 6;
2741         /**
2742          * @stable ICU 3.8
2743          */
2744         public static final int EXTENDNUMLET = 7;
2745         /**
2746          * @stable ICU 4.0
2747          */
2748         public static final int CR = 8;
2749         /**
2750          * @stable ICU 4.0
2751          */
2752         public static final int EXTEND = 9;
2753         /**
2754          * @stable ICU 4.0
2755          */
2756         public static final int LF = 10;
2757         /**
2758          * @stable ICU 4.0
2759          */
2760         public static final int MIDNUMLET = 11;
2761         /**
2762          * @stable ICU 4.0
2763          */
2764         public static final int NEWLINE = 12;
2765         /**
2766          * @stable ICU 4.0
2767          */
2768         public static final int COUNT = 13;
2769     }
2770
2771     /**
2772      * Sentence Break constants.
2773      * @see UProperty#SENTENCE_BREAK
2774      * @stable ICU 3.4
2775      */
2776     public static interface SentenceBreak {
2777         /**
2778          * @stable ICU 3.8
2779          */
2780         public static final int OTHER = 0;
2781         /**
2782          * @stable ICU 3.8
2783          */
2784         public static final int ATERM = 1;
2785         /**
2786          * @stable ICU 3.8
2787          */
2788         public static final int CLOSE = 2;
2789         /**
2790          * @stable ICU 3.8
2791          */
2792         public static final int FORMAT = 3;
2793         /**
2794          * @stable ICU 3.8
2795          */
2796         public static final int LOWER = 4;
2797         /**
2798          * @stable ICU 3.8
2799          */
2800         public static final int NUMERIC = 5;
2801         /**
2802          * @stable ICU 3.8
2803          */
2804         public static final int OLETTER = 6;
2805         /**
2806          * @stable ICU 3.8
2807          */
2808         public static final int SEP = 7;
2809         /**
2810          * @stable ICU 3.8
2811          */
2812         public static final int SP = 8;
2813         /**
2814          * @stable ICU 3.8
2815          */
2816         public static final int STERM = 9;
2817         /**
2818          * @stable ICU 3.8
2819          */
2820         public static final int UPPER = 10;
2821         /**
2822          * @stable ICU 4.0
2823          */
2824         public static final int CR = 11;
2825         /**
2826          * @stable ICU 4.0
2827          */
2828         public static final int EXTEND = 12;
2829         /**
2830          * @stable ICU 4.0
2831          */
2832         public static final int LF = 13;
2833         /**
2834          * @stable ICU 4.0
2835          */
2836         public static final int SCONTINUE = 14;
2837         /**
2838          * @stable ICU 4.0
2839          */
2840         public static final int COUNT = 15;
2841     }
2842
2843     /**
2844      * Line Break constants.
2845      * @see UProperty#LINE_BREAK
2846      * @stable ICU 2.4
2847      */
2848     public static interface LineBreak
2849     {
2850         /**
2851          * @stable ICU 2.4
2852          */
2853         public static final int UNKNOWN = 0;
2854         /**
2855          * @stable ICU 2.4
2856          */
2857         public static final int AMBIGUOUS = 1;
2858         /**
2859          * @stable ICU 2.4
2860          */
2861         public static final int ALPHABETIC = 2;
2862         /**
2863          * @stable ICU 2.4
2864          */
2865         public static final int BREAK_BOTH = 3;
2866         /**
2867          * @stable ICU 2.4
2868          */
2869         public static final int BREAK_AFTER = 4;
2870         /**
2871          * @stable ICU 2.4
2872          */
2873         public static final int BREAK_BEFORE = 5;
2874         /**
2875          * @stable ICU 2.4
2876          */
2877         public static final int MANDATORY_BREAK = 6;
2878         /**
2879          * @stable ICU 2.4
2880          */
2881         public static final int CONTINGENT_BREAK = 7;
2882         /**
2883          * @stable ICU 2.4
2884          */
2885         public static final int CLOSE_PUNCTUATION = 8;
2886         /**
2887          * @stable ICU 2.4
2888          */
2889         public static final int COMBINING_MARK = 9;
2890         /**
2891          * @stable ICU 2.4
2892          */
2893         public static final int CARRIAGE_RETURN = 10;
2894         /**
2895          * @stable ICU 2.4
2896          */
2897         public static final int EXCLAMATION = 11;
2898         /**
2899          * @stable ICU 2.4
2900          */
2901         public static final int GLUE = 12;
2902         /**
2903          * @stable ICU 2.4
2904          */
2905         public static final int HYPHEN = 13;
2906         /**
2907          * @stable ICU 2.4
2908          */
2909         public static final int IDEOGRAPHIC = 14;
2910         /**
2911          * @see #INSEPARABLE
2912          * @stable ICU 2.4
2913          */
2914         public static final int INSEPERABLE = 15;
2915         /**
2916          * Renamed from the misspelled "inseperable" in Unicode 4.0.1.
2917          * @stable ICU 3.0
2918          */
2919         public static final int INSEPARABLE = 15;
2920         /**
2921          * @stable ICU 2.4
2922          */
2923         public static final int INFIX_NUMERIC = 16;
2924         /**
2925          * @stable ICU 2.4
2926          */
2927         public static final int LINE_FEED = 17;
2928         /**
2929          * @stable ICU 2.4
2930          */
2931         public static final int NONSTARTER = 18;
2932         /**
2933          * @stable ICU 2.4
2934          */
2935         public static final int NUMERIC = 19;
2936         /**
2937          * @stable ICU 2.4
2938          */
2939         public static final int OPEN_PUNCTUATION = 20;
2940         /**
2941          * @stable ICU 2.4
2942          */
2943         public static final int POSTFIX_NUMERIC = 21;
2944         /**
2945          * @stable ICU 2.4
2946          */
2947         public static final int PREFIX_NUMERIC = 22;
2948         /**
2949          * @stable ICU 2.4
2950          */
2951         public static final int QUOTATION = 23;
2952         /**
2953          * @stable ICU 2.4
2954          */
2955         public static final int COMPLEX_CONTEXT = 24;
2956         /**
2957          * @stable ICU 2.4
2958          */
2959         public static final int SURROGATE = 25;
2960         /**
2961          * @stable ICU 2.4
2962          */
2963         public static final int SPACE = 26;
2964         /**
2965          * @stable ICU 2.4
2966          */
2967         public static final int BREAK_SYMBOLS = 27;
2968         /**
2969          * @stable ICU 2.4
2970          */
2971         public static final int ZWSPACE = 28;
2972
2973         /**
2974          * @stable ICU 2.6
2975          */
2976         public static final int NEXT_LINE = 29;       /*[NL]*/
2977
2978         /* from here on: new in Unicode 4/ICU 2.6 */
2979
2980         /**
2981          * @stable ICU 2.6
2982          */
2983         public static final int  WORD_JOINER = 30;      /*[WJ]*/
2984
2985         /* from here on: new in Unicode 4.1/ICU 3.4 */
2986
2987         /**
2988          * @stable ICU 3.4
2989          */
2990         public static final int  H2 = 31;
2991         /**
2992          * @stable ICU 3.4
2993          */
2994         public static final int  H3 = 32;
2995         /**
2996          * @stable ICU 3.4
2997          */
2998         public static final int  JL = 33;
2999         /**
3000          * @stable ICU 3.4
3001          */
3002         public static final int  JT = 34;
3003         /**
3004          * @stable ICU 3.4
3005          */
3006         public static final int  JV = 35;
3007         /** @stable ICU 4.4 */
3008         public static final int  CLOSE_PARENTHESIS = 36; /*[CP]*/
3009
3010         /* new in Unicode 5.2/ICU 4.4 */
3011
3012         /**
3013          * @stable ICU 2.4
3014          */
3015         public static final int COUNT = 37;
3016     }
3017
3018     /**
3019      * Numeric Type constants.
3020      * @see UProperty#NUMERIC_TYPE
3021      * @stable ICU 2.4
3022      */
3023     public static interface NumericType
3024     {
3025         /**
3026          * @stable ICU 2.4
3027          */
3028         public static final int NONE = 0;
3029         /**
3030          * @stable ICU 2.4
3031          */
3032         public static final int DECIMAL = 1;
3033         /**
3034          * @stable ICU 2.4
3035          */
3036         public static final int DIGIT = 2;
3037         /**
3038          * @stable ICU 2.4
3039          */
3040         public static final int NUMERIC = 3;
3041         /**
3042          * @stable ICU 2.4
3043          */
3044         public static final int COUNT = 4;
3045     }
3046
3047     /**
3048      * Hangul Syllable Type constants.
3049      *
3050      * @see UProperty#HANGUL_SYLLABLE_TYPE
3051      * @stable ICU 2.6
3052      */
3053     public static interface HangulSyllableType
3054     {
3055         /**
3056          * @stable ICU 2.6
3057          */
3058         public static final int NOT_APPLICABLE      = 0;   /*[NA]*/ /*See note !!*/
3059         /**
3060          * @stable ICU 2.6
3061          */
3062         public static final int LEADING_JAMO        = 1;   /*[L]*/
3063         /**
3064          * @stable ICU 2.6
3065          */
3066         public static final int VOWEL_JAMO          = 2;   /*[V]*/
3067         /**
3068          * @stable ICU 2.6
3069          */
3070         public static final int TRAILING_JAMO       = 3;   /*[T]*/
3071         /**
3072          * @stable ICU 2.6
3073          */
3074         public static final int LV_SYLLABLE         = 4;   /*[LV]*/
3075         /**
3076          * @stable ICU 2.6
3077          */
3078         public static final int LVT_SYLLABLE        = 5;   /*[LVT]*/
3079         /**
3080          * @stable ICU 2.6
3081          */
3082         public static final int COUNT               = 6;
3083     }
3084
3085     // public data members -----------------------------------------------
3086
3087     /**
3088      * The lowest Unicode code point value.
3089      * @stable ICU 2.1
3090      */
3091     public static final int MIN_VALUE = UTF16.CODEPOINT_MIN_VALUE;
3092
3093     /**
3094      * The highest Unicode code point value (scalar value) according to the
3095      * Unicode Standard.
3096      * This is a 21-bit value (21 bits, rounded up).<br>
3097      * Up-to-date Unicode implementation of java.lang.Character.MAX_VALUE
3098      * @stable ICU 2.1
3099      */
3100     public static final int MAX_VALUE = UTF16.CODEPOINT_MAX_VALUE;
3101
3102     /**
3103      * The minimum value for Supplementary code points
3104      * @stable ICU 2.1
3105      */
3106     public static final int SUPPLEMENTARY_MIN_VALUE =
3107         UTF16.SUPPLEMENTARY_MIN_VALUE;
3108
3109     /**
3110      * Unicode value used when translating into Unicode encoding form and there
3111      * is no existing character.
3112      * @stable ICU 2.1
3113      */
3114     public static final int REPLACEMENT_CHAR = '\uFFFD';
3115
3116     /**
3117      * Special value that is returned by getUnicodeNumericValue(int) when no
3118      * numeric value is defined for a code point.
3119      * @stable ICU 2.4
3120      * @see #getUnicodeNumericValue
3121      */
3122     public static final double NO_NUMERIC_VALUE = -123456789;
3123
3124     /**
3125      * Compatibility constant for Java Character's MIN_RADIX.
3126      * @stable ICU 3.4
3127      */
3128     public static final int MIN_RADIX = java.lang.Character.MIN_RADIX;
3129
3130     /**
3131      * Compatibility constant for Java Character's MAX_RADIX.
3132      * @stable ICU 3.4
3133      */
3134     public static final int MAX_RADIX = java.lang.Character.MAX_RADIX;
3135
3136     /**
3137      * Do not lowercase non-initial parts of words when titlecasing.
3138      * Option bit for titlecasing APIs that take an options bit set.
3139      *
3140      * By default, titlecasing will titlecase the first cased character
3141      * of a word and lowercase all other characters.
3142      * With this option, the other characters will not be modified.
3143      *
3144      * @see #toTitleCase
3145      * @stable ICU 3.8
3146      */
3147     public static final int TITLECASE_NO_LOWERCASE = 0x100;
3148
3149     /**
3150      * Do not adjust the titlecasing indexes from BreakIterator::next() indexes;
3151      * titlecase exactly the characters at breaks from the iterator.
3152      * Option bit for titlecasing APIs that take an options bit set.
3153      *
3154      * By default, titlecasing will take each break iterator index,
3155      * adjust it by looking for the next cased character, and titlecase that one.
3156      * Other characters are lowercased.
3157      *
3158      * This follows Unicode 4 & 5 section 3.13 Default Case Operations:
3159      *
3160      * R3  toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
3161      * #29, "Text Boundaries." Between each pair of word boundaries, find the first
3162      * cased character F. If F exists, map F to default_title(F); then map each
3163      * subsequent character C to default_lower(C).
3164      *
3165      * @see #toTitleCase
3166      * @see #TITLECASE_NO_LOWERCASE
3167      * @stable ICU 3.8
3168      */
3169     public static final int TITLECASE_NO_BREAK_ADJUSTMENT = 0x200;
3170
3171     // public methods ----------------------------------------------------
3172
3173     /**
3174      * Returnss the numeric value of a decimal digit code point.
3175      * <br>This method observes the semantics of
3176      * <code>java.lang.Character.digit()</code>.  Note that this
3177      * will return positive values for code points for which isDigit
3178      * returns false, just like java.lang.Character.
3179      * <br><em>Semantic Change:</em> In release 1.3.1 and
3180      * prior, this did not treat the European letters as having a
3181      * digit value, and also treated numeric letters and other numbers as
3182      * digits.
3183      * This has been changed to conform to the java semantics.
3184      * <br>A code point is a valid digit if and only if:
3185      * <ul>
3186      *   <li>ch is a decimal digit or one of the european letters, and
3187      *   <li>the value of ch is less than the specified radix.
3188      * </ul>
3189      * @param ch the code point to query
3190      * @param radix the radix
3191      * @return the numeric value represented by the code point in the
3192      * specified radix, or -1 if the code point is not a decimal digit
3193      * or if its value is too large for the radix
3194      * @stable ICU 2.1
3195      */
3196     public static int digit(int ch, int radix)
3197     {
3198         if (2 <= radix && radix <= 36) {
3199             int value = digit(ch);
3200             if (value < 0) {
3201                 // ch is not a decimal digit, try latin letters
3202                 value = UCharacterProperty.getEuropeanDigit(ch);
3203             }
3204             return (value < radix) ? value : -1;
3205         } else {
3206             return -1;  // invalid radix
3207         }
3208     }
3209
3210     /**
3211      * Returnss the numeric value of a decimal digit code point.
3212      * <br>This is a convenience overload of <code>digit(int, int)</code>
3213      * that provides a decimal radix.
3214      * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this
3215      * treated numeric letters and other numbers as digits.  This has
3216      * been changed to conform to the java semantics.
3217      * @param ch the code point to query
3218      * @return the numeric value represented by the code point,
3219      * or -1 if the code point is not a decimal digit or if its
3220      * value is too large for a decimal radix
3221      * @stable ICU 2.1
3222      */
3223     public static int digit(int ch)
3224     {
3225         return UCharacterProperty.INSTANCE.digit(ch);
3226     }
3227
3228     /**
3229      * Returns the numeric value of the code point as a nonnegative
3230      * integer.
3231      * <br>If the code point does not have a numeric value, then -1 is returned.
3232      * <br>
3233      * If the code point has a numeric value that cannot be represented as a
3234      * nonnegative integer (for example, a fractional value), then -2 is
3235      * returned.
3236      * @param ch the code point to query
3237      * @return the numeric value of the code point, or -1 if it has no numeric
3238      * value, or -2 if it has a numeric value that cannot be represented as a
3239      * nonnegative integer
3240      * @stable ICU 2.1
3241      */
3242     public static int getNumericValue(int ch)
3243     {
3244         return UCharacterProperty.INSTANCE.getNumericValue(ch);
3245     }
3246
3247     /**
3248      * {@icu} Returns the numeric value for a Unicode code point as defined in the
3249      * Unicode Character Database.</p>
3250      * <p>A "double" return type is necessary because some numeric values are
3251      * fractions, negative, or too large for int.</p>
3252      * <p>For characters without any numeric values in the Unicode Character
3253      * Database, this function will return NO_NUMERIC_VALUE.</p>
3254      * <p><em>API Change:</em> In release 2.2 and prior, this API has a
3255      * return type int and returns -1 when the argument ch does not have a
3256      * corresponding numeric value. This has been changed to synch with ICU4C
3257      * </p>
3258      * This corresponds to the ICU4C function u_getNumericValue.
3259      * @param ch Code point to get the numeric value for.
3260      * @return numeric value of ch, or NO_NUMERIC_VALUE if none is defined.
3261      * @stable ICU 2.4
3262      */
3263     public static double getUnicodeNumericValue(int ch)
3264     {
3265         return UCharacterProperty.INSTANCE.getUnicodeNumericValue(ch);
3266     }
3267
3268     /**
3269      * Compatibility override of Java deprecated method.  This
3270      * method will always remain deprecated.
3271      * Same as java.lang.Character.isSpace().
3272      * @param ch the code point
3273      * @return true if the code point is a space character as
3274      * defined by java.lang.Character.isSpace.
3275      * @deprecated ICU 3.4 (Java)
3276      */
3277     public static boolean isSpace(int ch) {
3278         return ch <= 0x20 &&
3279             (ch == 0x20 || ch == 0x09 || ch == 0x0a || ch == 0x0c || ch == 0x0d);
3280     }
3281
3282     /**
3283      * Returns a value indicating a code point's Unicode category.
3284      * Up-to-date Unicode implementation of java.lang.Character.getType()
3285      * except for the above mentioned code points that had their category
3286      * changed.<br>
3287      * Return results are constants from the interface
3288      * <a href=UCharacterCategory.html>UCharacterCategory</a><br>
3289      * <em>NOTE:</em> the UCharacterCategory values are <em>not</em> compatible with
3290      * those returned by java.lang.Character.getType.  UCharacterCategory values
3291      * match the ones used in ICU4C, while java.lang.Character type
3292      * values, though similar, skip the value 17.</p>
3293      * @param ch code point whose type is to be determined
3294      * @return category which is a value of UCharacterCategory
3295      * @stable ICU 2.1
3296      */
3297     public static int getType(int ch)
3298     {
3299         return UCharacterProperty.INSTANCE.getType(ch);
3300     }
3301
3302     /**
3303      * Determines if a code point has a defined meaning in the up-to-date
3304      * Unicode standard.
3305      * E.g. supplementary code points though allocated space are not defined in
3306      * Unicode yet.<br>
3307      * Up-to-date Unicode implementation of java.lang.Character.isDefined()
3308      * @param ch code point to be determined if it is defined in the most
3309      *        current version of Unicode
3310      * @return true if this code point is defined in unicode
3311      * @stable ICU 2.1
3312      */
3313     public static boolean isDefined(int ch)
3314     {
3315         return getType(ch) != 0;
3316     }
3317
3318     /**
3319      * Determines if a code point is a Java digit.
3320      * <br>This method observes the semantics of
3321      * <code>java.lang.Character.isDigit()</code>. It returns true for decimal
3322      * digits only.
3323      * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this treated
3324      * numeric letters and other numbers as digits.
3325      * This has been changed to conform to the java semantics.
3326      * @param ch code point to query
3327      * @return true if this code point is a digit
3328      * @stable ICU 2.1
3329      */
3330     public static boolean isDigit(int ch)
3331     {
3332         return getType(ch) == UCharacterCategory.DECIMAL_DIGIT_NUMBER;
3333     }
3334
3335     /**
3336      * Determines if the specified code point is an ISO control character.
3337      * A code point is considered to be an ISO control character if it is in
3338      * the range &#92u0000 through &#92u001F or in the range &#92u007F through
3339      * &#92u009F.<br>
3340      * Up-to-date Unicode implementation of java.lang.Character.isISOControl()
3341      * @param ch code point to determine if it is an ISO control character
3342      * @return true if code point is a ISO control character
3343      * @stable ICU 2.1
3344      */
3345     public static boolean isISOControl(int ch)
3346     {
3347         return ch >= 0 && ch <= APPLICATION_PROGRAM_COMMAND_ &&
3348             ((ch <= UNIT_SEPARATOR_) || (ch >= DELETE_));
3349     }
3350
3351     /**
3352      * Determines if the specified code point is a letter.
3353      * Up-to-date Unicode implementation of java.lang.Character.isLetter()
3354      * @param ch code point to determine if it is a letter
3355      * @return true if code point is a letter
3356      * @stable ICU 2.1
3357      */
3358     public static boolean isLetter(int ch)
3359     {
3360         // if props == 0, it will just fall through and return false
3361         return ((1 << getType(ch))
3362         & ((1 << UCharacterCategory.UPPERCASE_LETTER)
3363            | (1 << UCharacterCategory.LOWERCASE_LETTER)
3364            | (1 << UCharacterCategory.TITLECASE_LETTER)
3365            | (1 << UCharacterCategory.MODIFIER_LETTER)
3366            | (1 << UCharacterCategory.OTHER_LETTER))) != 0;
3367     }
3368
3369     /**
3370      * Determines if the specified code point is a letter or digit.
3371      * {@icunote} This method, unlike java.lang.Character does not regard the ascii
3372      * characters 'A' - 'Z' and 'a' - 'z' as digits.
3373      * @param ch code point to determine if it is a letter or a digit
3374      * @return true if code point is a letter or a digit
3375      * @stable ICU 2.1
3376      */
3377     public static boolean isLetterOrDigit(int ch)
3378     {
3379         return ((1 << getType(ch))
3380         & ((1 << UCharacterCategory.UPPERCASE_LETTER)
3381            | (1 << UCharacterCategory.LOWERCASE_LETTER)
3382            | (1 << UCharacterCategory.TITLECASE_LETTER)
3383            | (1 << UCharacterCategory.MODIFIER_LETTER)
3384            | (1 << UCharacterCategory.OTHER_LETTER)
3385            | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER))) != 0;
3386     }
3387
3388     /**
3389      * Compatibility override of Java deprecated method.  This
3390      * method will always remain deprecated.  Delegates to
3391      * java.lang.Character.isJavaIdentifierStart.
3392      * @param cp the code point
3393      * @return true if the code point can start a java identifier.
3394      * @deprecated ICU 3.4 (Java)
3395      */
3396     public static boolean isJavaLetter(int cp) {
3397         return isJavaIdentifierStart(cp);
3398     }
3399
3400     /**
3401      * Compatibility override of Java deprecated method.  This
3402      * method will always remain deprecated.  Delegates to
3403      * java.lang.Character.isJavaIdentifierPart.
3404      * @param cp the code point
3405      * @return true if the code point can continue a java identifier.
3406      * @deprecated ICU 3.4 (Java)
3407      */
3408     public static boolean isJavaLetterOrDigit(int cp) {
3409         return isJavaIdentifierPart(cp);
3410     }
3411
3412     /**
3413      * Compatibility override of Java method, delegates to
3414      * java.lang.Character.isJavaIdentifierStart.
3415      * @param cp the code point
3416      * @return true if the code point can start a java identifier.
3417      * @stable ICU 3.4
3418      */
3419     public static boolean isJavaIdentifierStart(int cp) {
3420         // note, downcast to char for jdk 1.4 compatibility
3421         return java.lang.Character.isJavaIdentifierStart((char)cp);
3422     }
3423
3424     /**
3425      * Compatibility override of Java method, delegates to
3426      * java.lang.Character.isJavaIdentifierPart.
3427      * @param cp the code point
3428      * @return true if the code point can continue a java identifier.
3429      * @stable ICU 3.4
3430      */
3431     public static boolean isJavaIdentifierPart(int cp) {
3432         // note, downcast to char for jdk 1.4 compatibility
3433         return java.lang.Character.isJavaIdentifierPart((char)cp);
3434     }
3435
3436     /**
3437      * Determines if the specified code point is a lowercase character.
3438      * UnicodeData only contains case mappings for code points where they are
3439      * one-to-one mappings; it also omits information about context-sensitive
3440      * case mappings.<br> For more information about Unicode case mapping
3441      * please refer to the
3442      * <a href=http://www.unicode.org/unicode/reports/tr21/>Technical report
3443      * #21</a>.<br>
3444      * Up-to-date Unicode implementation of java.lang.Character.isLowerCase()
3445      * @param ch code point to determine if it is in lowercase
3446      * @return true if code point is a lowercase character
3447      * @stable ICU 2.1
3448      */
3449     public static boolean isLowerCase(int ch)
3450     {
3451         // if props == 0, it will just fall through and return false
3452         return getType(ch) == UCharacterCategory.LOWERCASE_LETTER;
3453     }
3454
3455     /**
3456      * Determines if the specified code point is a white space character.
3457      * A code point is considered to be an whitespace character if and only
3458      * if it satisfies one of the following criteria:
3459      * <ul>
3460      * <li> It is a Unicode Separator character (categories "Z" = "Zs" or "Zl" or "Zp"), but is not
3461      *      also a non-breaking space (&#92u00A0 or &#92u2007 or &#92u202F).
3462      * <li> It is &#92u0009, HORIZONTAL TABULATION.
3463      * <li> It is &#92u000A, LINE FEED.
3464      * <li> It is &#92u000B, VERTICAL TABULATION.
3465      * <li> It is &#92u000C, FORM FEED.
3466      * <li> It is &#92u000D, CARRIAGE RETURN.
3467      * <li> It is &#92u001C, FILE SEPARATOR.
3468      * <li> It is &#92u001D, GROUP SEPARATOR.
3469      * <li> It is &#92u001E, RECORD SEPARATOR.
3470      * <li> It is &#92u001F, UNIT SEPARATOR.
3471      * </ul>
3472      *
3473      * This API tries to sync with the semantics of Java's
3474      * java.lang.Character.isWhitespace(), but it may not return
3475      * the exact same results because of the Unicode version
3476      * difference.
3477      * <p>Note: Unicode 4.0.1 changed U+200B ZERO WIDTH SPACE from a Space Separator (Zs)
3478      * to a Format Control (Cf). Since then, isWhitespace(0x200b) returns false.
3479      * See http://www.unicode.org/versions/Unicode4.0.1/
3480      * @param ch code point to determine if it is a white space
3481      * @return true if the specified code point is a white space character
3482      * @stable ICU 2.1
3483      */
3484     public static boolean isWhitespace(int ch)
3485     {
3486         // exclude no-break spaces
3487         // if props == 0, it will just fall through and return false
3488         return ((1 << getType(ch)) &
3489                 ((1 << UCharacterCategory.SPACE_SEPARATOR)
3490                  | (1 << UCharacterCategory.LINE_SEPARATOR)
3491                  | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR))) != 0
3492         && (ch != NO_BREAK_SPACE_) && (ch != FIGURE_SPACE_) && (ch != NARROW_NO_BREAK_SPACE_)
3493         // TAB VT LF FF CR FS GS RS US NL are all control characters
3494         // that are white spaces.
3495         || (ch >= 0x9 && ch <= 0xd) || (ch >= 0x1c && ch <= 0x1f);
3496     }
3497
3498     /**
3499      * Determines if the specified code point is a Unicode specified space
3500      * character, i.e. if code point is in the category Zs, Zl and Zp.
3501      * Up-to-date Unicode implementation of java.lang.Character.isSpaceChar().
3502      * @param ch code point to determine if it is a space
3503      * @return true if the specified code point is a space character
3504      * @stable ICU 2.1
3505      */
3506     public static boolean isSpaceChar(int ch)
3507     {
3508         // if props == 0, it will just fall through and return false
3509         return ((1 << getType(ch)) & ((1 << UCharacterCategory.SPACE_SEPARATOR)
3510                       | (1 << UCharacterCategory.LINE_SEPARATOR)
3511                       | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR)))
3512         != 0;
3513     }
3514
3515     /**
3516      * Determines if the specified code point is a titlecase character.
3517      * UnicodeData only contains case mappings for code points where they are
3518      * one-to-one mappings; it also omits information about context-sensitive
3519      * case mappings.<br>
3520      * For more information about Unicode case mapping please refer to the
3521      * <a href=http://www.unicode.org/unicode/reports/tr21/>
3522      * Technical report #21</a>.<br>
3523      * Up-to-date Unicode implementation of java.lang.Character.isTitleCase().
3524      * @param ch code point to determine if it is in title case
3525      * @return true if the specified code point is a titlecase character
3526      * @stable ICU 2.1
3527      */
3528     public static boolean isTitleCase(int ch)
3529     {
3530         // if props == 0, it will just fall through and return false
3531         return getType(ch) == UCharacterCategory.TITLECASE_LETTER;
3532     }
3533
3534     /**
3535      * Determines if the specified code point may be any part of a Unicode
3536      * identifier other than the starting character.
3537      * A code point may be part of a Unicode identifier if and only if it is
3538      * one of the following:
3539      * <ul>
3540      * <li> Lu Uppercase letter
3541      * <li> Ll Lowercase letter
3542      * <li> Lt Titlecase letter
3543      * <li> Lm Modifier letter
3544      * <li> Lo Other letter
3545      * <li> Nl Letter number
3546      * <li> Pc Connecting punctuation character
3547      * <li> Nd decimal number
3548      * <li> Mc Spacing combining mark
3549      * <li> Mn Non-spacing mark
3550      * <li> Cf formatting code
3551      * </ul>
3552      * Up-to-date Unicode implementation of
3553      * java.lang.Character.isUnicodeIdentifierPart().<br>
3554      * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>.
3555      * @param ch code point to determine if is can be part of a Unicode
3556      *        identifier
3557      * @return true if code point is any character belonging a unicode
3558      *         identifier suffix after the first character
3559      * @stable ICU 2.1
3560      */
3561     public static boolean isUnicodeIdentifierPart(int ch)
3562     {
3563         // if props == 0, it will just fall through and return false
3564         // cat == format
3565         return ((1 << getType(ch))
3566         & ((1 << UCharacterCategory.UPPERCASE_LETTER)
3567            | (1 << UCharacterCategory.LOWERCASE_LETTER)
3568            | (1 << UCharacterCategory.TITLECASE_LETTER)
3569            | (1 << UCharacterCategory.MODIFIER_LETTER)
3570            | (1 << UCharacterCategory.OTHER_LETTER)
3571            | (1 << UCharacterCategory.LETTER_NUMBER)
3572            | (1 << UCharacterCategory.CONNECTOR_PUNCTUATION)
3573            | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER)
3574            | (1 << UCharacterCategory.COMBINING_SPACING_MARK)
3575            | (1 << UCharacterCategory.NON_SPACING_MARK))) != 0
3576         || isIdentifierIgnorable(ch);
3577     }
3578
3579     /**
3580      * Determines if the specified code point is permissible as the first
3581      * character in a Unicode identifier.
3582      * A code point may start a Unicode identifier if it is of type either
3583      * <ul>
3584      * <li> Lu Uppercase letter
3585      * <li> Ll Lowercase letter
3586      * <li> Lt Titlecase letter
3587      * <li> Lm Modifier letter
3588      * <li> Lo Other letter
3589      * <li> Nl Letter number
3590      * </ul>
3591      * Up-to-date Unicode implementation of
3592      * java.lang.Character.isUnicodeIdentifierStart().<br>
3593      * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>.
3594      * @param ch code point to determine if it can start a Unicode identifier
3595      * @return true if code point is the first character belonging a unicode
3596      *              identifier
3597      * @stable ICU 2.1
3598      */
3599     public static boolean isUnicodeIdentifierStart(int ch)
3600     {
3601         /*int cat = getType(ch);*/
3602         // if props == 0, it will just fall through and return false
3603         return ((1 << getType(ch))
3604         & ((1 << UCharacterCategory.UPPERCASE_LETTER)
3605            | (1 << UCharacterCategory.LOWERCASE_LETTER)
3606            | (1 << UCharacterCategory.TITLECASE_LETTER)
3607            | (1 << UCharacterCategory.MODIFIER_LETTER)
3608            | (1 << UCharacterCategory.OTHER_LETTER)
3609            | (1 << UCharacterCategory.LETTER_NUMBER))) != 0;
3610     }
3611
3612     /**
3613      * Determines if the specified code point should be regarded as an
3614      * ignorable character in a Java identifier.
3615      * A character is Java-identifier-ignorable if it has the general category
3616      * Cf Formatting Control, or it is a non-Java-whitespace ISO control:
3617      * U+0000..U+0008, U+000E..U+001B, U+007F..U+009F.<br>
3618      * Up-to-date Unicode implementation of
3619      * java.lang.Character.isIdentifierIgnorable().<br>
3620      * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>.
3621      * <p>Note that Unicode just recommends to ignore Cf (format controls).
3622      * @param ch code point to be determined if it can be ignored in a Unicode
3623      *        identifier.
3624      * @return true if the code point is ignorable
3625      * @stable ICU 2.1
3626      */
3627     public static boolean isIdentifierIgnorable(int ch)
3628     {
3629         // see java.lang.Character.isIdentifierIgnorable() on range of
3630         // ignorable characters.
3631         if (ch <= 0x9f) {
3632             return isISOControl(ch)
3633                 && !((ch >= 0x9 && ch <= 0xd)
3634                      || (ch >= 0x1c && ch <= 0x1f));
3635         }
3636         return getType(ch) == UCharacterCategory.FORMAT;
3637     }
3638
3639     /**
3640      * Determines if the specified code point is an uppercase character.
3641      * UnicodeData only contains case mappings for code point where they are
3642      * one-to-one mappings; it also omits information about context-sensitive
3643      * case mappings.<br>
3644      * For language specific case conversion behavior, use
3645      * toUpperCase(locale, str). <br>
3646      * For example, the case conversion for dot-less i and dotted I in Turkish,
3647      * or for final sigma in Greek.
3648      * For more information about Unicode case mapping please refer to the
3649      * <a href=http://www.unicode.org/unicode/reports/tr21/>
3650      * Technical report #21</a>.<br>
3651      * Up-to-date Unicode implementation of java.lang.Character.isUpperCase().
3652      * @param ch code point to determine if it is in uppercase
3653      * @return true if the code point is an uppercase character
3654      * @stable ICU 2.1
3655      */
3656     public static boolean isUpperCase(int ch)
3657     {
3658         // if props == 0, it will just fall through and return false
3659         return getType(ch) == UCharacterCategory.UPPERCASE_LETTER;
3660     }
3661
3662     /**
3663      * The given code point is mapped to its lowercase equivalent; if the code
3664      * point has no lowercase equivalent, the code point itself is returned.
3665      * Up-to-date Unicode implementation of java.lang.Character.toLowerCase()
3666      *
3667      * <p>This function only returns the simple, single-code point case mapping.
3668      * Full case mappings should be used whenever possible because they produce
3669      * better results by working on whole strings.
3670      * They take into account the string context and the language and can map
3671      * to a result string with a different length as appropriate.
3672      * Full case mappings are applied by the case mapping functions
3673      * that take String parameters rather than code points (int).
3674      * See also the User Guide chapter on C/POSIX migration:
3675      * http://www.icu-project.org/userguide/posix.html#case_mappings
3676      *
3677      * @param ch code point whose lowercase equivalent is to be retrieved
3678      * @return the lowercase equivalent code point
3679      * @stable ICU 2.1
3680      */
3681     public static int toLowerCase(int ch) {
3682         return UCaseProps.INSTANCE.tolower(ch);
3683     }
3684
3685     /**
3686      * Converts argument code point and returns a String object representing
3687      * the code point's value in UTF16 format.
3688      * The result is a string whose length is 1 for non-supplementary code
3689      * points, 2 otherwise.<br>
3690      * com.ibm.ibm.icu.UTF16 can be used to parse Strings generated by this
3691      * function.<br>
3692      * Up-to-date Unicode implementation of java.lang.Character.toString()
3693      * @param ch code point
3694      * @return string representation of the code point, null if code point is not
3695      *         defined in unicode
3696      * @stable ICU 2.1
3697      */
3698     public static String toString(int ch)
3699     {
3700         if (ch < MIN_VALUE || ch > MAX_VALUE) {
3701             return null;
3702         }
3703
3704         if (ch < SUPPLEMENTARY_MIN_VALUE) {
3705             return String.valueOf((char)ch);
3706         }
3707
3708         StringBuilder result = new StringBuilder();
3709         result.append(UTF16.getLeadSurrogate(ch));
3710         result.append(UTF16.getTrailSurrogate(ch));
3711         return result.toString();
3712     }
3713
3714     /**
3715      * Converts the code point argument to titlecase.
3716      * If no titlecase is available, the uppercase is returned. If no uppercase
3717      * is available, the code point itself is returned.
3718      * Up-to-date Unicode implementation of java.lang.Character.toTitleCase()
3719      *
3720      * <p>This function only returns the simple, single-code point case mapping.
3721      * Full case mappings should be used whenever possible because they produce
3722      * better results by working on whole strings.
3723      * They take into account the string context and the language and can map
3724      * to a result string with a different length as appropriate.
3725      * Full case mappings are applied by the case mapping functions
3726      * that take String parameters rather than code points (int).
3727      * See also the User Guide chapter on C/POSIX migration:
3728      * http://www.icu-project.org/userguide/posix.html#case_mappings
3729      *
3730      * @param ch code point  whose title case is to be retrieved
3731      * @return titlecase code point
3732      * @stable ICU 2.1
3733      */
3734     public static int toTitleCase(int ch) {
3735         return UCaseProps.INSTANCE.totitle(ch);
3736     }
3737
3738     /**
3739      * Converts the character argument to uppercase.
3740      * If no uppercase is available, the character itself is returned.
3741      * Up-to-date Unicode implementation of java.lang.Character.toUpperCase()
3742      *
3743      * <p>This function only returns the simple, single-code point case mapping.
3744      * Full case mappings should be used whenever possible because they produce
3745      * better results by working on whole strings.
3746      * They take into account the string context and the language and can map
3747      * to a result string with a different length as appropriate.
3748      * Full case mappings are applied by the case mapping functions
3749      * that take String parameters rather than code points (int).
3750      * See also the User Guide chapter on C/POSIX migration:
3751      * http://www.icu-project.org/userguide/posix.html#case_mappings
3752      *
3753      * @param ch code point whose uppercase is to be retrieved
3754      * @return uppercase code point
3755      * @stable ICU 2.1
3756      */
3757     public static int toUpperCase(int ch) {
3758         return UCaseProps.INSTANCE.toupper(ch);
3759     }
3760
3761     // extra methods not in java.lang.Character --------------------------
3762
3763     /**
3764      * {@icu} Determines if the code point is a supplementary character.
3765      * A code point is a supplementary character if and only if it is greater
3766      * than <a href=#SUPPLEMENTARY_MIN_VALUE>SUPPLEMENTARY_MIN_VALUE</a>
3767      * @param ch code point to be determined if it is in the supplementary
3768      *        plane
3769      * @return true if code point is a supplementary character
3770      * @stable ICU 2.1
3771      */
3772     public static boolean isSupplementary(int ch)
3773     {
3774         return ch >= UCharacter.SUPPLEMENTARY_MIN_VALUE &&
3775             ch <= UCharacter.MAX_VALUE;
3776     }
3777
3778     /**
3779      * {@icu} Determines if the code point is in the BMP plane.
3780      * @param ch code point to be determined if it is not a supplementary
3781      *        character
3782      * @return true if code point is not a supplementary character
3783      * @stable ICU 2.1
3784      */
3785     public static boolean isBMP(int ch)
3786     {
3787         return (ch >= 0 && ch <= LAST_CHAR_MASK_);
3788     }
3789
3790     /**
3791      * {@icu} Determines whether the specified code point is a printable character
3792      * according to the Unicode standard.
3793      * @param ch code point to be determined if it is printable
3794      * @return true if the code point is a printable character
3795      * @stable ICU 2.1
3796      */
3797     public static boolean isPrintable(int ch)
3798     {
3799         int cat = getType(ch);
3800         // if props == 0, it will just fall through and return false
3801         return (cat != UCharacterCategory.UNASSIGNED &&
3802         cat != UCharacterCategory.CONTROL &&
3803         cat != UCharacterCategory.FORMAT &&
3804         cat != UCharacterCategory.PRIVATE_USE &&
3805         cat != UCharacterCategory.SURROGATE &&
3806         cat != UCharacterCategory.GENERAL_OTHER_TYPES);
3807     }
3808
3809     /**
3810      * {@icu} Determines whether the specified code point is of base form.
3811      * A code point of base form does not graphically combine with preceding
3812      * characters, and is neither a control nor a format character.
3813      * @param ch code point to be determined if it is of base form
3814      * @return true if the code point is of base form
3815      * @stable ICU 2.1
3816      */
3817     public static boolean isBaseForm(int ch)
3818     {
3819         int cat = getType(ch);
3820         // if props == 0, it will just fall through and return false
3821         return cat == UCharacterCategory.DECIMAL_DIGIT_NUMBER ||
3822             cat == UCharacterCategory.OTHER_NUMBER ||
3823             cat == UCharacterCategory.LETTER_NUMBER ||
3824             cat == UCharacterCategory.UPPERCASE_LETTER ||
3825             cat == UCharacterCategory.LOWERCASE_LETTER ||
3826             cat == UCharacterCategory.TITLECASE_LETTER ||
3827             cat == UCharacterCategory.MODIFIER_LETTER ||
3828             cat == UCharacterCategory.OTHER_LETTER ||
3829             cat == UCharacterCategory.NON_SPACING_MARK ||
3830             cat == UCharacterCategory.ENCLOSING_MARK ||
3831             cat == UCharacterCategory.COMBINING_SPACING_MARK;
3832     }
3833
3834     /**
3835      * {@icu} Returns the Bidirection property of a code point.
3836      * For example, 0x0041 (letter A) has the LEFT_TO_RIGHT directional
3837      * property.<br>
3838      * Result returned belongs to the interface
3839      * <a href=UCharacterDirection.html>UCharacterDirection</a>
3840      * @param ch the code point to be determined its direction
3841      * @return direction constant from UCharacterDirection.
3842      * @stable ICU 2.1
3843      */
3844     public static int getDirection(int ch)
3845     {
3846         return UBiDiProps.INSTANCE.getClass(ch);
3847     }
3848
3849     /**
3850      * Determines whether the code point has the "mirrored" property.
3851      * This property is set for characters that are commonly used in
3852      * Right-To-Left contexts and need to be displayed with a "mirrored"
3853      * glyph.
3854      * @param ch code point whose mirror is to be determined
3855      * @return true if the code point has the "mirrored" property
3856      * @stable ICU 2.1
3857      */
3858     public static boolean isMirrored(int ch)
3859     {
3860         return UBiDiProps.INSTANCE.isMirrored(ch);
3861     }
3862
3863     /**
3864      * {@icu} Maps the specified code point to a "mirror-image" code point.
3865      * For code points with the "mirrored" property, implementations sometimes
3866      * need a "poor man's" mapping to another code point such that the default
3867      * glyph may serve as the mirror-image of the default glyph of the
3868      * specified code point.<br>
3869      * This is useful for text conversion to and from codepages with visual
3870      * order, and for displays without glyph selection capabilities.
3871      * @param ch code point whose mirror is to be retrieved
3872      * @return another code point that may serve as a mirror-image substitute,
3873      *         or ch itself if there is no such mapping or ch does not have the
3874      *         "mirrored" property
3875      * @stable ICU 2.1
3876      */
3877     public static int getMirror(int ch)
3878     {
3879         return UBiDiProps.INSTANCE.getMirror(ch);
3880     }
3881
3882     /**
3883      * {@icu} Returns the combining class of the argument codepoint
3884      * @param ch code point whose combining is to be retrieved
3885      * @return the combining class of the codepoint
3886      * @stable ICU 2.1
3887      */
3888     public static int getCombiningClass(int ch)
3889     {
3890         if (ch < MIN_VALUE || ch > MAX_VALUE) {
3891             throw new IllegalArgumentException("Codepoint out of bounds");
3892         }
3893         Normalizer2Impl impl = Norm2AllModes.getNFCInstance().impl;
3894         return impl.getCC(impl.getNorm16(ch));
3895     }
3896
3897     /**
3898      * {@icu} A code point is illegal if and only if
3899      * <ul>
3900      * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE
3901      * <li> A surrogate value, 0xD800 to 0xDFFF
3902      * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE
3903      * </ul>
3904      * Note: legal does not mean that it is assigned in this version of Unicode.
3905      * @param ch code point to determine if it is a legal code point by itself
3906      * @return true if and only if legal.
3907      * @stable ICU 2.1
3908      */
3909     public static boolean isLegal(int ch)
3910     {
3911         if (ch < MIN_VALUE) {
3912             return false;
3913         }
3914         if (ch < UTF16.SURROGATE_MIN_VALUE) {
3915             return true;
3916         }
3917         if (ch <= UTF16.SURROGATE_MAX_VALUE) {
3918             return false;
3919         }
3920         if (UCharacterUtility.isNonCharacter(ch)) {
3921             return false;
3922         }
3923         return (ch <= MAX_VALUE);
3924     }
3925
3926     /**
3927      * {@icu} A string is legal iff all its code points are legal.
3928      * A code point is illegal if and only if
3929      * <ul>
3930      * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE
3931      * <li> A surrogate value, 0xD800 to 0xDFFF
3932      * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE
3933      * </ul>
3934      * Note: legal does not mean that it is assigned in this version of Unicode.
3935      * @param str containing code points to examin
3936      * @return true if and only if legal.
3937      * @stable ICU 2.1
3938      */
3939     public static boolean isLegal(String str)
3940     {
3941         int size = str.length();
3942         int codepoint;
3943         for (int i = 0; i < size; i ++)
3944         {
3945         codepoint = UTF16.charAt(str, i);
3946         if (!isLegal(codepoint)) {
3947             return false;
3948         }
3949         if (isSupplementary(codepoint)) {
3950             i ++;
3951         }
3952         }
3953         return true;
3954     }
3955
3956     /**
3957      * {@icu} Returns the version of Unicode data used.
3958      * @return the unicode version number used
3959      * @stable ICU 2.1
3960      */
3961     public static VersionInfo getUnicodeVersion()
3962     {
3963         return UCharacterProperty.INSTANCE.m_unicodeVersion_;
3964     }
3965
3966     /**
3967      * {@icu} Returns the most current Unicode name of the argument code point, or
3968      * null if the character is unassigned or outside the range
3969      * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name.
3970      * <br>
3971      * Note calling any methods related to code point names, e.g. get*Name*()
3972      * incurs a one-time initialisation cost to construct the name tables.
3973      * @param ch the code point for which to get the name
3974      * @return most current Unicode name
3975      * @stable ICU 2.1
3976      */
3977     public static String getName(int ch)
3978     {
3979         return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.UNICODE_CHAR_NAME);
3980     }
3981
3982     /**
3983      * {@icu} Returns the names for each of the characters in a string
3984      * @param s string to format
3985      * @param separator string to go between names
3986      * @return string of names
3987      * @stable ICU 3.8
3988      */
3989     public static String getName(String s, String separator) {
3990         if (s.length() == 1) { // handle common case
3991             return getName(s.charAt(0));
3992         }
3993         int cp;
3994         StringBuilder sb = new StringBuilder();
3995         for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
3996             cp = UTF16.charAt(s,i);
3997             if (i != 0) sb.append(separator);
3998             sb.append(UCharacter.getName(cp));
3999         }
4000         return sb.toString();
4001     }
4002
4003     /**
4004      * {@icu} Returns the earlier version 1.0 Unicode name of the argument code
4005      * point, or null if the character is unassigned or outside the range
4006      * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name.
4007      * <br>
4008      * Note calling any methods related to code point names, e.g. get*Name*()
4009      * incurs a one-time initialisation cost to construct the name tables.
4010      * @param ch the code point for which to get the name
4011      * @return version 1.0 Unicode name
4012      * @stable ICU 2.1
4013      */
4014     public static String getName1_0(int ch)
4015     {
4016         return UCharacterName.INSTANCE.getName(ch,
4017                              UCharacterNameChoice.UNICODE_10_CHAR_NAME);
4018     }
4019
4020     /**
4021      * {@icu} Returns a name for a valid codepoint. Unlike, getName(int) and
4022      * getName1_0(int), this method will return a name even for codepoints that
4023      * are not assigned a name in UnicodeData.txt.
4024      * </p>
4025      * The names are returned in the following order.
4026      * <ul>
4027      * <li> Most current Unicode name if there is any
4028      * <li> Unicode 1.0 name if there is any
4029      * <li> Extended name in the form of
4030      *      "<codepoint_type-codepoint_hex_digits>". E.g. <noncharacter-fffe>
4031      * </ul>
4032      * Note calling any methods related to code point names, e.g. get*Name*()
4033      * incurs a one-time initialisation cost to construct the name tables.
4034      * @param ch the code point for which to get the name
4035      * @return a name for the argument codepoint
4036      * @stable ICU 2.6
4037      */
4038     public static String getExtendedName(int ch) {
4039         return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.EXTENDED_CHAR_NAME);
4040     }
4041
4042     /**
4043      * {@icu} Returns the corrected name from NameAliases.txt if there is one.
4044      * Returns null if the character is unassigned or outside the range
4045      * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name.
4046      * <br>
4047      * Note calling any methods related to code point names, e.g. get*Name*()
4048      * incurs a one-time initialisation cost to construct the name tables.
4049      * @param ch the code point for which to get the name alias
4050      * @return Unicode name alias, or null
4051      * @stable ICU 4.4
4052      */
4053     public static String getNameAlias(int ch)
4054     {
4055         return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.CHAR_NAME_ALIAS);
4056     }
4057
4058     /**
4059      * {@icu} Returns the ISO 10646 comment for a character.
4060      * The ISO 10646 comment is an informative field in the Unicode Character
4061      * Database (UnicodeData.txt field 11) and is from the ISO 10646 names list.
4062      *
4063      * Note: Unicode 5.2 removes all ISO comment data, resulting in empty strings
4064      * returned for all characters.
4065      *
4066      * @param ch The code point for which to get the ISO comment.
4067      *           It must be the case that {@code 0 <= ch <= 0x10ffff}.
4068      * @return The ISO comment, or null if there is no comment for this
4069      *         character.
4070      * @stable ICU 2.4
4071      */
4072     public static String getISOComment(int ch)
4073     {
4074         if (ch < UCharacter.MIN_VALUE || ch > UCharacter.MAX_VALUE) {
4075             return null;
4076         }
4077
4078         String result = UCharacterName.INSTANCE.getGroupName(ch,
4079                                            UCharacterNameChoice.ISO_COMMENT_);
4080         return result;
4081     }
4082
4083     /**
4084      * {@icu} <p>Finds a Unicode code point by its most current Unicode name and
4085      * return its code point value. All Unicode names are in uppercase.</p>
4086      * Note calling any methods related to code point names, e.g. get*Name*()
4087      * incurs a one-time initialisation cost to construct the name tables.
4088      * @param name most current Unicode character name whose code point is to
4089      *        be returned
4090      * @return code point or -1 if name is not found
4091      * @stable ICU 2.1
4092      */
4093     public static int getCharFromName(String name){
4094         return UCharacterName.INSTANCE.getCharFromName(
4095                      UCharacterNameChoice.UNICODE_CHAR_NAME, name);
4096     }
4097
4098     /**
4099      * {@icu} <p>Find a Unicode character by its version 1.0 Unicode name and return
4100      * its code point value. All Unicode names are in uppercase.</p>
4101      * Note calling any methods related to code point names, e.g. get*Name*()
4102      * incurs a one-time initialisation cost to construct the name tables.
4103      * @param name Unicode 1.0 code point name whose code point is to
4104      *             returned
4105      * @return code point or -1 if name is not found
4106      * @stable ICU 2.1
4107      */
4108     public static int getCharFromName1_0(String name){
4109         return UCharacterName.INSTANCE.getCharFromName(
4110                      UCharacterNameChoice.UNICODE_10_CHAR_NAME, name);
4111     }
4112
4113     /**
4114      * {@icu} <p>Find a Unicode character by either its name and return its code
4115      * point value. All Unicode names are in uppercase.
4116      * Extended names are all lowercase except for numbers and are contained
4117      * within angle brackets.</p>
4118      * The names are searched in the following order
4119      * <ul>
4120      * <li> Most current Unicode name if there is any
4121      * <li> Unicode 1.0 name if there is any
4122      * <li> Extended name in the form of
4123      *      "<codepoint_type-codepoint_hex_digits>". E.g. <noncharacter-FFFE>
4124      * </ul>
4125      * Note calling any methods related to code point names, e.g. get*Name*()
4126      * incurs a one-time initialisation cost to construct the name tables.
4127      * @param name codepoint name
4128      * @return code point associated with the name or -1 if the name is not
4129      *         found.
4130      * @stable ICU 2.6
4131      */
4132     public static int getCharFromExtendedName(String name){
4133         return UCharacterName.INSTANCE.getCharFromName(
4134                      UCharacterNameChoice.EXTENDED_CHAR_NAME, name);
4135     }
4136
4137     /**
4138      * {@icu} <p>Find a Unicode character by its corrected name alias and return
4139      * its code point value. All Unicode names are in uppercase.</p>
4140      * Note calling any methods related to code point names, e.g. get*Name*()
4141      * incurs a one-time initialisation cost to construct the name tables.
4142      * @param name Unicode name alias whose code point is to be returned
4143      * @return code point or -1 if name is not found
4144      * @stable ICU 4.4
4145      */
4146     public static int getCharFromNameAlias(String name){
4147         return UCharacterName.INSTANCE.getCharFromName(UCharacterNameChoice.CHAR_NAME_ALIAS, name);
4148     }
4149
4150     /**
4151      * {@icu} Return the Unicode name for a given property, as given in the
4152      * Unicode database file PropertyAliases.txt.  Most properties
4153      * have more than one name.  The nameChoice determines which one
4154      * is returned.
4155      *
4156      * In addition, this function maps the property
4157      * UProperty.GENERAL_CATEGORY_MASK to the synthetic names "gcm" /
4158      * "General_Category_Mask".  These names are not in
4159      * PropertyAliases.txt.
4160      *
4161      * @param property UProperty selector.
4162      *
4163      * @param nameChoice UProperty.NameChoice selector for which name
4164      * to get.  All properties have a long name.  Most have a short
4165      * name, but some do not.  Unicode allows for additional names; if
4166      * present these will be returned by UProperty.NameChoice.LONG + i,
4167      * where i=1, 2,...
4168      *
4169      * @return a name, or null if Unicode explicitly defines no name
4170      * ("n/a") for a given property/nameChoice.  If a given nameChoice
4171      * throws an exception, then all larger values of nameChoice will
4172      * throw an exception.  If null is returned for a given
4173      * nameChoice, then other nameChoice values may return non-null
4174      * results.
4175      *
4176      * @exception IllegalArgumentException thrown if property or
4177      * nameChoice are invalid.
4178      *
4179      * @see UProperty
4180      * @see UProperty.NameChoice
4181      * @stable ICU 2.4
4182      */
4183     public static String getPropertyName(int property,
4184                                          int nameChoice) {
4185         return UPropertyAliases.INSTANCE.getPropertyName(property, nameChoice);
4186     }
4187
4188     /**
4189      * {@icu} Return the UProperty selector for a given property name, as
4190      * specified in the Unicode database file PropertyAliases.txt.
4191      * Short, long, and any other variants are recognized.
4192      *
4193      * In addition, this function maps the synthetic names "gcm" /
4194      * "General_Category_Mask" to the property
4195      * UProperty.GENERAL_CATEGORY_MASK.  These names are not in
4196      * PropertyAliases.txt.
4197      *
4198      * @param propertyAlias the property name to be matched.  The name
4199      * is compared using "loose matching" as described in
4200      * PropertyAliases.txt.
4201      *
4202      * @return a UProperty enum.
4203      *
4204      * @exception IllegalArgumentException thrown if propertyAlias
4205      * is not recognized.
4206      *
4207      * @see UProperty
4208      * @stable ICU 2.4
4209      */
4210     public static int getPropertyEnum(CharSequence propertyAlias) {
4211         int propEnum = UPropertyAliases.INSTANCE.getPropertyEnum(propertyAlias);
4212         if (propEnum == UProperty.UNDEFINED) {
4213             throw new IllegalIcuArgumentException("Invalid name: " + propertyAlias);
4214         }
4215         return propEnum;
4216     }
4217
4218     /**
4219      * {@icu} Return the Unicode name for a given property value, as given in
4220      * the Unicode database file PropertyValueAliases.txt.  Most
4221      * values have more than one name.  The nameChoice determines
4222      * which one is returned.
4223      *
4224      * Note: Some of the names in PropertyValueAliases.txt can only be
4225      * retrieved using UProperty.GENERAL_CATEGORY_MASK, not
4226      * UProperty.GENERAL_CATEGORY.  These include: "C" / "Other", "L" /
4227      * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"
4228      * / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
4229      *
4230      * @param property UProperty selector constant.
4231      * UProperty.INT_START &lt;= property &lt; UProperty.INT_LIMIT or
4232      * UProperty.BINARY_START &lt;= property &lt; UProperty.BINARY_LIMIT or
4233      * UProperty.MASK_START &lt; = property &lt; UProperty.MASK_LIMIT.
4234      * If out of range, null is returned.
4235      *
4236      * @param value selector for a value for the given property.  In
4237      * general, valid values range from 0 up to some maximum.  There
4238      * are a few exceptions: (1.) UProperty.BLOCK values begin at the
4239      * non-zero value BASIC_LATIN.getID().  (2.)
4240      * UProperty.CANONICAL_COMBINING_CLASS values are not contiguous
4241      * and range from 0..240.  (3.)  UProperty.GENERAL_CATEGORY_MASK values
4242      * are mask values produced by left-shifting 1 by
4243      * UCharacter.getType().  This allows grouped categories such as
4244      * [:L:] to be represented.  Mask values are non-contiguous.
4245      *
4246      * @param nameChoice UProperty.NameChoice selector for which name
4247      * to get.  All values have a long name.  Most have a short name,
4248      * but some do not.  Unicode allows for additional names; if
4249      * present these will be returned by UProperty.NameChoice.LONG + i,
4250      * where i=1, 2,...
4251      *
4252      * @return a name, or null if Unicode explicitly defines no name
4253      * ("n/a") for a given property/value/nameChoice.  If a given
4254      * nameChoice throws an exception, then all larger values of
4255      * nameChoice will throw an exception.  If null is returned for a
4256      * given nameChoice, then other nameChoice values may return
4257      * non-null results.
4258      *
4259      * @exception IllegalArgumentException thrown if property, value,
4260      * or nameChoice are invalid.
4261      *
4262      * @see UProperty
4263      * @see UProperty.NameChoice
4264      * @stable ICU 2.4
4265      */
4266     public static String getPropertyValueName(int property,
4267                                               int value,
4268                                               int nameChoice)
4269     {
4270         if ((property == UProperty.CANONICAL_COMBINING_CLASS
4271              || property == UProperty.LEAD_CANONICAL_COMBINING_CLASS
4272              || property == UProperty.TRAIL_CANONICAL_COMBINING_CLASS)
4273             && value >= UCharacter.getIntPropertyMinValue(
4274                               UProperty.CANONICAL_COMBINING_CLASS)
4275             && value <= UCharacter.getIntPropertyMaxValue(
4276                               UProperty.CANONICAL_COMBINING_CLASS)
4277             && nameChoice >= 0 && nameChoice < UProperty.NameChoice.COUNT) {
4278             // this is hard coded for the valid cc
4279             // because PropertyValueAliases.txt does not contain all of them
4280             try {
4281                 return UPropertyAliases.INSTANCE.getPropertyValueName(property, value,
4282                                                     nameChoice);
4283             }
4284             catch (IllegalArgumentException e) {
4285                 return null;
4286             }
4287         }
4288         return UPropertyAliases.INSTANCE.getPropertyValueName(property, value, nameChoice);
4289     }
4290
4291     /**
4292      * {@icu} Return the property value integer for a given value name, as
4293      * specified in the Unicode database file PropertyValueAliases.txt.
4294      * Short, long, and any other variants are recognized.
4295      *
4296      * Note: Some of the names in PropertyValueAliases.txt will only be
4297      * recognized with UProperty.GENERAL_CATEGORY_MASK, not
4298      * UProperty.GENERAL_CATEGORY.  These include: "C" / "Other", "L" /
4299      * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"
4300      * / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
4301      *
4302      * @param property UProperty selector constant.
4303      * UProperty.INT_START &lt;= property &lt; UProperty.INT_LIMIT or
4304      * UProperty.BINARY_START &lt;= property &lt; UProperty.BINARY_LIMIT or
4305      * UProperty.MASK_START &lt; = property &lt; UProperty.MASK_LIMIT.
4306      * Only these properties can be enumerated.
4307      *
4308      * @param valueAlias the value name to be matched.  The name is
4309      * compared using "loose matching" as described in
4310      * PropertyValueAliases.txt.
4311      *
4312      * @return a value integer.  Note: UProperty.GENERAL_CATEGORY
4313      * values are mask values produced by left-shifting 1 by
4314      * UCharacter.getType().  This allows grouped categories such as
4315      * [:L:] to be represented.
4316      *
4317      * @see UProperty
4318      * @throws IllegalArgumentException if property is not a valid UProperty
4319      *         selector or valueAlias is not a value of this property
4320      * @stable ICU 2.4
4321      */
4322     public static int getPropertyValueEnum(int property, CharSequence valueAlias) {
4323         int propEnum = UPropertyAliases.INSTANCE.getPropertyValueEnum(property, valueAlias);
4324         if (propEnum == UProperty.UNDEFINED) {
4325             throw new IllegalIcuArgumentException("Invalid name: " + valueAlias);
4326         }
4327         return propEnum;
4328     }
4329
4330     /**
4331      * {@icu} Returns a code point corresponding to the two UTF16 characters.
4332      * @param lead the lead char
4333      * @param trail the trail char
4334      * @return code point if surrogate characters are valid.
4335      * @exception IllegalArgumentException thrown when argument characters do
4336      *            not form a valid codepoint
4337      * @stable ICU 2.1
4338      */
4339     public static int getCodePoint(char lead, char trail)
4340     {
4341         if (UTF16.isLeadSurrogate(lead) && UTF16.isTrailSurrogate(trail)) {
4342             return UCharacterProperty.getRawSupplementary(lead, trail);
4343         }
4344         throw new IllegalArgumentException("Illegal surrogate characters");
4345     }
4346
4347     /**
4348      * {@icu} Returns the code point corresponding to the UTF16 character.
4349      * @param char16 the UTF16 character
4350      * @return code point if argument is a valid character.
4351      * @exception IllegalArgumentException thrown when char16 is not a valid
4352      *            codepoint
4353      * @stable ICU 2.1
4354      */
4355     public static int getCodePoint(char char16)
4356     {
4357         if (UCharacter.isLegal(char16)) {
4358             return char16;
4359         }
4360         throw new IllegalArgumentException("Illegal codepoint");
4361     }
4362
4363     /**
4364      * Implementation of UCaseProps.ContextIterator, iterates over a String.
4365      * See ustrcase.c/utf16_caseContextIterator().
4366      */
4367     private static class StringContextIterator implements UCaseProps.ContextIterator {
4368         /**
4369          * Constructor.
4370          * @param s String to iterate over.
4371          */
4372         StringContextIterator(String s) {
4373             this.s=s;
4374             limit=s.length();
4375             cpStart=cpLimit=index=0;
4376             dir=0;
4377         }
4378
4379         /**
4380          * Set the iteration limit for nextCaseMapCP() to an index within the string.
4381          * If the limit parameter is negative or past the string, then the
4382          * string length is restored as the iteration limit.
4383          *
4384          * This limit does not affect the next() function which always
4385          * iterates to the very end of the string.
4386          *
4387          * @param lim The iteration limit.
4388          */
4389         public void setLimit(int lim) {
4390             if(0<=lim && lim<=s.length()) {
4391                 limit=lim;
4392             } else {
4393                 limit=s.length();
4394             }
4395         }
4396
4397         /**
4398          * Move to the iteration limit without fetching code points up to there.
4399          */
4400         public void moveToLimit() {
4401             cpStart=cpLimit=limit;
4402         }
4403
4404         /**
4405          * Iterate forward through the string to fetch the next code point
4406          * to be case-mapped, and set the context indexes for it.
4407          * Performance optimization, to save on function calls and redundant
4408          * tests. Combines UTF16.charAt(), UTF16.getCharCount(), and setIndex().
4409          *
4410          * When the iteration limit is reached (and -1 is returned),
4411          * getCPStart() will be at the iteration limit.
4412          *
4413          * Iteration with next() does not affect the position for nextCaseMapCP().
4414          *
4415          * @return The next code point to be case-mapped, or <0 when the iteration is done.
4416          */
4417         public int nextCaseMapCP() {
4418             cpStart=cpLimit;
4419             if(cpLimit<limit) {
4420                 int c=s.charAt(cpLimit++);
4421                 if(UTF16.LEAD_SURROGATE_MIN_VALUE<=c || c<=UTF16.TRAIL_SURROGATE_MAX_VALUE) {
4422                     char c2;
4423                     if( c<=UTF16.LEAD_SURROGATE_MAX_VALUE && cpLimit<limit &&
4424                         UTF16.TRAIL_SURROGATE_MIN_VALUE<=(c2=s.charAt(cpLimit)) &&
4425                         c2<=UTF16.TRAIL_SURROGATE_MAX_VALUE
4426                     ) {
4427                         // supplementary code point
4428                         ++cpLimit;
4429                         c=UCharacterProperty.getRawSupplementary((char)c, c2);
4430                     // else unpaired surrogate code point
4431                     }
4432                 // else BMP code point
4433                 }
4434                 return c;
4435             } else {
4436                 return -1;
4437             }
4438         }
4439
4440         /**
4441          * Returns the start of the code point that was last returned
4442          * by nextCaseMapCP().
4443          */
4444         public int getCPStart() {
4445             return cpStart;
4446         }
4447
4448         /**
4449          * Returns the limit of the code point that was last returned
4450          * by nextCaseMapCP().
4451          */
4452         public int getCPLimit() {
4453             return cpLimit;
4454         }
4455
4456         // implement UCaseProps.ContextIterator
4457         // The following code is not used anywhere in this private class
4458         public void reset(int direction) {
4459             if(direction>0) {
4460                 /* reset for forward iteration */
4461                 dir=1;
4462                 index=cpLimit;
4463             } else if(direction<0) {
4464                 /* reset for backward iteration */
4465                 dir=-1;
4466                 index=cpStart;
4467             } else {
4468                 // not a valid direction
4469                 dir=0;
4470                 index=0;
4471             }
4472         }
4473
4474         public int next() {
4475             int c;
4476
4477             if(dir>0 && index<s.length()) {
4478                 c=UTF16.charAt(s, index);
4479                 index+=UTF16.getCharCount(c);
4480                 return c;
4481             } else if(dir<0 && index>0) {
4482                 c=UTF16.charAt(s, index-1);
4483                 index-=UTF16.getCharCount(c);
4484                 return c;
4485             }
4486             return -1;
4487         }
4488
4489         // variables
4490         protected String s;
4491         protected int index, limit, cpStart, cpLimit;
4492         protected int dir; // 0=initial state  >0=forward  <0=backward
4493     }
4494
4495     /**
4496      * Returns the uppercase version of the argument string.
4497      * Casing is dependent on the default locale and context-sensitive.
4498      * @param str source string to be performed on
4499      * @return uppercase version of the argument string
4500      * @stable ICU 2.1
4501      */
4502     public static String toUpperCase(String str)
4503     {
4504         return toUpperCase(ULocale.getDefault(), str);
4505     }
4506
4507     /**
4508      * Returns the lowercase version of the argument string.
4509      * Casing is dependent on the default locale and context-sensitive
4510      * @param str source string to be performed on
4511      * @return lowercase version of the argument string
4512      * @stable ICU 2.1
4513      */
4514     public static String toLowerCase(String str)
4515     {
4516         return toLowerCase(ULocale.getDefault(), str);
4517     }
4518
4519     /**
4520      * <p>Returns the titlecase version of the argument string.</p>
4521      * <p>Position for titlecasing is determined by the argument break
4522      * iterator, hence the user can customize his break iterator for
4523      * a specialized titlecasing. In this case only the forward iteration
4524      * needs to be implemented.
4525      * If the break iterator passed in is null, the default Unicode algorithm
4526      * will be used to determine the titlecase positions.
4527      * </p>
4528      * <p>Only positions returned by the break iterator will be title cased,
4529      * character in between the positions will all be in lower case.</p>
4530      * <p>Casing is dependent on the default locale and context-sensitive</p>
4531      * @param str source string to be performed on
4532      * @param breakiter break iterator to determine the positions in which
4533      *        the character should be title cased.
4534      * @return lowercase version of the argument string
4535      * @stable ICU 2.6
4536      */
4537     public static String toTitleCase(String str, BreakIterator breakiter)
4538     {
4539         return toTitleCase(ULocale.getDefault(), str, breakiter);
4540     }
4541
4542     /**
4543      * Returns the uppercase version of the argument string.
4544      * Casing is dependent on the argument locale and context-sensitive.
4545      * @param locale which string is to be converted in
4546      * @param str source string to be performed on
4547      * @return uppercase version of the argument string
4548      * @stable ICU 2.1
4549      */
4550     public static String toUpperCase(Locale locale, String str)
4551     {
4552         return toUpperCase(ULocale.forLocale(locale), str);
4553     }
4554
4555     /**
4556      * Returns the uppercase version of the argument string.
4557      * Casing is dependent on the argument locale and context-sensitive.
4558      * @param locale which string is to be converted in
4559      * @param str source string to be performed on
4560      * @return uppercase version of the argument string
4561      * @stable ICU 3.2
4562      */
4563     public static String toUpperCase(ULocale locale, String str) {
4564         StringContextIterator iter = new StringContextIterator(str);
4565         StringBuilder result = new StringBuilder(str.length());
4566         int[] locCache = new int[1];
4567         int c;
4568
4569         if (locale == null) {
4570             locale = ULocale.getDefault();
4571         }
4572         locCache[0]=0;
4573
4574         while((c=iter.nextCaseMapCP())>=0) {
4575             c = UCaseProps.INSTANCE.toFullUpper(c, iter, result, locale, locCache);
4576
4577             /* decode the result */
4578             if(c<0) {
4579                 /* (not) original code point */
4580                 c=~c;
4581             } else if(c<=UCaseProps.MAX_STRING_LENGTH) {
4582                 /* mapping already appended to result */
4583                 continue;
4584             /* } else { append single-code point mapping */
4585             }
4586             result.appendCodePoint(c);
4587         }
4588         return result.toString();
4589     }
4590
4591     /**
4592      * Returns the lowercase version of the argument string.
4593      * Casing is dependent on the argument locale and context-sensitive
4594      * @param locale which string is to be converted in
4595      * @param str source string to be performed on
4596      * @return lowercase version of the argument string
4597      * @stable ICU 2.1
4598      */
4599     public static String toLowerCase(Locale locale, String str)
4600     {
4601         return toLowerCase(ULocale.forLocale(locale), str);
4602     }
4603
4604     /**
4605      * Returns the lowercase version of the argument string.
4606      * Casing is dependent on the argument locale and context-sensitive
4607      * @param locale which string is to be converted in
4608      * @param str source string to be performed on
4609      * @return lowercase version of the argument string
4610      * @stable ICU 3.2
4611      */
4612     public static String toLowerCase(ULocale locale, String str) {
4613         StringContextIterator iter = new StringContextIterator(str);
4614         StringBuilder result = new StringBuilder(str.length());
4615         int[] locCache = new int[1];
4616         int c;
4617
4618         if (locale == null) {
4619             locale = ULocale.getDefault();
4620         }
4621         locCache[0]=0;
4622
4623         while((c=iter.nextCaseMapCP())>=0) {
4624             c = UCaseProps.INSTANCE.toFullLower(c, iter, result, locale, locCache);
4625
4626             /* decode the result */
4627             if(c<0) {
4628                 /* (not) original code point */
4629                 c=~c;
4630             } else if(c<=UCaseProps.MAX_STRING_LENGTH) {
4631                 /* mapping already appended to result */
4632                 continue;
4633             /* } else { append single-code point mapping */
4634             }
4635             result.appendCodePoint(c);
4636         }
4637         return result.toString();
4638     }
4639
4640     /**
4641      * <p>Returns the titlecase version of the argument string.</p>
4642      * <p>Position for titlecasing is determined by the argument break
4643      * iterator, hence the user can customize his break iterator for
4644      * a specialized titlecasing. In this case only the forward iteration
4645      * needs to be implemented.
4646      * If the break iterator passed in is null, the default Unicode algorithm
4647      * will be used to determine the titlecase positions.
4648      * </p>
4649      * <p>Only positions returned by the break iterator will be title cased,
4650      * character in between the positions will all be in lower case.</p>
4651      * <p>Casing is dependent on the argument locale and context-sensitive</p>
4652      * @param locale which string is to be converted in
4653      * @param str source string to be performed on
4654      * @param breakiter break iterator to determine the positions in which
4655      *        the character should be title cased.
4656      * @return lowercase version of the argument string
4657      * @stable ICU 2.6
4658      */
4659     public static String toTitleCase(Locale locale, String str,
4660                                      BreakIterator breakiter)
4661     {
4662         return toTitleCase(ULocale.forLocale(locale), str, breakiter);
4663     }
4664
4665     /**
4666      * <p>Returns the titlecase version of the argument string.</p>
4667      * <p>Position for titlecasing is determined by the argument break
4668      * iterator, hence the user can customize his break iterator for
4669      * a specialized titlecasing. In this case only the forward iteration
4670      * needs to be implemented.
4671      * If the break iterator passed in is null, the default Unicode algorithm
4672      * will be used to determine the titlecase positions.
4673      * </p>
4674      * <p>Only positions returned by the break iterator will be title cased,
4675      * character in between the positions will all be in lower case.</p>
4676      * <p>Casing is dependent on the argument locale and context-sensitive</p>
4677      * @param locale which string is to be converted in
4678      * @param str source string to be performed on
4679      * @param titleIter break iterator to determine the positions in which
4680      *        the character should be title cased.
4681      * @return lowercase version of the argument string
4682      * @stable ICU 3.2
4683      */
4684     public static String toTitleCase(ULocale locale, String str,
4685                                      BreakIterator titleIter) {
4686         return toTitleCase(locale, str, titleIter, 0);
4687     }
4688
4689     /**
4690      * <p>Returns the titlecase version of the argument string.</p>
4691      * <p>Position for titlecasing is determined by the argument break
4692      * iterator, hence the user can customize his break iterator for
4693      * a specialized titlecasing. In this case only the forward iteration
4694      * needs to be implemented.
4695      * If the break iterator passed in is null, the default Unicode algorithm
4696      * will be used to determine the titlecase positions.
4697      * </p>
4698      * <p>Only positions returned by the break iterator will be title cased,
4699      * character in between the positions will all be in lower case.</p>
4700      * <p>Casing is dependent on the argument locale and context-sensitive</p>
4701      * @param locale which string is to be converted in
4702      * @param str source string to be performed on
4703      * @param titleIter break iterator to determine the positions in which
4704      *        the character should be title cased.
4705      * @param options bit set to modify the titlecasing operation
4706      * @return lowercase version of the argument string
4707      * @stable ICU 3.8
4708      * @see #TITLECASE_NO_LOWERCASE
4709      * @see #TITLECASE_NO_BREAK_ADJUSTMENT
4710      */
4711     public static String toTitleCase(ULocale locale, String str,
4712                                      BreakIterator titleIter,
4713                                      int options) {
4714         StringContextIterator iter = new StringContextIterator(str);
4715         StringBuilder result = new StringBuilder(str.length());
4716         int[] locCache = new int[1];
4717         int c, nc, srcLength = str.length();
4718
4719         if (locale == null) {
4720             locale = ULocale.getDefault();
4721         }
4722         locCache[0]=0;
4723
4724         if(titleIter == null) {
4725             titleIter = BreakIterator.getWordInstance(locale);
4726         }
4727         titleIter.setText(str);
4728
4729         int prev, titleStart, index;
4730         boolean isFirstIndex;
4731         boolean isDutch = locale.getLanguage().equals("nl");
4732         boolean FirstIJ = true;
4733
4734         /* set up local variables */
4735         prev=0;
4736         isFirstIndex=true;
4737
4738         /* titlecasing loop */
4739         while(prev<srcLength) {
4740             /* find next index where to titlecase */
4741             if(isFirstIndex) {
4742                 isFirstIndex=false;
4743                 index=titleIter.first();
4744             } else {
4745                 index=titleIter.next();
4746             }
4747             if(index==BreakIterator.DONE || index>srcLength) {
4748                 index=srcLength;
4749             }
4750
4751             /*
4752              * Unicode 4 & 5 section 3.13 Default Case Operations:
4753              *
4754              * R3  toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
4755              * #29, "Text Boundaries." Between each pair of word boundaries, find the first
4756              * cased character F. If F exists, map F to default_title(F); then map each
4757              * subsequent character C to default_lower(C).
4758              *
4759              * In this implementation, segment [prev..index[ into 3 parts:
4760              * a) uncased characters (copy as-is) [prev..titleStart[
4761              * b) first case letter (titlecase)         [titleStart..titleLimit[
4762              * c) subsequent characters (lowercase)                 [titleLimit..index[
4763              */
4764             if(prev<index) {
4765                 /* find and copy uncased characters [prev..titleStart[ */
4766                 iter.setLimit(index);
4767                 c=iter.nextCaseMapCP();
4768                 if((options&TITLECASE_NO_BREAK_ADJUSTMENT)==0
4769                    && UCaseProps.NONE==UCaseProps.INSTANCE.getType(c)) {
4770                     while((c=iter.nextCaseMapCP())>=0
4771                           && UCaseProps.NONE==UCaseProps.INSTANCE.getType(c)) {}
4772                     titleStart=iter.getCPStart();
4773                     if(prev<titleStart) {
4774                         result.append(str, prev, titleStart);
4775                     }
4776                 } else {
4777                     titleStart=prev;
4778                 }
4779
4780                 if(titleStart<index) {
4781                     FirstIJ = true;
4782                     /* titlecase c which is from titleStart */
4783                     c = UCaseProps.INSTANCE.toFullTitle(c, iter, result, locale, locCache);
4784
4785                     /* decode the result and lowercase up to index */
4786                     for(;;) {
4787                         if(c<0) {
4788                             /* (not) original code point */
4789                             c=~c;
4790                             result.appendCodePoint(c);
4791                         } else if(c<=UCaseProps.MAX_STRING_LENGTH) {
4792                             /* mapping already appended to result */
4793                         } else {
4794                             /* append single-code point mapping */
4795                             result.appendCodePoint(c);
4796                         }
4797
4798                         if((options&TITLECASE_NO_LOWERCASE)!=0) {
4799                             /* Optionally just copy the rest of the word unchanged. */
4800
4801                             int titleLimit=iter.getCPLimit();
4802                             if(titleLimit<index) {
4803                               // TODO: With Java 5, this would want to be
4804                               // result.append(str, titleLimit, index);
4805                                 String appendStr = str.substring(titleLimit,index);
4806                                 /* Special Case - Dutch IJ Titlecasing */
4807                                 if ( isDutch && c == 0x0049 && appendStr.startsWith("j")) {
4808                                    appendStr = "J" + appendStr.substring(1);
4809                                 }
4810                                 result.append(appendStr);
4811                             }
4812                             iter.moveToLimit();
4813                             break;
4814                         } else if((nc=iter.nextCaseMapCP())>=0) {
4815                             if (isDutch && (nc == 0x004A ||  nc == 0x006A)
4816                                 && (c == 0x0049) && (FirstIJ == true)) {
4817                                 c = 0x004A; /* J */
4818                                 FirstIJ = false;
4819                             } else {
4820                                 /* Normal operation: Lowercase the rest of the word. */
4821                                 c = UCaseProps.INSTANCE.toFullLower(nc, iter, result, locale,
4822                                                                     locCache);
4823                             }
4824                         } else {
4825                             break;
4826                         }
4827                     }
4828                 }
4829             }
4830
4831             prev=index;
4832         }
4833         return result.toString();
4834     }
4835
4836     /**
4837      * {@icu} The given character is mapped to its case folding equivalent according
4838      * to UnicodeData.txt and CaseFolding.txt; if the character has no case
4839      * folding equivalent, the character itself is returned.
4840      *
4841      * <p>This function only returns the simple, single-code point case mapping.
4842      * Full case mappings should be used whenever possible because they produce
4843      * better results by working on whole strings.
4844      * They can map to a result string with a different length as appropriate.
4845      * Full case mappings are applied by the case mapping functions
4846      * that take String parameters rather than code points (int).
4847      * See also the User Guide chapter on C/POSIX migration:
4848      * http://www.icu-project.org/userguide/posix.html#case_mappings
4849      *
4850      * @param ch             the character to be converted
4851      * @param defaultmapping Indicates if all mappings defined in
4852      *                       CaseFolding.txt is to be used, otherwise the
4853      *                       mappings for dotted I  and dotless i marked with
4854      *                       'I' in CaseFolding.txt will be skipped.
4855      * @return               the case folding equivalent of the character, if
4856      *                       any; otherwise the character itself.
4857      * @see                  #foldCase(String, boolean)
4858      * @stable ICU 2.1
4859      */
4860     public static int foldCase(int ch, boolean defaultmapping) {
4861         return foldCase(ch, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I);
4862     }
4863
4864     /**
4865      * {@icu} The given string is mapped to its case folding equivalent according to
4866      * UnicodeData.txt and CaseFolding.txt; if any character has no case
4867      * folding equivalent, the character itself is returned.
4868      * "Full", multiple-code point case folding mappings are returned here.
4869      * For "simple" single-code point mappings use the API
4870      * foldCase(int ch, boolean defaultmapping).
4871      * @param str            the String to be converted
4872      * @param defaultmapping Indicates if all mappings defined in
4873      *                       CaseFolding.txt is to be used, otherwise the
4874      *                       mappings for dotted I and dotless i marked with
4875      *                       'I' in CaseFolding.txt will be skipped.
4876      * @return               the case folding equivalent of the character, if
4877      *                       any; otherwise the character itself.
4878      * @see                  #foldCase(int, boolean)
4879      * @stable ICU 2.1
4880      */
4881     public static String foldCase(String str, boolean defaultmapping) {
4882         return foldCase(str, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I);
4883     }
4884
4885     /**
4886      * {@icu} Option value for case folding: use default mappings defined in
4887      * CaseFolding.txt.
4888      * @stable ICU 2.6
4889      */
4890     public static final int FOLD_CASE_DEFAULT    =      0x0000;
4891     /**
4892      * {@icu} Option value for case folding: exclude the mappings for dotted I
4893      * and dotless i marked with 'I' in CaseFolding.txt.
4894      * @stable ICU 2.6
4895      */
4896     public static final int FOLD_CASE_EXCLUDE_SPECIAL_I = 0x0001;
4897
4898     /**
4899      * {@icu} The given character is mapped to its case folding equivalent according
4900      * to UnicodeData.txt and CaseFolding.txt; if the character has no case
4901      * folding equivalent, the character itself is returned.
4902      *
4903      * <p>This function only returns the simple, single-code point case mapping.
4904      * Full case mappings should be used whenever possible because they produce
4905      * better results by working on whole strings.
4906      * They can map to a result string with a different length as appropriate.
4907      * Full case mappings are applied by the case mapping functions
4908      * that take String parameters rather than code points (int).
4909      * See also the User Guide chapter on C/POSIX migration:
4910      * http://www.icu-project.org/userguide/posix.html#case_mappings
4911      *
4912      * @param ch the character to be converted
4913      * @param options A bit set for special processing. Currently the recognised options
4914      * are FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT
4915      * @return the case folding equivalent of the character, if any; otherwise the
4916      * character itself.
4917      * @see #foldCase(String, boolean)
4918      * @stable ICU 2.6
4919      */
4920     public static int foldCase(int ch, int options) {
4921         return UCaseProps.INSTANCE.fold(ch, options);
4922     }
4923
4924     /**
4925      * {@icu} The given string is mapped to its case folding equivalent according to
4926      * UnicodeData.txt and CaseFolding.txt; if any character has no case
4927      * folding equivalent, the character itself is returned.
4928      * "Full", multiple-code point case folding mappings are returned here.
4929      * For "simple" single-code point mappings use the API
4930      * foldCase(int ch, boolean defaultmapping).
4931      * @param str the String to be converted
4932      * @param options A bit set for special processing. Currently the recognised options
4933      *                are FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT
4934      * @return the case folding equivalent of the character, if any; otherwise the
4935      *         character itself.
4936      * @see #foldCase(int, boolean)
4937      * @stable ICU 2.6
4938      */
4939     public static final String foldCase(String str, int options) {
4940         StringBuilder result = new StringBuilder(str.length());
4941         int c, i, length;
4942
4943         length = str.length();
4944         for(i=0; i<length;) {
4945             c=UTF16.charAt(str, i);
4946             i+=UTF16.getCharCount(c);
4947             c = UCaseProps.INSTANCE.toFullFolding(c, result, options);
4948
4949             /* decode the result */
4950             if(c<0) {
4951                 /* (not) original code point */
4952                 c=~c;
4953             } else if(c<=UCaseProps.MAX_STRING_LENGTH) {
4954                 /* mapping already appended to result */
4955                 continue;
4956             /* } else { append single-code point mapping */
4957             }
4958             result.appendCodePoint(c);
4959         }
4960         return result.toString();
4961     }
4962
4963     /**
4964      * {@icu} Return numeric value of Han code points.
4965      * <br> This returns the value of Han 'numeric' code points,
4966      * including those for zero, ten, hundred, thousand, ten thousand,
4967      * and hundred million.
4968      * This includes both the standard and 'checkwriting'
4969      * characters, the 'big circle' zero character, and the standard
4970      * zero character.
4971      * @param ch code point to query
4972      * @return value if it is a Han 'numeric character,' otherwise return -1.
4973      * @stable ICU 2.4
4974      */
4975     public static int getHanNumericValue(int ch)
4976     {
4977         // TODO: Are these all covered by Unicode numeric value data?
4978         switch(ch)
4979         {
4980         case IDEOGRAPHIC_NUMBER_ZERO_ :
4981         case CJK_IDEOGRAPH_COMPLEX_ZERO_ :
4982         return 0; // Han Zero
4983         case CJK_IDEOGRAPH_FIRST_ :
4984         case CJK_IDEOGRAPH_COMPLEX_ONE_ :
4985         return 1; // Han One
4986         case CJK_IDEOGRAPH_SECOND_ :
4987         case CJK_IDEOGRAPH_COMPLEX_TWO_ :
4988         return 2; // Han Two
4989         case CJK_IDEOGRAPH_THIRD_ :
4990         case CJK_IDEOGRAPH_COMPLEX_THREE_ :
4991         return 3; // Han Three
4992         case CJK_IDEOGRAPH_FOURTH_ :
4993         case CJK_IDEOGRAPH_COMPLEX_FOUR_ :
4994         return 4; // Han Four
4995         case CJK_IDEOGRAPH_FIFTH_ :
4996         case CJK_IDEOGRAPH_COMPLEX_FIVE_ :
4997         return 5; // Han Five
4998         case CJK_IDEOGRAPH_SIXTH_ :
4999         case CJK_IDEOGRAPH_COMPLEX_SIX_ :
5000         return 6; // Han Six
5001         case CJK_IDEOGRAPH_SEVENTH_ :
5002         case CJK_IDEOGRAPH_COMPLEX_SEVEN_ :
5003         return 7; // Han Seven
5004         case CJK_IDEOGRAPH_EIGHTH_ :
5005         case CJK_IDEOGRAPH_COMPLEX_EIGHT_ :
5006         return 8; // Han Eight
5007         case CJK_IDEOGRAPH_NINETH_ :
5008         case CJK_IDEOGRAPH_COMPLEX_NINE_ :
5009         return 9; // Han Nine
5010         case CJK_IDEOGRAPH_TEN_ :
5011         case CJK_IDEOGRAPH_COMPLEX_TEN_ :
5012         return 10;
5013         case CJK_IDEOGRAPH_HUNDRED_ :
5014         case CJK_IDEOGRAPH_COMPLEX_HUNDRED_ :
5015         return 100;
5016         case CJK_IDEOGRAPH_THOUSAND_ :
5017         case CJK_IDEOGRAPH_COMPLEX_THOUSAND_ :
5018         return 1000;
5019         case CJK_IDEOGRAPH_TEN_THOUSAND_ :
5020         return 10000;
5021         case CJK_IDEOGRAPH_HUNDRED_MILLION_ :
5022         return 100000000;
5023         }
5024         return -1; // no value
5025     }
5026
5027     /**
5028      * {@icu} <p>Returns an iterator for character types, iterating over codepoints.</p>
5029      * Example of use:<br>
5030      * <pre>
5031      * RangeValueIterator iterator = UCharacter.getTypeIterator();
5032      * RangeValueIterator.Element element = new RangeValueIterator.Element();
5033      * while (iterator.next(element)) {
5034      *     System.out.println("Codepoint \\u" +
5035      *                        Integer.toHexString(element.start) +
5036      *                        " to codepoint \\u" +
5037      *                        Integer.toHexString(element.limit - 1) +
5038      *                        " has the character type " +
5039      *                        element.value);
5040      * }
5041      * </pre>
5042      * @return an iterator
5043      * @stable ICU 2.6
5044      */
5045     public static RangeValueIterator getTypeIterator()
5046     {
5047         return new UCharacterTypeIterator();
5048     }
5049
5050     private static final class UCharacterTypeIterator implements RangeValueIterator {
5051         UCharacterTypeIterator() {
5052             reset();
5053         }
5054
5055         // implements RangeValueIterator
5056         public boolean next(Element element) {
5057             if(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) {
5058                 element.start=range.startCodePoint;
5059                 element.limit=range.endCodePoint+1;
5060                 element.value=range.value;
5061                 return true;
5062             } else {
5063                 return false;
5064             }
5065         }
5066
5067         // implements RangeValueIterator
5068         public void reset() {
5069             trieIterator=UCharacterProperty.INSTANCE.m_trie_.iterator(MASK_TYPE);
5070         }
5071
5072         private Iterator<Trie2.Range> trieIterator;
5073         private Trie2.Range range;
5074
5075         private static final class MaskType implements Trie2.ValueMapper {
5076             // Extracts the general category ("character type") from the trie value.
5077             public int map(int value) {
5078                 return value & UCharacterProperty.TYPE_MASK;
5079             }
5080         }
5081         private static final MaskType MASK_TYPE=new MaskType();
5082     }
5083
5084     /**
5085      * {@icu} <p>Returns an iterator for character names, iterating over codepoints.</p>
5086      * <p>This API only gets the iterator for the modern, most up-to-date
5087      * Unicode names. For older 1.0 Unicode names use get1_0NameIterator() or
5088      * for extended names use getExtendedNameIterator().</p>
5089      * Example of use:<br>
5090      * <pre>
5091      * ValueIterator iterator = UCharacter.getNameIterator();
5092      * ValueIterator.Element element = new ValueIterator.Element();
5093      * while (iterator.next(element)) {
5094      *     System.out.println("Codepoint \\u" +
5095      *                        Integer.toHexString(element.codepoint) +
5096      *                        " has the name " + (String)element.value);
5097      * }
5098      * </pre>
5099      * <p>The maximal range which the name iterator iterates is from
5100      * UCharacter.MIN_VALUE to UCharacter.MAX_VALUE.</p>
5101      * @return an iterator
5102      * @stable ICU 2.6
5103      */
5104     public static ValueIterator getNameIterator(){
5105         return new UCharacterNameIterator(UCharacterName.INSTANCE,
5106                       UCharacterNameChoice.UNICODE_CHAR_NAME);
5107     }
5108
5109     /**
5110      * {@icu} <p>Returns an iterator for character names, iterating over codepoints.</p>
5111      * <p>This API only gets the iterator for the older 1.0 Unicode names.
5112      * For modern, most up-to-date Unicode names use getNameIterator() or
5113      * for extended names use getExtendedNameIterator().</p>
5114      * Example of use:<br>
5115      * <pre>
5116      * ValueIterator iterator = UCharacter.get1_0NameIterator();
5117      * ValueIterator.Element element = new ValueIterator.Element();
5118      * while (iterator.next(element)) {
5119      *     System.out.println("Codepoint \\u" +
5120      *                        Integer.toHexString(element.codepoint) +
5121      *                        " has the name " + (String)element.value);
5122      * }
5123      * </pre>
5124      * <p>The maximal range which the name iterator iterates is from
5125      * @return an iterator
5126      * @stable ICU 2.6
5127      */
5128     public static ValueIterator getName1_0Iterator(){
5129         return new UCharacterNameIterator(UCharacterName.INSTANCE,
5130                       UCharacterNameChoice.UNICODE_10_CHAR_NAME);
5131     }
5132
5133     /**
5134      * {@icu} <p>Returns an iterator for character names, iterating over codepoints.</p>
5135      * <p>This API only gets the iterator for the extended names.
5136      * For modern, most up-to-date Unicode names use getNameIterator() or
5137      * for older 1.0 Unicode names use get1_0NameIterator().</p>
5138      * Example of use:<br>
5139      * <pre>
5140      * ValueIterator iterator = UCharacter.getExtendedNameIterator();
5141      * ValueIterator.Element element = new ValueIterator.Element();
5142      * while (iterator.next(element)) {
5143      *     System.out.println("Codepoint \\u" +
5144      *                        Integer.toHexString(element.codepoint) +
5145      *                        " has the name " + (String)element.value);
5146      * }
5147      * </pre>
5148      * <p>The maximal range which the name iterator iterates is from
5149      * @return an iterator
5150      * @stable ICU 2.6
5151      */
5152     public static ValueIterator getExtendedNameIterator(){
5153         return new UCharacterNameIterator(UCharacterName.INSTANCE,
5154                       UCharacterNameChoice.EXTENDED_CHAR_NAME);
5155     }
5156
5157     /**
5158      * {@icu} Returns the "age" of the code point.</p>
5159      * <p>The "age" is the Unicode version when the code point was first
5160      * designated (as a non-character or for Private Use) or assigned a
5161      * character.
5162      * <p>This can be useful to avoid emitting code points to receiving
5163      * processes that do not accept newer characters.</p>
5164      * <p>The data is from the UCD file DerivedAge.txt.</p>
5165      * @param ch The code point.
5166      * @return the Unicode version number
5167      * @stable ICU 2.6
5168      */
5169     public static VersionInfo getAge(int ch)
5170     {
5171         if (ch < MIN_VALUE || ch > MAX_VALUE) {
5172         throw new IllegalArgumentException("Codepoint out of bounds");
5173         }
5174         return UCharacterProperty.INSTANCE.getAge(ch);
5175     }
5176
5177     /**
5178      * {@icu} <p>Check a binary Unicode property for a code point.</p>
5179      * <p>Unicode, especially in version 3.2, defines many more properties
5180      * than the original set in UnicodeData.txt.</p>
5181      * <p>This API is intended to reflect Unicode properties as defined in
5182      * the Unicode Character Database (UCD) and Unicode Technical Reports
5183      * (UTR).</p>
5184      * <p>For details about the properties see
5185      * <a href=http://www.unicode.org/>http://www.unicode.org/</a>.</p>
5186      * <p>For names of Unicode properties see the UCD file
5187      * PropertyAliases.txt.</p>
5188      * <p>This API does not check the validity of the codepoint.</p>
5189      * <p>Important: If ICU is built with UCD files from Unicode versions
5190      * below 3.2, then properties marked with "new" are not or
5191      * not fully available.</p>
5192      * @param ch code point to test.
5193      * @param property selector constant from com.ibm.icu.lang.UProperty,
5194      *        identifies which binary property to check.
5195      * @return true or false according to the binary Unicode property value
5196      *         for ch. Also false if property is out of bounds or if the
5197      *         Unicode version does not have data for the property at all, or
5198      *         not for this code point.
5199      * @see com.ibm.icu.lang.UProperty
5200      * @stable ICU 2.6
5201      */
5202     public static boolean hasBinaryProperty(int ch, int property)
5203     {
5204         return UCharacterProperty.INSTANCE.hasBinaryProperty(ch, property);
5205     }
5206
5207     /**
5208      * {@icu} <p>Check if a code point has the Alphabetic Unicode property.</p>
5209      * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.ALPHABETIC).</p>
5210      * <p>Different from UCharacter.isLetter(ch)!</p>
5211      * @stable ICU 2.6
5212      * @param ch codepoint to be tested
5213      */
5214     public static boolean isUAlphabetic(int ch)
5215     {
5216     return hasBinaryProperty(ch, UProperty.ALPHABETIC);
5217     }
5218
5219     /**
5220      * {@icu} <p>Check if a code point has the Lowercase Unicode property.</p>
5221      * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.LOWERCASE).</p>
5222      * <p>This is different from UCharacter.isLowerCase(ch)!</p>
5223      * @param ch codepoint to be tested
5224      * @stable ICU 2.6
5225      */
5226     public static boolean isULowercase(int ch)
5227     {
5228     return hasBinaryProperty(ch, UProperty.LOWERCASE);
5229     }
5230
5231     /**
5232      * {@icu} <p>Check if a code point has the Uppercase Unicode property.</p>
5233      * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.UPPERCASE).</p>
5234      * <p>This is different from UCharacter.isUpperCase(ch)!</p>
5235      * @param ch codepoint to be tested
5236      * @stable ICU 2.6
5237      */
5238     public static boolean isUUppercase(int ch)
5239     {
5240     return hasBinaryProperty(ch, UProperty.UPPERCASE);
5241     }
5242
5243     /**
5244      * {@icu} <p>Check if a code point has the White_Space Unicode property.</p>
5245      * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.WHITE_SPACE).</p>
5246      * <p>This is different from both UCharacter.isSpace(ch) and
5247      * UCharacter.isWhitespace(ch)!</p>
5248      * @param ch codepoint to be tested
5249      * @stable ICU 2.6
5250      */
5251     public static boolean isUWhiteSpace(int ch)
5252     {
5253     return hasBinaryProperty(ch, UProperty.WHITE_SPACE);
5254     }
5255
5256     /**
5257      * {@icu} <p>Returns the property value for an Unicode property type of a code point.
5258      * Also returns binary and mask property values.</p>
5259      * <p>Unicode, especially in version 3.2, defines many more properties than
5260      * the original set in UnicodeData.txt.</p>
5261      * <p>The properties APIs are intended to reflect Unicode properties as
5262      * defined in the Unicode Character Database (UCD) and Unicode Technical
5263      * Reports (UTR). For details about the properties see
5264      * http://www.unicode.org/.</p>
5265      * <p>For names of Unicode properties see the UCD file PropertyAliases.txt.
5266      * </p>
5267      * <pre>
5268      * Sample usage:
5269      * int ea = UCharacter.getIntPropertyValue(c, UProperty.EAST_ASIAN_WIDTH);
5270      * int ideo = UCharacter.getIntPropertyValue(c, UProperty.IDEOGRAPHIC);
5271      * boolean b = (ideo == 1) ? true : false;
5272      * </pre>
5273      * @param ch code point to test.
5274      * @param type UProperty selector constant, identifies which binary
5275      *        property to check. Must be
5276      *        UProperty.BINARY_START &lt;= type &lt; UProperty.BINARY_LIMIT or
5277      *        UProperty.INT_START &lt;= type &lt; UProperty.INT_LIMIT or
5278      *        UProperty.MASK_START &lt;= type &lt; UProperty.MASK_LIMIT.
5279      * @return numeric value that is directly the property value or,
5280      *         for enumerated properties, corresponds to the numeric value of
5281      *         the enumerated constant of the respective property value
5282      *         enumeration type (cast to enum type if necessary).
5283      *         Returns 0 or 1 (for false / true) for binary Unicode properties.
5284      *         Returns a bit-mask for mask properties.
5285      *         Returns 0 if 'type' is out of bounds or if the Unicode version
5286      *         does not have data for the property at all, or not for this code
5287      *         point.
5288      * @see UProperty
5289      * @see #hasBinaryProperty
5290      * @see #getIntPropertyMinValue
5291      * @see #getIntPropertyMaxValue
5292      * @see #getUnicodeVersion
5293      * @stable ICU 2.4
5294      */
5295     public static int getIntPropertyValue(int ch, int type)
5296     {
5297         return UCharacterProperty.INSTANCE.getIntPropertyValue(ch, type);
5298     }
5299     /**
5300      * {@icu} Returns a string version of the property value.
5301      * @param propertyEnum The property enum value.
5302      * @param codepoint The codepoint value.
5303      * @param nameChoice The choice of the name.
5304      * @return value as string
5305      * @internal
5306      * @deprecated This API is ICU internal only.
5307      */
5308     ///CLOVER:OFF
5309     public static String getStringPropertyValue(int propertyEnum, int codepoint, int nameChoice) {
5310         if ((propertyEnum >= UProperty.BINARY_START && propertyEnum < UProperty.BINARY_LIMIT) ||
5311                 (propertyEnum >= UProperty.INT_START && propertyEnum < UProperty.INT_LIMIT)) {
5312             return getPropertyValueName(propertyEnum, getIntPropertyValue(codepoint, propertyEnum),
5313                                         nameChoice);
5314         }
5315         if (propertyEnum == UProperty.NUMERIC_VALUE) {
5316                 return String.valueOf(getUnicodeNumericValue(codepoint));
5317         }
5318         // otherwise must be string property
5319         switch (propertyEnum) {
5320         case UProperty.AGE: return getAge(codepoint).toString();
5321         case UProperty.ISO_COMMENT: return getISOComment(codepoint);
5322         case UProperty.BIDI_MIRRORING_GLYPH: return UTF16.valueOf(getMirror(codepoint));
5323         case UProperty.CASE_FOLDING: return foldCase(UTF16.valueOf(codepoint), true);
5324         case UProperty.LOWERCASE_MAPPING: return toLowerCase(UTF16.valueOf(codepoint));
5325         case UProperty.NAME: return getName(codepoint);
5326         case UProperty.SIMPLE_CASE_FOLDING: return UTF16.valueOf(foldCase(codepoint,true));
5327         case UProperty.SIMPLE_LOWERCASE_MAPPING: return UTF16.valueOf(toLowerCase(codepoint));
5328         case UProperty.SIMPLE_TITLECASE_MAPPING: return UTF16.valueOf(toTitleCase(codepoint));
5329         case UProperty.SIMPLE_UPPERCASE_MAPPING: return UTF16.valueOf(toUpperCase(codepoint));
5330         case UProperty.TITLECASE_MAPPING: return toTitleCase(UTF16.valueOf(codepoint),null);
5331         case UProperty.UNICODE_1_NAME: return getName1_0(codepoint);
5332         case UProperty.UPPERCASE_MAPPING: return toUpperCase(UTF16.valueOf(codepoint));
5333         }
5334         throw new IllegalArgumentException("Illegal Property Enum");
5335     }
5336     ///CLOVER:ON
5337
5338     /**
5339      * {@icu} Returns the minimum value for an integer/binary Unicode property type.
5340      * Can be used together with UCharacter.getIntPropertyMaxValue(int)
5341      * to allocate arrays of com.ibm.icu.text.UnicodeSet or similar.
5342      * @param type UProperty selector constant, identifies which binary
5343      *        property to check. Must be
5344      *        UProperty.BINARY_START &lt;= type &lt; UProperty.BINARY_LIMIT or
5345      *        UProperty.INT_START &lt;= type &lt; UProperty.INT_LIMIT.
5346      * @return Minimum value returned by UCharacter.getIntPropertyValue(int)
5347      *         for a Unicode property. 0 if the property
5348      *         selector 'type' is out of range.
5349      * @see UProperty
5350      * @see #hasBinaryProperty
5351      * @see #getUnicodeVersion
5352      * @see #getIntPropertyMaxValue
5353      * @see #getIntPropertyValue
5354      * @stable ICU 2.4
5355      */
5356     public static int getIntPropertyMinValue(int type){
5357
5358         return 0; // undefined; and: all other properties have a minimum value of 0
5359     }
5360
5361
5362     /**
5363      * {@icu} Returns the maximum value for an integer/binary Unicode property.
5364      * Can be used together with UCharacter.getIntPropertyMinValue(int)
5365      * to allocate arrays of com.ibm.icu.text.UnicodeSet or similar.
5366      * Examples for min/max values (for Unicode 3.2):
5367      * <ul>
5368      * <li> UProperty.BIDI_CLASS:    0/18
5369      * (UCharacterDirection.LEFT_TO_RIGHT/UCharacterDirection.BOUNDARY_NEUTRAL)
5370      * <li> UProperty.SCRIPT:        0/45 (UScript.COMMON/UScript.TAGBANWA)
5371      * <li> UProperty.IDEOGRAPHIC:   0/1  (false/true)
5372      * </ul>
5373      * For undefined UProperty constant values, min/max values will be 0/-1.
5374      * @param type UProperty selector constant, identifies which binary
5375      *        property to check. Must be
5376      *        UProperty.BINARY_START &lt;= type &lt; UProperty.BINARY_LIMIT or
5377      *        UProperty.INT_START &lt;= type &lt; UProperty.INT_LIMIT.
5378      * @return Maximum value returned by u_getIntPropertyValue for a Unicode
5379      *         property. &lt;= 0 if the property selector 'type' is out of range.
5380      * @see UProperty
5381      * @see #hasBinaryProperty
5382      * @see #getUnicodeVersion
5383      * @see #getIntPropertyMaxValue
5384      * @see #getIntPropertyValue
5385      * @stable ICU 2.4
5386      */
5387     public static int getIntPropertyMaxValue(int type)
5388     {
5389         return UCharacterProperty.INSTANCE.getIntPropertyMaxValue(type);
5390     }
5391
5392     /**
5393      * Provide the java.lang.Character forDigit API, for convenience.
5394      * @stable ICU 3.0
5395      */
5396     public static char forDigit(int digit, int radix) {
5397         return java.lang.Character.forDigit(digit, radix);
5398     }
5399
5400     // JDK 1.5 API coverage
5401
5402     /**
5403      * Cover the JDK 1.5 API, for convenience.
5404      * @see UTF16#LEAD_SURROGATE_MIN_VALUE
5405      * @stable ICU 3.0
5406      */
5407     public static final char MIN_HIGH_SURROGATE = UTF16.LEAD_SURROGATE_MIN_VALUE;
5408
5409     /**
5410      * Cover the JDK 1.5 API, for convenience.
5411      * @see UTF16#LEAD_SURROGATE_MAX_VALUE
5412      * @stable ICU 3.0
5413      */
5414     public static final char MAX_HIGH_SURROGATE = UTF16.LEAD_SURROGATE_MAX_VALUE;
5415
5416     /**
5417      * Cover the JDK 1.5 API, for convenience.
5418      * @see UTF16#TRAIL_SURROGATE_MIN_VALUE
5419      * @stable ICU 3.0
5420      */
5421     public static final char MIN_LOW_SURROGATE = UTF16.TRAIL_SURROGATE_MIN_VALUE;
5422
5423     /**
5424      * Cover the JDK 1.5 API, for convenience.
5425      * @see UTF16#TRAIL_SURROGATE_MAX_VALUE
5426      * @stable ICU 3.0
5427      */
5428     public static final char MAX_LOW_SURROGATE = UTF16.TRAIL_SURROGATE_MAX_VALUE;
5429
5430     /**
5431      * Cover the JDK 1.5 API, for convenience.
5432      * @see UTF16#SURROGATE_MIN_VALUE
5433      * @stable ICU 3.0
5434      */
5435     public static final char MIN_SURROGATE = UTF16.SURROGATE_MIN_VALUE;
5436
5437     /**
5438      * Cover the JDK 1.5 API, for convenience.
5439      * @see UTF16#SURROGATE_MAX_VALUE
5440      * @stable ICU 3.0
5441      */
5442     public static final char MAX_SURROGATE = UTF16.SURROGATE_MAX_VALUE;
5443
5444     /**
5445      * Cover the JDK 1.5 API, for convenience.
5446      * @see UTF16#SUPPLEMENTARY_MIN_VALUE
5447      * @stable ICU 3.0
5448      */
5449     public static final int  MIN_SUPPLEMENTARY_CODE_POINT = UTF16.SUPPLEMENTARY_MIN_VALUE;
5450
5451     /**
5452      * Cover the JDK 1.5 API, for convenience.
5453      * @see UTF16#CODEPOINT_MAX_VALUE
5454      * @stable ICU 3.0
5455      */
5456     public static final int  MAX_CODE_POINT = UTF16.CODEPOINT_MAX_VALUE;
5457
5458     /**
5459      * Cover the JDK 1.5 API, for convenience.
5460      * @see UTF16#CODEPOINT_MIN_VALUE
5461      * @stable ICU 3.0
5462      */
5463     public static final int  MIN_CODE_POINT = UTF16.CODEPOINT_MIN_VALUE;
5464
5465     /**
5466      * Cover the JDK 1.5 API, for convenience.
5467      * @param cp the code point to check
5468      * @return true if cp is a valid code point
5469      * @stable ICU 3.0
5470      */
5471     public static final boolean isValidCodePoint(int cp) {
5472         return cp >= 0 && cp <= MAX_CODE_POINT;
5473     }
5474
5475     /**
5476      * Cover the JDK 1.5 API, for convenience.
5477      * @param cp the code point to check
5478      * @return true if cp is a supplementary code point
5479      * @stable ICU 3.0
5480      */
5481     public static final boolean isSupplementaryCodePoint(int cp) {
5482         return cp >= UTF16.SUPPLEMENTARY_MIN_VALUE
5483             && cp <= UTF16.CODEPOINT_MAX_VALUE;
5484     }
5485
5486     /**
5487      * Cover the JDK 1.5 API, for convenience.
5488      * @param ch the char to check
5489      * @return true if ch is a high (lead) surrogate
5490      * @stable ICU 3.0
5491      */
5492     public static boolean isHighSurrogate(char ch) {
5493         return ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE;
5494     }
5495
5496     /**
5497      * Cover the JDK 1.5 API, for convenience.
5498      * @param ch the char to check
5499      * @return true if ch is a low (trail) surrogate
5500      * @stable ICU 3.0
5501      */
5502     public static boolean isLowSurrogate(char ch) {
5503         return ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE;
5504     }
5505
5506     /**
5507      * Cover the JDK 1.5 API, for convenience.  Return true if the chars
5508      * form a valid surrogate pair.
5509      * @param high the high (lead) char
5510      * @param low the low (trail) char
5511      * @return true if high, low form a surrogate pair
5512      * @stable ICU 3.0
5513      */
5514     public static final boolean isSurrogatePair(char high, char low) {
5515         return isHighSurrogate(high) && isLowSurrogate(low);
5516     }
5517
5518     /**
5519      * Cover the JDK 1.5 API, for convenience.  Return the number of chars needed
5520      * to represent the code point.  This does not check the
5521      * code point for validity.
5522      * @param cp the code point to check
5523      * @return the number of chars needed to represent the code point
5524      * @see UTF16#getCharCount
5525      * @stable ICU 3.0
5526      */
5527     public static int charCount(int cp) {
5528         return UTF16.getCharCount(cp);
5529     }
5530
5531     /**
5532      * Cover the JDK 1.5 API, for convenience.  Return the code point represented by
5533      * the characters.  This does not check the surrogate pair for validity.
5534      * @param high the high (lead) surrogate
5535      * @param low the low (trail) surrogate
5536      * @return the code point formed by the surrogate pair
5537      * @stable ICU 3.0
5538      */
5539     public static final int toCodePoint(char high, char low) {
5540         return UCharacterProperty.getRawSupplementary(high, low);
5541     }
5542
5543     /**
5544      * Cover the JDK 1.5 API, for convenience.  Return the code point at index.
5545      * <br/><b>Note</b>: the semantics of this API is different from the related UTF16
5546      * API.  This examines only the characters at index and index+1.
5547      * @param seq the characters to check
5548      * @param index the index of the first or only char forming the code point
5549      * @return the code point at the index
5550      * @stable ICU 3.0
5551      */
5552     public static final int codePointAt(CharSequence seq, int index) {
5553         char c1 = seq.charAt(index++);
5554         if (isHighSurrogate(c1)) {
5555             if (index < seq.length()) {
5556                 char c2 = seq.charAt(index);
5557                 if (isLowSurrogate(c2)) {
5558                     return toCodePoint(c1, c2);
5559                 }
5560             }
5561         }
5562         return c1;
5563     }
5564
5565 //#if defined(ECLIPSE)
5566 //##    public static final int codePointAt(String seq, int index) {
5567 //##        return codePointAt((CharSequence)seq, index);
5568 //##    }
5569 //#endif
5570
5571     /**
5572      * Cover the JDK 1.5 API, for convenience.  Return the code point at index.
5573      * <br/><b>Note</b>: the semantics of this API is different from the related UTF16
5574      * API.  This examines only the characters at index and index+1.
5575      * @param text the characters to check
5576      * @param index the index of the first or only char forming the code point
5577      * @return the code point at the index
5578      * @stable ICU 3.0
5579      */
5580     public static final int codePointAt(char[] text, int index) {
5581         char c1 = text[index++];
5582         if (isHighSurrogate(c1)) {
5583             if (index < text.length) {
5584                 char c2 = text[index];
5585                 if (isLowSurrogate(c2)) {
5586                     return toCodePoint(c1, c2);
5587                 }
5588             }
5589         }
5590         return c1;
5591     }
5592
5593     /**
5594      * Cover the JDK 1.5 API, for convenience.  Return the code point at index.
5595      * <br/><b>Note</b>: the semantics of this API is different from the related UTF16
5596      * API.  This examines only the characters at index and index+1.
5597      * @param text the characters to check
5598      * @param index the index of the first or only char forming the code point
5599      * @param limit the limit of the valid text
5600      * @return the code point at the index
5601      * @stable ICU 3.0
5602      */
5603     public static final int codePointAt(char[] text, int index, int limit) {
5604         if (index >= limit || limit > text.length) {
5605             throw new IndexOutOfBoundsException();
5606         }
5607         char c1 = text[index++];
5608         if (isHighSurrogate(c1)) {
5609             if (index < limit) {
5610                 char c2 = text[index];
5611                 if (isLowSurrogate(c2)) {
5612                     return toCodePoint(c1, c2);
5613                 }
5614             }
5615         }
5616         return c1;
5617     }
5618
5619     /**
5620      * Cover the JDK 1.5 API, for convenience.  Return the code point before index.
5621      * <br/><b>Note</b>: the semantics of this API is different from the related UTF16
5622      * API.  This examines only the characters at index-1 and index-2.
5623      * @param seq the characters to check
5624      * @param index the index after the last or only char forming the code point
5625      * @return the code point before the index
5626      * @stable ICU 3.0
5627      */
5628     public static final int codePointBefore(CharSequence seq, int index) {
5629         char c2 = seq.charAt(--index);
5630         if (isLowSurrogate(c2)) {
5631             if (index > 0) {
5632                 char c1 = seq.charAt(--index);
5633                 if (isHighSurrogate(c1)) {
5634                     return toCodePoint(c1, c2);
5635                 }
5636             }
5637         }
5638         return c2;
5639     }
5640
5641 //#if defined(ECLIPSE)
5642 //##    public static final int codePointBefore(String seq, int index) {
5643 //##        return codePointBefore((CharSequence)seq, index);
5644 //##    }
5645 //#endif
5646
5647     /**
5648      * Cover the JDK 1.5 API, for convenience.  Return the code point before index.
5649      * <br/><b>Note</b>: the semantics of this API is different from the related UTF16
5650      * API.  This examines only the characters at index-1 and index-2.
5651      * @param text the characters to check
5652      * @param index the index after the last or only char forming the code point
5653      * @return the code point before the index
5654      * @stable ICU 3.0
5655      */
5656     public static final int codePointBefore(char[] text, int index) {
5657         char c2 = text[--index];
5658         if (isLowSurrogate(c2)) {
5659             if (index > 0) {
5660                 char c1 = text[--index];
5661                 if (isHighSurrogate(c1)) {
5662                     return toCodePoint(c1, c2);
5663                 }
5664             }
5665         }
5666         return c2;
5667     }
5668
5669     /**
5670      * Cover the JDK 1.5 API, for convenience.  Return the code point before index.
5671      * <br/><b>Note</b>: the semantics of this API is different from the related UTF16
5672      * API.  This examines only the characters at index-1 and index-2.
5673      * @param text the characters to check
5674      * @param index the index after the last or only char forming the code point
5675      * @param limit the start of the valid text
5676      * @return the code point before the index
5677      * @stable ICU 3.0
5678      */
5679     public static final int codePointBefore(char[] text, int index, int limit) {
5680         if (index <= limit || limit < 0) {
5681             throw new IndexOutOfBoundsException();
5682         }
5683         char c2 = text[--index];
5684         if (isLowSurrogate(c2)) {
5685             if (index > limit) {
5686                 char c1 = text[--index];
5687                 if (isHighSurrogate(c1)) {
5688                     return toCodePoint(c1, c2);
5689                 }
5690             }
5691         }
5692         return c2;
5693     }
5694
5695     /**
5696      * Cover the JDK 1.5 API, for convenience.  Writes the chars representing the
5697      * code point into the destination at the given index.
5698      * @param cp the code point to convert
5699      * @param dst the destination array into which to put the char(s) representing the code point
5700      * @param dstIndex the index at which to put the first (or only) char
5701      * @return the count of the number of chars written (1 or 2)
5702      * @throws IllegalArgumentException if cp is not a valid code point
5703      * @stable ICU 3.0
5704      */
5705     public static final int toChars(int cp, char[] dst, int dstIndex) {
5706         if (cp >= 0) {
5707             if (cp < MIN_SUPPLEMENTARY_CODE_POINT) {
5708                 dst[dstIndex] = (char)cp;
5709                 return 1;
5710             }
5711             if (cp <= MAX_CODE_POINT) {
5712                 dst[dstIndex] = UTF16.getLeadSurrogate(cp);
5713                 dst[dstIndex+1] = UTF16.getTrailSurrogate(cp);
5714                 return 2;
5715             }
5716         }
5717         throw new IllegalArgumentException();
5718     }
5719
5720     /**
5721      * Cover the JDK 1.5 API, for convenience.  Returns a char array
5722      * representing the code point.
5723      * @param cp the code point to convert
5724      * @return an array containing the char(s) representing the code point
5725      * @throws IllegalArgumentException if cp is not a valid code point
5726      * @stable ICU 3.0
5727      */
5728     public static final char[] toChars(int cp) {
5729         if (cp >= 0) {
5730             if (cp < MIN_SUPPLEMENTARY_CODE_POINT) {
5731                 return new char[] { (char)cp };
5732             }
5733             if (cp <= MAX_CODE_POINT) {
5734                 return new char[] {
5735                     UTF16.getLeadSurrogate(cp),
5736                     UTF16.getTrailSurrogate(cp)
5737                 };
5738             }
5739         }
5740         throw new IllegalArgumentException();
5741     }
5742
5743     /**
5744      * Cover the JDK API, for convenience.  Return a byte representing the directionality of
5745      * the character.
5746      *
5747      * {@icunote} Unlike the JDK, this returns DIRECTIONALITY_LEFT_TO_RIGHT for undefined
5748      * or out-of-bounds characters.
5749      *
5750      * {@icunote} The return value must be tested using the constants defined in {@link
5751      * UCharacterDirection} and its interface {@link
5752      * UCharacterEnums.ECharacterDirection} since the values are different from the ones
5753      * defined by <code>java.lang.Character</code>.
5754      * @param cp the code point to check
5755      * @return the directionality of the code point
5756      * @see #getDirection
5757      * @stable ICU 3.0
5758      */
5759     public static byte getDirectionality(int cp)
5760     {
5761         return (byte)getDirection(cp);
5762     }
5763
5764     /**
5765      * Cover the JDK API, for convenience.  Count the number of code points in the range of text.
5766      * @param text the characters to check
5767      * @param start the start of the range
5768      * @param limit the limit of the range
5769      * @return the number of code points in the range
5770      * @stable ICU 3.0
5771      */
5772     public static int codePointCount(CharSequence text, int start, int limit) {
5773         if (start < 0 || limit < start || limit > text.length()) {
5774             throw new IndexOutOfBoundsException("start (" + start +
5775                 ") or limit (" + limit +
5776                 ") invalid or out of range 0, " + text.length());
5777         }
5778
5779         int len = limit - start;
5780         while (limit > start) {
5781             char ch = text.charAt(--limit);
5782             while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) {
5783                 ch = text.charAt(--limit);
5784                 if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) {
5785                     --len;
5786                     break;
5787                 }
5788             }
5789         }
5790         return len;
5791     }
5792
5793 //#if defined(ECLIPSE)
5794 //##    public static int codePointCount(String text, int start, int limit) {
5795 //##        return codePointCount((CharSequence)text, start, limit);
5796 //##    }
5797 //#endif
5798
5799     /**
5800      * Cover the JDK API, for convenience.  Count the number of code points in the range of text.
5801      * @param text the characters to check
5802      * @param start the start of the range
5803      * @param limit the limit of the range
5804      * @return the number of code points in the range
5805      * @stable ICU 3.0
5806      */
5807     public static int codePointCount(char[] text, int start, int limit) {
5808         if (start < 0 || limit < start || limit > text.length) {
5809             throw new IndexOutOfBoundsException("start (" + start +
5810                                                 ") or limit (" + limit +
5811                                                 ") invalid or out of range 0, " + text.length);
5812         }
5813
5814         int len = limit - start;
5815         while (limit > start) {
5816             char ch = text[--limit];
5817             while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) {
5818                 ch = text[--limit];
5819                 if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) {
5820                     --len;
5821                     break;
5822                 }
5823             }
5824         }
5825         return len;
5826     }
5827
5828     /**
5829      * Cover the JDK API, for convenience.  Adjust the char index by a code point offset.
5830      * @param text the characters to check
5831      * @param index the index to adjust
5832      * @param codePointOffset the number of code points by which to offset the index
5833      * @return the adjusted index
5834      * @stable ICU 3.0
5835      */
5836     public static int offsetByCodePoints(CharSequence text, int index, int codePointOffset) {
5837         if (index < 0 || index > text.length()) {
5838             throw new IndexOutOfBoundsException("index ( " + index +
5839                                                 ") out of range 0, " + text.length());
5840         }
5841
5842         if (codePointOffset < 0) {
5843             while (++codePointOffset <= 0) {
5844                 char ch = text.charAt(--index);
5845                 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > 0) {
5846                     ch = text.charAt(--index);
5847                     if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) {
5848                         if (++codePointOffset > 0) {
5849                             return index+1;
5850                         }
5851                     }
5852                 }
5853             }
5854         } else {
5855             int limit = text.length();
5856             while (--codePointOffset >= 0) {
5857                 char ch = text.charAt(index++);
5858                 while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) {
5859                     ch = text.charAt(index++);
5860                     if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) {
5861                         if (--codePointOffset < 0) {
5862                             return index-1;
5863                         }
5864                     }
5865                 }
5866             }
5867         }
5868
5869         return index;
5870     }
5871
5872 //#if defined(ECLIPSE)
5873 //##    public static int offsetByCodePoints(String text, int index, int codePointOffset) {
5874 //##        return offsetByCodePoints((CharSequence)text, index, codePointOffset);
5875 //##    }
5876 //#endif
5877
5878     /**
5879      * Cover the JDK API, for convenience.  Adjust the char index by a code point offset.
5880      * @param text the characters to check
5881      * @param start the start of the range to check
5882      * @param count the length of the range to check
5883      * @param index the index to adjust
5884      * @param codePointOffset the number of code points by which to offset the index
5885      * @return the adjusted index
5886      * @stable ICU 3.0
5887      */
5888     public static int offsetByCodePoints(char[] text, int start, int count, int index,
5889                                          int codePointOffset) {
5890         int limit = start + count;
5891         if (start < 0 || limit < start || limit > text.length || index < start || index > limit) {
5892             throw new IndexOutOfBoundsException("index ( " + index +
5893                                                 ") out of range " + start +
5894                                                 ", " + limit +
5895                                                 " in array 0, " + text.length);
5896         }
5897
5898         if (codePointOffset < 0) {
5899             while (++codePointOffset <= 0) {
5900                 char ch = text[--index];
5901                 if (index < start) {
5902                     throw new IndexOutOfBoundsException("index ( " + index +
5903                                                         ") < start (" + start +
5904                                                         ")");
5905                 }
5906                 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > start) {
5907                     ch = text[--index];
5908                     if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) {
5909                         if (++codePointOffset > 0) {
5910                             return index+1;
5911                         }
5912                     }
5913                 }
5914             }
5915         } else {
5916             while (--codePointOffset >= 0) {
5917                 char ch = text[index++];
5918                 if (index > limit) {
5919                     throw new IndexOutOfBoundsException("index ( " + index +
5920                                                         ") > limit (" + limit +
5921                                                         ")");
5922                 }
5923                 while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) {
5924                     ch = text[index++];
5925                     if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) {
5926                         if (--codePointOffset < 0) {
5927                             return index-1;
5928                         }
5929                     }
5930                 }
5931             }
5932         }
5933
5934         return index;
5935     }
5936
5937     // private variables -------------------------------------------------
5938
5939     /**
5940      * To get the last character out from a data type
5941      */
5942     private static final int LAST_CHAR_MASK_ = 0xFFFF;
5943
5944 //    /**
5945 //     * To get the last byte out from a data type
5946 //     */
5947 //    private static final int LAST_BYTE_MASK_ = 0xFF;
5948 //
5949 //    /**
5950 //     * Shift 16 bits
5951 //     */
5952 //    private static final int SHIFT_16_ = 16;
5953 //
5954 //    /**
5955 //     * Shift 24 bits
5956 //     */
5957 //    private static final int SHIFT_24_ = 24;
5958 //
5959 //    /**
5960 //     * Decimal radix
5961 //     */
5962 //    private static final int DECIMAL_RADIX_ = 10;
5963
5964     /**
5965      * No break space code point
5966      */
5967     private static final int NO_BREAK_SPACE_ = 0xA0;
5968
5969     /**
5970      * Figure space code point
5971      */
5972     private static final int FIGURE_SPACE_ = 0x2007;
5973
5974     /**
5975      * Narrow no break space code point
5976      */
5977     private static final int NARROW_NO_BREAK_SPACE_ = 0x202F;
5978
5979     /**
5980      * Ideographic number zero code point
5981      */
5982     private static final int IDEOGRAPHIC_NUMBER_ZERO_ = 0x3007;
5983
5984     /**
5985      * CJK Ideograph, First code point
5986      */
5987     private static final int CJK_IDEOGRAPH_FIRST_ = 0x4e00;
5988
5989     /**
5990      * CJK Ideograph, Second code point
5991      */
5992     private static final int CJK_IDEOGRAPH_SECOND_ = 0x4e8c;
5993
5994     /**
5995      * CJK Ideograph, Third code point
5996      */
5997     private static final int CJK_IDEOGRAPH_THIRD_ = 0x4e09;
5998
5999     /**
6000      * CJK Ideograph, Fourth code point
6001      */
6002     private static final int CJK_IDEOGRAPH_FOURTH_ = 0x56d8;
6003
6004     /**
6005      * CJK Ideograph, FIFTH code point
6006      */
6007     private static final int CJK_IDEOGRAPH_FIFTH_ = 0x4e94;
6008
6009     /**
6010      * CJK Ideograph, Sixth code point
6011      */
6012     private static final int CJK_IDEOGRAPH_SIXTH_ = 0x516d;
6013
6014     /**
6015      * CJK Ideograph, Seventh code point
6016      */
6017     private static final int CJK_IDEOGRAPH_SEVENTH_ = 0x4e03;
6018
6019     /**
6020      * CJK Ideograph, Eighth code point
6021      */
6022     private static final int CJK_IDEOGRAPH_EIGHTH_ = 0x516b;
6023
6024     /**
6025      * CJK Ideograph, Nineth code point
6026      */
6027     private static final int CJK_IDEOGRAPH_NINETH_ = 0x4e5d;
6028
6029     /**
6030      * Application Program command code point
6031      */
6032     private static final int APPLICATION_PROGRAM_COMMAND_ = 0x009F;
6033
6034     /**
6035      * Unit separator code point
6036      */
6037     private static final int UNIT_SEPARATOR_ = 0x001F;
6038
6039     /**
6040      * Delete code point
6041      */
6042     private static final int DELETE_ = 0x007F;
6043
6044     /**
6045      * Han digit characters
6046      */
6047     private static final int CJK_IDEOGRAPH_COMPLEX_ZERO_     = 0x96f6;
6048     private static final int CJK_IDEOGRAPH_COMPLEX_ONE_      = 0x58f9;
6049     private static final int CJK_IDEOGRAPH_COMPLEX_TWO_      = 0x8cb3;
6050     private static final int CJK_IDEOGRAPH_COMPLEX_THREE_    = 0x53c3;
6051     private static final int CJK_IDEOGRAPH_COMPLEX_FOUR_     = 0x8086;
6052     private static final int CJK_IDEOGRAPH_COMPLEX_FIVE_     = 0x4f0d;
6053     private static final int CJK_IDEOGRAPH_COMPLEX_SIX_      = 0x9678;
6054     private static final int CJK_IDEOGRAPH_COMPLEX_SEVEN_    = 0x67d2;
6055     private static final int CJK_IDEOGRAPH_COMPLEX_EIGHT_    = 0x634c;
6056     private static final int CJK_IDEOGRAPH_COMPLEX_NINE_     = 0x7396;
6057     private static final int CJK_IDEOGRAPH_TEN_              = 0x5341;
6058     private static final int CJK_IDEOGRAPH_COMPLEX_TEN_      = 0x62fe;
6059     private static final int CJK_IDEOGRAPH_HUNDRED_          = 0x767e;
6060     private static final int CJK_IDEOGRAPH_COMPLEX_HUNDRED_  = 0x4f70;
6061     private static final int CJK_IDEOGRAPH_THOUSAND_         = 0x5343;
6062     private static final int CJK_IDEOGRAPH_COMPLEX_THOUSAND_ = 0x4edf;
6063     private static final int CJK_IDEOGRAPH_TEN_THOUSAND_     = 0x824c;
6064     private static final int CJK_IDEOGRAPH_HUNDRED_MILLION_  = 0x5104;
6065
6066     // private constructor -----------------------------------------------
6067     ///CLOVER:OFF
6068     /**
6069      * Private constructor to prevent instantiation
6070      */
6071     private UCharacter()
6072     {
6073     }
6074     ///CLOVER:ON
6075 }