jars/icu4j-4_2_1-src/src/com/ibm/icu/lang/UCharacter.java

   1 //##header J2SE15
   2 /**
   3 *******************************************************************************
   4 * Copyright (C) 1996-2009, International Business Machines Corporation and    *
   5 * others. All Rights Reserved.                                                *
   6 *******************************************************************************
   7 */
   8
   9 package com.ibm.icu.lang;
  10
  11 import java.io.IOException;
  12 import java.lang.ref.SoftReference;
  13 import java.util.HashMap;
  14 import java.util.Locale;
  15 import java.util.Map;
  16 import java.util.MissingResourceException;
  17
  18 import com.ibm.icu.impl.UBiDiProps;
  19 import com.ibm.icu.impl.UCaseProps;
  20 import com.ibm.icu.impl.NormalizerImpl;
  21 import com.ibm.icu.impl.UCharacterUtility;
  22 import com.ibm.icu.impl.UCharacterName;
  23 import com.ibm.icu.impl.UCharacterNameChoice;
  24 import com.ibm.icu.impl.UPropertyAliases;
  25 import com.ibm.icu.lang.UCharacterEnums.*;
  26 import com.ibm.icu.text.BreakIterator;
  27 import com.ibm.icu.text.UTF16;
  28 import com.ibm.icu.impl.UCharacterProperty;
  29 import com.ibm.icu.util.RangeValueIterator;
  30 import com.ibm.icu.util.ULocale;
  31 import com.ibm.icu.util.ValueIterator;
  32 import com.ibm.icu.util.VersionInfo;
  33
  34 /**
  35  * <p>
  36  * The UCharacter class provides extensions to the
  37  * <a href="http://java.sun.com/j2se/1.5/docs/api/java/lang/Character.html">
  38  * java.lang.Character</a> class. These extensions provide support for
  39  * more Unicode properties and together with the <a href=../text/UTF16.html>UTF16</a>
  40  * class, provide support for supplementary characters (those with code
  41  * points above U+FFFF).
  42  * Each ICU release supports the latest version of Unicode available at that time.
  43  * </p>
  44  * <p>
  45  * Code points are represented in these API using ints. While it would be
  46  * more convenient in Java to have a separate primitive datatype for them,
  47  * ints suffice in the meantime.
  48  * </p>
  49  * <p>
  50  * To use this class please add the jar file name icu4j.jar to the
  51  * class path, since it contains data files which supply the information used
  52  * by this file.<br>
  53  * E.g. In Windows <br>
  54  * <code>set CLASSPATH=%CLASSPATH%;$JAR_FILE_PATH/ucharacter.jar</code>.<br>
  55  * Otherwise, another method would be to copy the files uprops.dat and
  56  * unames.icu from the icu4j source subdirectory
  57  * <i>$ICU4J_SRC/src/com.ibm.icu.impl.data</i> to your class directory
  58  * <i>$ICU4J_CLASS/com.ibm.icu.impl.data</i>.
  59  * </p>
  60  * <p>
  61  * Aside from the additions for UTF-16 support, and the updated Unicode
  62  * properties, the main differences between UCharacter and Character are:
  63  * <ul>
  64  * <li> UCharacter is not designed to be a char wrapper and does not have
  65  *      APIs to which involves management of that single char.<br>
  66  *      These include:
  67  *      <ul>
  68  *        <li> char charValue(),
  69  *        <li> int compareTo(java.lang.Character, java.lang.Character), etc.
  70  *      </ul>
  71  * <li> UCharacter does not include Character APIs that are deprecated, nor
  72  *      does it include the Java-specific character information, such as
  73  *      boolean isJavaIdentifierPart(char ch).
  74  * <li> Character maps characters 'A' - 'Z' and 'a' - 'z' to the numeric
  75  *      values '10' - '35'. UCharacter also does this in digit and
  76  *      getNumericValue, to adhere to the java semantics of these
  77  *      methods.  New methods unicodeDigit, and
  78  *      getUnicodeNumericValue do not treat the above code points
  79  *      as having numeric values.  This is a semantic change from ICU4J 1.3.1.
  80  * </ul>
  81  * <p>
  82  * Further detail differences can be determined from the program
  83  *        <a href="http://source.icu-project.org/repos/icu/icu4j/trunk/src/com/ibm/icu/dev/test/lang/UCharacterCompare.java">
  84  *        com.ibm.icu.dev.test.lang.UCharacterCompare</a>
  85  * </p>
  86  * <p>
  87  * In addition to Java compatibility functions, which calculate derived properties,
  88  * this API provides low-level access to the Unicode Character Database.
  89  * </p>
  90  * <p>
  91  * Unicode assigns each code point (not just assigned character) values for
  92  * many properties.
  93  * Most of them are simple boolean flags, or constants from a small enumerated list.
  94  * For some properties, values are strings or other relatively more complex types.
  95  * </p>
  96  * <p>
  97  * For more information see
  98  * "About the Unicode Character Database" (http://www.unicode.org/ucd/)
  99  * and the ICU User Guide chapter on Properties (http://www.icu-project.org/userguide/properties.html).
 100  * </p>
 101  * <p>
 102  * There are also functions that provide easy migration from C/POSIX functions
 103  * like isblank(). Their use is generally discouraged because the C/POSIX
 104  * standards do not define their semantics beyond the ASCII range, which means
 105  * that different implementations exhibit very different behavior.
 106  * Instead, Unicode properties should be used directly.
 107  * </p>
 108  * <p>
 109  * There are also only a few, broad C/POSIX character classes, and they tend
 110  * to be used for conflicting purposes. For example, the "isalpha()" class
 111  * is sometimes used to determine word boundaries, while a more sophisticated
 112  * approach would at least distinguish initial letters from continuation
 113  * characters (the latter including combining marks).
 114  * (In ICU, BreakIterator is the most sophisticated API for word boundaries.)
 115  * Another example: There is no "istitle()" class for titlecase characters.
 116  * </p>
 117  * <p>
 118  * ICU 3.4 and later provides API access for all twelve C/POSIX character classes.
 119  * ICU implements them according to the Standard Recommendations in
 120  * Annex C: Compatibility Properties of UTS #18 Unicode Regular Expressions
 121  * (http://www.unicode.org/reports/tr18/#Compatibility_Properties).
 122  * </p>
 123  * <p>
 124  * API access for C/POSIX character classes is as follows:
 125  * - alpha:     isUAlphabetic(c) or hasBinaryProperty(c, UProperty.ALPHABETIC)
 126  * - lower:     isULowercase(c) or hasBinaryProperty(c, UProperty.LOWERCASE)
 127  * - upper:     isUUppercase(c) or hasBinaryProperty(c, UProperty.UPPERCASE)
 128  * - punct:     ((1<<getType(c)) & ((1<<DASH_PUNCTUATION)|(1<<START_PUNCTUATION)|(1<<END_PUNCTUATION)|(1<<CONNECTOR_PUNCTUATION)|(1<<OTHER_PUNCTUATION)|(1<<INITIAL_PUNCTUATION)|(1<<FINAL_PUNCTUATION)))!=0
 129  * - digit:     isDigit(c) or getType(c)==DECIMAL_DIGIT_NUMBER
 130  * - xdigit:    hasBinaryProperty(c, UProperty.POSIX_XDIGIT)
 131  * - alnum:     hasBinaryProperty(c, UProperty.POSIX_ALNUM)
 132  * - space:     isUWhiteSpace(c) or hasBinaryProperty(c, UProperty.WHITE_SPACE)
 133  * - blank:     hasBinaryProperty(c, UProperty.POSIX_BLANK)
 134  * - cntrl:     getType(c)==CONTROL
 135  * - graph:     hasBinaryProperty(c, UProperty.POSIX_GRAPH)
 136  * - print:     hasBinaryProperty(c, UProperty.POSIX_PRINT)
 137  * </p>
 138  * <p>
 139  * The C/POSIX character classes are also available in UnicodeSet patterns,
 140  * using patterns like [:graph:] or \p{graph}.
 141  * </p>
 142  * <p>
 143  * Note: There are several ICU (and Java) whitespace functions.
 144  * Comparison:
 145  * - isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property;
 146  *       most of general categories "Z" (separators) + most whitespace ISO controls
 147  *       (including no-break spaces, but excluding IS1..IS4 and ZWSP)
 148  * - isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces
 149  * - isSpaceChar: just Z (including no-break spaces)
 150  * </p>
 151  * <p>
 152  * This class is not subclassable
 153  * </p>
 154  * @author Syn Wee Quek
 155  * @stable ICU 2.1
 156  * @see com.ibm.icu.lang.UCharacterEnums
 157  */
 158
 159 public final class UCharacter implements ECharacterCategory, ECharacterDirection
 160 {
 161     // public inner classes ----------------------------------------------
 162
 163     /**
 164      * A family of character subsets representing the character blocks in the
 165      * Unicode specification, generated from Unicode Data file Blocks.txt.
 166      * Character blocks generally define characters used for a specific script
 167      * or purpose. A character is contained by at most one Unicode block.
 168      * @stable ICU 2.4
 169      */
 170     public static final class UnicodeBlock extends Character.Subset
 171     {
 172         // block id corresponding to icu4c -----------------------------------
 173
 174         /**
 175          * @stable ICU 2.4
 176          */
 177         public static final int INVALID_CODE_ID = -1;
 178         /**
 179          * @stable ICU 2.4
 180          */
 181         public static final int BASIC_LATIN_ID = 1;
 182         /**
 183          * @stable ICU 2.4
 184          */
 185         public static final int LATIN_1_SUPPLEMENT_ID = 2;
 186         /**
 187          * @stable ICU 2.4
 188          */
 189         public static final int LATIN_EXTENDED_A_ID = 3;
 190         /**
 191          * @stable ICU 2.4
 192          */
 193         public static final int LATIN_EXTENDED_B_ID = 4;
 194         /**
 195          * @stable ICU 2.4
 196          */
 197         public static final int IPA_EXTENSIONS_ID = 5;
 198         /**
 199          * @stable ICU 2.4
 200          */
 201         public static final int SPACING_MODIFIER_LETTERS_ID = 6;
 202         /**
 203          * @stable ICU 2.4
 204          */
 205         public static final int COMBINING_DIACRITICAL_MARKS_ID = 7;
 206         /**
 207          * Unicode 3.2 renames this block to "Greek and Coptic".
 208          * @stable ICU 2.4
 209          */
 210         public static final int GREEK_ID = 8;
 211         /**
 212          * @stable ICU 2.4
 213          */
 214         public static final int CYRILLIC_ID = 9;
 215         /**
 216          * @stable ICU 2.4
 217          */
 218         public static final int ARMENIAN_ID = 10;
 219         /**
 220          * @stable ICU 2.4
 221          */
 222         public static final int HEBREW_ID = 11;
 223         /**
 224          * @stable ICU 2.4
 225          */
 226         public static final int ARABIC_ID = 12;
 227         /**
 228          * @stable ICU 2.4
 229          */
 230         public static final int SYRIAC_ID = 13;
 231         /**
 232          * @stable ICU 2.4
 233          */
 234         public static final int THAANA_ID = 14;
 235         /**
 236          * @stable ICU 2.4
 237          */
 238         public static final int DEVANAGARI_ID = 15;
 239         /**
 240          * @stable ICU 2.4
 241          */
 242         public static final int BENGALI_ID = 16;
 243         /**
 244          * @stable ICU 2.4
 245          */
 246         public static final int GURMUKHI_ID = 17;
 247         /**
 248          * @stable ICU 2.4
 249          */
 250         public static final int GUJARATI_ID = 18;
 251         /**
 252          * @stable ICU 2.4
 253          */
 254         public static final int ORIYA_ID = 19;
 255         /**
 256          * @stable ICU 2.4
 257          */
 258         public static final int TAMIL_ID = 20;
 259         /**
 260          * @stable ICU 2.4
 261          */
 262         public static final int TELUGU_ID = 21;
 263         /**
 264          * @stable ICU 2.4
 265          */
 266         public static final int KANNADA_ID = 22;
 267         /**
 268          * @stable ICU 2.4
 269          */
 270         public static final int MALAYALAM_ID = 23;
 271         /**
 272          * @stable ICU 2.4
 273          */
 274         public static final int SINHALA_ID = 24;
 275         /**
 276          * @stable ICU 2.4
 277          */
 278         public static final int THAI_ID = 25;
 279         /**
 280          * @stable ICU 2.4
 281          */
 282         public static final int LAO_ID = 26;
 283         /**
 284          * @stable ICU 2.4
 285          */
 286         public static final int TIBETAN_ID = 27;
 287         /**
 288          * @stable ICU 2.4
 289          */
 290         public static final int MYANMAR_ID = 28;
 291         /**
 292          * @stable ICU 2.4
 293          */
 294         public static final int GEORGIAN_ID = 29;
 295         /**
 296          * @stable ICU 2.4
 297          */
 298         public static final int HANGUL_JAMO_ID = 30;
 299         /**
 300          * @stable ICU 2.4
 301          */
 302         public static final int ETHIOPIC_ID = 31;
 303         /**
 304          * @stable ICU 2.4
 305          */
 306         public static final int CHEROKEE_ID = 32;
 307         /**
 308          * @stable ICU 2.4
 309          */
 310         public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID = 33;
 311         /**
 312          * @stable ICU 2.4
 313          */
 314         public static final int OGHAM_ID = 34;
 315         /**
 316          * @stable ICU 2.4
 317          */
 318         public static final int RUNIC_ID = 35;
 319         /**
 320          * @stable ICU 2.4
 321          */
 322         public static final int KHMER_ID = 36;
 323         /**
 324          * @stable ICU 2.4
 325          */
 326         public static final int MONGOLIAN_ID = 37;
 327         /**
 328          * @stable ICU 2.4
 329          */
 330         public static final int LATIN_EXTENDED_ADDITIONAL_ID = 38;
 331         /**
 332          * @stable ICU 2.4
 333          */
 334         public static final int GREEK_EXTENDED_ID = 39;
 335         /**
 336          * @stable ICU 2.4
 337          */
 338         public static final int GENERAL_PUNCTUATION_ID = 40;
 339         /**
 340          * @stable ICU 2.4
 341          */
 342         public static final int SUPERSCRIPTS_AND_SUBSCRIPTS_ID = 41;
 343         /**
 344          * @stable ICU 2.4
 345          */
 346         public static final int CURRENCY_SYMBOLS_ID = 42;
 347         /**
 348          * Unicode 3.2 renames this block to "Combining Diacritical Marks for
 349          * Symbols".
 350          * @stable ICU 2.4
 351          */
 352         public static final int COMBINING_MARKS_FOR_SYMBOLS_ID = 43;
 353         /**
 354          * @stable ICU 2.4
 355          */
 356         public static final int LETTERLIKE_SYMBOLS_ID = 44;
 357         /**
 358          * @stable ICU 2.4
 359          */
 360         public static final int NUMBER_FORMS_ID = 45;
 361         /**
 362          * @stable ICU 2.4
 363          */
 364         public static final int ARROWS_ID = 46;
 365         /**
 366          * @stable ICU 2.4
 367          */
 368         public static final int MATHEMATICAL_OPERATORS_ID = 47;
 369         /**
 370          * @stable ICU 2.4
 371          */
 372         public static final int MISCELLANEOUS_TECHNICAL_ID = 48;
 373         /**
 374          * @stable ICU 2.4
 375          */
 376         public static final int CONTROL_PICTURES_ID = 49;
 377         /**
 378          * @stable ICU 2.4
 379          */
 380         public static final int OPTICAL_CHARACTER_RECOGNITION_ID = 50;
 381         /**
 382          * @stable ICU 2.4
 383          */
 384         public static final int ENCLOSED_ALPHANUMERICS_ID = 51;
 385         /**
 386          * @stable ICU 2.4
 387          */
 388         public static final int BOX_DRAWING_ID = 52;
 389         /**
 390          * @stable ICU 2.4
 391          */
 392         public static final int BLOCK_ELEMENTS_ID = 53;
 393         /**
 394          * @stable ICU 2.4
 395          */
 396         public static final int GEOMETRIC_SHAPES_ID = 54;
 397         /**
 398          * @stable ICU 2.4
 399          */
 400         public static final int MISCELLANEOUS_SYMBOLS_ID = 55;
 401         /**
 402          * @stable ICU 2.4
 403          */
 404         public static final int DINGBATS_ID = 56;
 405         /**
 406          * @stable ICU 2.4
 407          */
 408         public static final int BRAILLE_PATTERNS_ID = 57;
 409         /**
 410          * @stable ICU 2.4
 411          */
 412         public static final int CJK_RADICALS_SUPPLEMENT_ID = 58;
 413         /**
 414          * @stable ICU 2.4
 415          */
 416         public static final int KANGXI_RADICALS_ID = 59;
 417         /**
 418          * @stable ICU 2.4
 419          */
 420         public static final int IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID = 60;
 421         /**
 422          * @stable ICU 2.4
 423          */
 424         public static final int CJK_SYMBOLS_AND_PUNCTUATION_ID = 61;
 425         /**
 426          * @stable ICU 2.4
 427          */
 428         public static final int HIRAGANA_ID = 62;
 429         /**
 430          * @stable ICU 2.4
 431          */
 432         public static final int KATAKANA_ID = 63;
 433         /**
 434          * @stable ICU 2.4
 435          */
 436         public static final int BOPOMOFO_ID = 64;
 437         /**
 438          * @stable ICU 2.4
 439          */
 440         public static final int HANGUL_COMPATIBILITY_JAMO_ID = 65;
 441         /**
 442          * @stable ICU 2.4
 443          */
 444         public static final int KANBUN_ID = 66;
 445         /**
 446          * @stable ICU 2.4
 447          */
 448         public static final int BOPOMOFO_EXTENDED_ID = 67;
 449         /**
 450          * @stable ICU 2.4
 451          */
 452         public static final int ENCLOSED_CJK_LETTERS_AND_MONTHS_ID = 68;
 453         /**
 454          * @stable ICU 2.4
 455          */
 456         public static final int CJK_COMPATIBILITY_ID = 69;
 457         /**
 458          * @stable ICU 2.4
 459          */
 460         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID = 70;
 461         /**
 462          * @stable ICU 2.4
 463          */
 464         public static final int CJK_UNIFIED_IDEOGRAPHS_ID = 71;
 465         /**
 466          * @stable ICU 2.4
 467          */
 468         public static final int YI_SYLLABLES_ID = 72;
 469         /**
 470          * @stable ICU 2.4
 471          */
 472         public static final int YI_RADICALS_ID = 73;
 473         /**
 474          * @stable ICU 2.4
 475          */
 476         public static final int HANGUL_SYLLABLES_ID = 74;
 477         /**
 478          * @stable ICU 2.4
 479          */
 480         public static final int HIGH_SURROGATES_ID = 75;
 481         /**
 482          * @stable ICU 2.4
 483          */
 484         public static final int HIGH_PRIVATE_USE_SURROGATES_ID = 76;
 485         /**
 486          * @stable ICU 2.4
 487          */
 488         public static final int LOW_SURROGATES_ID = 77;
 489         /**
 490          * Same as public static final int PRIVATE_USE.
 491          * Until Unicode 3.1.1; the corresponding block name was "Private Use";
 492          * and multiple code point ranges had this block.
 493          * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area"
 494          * and adds separate blocks for the supplementary PUAs.
 495          * @stable ICU 2.4
 496          */
 497         public static final int PRIVATE_USE_AREA_ID = 78;
 498         /**
 499          * Same as public static final int PRIVATE_USE_AREA.
 500          * Until Unicode 3.1.1; the corresponding block name was "Private Use";
 501          * and multiple code point ranges had this block.
 502          * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area"
 503          * and adds separate blocks for the supplementary PUAs.
 504          * @stable ICU 2.4
 505          */
 506         public static final int PRIVATE_USE_ID = PRIVATE_USE_AREA_ID;
 507         /**
 508          * @stable ICU 2.4
 509          */
 510         public static final int CJK_COMPATIBILITY_IDEOGRAPHS_ID = 79;
 511         /**
 512          * @stable ICU 2.4
 513          */
 514         public static final int ALPHABETIC_PRESENTATION_FORMS_ID = 80;
 515         /**
 516          * @stable ICU 2.4
 517          */
 518         public static final int ARABIC_PRESENTATION_FORMS_A_ID = 81;
 519         /**
 520          * @stable ICU 2.4
 521          */
 522         public static final int COMBINING_HALF_MARKS_ID = 82;
 523         /**
 524          * @stable ICU 2.4
 525          */
 526         public static final int CJK_COMPATIBILITY_FORMS_ID = 83;
 527         /**
 528          * @stable ICU 2.4
 529          */
 530         public static final int SMALL_FORM_VARIANTS_ID = 84;
 531         /**
 532          * @stable ICU 2.4
 533          */
 534         public static final int ARABIC_PRESENTATION_FORMS_B_ID = 85;
 535         /**
 536          * @stable ICU 2.4
 537          */
 538         public static final int SPECIALS_ID = 86;
 539         /**
 540          * @stable ICU 2.4
 541          */
 542         public static final int HALFWIDTH_AND_FULLWIDTH_FORMS_ID = 87;
 543         /**
 544          * @stable ICU 2.4
 545          */
 546         public static final int OLD_ITALIC_ID = 88;
 547         /**
 548          * @stable ICU 2.4
 549          */
 550         public static final int GOTHIC_ID = 89;
 551         /**
 552          * @stable ICU 2.4
 553          */
 554         public static final int DESERET_ID = 90;
 555         /**
 556          * @stable ICU 2.4
 557          */
 558         public static final int BYZANTINE_MUSICAL_SYMBOLS_ID = 91;
 559         /**
 560          * @stable ICU 2.4
 561          */
 562         public static final int MUSICAL_SYMBOLS_ID = 92;
 563         /**
 564          * @stable ICU 2.4
 565          */
 566         public static final int MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID = 93;
 567         /**
 568          * @stable ICU 2.4
 569          */
 570         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID = 94;
 571         /**
 572          * @stable ICU 2.4
 573          */
 574         public static final int
 575             CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID = 95;
 576         /**
 577          * @stable ICU 2.4
 578          */
 579         public static final int TAGS_ID = 96;
 580
 581         // New blocks in Unicode 3.2
 582
 583         /**
 584          * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
 585          * @stable ICU 2.4
 586          */
 587         public static final int CYRILLIC_SUPPLEMENTARY_ID = 97;
 588         /**
 589          * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
 590          * @stable ICU 3.0
 591          */
 592
 593         public static final int CYRILLIC_SUPPLEMENT_ID = 97;
 594         /**
 595          * @stable ICU 2.4
 596          */
 597         public static final int TAGALOG_ID = 98;
 598         /**
 599          * @stable ICU 2.4
 600          */
 601         public static final int HANUNOO_ID = 99;
 602         /**
 603          * @stable ICU 2.4
 604          */
 605         public static final int BUHID_ID = 100;
 606         /**
 607          * @stable ICU 2.4
 608          */
 609         public static final int TAGBANWA_ID = 101;
 610         /**
 611          * @stable ICU 2.4
 612          */
 613         public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID = 102;
 614         /**
 615          * @stable ICU 2.4
 616          */
 617         public static final int SUPPLEMENTAL_ARROWS_A_ID = 103;
 618         /**
 619          * @stable ICU 2.4
 620          */
 621         public static final int SUPPLEMENTAL_ARROWS_B_ID = 104;
 622         /**
 623          * @stable ICU 2.4
 624          */
 625         public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID = 105;
 626         /**
 627          * @stable ICU 2.4
 628          */
 629         public static final int SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID = 106;
 630         /**
 631          * @stable ICU 2.4
 632          */
 633         public static final int KATAKANA_PHONETIC_EXTENSIONS_ID = 107;
 634         /**
 635          * @stable ICU 2.4
 636          */
 637         public static final int VARIATION_SELECTORS_ID = 108;
 638         /**
 639          * @stable ICU 2.4
 640          */
 641         public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID = 109;
 642         /**
 643          * @stable ICU 2.4
 644          */
 645         public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID = 110;
 646
 647         /**
 648          * @stable ICU 2.6
 649          */
 650         public static final int LIMBU_ID = 111; /*[1900]*/
 651         /**
 652          * @stable ICU 2.6
 653          */
 654         public static final int TAI_LE_ID = 112; /*[1950]*/
 655         /**
 656          * @stable ICU 2.6
 657          */
 658         public static final int KHMER_SYMBOLS_ID = 113; /*[19E0]*/
 659         /**
 660          * @stable ICU 2.6
 661          */
 662         public static final int PHONETIC_EXTENSIONS_ID = 114; /*[1D00]*/
 663         /**
 664          * @stable ICU 2.6
 665          */
 666         public static final int MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID = 115; /*[2B00]*/
 667         /**
 668          * @stable ICU 2.6
 669          */
 670         public static final int YIJING_HEXAGRAM_SYMBOLS_ID = 116; /*[4DC0]*/
 671         /**
 672          * @stable ICU 2.6
 673          */
 674         public static final int LINEAR_B_SYLLABARY_ID = 117; /*[10000]*/
 675         /**
 676          * @stable ICU 2.6
 677          */
 678         public static final int LINEAR_B_IDEOGRAMS_ID = 118; /*[10080]*/
 679         /**
 680          * @stable ICU 2.6
 681          */
 682         public static final int AEGEAN_NUMBERS_ID = 119; /*[10100]*/
 683         /**
 684          * @stable ICU 2.6
 685          */
 686         public static final int UGARITIC_ID = 120; /*[10380]*/
 687         /**
 688          * @stable ICU 2.6
 689          */
 690         public static final int SHAVIAN_ID = 121; /*[10450]*/
 691         /**
 692          * @stable ICU 2.6
 693          */
 694         public static final int OSMANYA_ID = 122; /*[10480]*/
 695         /**
 696          * @stable ICU 2.6
 697          */
 698         public static final int CYPRIOT_SYLLABARY_ID = 123; /*[10800]*/
 699         /**
 700          * @stable ICU 2.6
 701          */
 702         public static final int TAI_XUAN_JING_SYMBOLS_ID = 124; /*[1D300]*/
 703         /**
 704          * @stable ICU 2.6
 705          */
 706         public static final int VARIATION_SELECTORS_SUPPLEMENT_ID = 125; /*[E0100]*/
 707
 708         /* New blocks in Unicode 4.1 */
 709
 710         /**
 711          * @stable ICU 3.4
 712          */
 713         public static final int ANCIENT_GREEK_MUSICAL_NOTATION_ID = 126; /*[1D200]*/
 714
 715         /**
 716          * @stable ICU 3.4
 717          */
 718         public static final int ANCIENT_GREEK_NUMBERS_ID = 127; /*[10140]*/
 719
 720         /**
 721          * @stable ICU 3.4
 722          */
 723         public static final int ARABIC_SUPPLEMENT_ID = 128; /*[0750]*/
 724
 725         /**
 726          * @stable ICU 3.4
 727          */
 728         public static final int BUGINESE_ID = 129; /*[1A00]*/
 729
 730         /**
 731          * @stable ICU 3.4
 732          */
 733         public static final int CJK_STROKES_ID = 130; /*[31C0]*/
 734
 735         /**
 736          * @stable ICU 3.4
 737          */
 738         public static final int COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID = 131; /*[1DC0]*/
 739
 740         /**
 741          * @stable ICU 3.4
 742          */
 743         public static final int COPTIC_ID = 132; /*[2C80]*/
 744
 745         /**
 746          * @stable ICU 3.4
 747          */
 748         public static final int ETHIOPIC_EXTENDED_ID = 133; /*[2D80]*/
 749
 750         /**
 751          * @stable ICU 3.4
 752          */
 753         public static final int ETHIOPIC_SUPPLEMENT_ID = 134; /*[1380]*/
 754
 755         /**
 756          * @stable ICU 3.4
 757          */
 758         public static final int GEORGIAN_SUPPLEMENT_ID = 135; /*[2D00]*/
 759
 760         /**
 761          * @stable ICU 3.4
 762          */
 763         public static final int GLAGOLITIC_ID = 136; /*[2C00]*/
 764
 765         /**
 766          * @stable ICU 3.4
 767          */
 768         public static final int KHAROSHTHI_ID = 137; /*[10A00]*/
 769
 770         /**
 771          * @stable ICU 3.4
 772          */
 773         public static final int MODIFIER_TONE_LETTERS_ID = 138; /*[A700]*/
 774
 775         /**
 776          * @stable ICU 3.4
 777          */
 778         public static final int NEW_TAI_LUE_ID = 139; /*[1980]*/
 779
 780         /**
 781          * @stable ICU 3.4
 782          */
 783         public static final int OLD_PERSIAN_ID = 140; /*[103A0]*/
 784
 785         /**
 786          * @stable ICU 3.4
 787          */
 788         public static final int PHONETIC_EXTENSIONS_SUPPLEMENT_ID = 141; /*[1D80]*/
 789
 790         /**
 791          * @stable ICU 3.4
 792          */
 793         public static final int SUPPLEMENTAL_PUNCTUATION_ID = 142; /*[2E00]*/
 794
 795         /**
 796          * @stable ICU 3.4
 797          */
 798         public static final int SYLOTI_NAGRI_ID = 143; /*[A800]*/
 799
 800         /**
 801          * @stable ICU 3.4
 802          */
 803         public static final int TIFINAGH_ID = 144; /*[2D30]*/
 804
 805         /**
 806          * @stable ICU 3.4
 807          */
 808         public static final int VERTICAL_FORMS_ID = 145; /*[FE10]*/
 809
 810         /* New blocks in Unicode 5.0 */
 811
 812         /**
 813          * @stable ICU 3.6
 814          */
 815         public static final int NKO_ID = 146; /*[07C0]*/
 816         /**
 817          * @stable ICU 3.6
 818          */
 819         public static final int BALINESE_ID = 147; /*[1B00]*/
 820         /**
 821          * @stable ICU 3.6
 822          */
 823         public static final int LATIN_EXTENDED_C_ID = 148; /*[2C60]*/
 824         /**
 825          * @stable ICU 3.6
 826          */
 827         public static final int LATIN_EXTENDED_D_ID = 149; /*[A720]*/
 828         /**
 829          * @stable ICU 3.6
 830          */
 831         public static final int PHAGS_PA_ID = 150; /*[A840]*/
 832         /**
 833          * @stable ICU 3.6
 834          */
 835         public static final int PHOENICIAN_ID = 151; /*[10900]*/
 836         /**
 837          * @stable ICU 3.6
 838          */
 839         public static final int CUNEIFORM_ID = 152; /*[12000]*/
 840         /**
 841          * @stable ICU 3.6
 842          */
 843         public static final int CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID = 153; /*[12400]*/
 844         /**
 845          * @stable ICU 3.6
 846          */
 847         public static final int COUNTING_ROD_NUMERALS_ID = 154; /*[1D360]*/
 848
 849         /**
 850          * @stable ICU 4.0
 851          */
 852         public static final int SUNDANESE_ID = 155; /* [1B80] */
 853
 854         /**
 855          * @stable ICU 4.0
 856          */
 857         public static final int LEPCHA_ID = 156; /* [1C00] */
 858
 859         /**
 860          * @stable ICU 4.0
 861          */
 862         public static final int OL_CHIKI_ID = 157; /* [1C50] */
 863
 864         /**
 865          * @stable ICU 4.0
 866          */
 867         public static final int CYRILLIC_EXTENDED_A_ID = 158; /* [2DE0] */
 868
 869         /**
 870          * @stable ICU 4.0
 871          */
 872         public static final int VAI_ID = 159; /* [A500] */
 873
 874         /**
 875          * @stable ICU 4.0
 876          */
 877         public static final int CYRILLIC_EXTENDED_B_ID = 160; /* [A640] */
 878
 879         /**
 880          * @stable ICU 4.0
 881          */
 882         public static final int SAURASHTRA_ID = 161; /* [A880] */
 883
 884         /**
 885          * @stable ICU 4.0
 886          */
 887         public static final int KAYAH_LI_ID = 162; /* [A900] */
 888
 889         /**
 890          * @stable ICU 4.0
 891          */
 892         public static final int REJANG_ID = 163; /* [A930] */
 893
 894         /**
 895          * @stable ICU 4.0
 896          */
 897         public static final int CHAM_ID = 164; /* [AA00] */
 898
 899         /**
 900          * @stable ICU 4.0
 901          */
 902         public static final int ANCIENT_SYMBOLS_ID = 165; /* [10190] */
 903
 904         /**
 905          * @stable ICU 4.0
 906          */
 907         public static final int PHAISTOS_DISC_ID = 166; /* [101D0] */
 908
 909         /**
 910          * @stable ICU 4.0
 911          */
 912         public static final int LYCIAN_ID = 167; /* [10280] */
 913
 914         /**
 915          * @stable ICU 4.0
 916          */
 917         public static final int CARIAN_ID = 168; /* [102A0] */
 918
 919         /**
 920          * @stable ICU 4.0
 921          */
 922         public static final int LYDIAN_ID = 169; /* [10920] */
 923
 924         /**
 925          * @stable ICU 4.0
 926          */
 927         public static final int MAHJONG_TILES_ID = 170; /* [1F000] */
 928
 929         /**
 930          * @stable ICU 4.0
 931          */
 932         public static final int DOMINO_TILES_ID = 171; /* [1F030] */
 933
 934         /**
 935          * @stable ICU 2.4
 936          */
 937         public static final int COUNT = 172;
 938
 939         // blocks objects ---------------------------------------------------
 940
 941         /**
 942          * @stable ICU 2.6
 943          */
 944         public static final UnicodeBlock NO_BLOCK
 945             = new UnicodeBlock("NO_BLOCK", 0);
 946
 947         /**
 948          * @stable ICU 2.4
 949          */
 950         public static final UnicodeBlock BASIC_LATIN
 951             = new UnicodeBlock("BASIC_LATIN", BASIC_LATIN_ID);
 952         /**
 953          * @stable ICU 2.4
 954          */
 955         public static final UnicodeBlock LATIN_1_SUPPLEMENT
 956             = new UnicodeBlock("LATIN_1_SUPPLEMENT", LATIN_1_SUPPLEMENT_ID);
 957         /**
 958          * @stable ICU 2.4
 959          */
 960         public static final UnicodeBlock LATIN_EXTENDED_A
 961             = new UnicodeBlock("LATIN_EXTENDED_A", LATIN_EXTENDED_A_ID);
 962         /**
 963          * @stable ICU 2.4
 964          */
 965         public static final UnicodeBlock LATIN_EXTENDED_B
 966             = new UnicodeBlock("LATIN_EXTENDED_B", LATIN_EXTENDED_B_ID);
 967         /**
 968          * @stable ICU 2.4
 969          */
 970         public static final UnicodeBlock IPA_EXTENSIONS
 971             = new UnicodeBlock("IPA_EXTENSIONS", IPA_EXTENSIONS_ID);
 972         /**
 973          * @stable ICU 2.4
 974          */
 975         public static final UnicodeBlock SPACING_MODIFIER_LETTERS
 976             = new UnicodeBlock("SPACING_MODIFIER_LETTERS", SPACING_MODIFIER_LETTERS_ID);
 977         /**
 978          * @stable ICU 2.4
 979          */
 980         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS
 981             = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", COMBINING_DIACRITICAL_MARKS_ID);
 982         /**
 983          * Unicode 3.2 renames this block to "Greek and Coptic".
 984          * @stable ICU 2.4
 985          */
 986         public static final UnicodeBlock GREEK
 987             = new UnicodeBlock("GREEK", GREEK_ID);
 988         /**
 989          * @stable ICU 2.4
 990          */
 991         public static final UnicodeBlock CYRILLIC
 992             = new UnicodeBlock("CYRILLIC", CYRILLIC_ID);
 993         /**
 994          * @stable ICU 2.4
 995          */
 996         public static final UnicodeBlock ARMENIAN
 997             = new UnicodeBlock("ARMENIAN", ARMENIAN_ID);
 998         /**
 999          * @stable ICU 2.4
1000          */
1001         public static final UnicodeBlock HEBREW
1002             = new UnicodeBlock("HEBREW", HEBREW_ID);
1003         /**
1004          * @stable ICU 2.4
1005          */
1006         public static final UnicodeBlock ARABIC
1007             = new UnicodeBlock("ARABIC", ARABIC_ID);
1008         /**
1009          * @stable ICU 2.4
1010          */
1011         public static final UnicodeBlock SYRIAC
1012             = new UnicodeBlock("SYRIAC", SYRIAC_ID);
1013         /**
1014          * @stable ICU 2.4
1015          */
1016         public static final UnicodeBlock THAANA
1017             = new UnicodeBlock("THAANA", THAANA_ID);
1018         /**
1019          * @stable ICU 2.4
1020          */
1021         public static final UnicodeBlock DEVANAGARI
1022             = new UnicodeBlock("DEVANAGARI", DEVANAGARI_ID);
1023         /**
1024          * @stable ICU 2.4
1025          */
1026         public static final UnicodeBlock BENGALI
1027             = new UnicodeBlock("BENGALI", BENGALI_ID);
1028         /**
1029          * @stable ICU 2.4
1030          */
1031         public static final UnicodeBlock GURMUKHI
1032             = new UnicodeBlock("GURMUKHI", GURMUKHI_ID);
1033         /**
1034          * @stable ICU 2.4
1035          */
1036         public static final UnicodeBlock GUJARATI
1037             = new UnicodeBlock("GUJARATI", GUJARATI_ID);
1038         /**
1039          * @stable ICU 2.4
1040          */
1041         public static final UnicodeBlock ORIYA
1042             = new UnicodeBlock("ORIYA", ORIYA_ID);
1043         /**
1044          * @stable ICU 2.4
1045          */
1046         public static final UnicodeBlock TAMIL
1047             = new UnicodeBlock("TAMIL", TAMIL_ID);
1048         /**
1049          * @stable ICU 2.4
1050          */
1051         public static final UnicodeBlock TELUGU
1052             = new UnicodeBlock("TELUGU", TELUGU_ID);
1053         /**
1054          * @stable ICU 2.4
1055          */
1056         public static final UnicodeBlock KANNADA
1057             = new UnicodeBlock("KANNADA", KANNADA_ID);
1058         /**
1059          * @stable ICU 2.4
1060          */
1061         public static final UnicodeBlock MALAYALAM
1062             = new UnicodeBlock("MALAYALAM", MALAYALAM_ID);
1063         /**
1064          * @stable ICU 2.4
1065          */
1066         public static final UnicodeBlock SINHALA
1067             = new UnicodeBlock("SINHALA", SINHALA_ID);
1068         /**
1069          * @stable ICU 2.4
1070          */
1071         public static final UnicodeBlock THAI
1072             = new UnicodeBlock("THAI", THAI_ID);
1073         /**
1074          * @stable ICU 2.4
1075          */
1076         public static final UnicodeBlock LAO
1077             = new UnicodeBlock("LAO", LAO_ID);
1078         /**
1079          * @stable ICU 2.4
1080          */
1081         public static final UnicodeBlock TIBETAN
1082             = new UnicodeBlock("TIBETAN", TIBETAN_ID);
1083         /**
1084          * @stable ICU 2.4
1085          */
1086         public static final UnicodeBlock MYANMAR
1087             = new UnicodeBlock("MYANMAR", MYANMAR_ID);
1088         /**
1089          * @stable ICU 2.4
1090          */
1091         public static final UnicodeBlock GEORGIAN
1092             = new UnicodeBlock("GEORGIAN", GEORGIAN_ID);
1093         /**
1094          * @stable ICU 2.4
1095          */
1096         public static final UnicodeBlock HANGUL_JAMO
1097             = new UnicodeBlock("HANGUL_JAMO", HANGUL_JAMO_ID);
1098         /**
1099          * @stable ICU 2.4
1100          */
1101         public static final UnicodeBlock ETHIOPIC
1102             = new UnicodeBlock("ETHIOPIC", ETHIOPIC_ID);
1103         /**
1104          * @stable ICU 2.4
1105          */
1106         public static final UnicodeBlock CHEROKEE
1107             = new UnicodeBlock("CHEROKEE", CHEROKEE_ID);
1108         /**
1109          * @stable ICU 2.4
1110          */
1111         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS
1112             = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID);
1113         /**
1114          * @stable ICU 2.4
1115          */
1116         public static final UnicodeBlock OGHAM
1117             = new UnicodeBlock("OGHAM", OGHAM_ID);
1118         /**
1119          * @stable ICU 2.4
1120          */
1121         public static final UnicodeBlock RUNIC
1122             = new UnicodeBlock("RUNIC", RUNIC_ID);
1123         /**
1124          * @stable ICU 2.4
1125          */
1126         public static final UnicodeBlock KHMER
1127             = new UnicodeBlock("KHMER", KHMER_ID);
1128         /**
1129          * @stable ICU 2.4
1130          */
1131         public static final UnicodeBlock MONGOLIAN
1132             = new UnicodeBlock("MONGOLIAN", MONGOLIAN_ID);
1133         /**
1134          * @stable ICU 2.4
1135          */
1136         public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL
1137             = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", LATIN_EXTENDED_ADDITIONAL_ID);
1138         /**
1139          * @stable ICU 2.4
1140          */
1141         public static final UnicodeBlock GREEK_EXTENDED
1142             = new UnicodeBlock("GREEK_EXTENDED", GREEK_EXTENDED_ID);
1143         /**
1144          * @stable ICU 2.4
1145          */
1146         public static final UnicodeBlock GENERAL_PUNCTUATION
1147             = new UnicodeBlock("GENERAL_PUNCTUATION", GENERAL_PUNCTUATION_ID);
1148         /**
1149          * @stable ICU 2.4
1150          */
1151         public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS
1152             = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", SUPERSCRIPTS_AND_SUBSCRIPTS_ID);
1153         /**
1154          * @stable ICU 2.4
1155          */
1156         public static final UnicodeBlock CURRENCY_SYMBOLS
1157             = new UnicodeBlock("CURRENCY_SYMBOLS", CURRENCY_SYMBOLS_ID);
1158         /**
1159          * Unicode 3.2 renames this block to "Combining Diacritical Marks for
1160          * Symbols".
1161          * @stable ICU 2.4
1162          */
1163         public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS
1164             = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", COMBINING_MARKS_FOR_SYMBOLS_ID);
1165         /**
1166          * @stable ICU 2.4
1167          */
1168         public static final UnicodeBlock LETTERLIKE_SYMBOLS
1169             = new UnicodeBlock("LETTERLIKE_SYMBOLS", LETTERLIKE_SYMBOLS_ID);
1170         /**
1171          * @stable ICU 2.4
1172          */
1173         public static final UnicodeBlock NUMBER_FORMS
1174             = new UnicodeBlock("NUMBER_FORMS", NUMBER_FORMS_ID);
1175         /**
1176          * @stable ICU 2.4
1177          */
1178         public static final UnicodeBlock ARROWS
1179             = new UnicodeBlock("ARROWS", ARROWS_ID);
1180         /**
1181          * @stable ICU 2.4
1182          */
1183         public static final UnicodeBlock MATHEMATICAL_OPERATORS
1184             = new UnicodeBlock("MATHEMATICAL_OPERATORS", MATHEMATICAL_OPERATORS_ID);
1185         /**
1186          * @stable ICU 2.4
1187          */
1188         public static final UnicodeBlock MISCELLANEOUS_TECHNICAL
1189             = new UnicodeBlock("MISCELLANEOUS_TECHNICAL", MISCELLANEOUS_TECHNICAL_ID);
1190         /**
1191          * @stable ICU 2.4
1192          */
1193         public static final UnicodeBlock CONTROL_PICTURES
1194             = new UnicodeBlock("CONTROL_PICTURES", CONTROL_PICTURES_ID);
1195         /**
1196          * @stable ICU 2.4
1197          */
1198         public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION
1199             = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", OPTICAL_CHARACTER_RECOGNITION_ID);
1200         /**
1201          * @stable ICU 2.4
1202          */
1203         public static final UnicodeBlock ENCLOSED_ALPHANUMERICS
1204             = new UnicodeBlock("ENCLOSED_ALPHANUMERICS", ENCLOSED_ALPHANUMERICS_ID);
1205         /**
1206          * @stable ICU 2.4
1207          */
1208         public static final UnicodeBlock BOX_DRAWING
1209             = new UnicodeBlock("BOX_DRAWING", BOX_DRAWING_ID);
1210         /**
1211          * @stable ICU 2.4
1212          */
1213         public static final UnicodeBlock BLOCK_ELEMENTS
1214             = new UnicodeBlock("BLOCK_ELEMENTS", BLOCK_ELEMENTS_ID);
1215         /**
1216          * @stable ICU 2.4
1217          */
1218         public static final UnicodeBlock GEOMETRIC_SHAPES
1219             = new UnicodeBlock("GEOMETRIC_SHAPES", GEOMETRIC_SHAPES_ID);
1220         /**
1221          * @stable ICU 2.4
1222          */
1223         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS
1224             = new UnicodeBlock("MISCELLANEOUS_SYMBOLS", MISCELLANEOUS_SYMBOLS_ID);
1225         /**
1226          * @stable ICU 2.4
1227          */
1228         public static final UnicodeBlock DINGBATS
1229             = new UnicodeBlock("DINGBATS", DINGBATS_ID);
1230         /**
1231          * @stable ICU 2.4
1232          */
1233         public static final UnicodeBlock BRAILLE_PATTERNS
1234             = new UnicodeBlock("BRAILLE_PATTERNS", BRAILLE_PATTERNS_ID);
1235         /**
1236          * @stable ICU 2.4
1237          */
1238         public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT
1239             = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", CJK_RADICALS_SUPPLEMENT_ID);
1240         /**
1241          * @stable ICU 2.4
1242          */
1243         public static final UnicodeBlock KANGXI_RADICALS
1244             = new UnicodeBlock("KANGXI_RADICALS", KANGXI_RADICALS_ID);
1245         /**
1246          * @stable ICU 2.4
1247          */
1248         public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS
1249             = new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID);
1250         /**
1251          * @stable ICU 2.4
1252          */
1253         public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION
1254             = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", CJK_SYMBOLS_AND_PUNCTUATION_ID);
1255         /**
1256          * @stable ICU 2.4
1257          */
1258         public static final UnicodeBlock HIRAGANA
1259             = new UnicodeBlock("HIRAGANA", HIRAGANA_ID);
1260         /**
1261          * @stable ICU 2.4
1262          */
1263         public static final UnicodeBlock KATAKANA
1264             = new UnicodeBlock("KATAKANA", KATAKANA_ID);
1265         /**
1266          * @stable ICU 2.4
1267          */
1268         public static final UnicodeBlock BOPOMOFO
1269             = new UnicodeBlock("BOPOMOFO", BOPOMOFO_ID);
1270         /**
1271          * @stable ICU 2.4
1272          */
1273         public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO
1274             = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", HANGUL_COMPATIBILITY_JAMO_ID);
1275         /**
1276          * @stable ICU 2.4
1277          */
1278         public static final UnicodeBlock KANBUN
1279             = new UnicodeBlock("KANBUN", KANBUN_ID);
1280         /**
1281          * @stable ICU 2.4
1282          */
1283         public static final UnicodeBlock BOPOMOFO_EXTENDED
1284             = new UnicodeBlock("BOPOMOFO_EXTENDED", BOPOMOFO_EXTENDED_ID);
1285         /**
1286          * @stable ICU 2.4
1287          */
1288         public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS
1289             = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", ENCLOSED_CJK_LETTERS_AND_MONTHS_ID);
1290         /**
1291          * @stable ICU 2.4
1292          */
1293         public static final UnicodeBlock CJK_COMPATIBILITY
1294             = new UnicodeBlock("CJK_COMPATIBILITY", CJK_COMPATIBILITY_ID);
1295         /**
1296          * @stable ICU 2.4
1297          */
1298         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
1299             = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID);
1300         /**
1301          * @stable ICU 2.4
1302          */
1303         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS
1304             = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", CJK_UNIFIED_IDEOGRAPHS_ID);
1305         /**
1306          * @stable ICU 2.4
1307          */
1308         public static final UnicodeBlock YI_SYLLABLES
1309             = new UnicodeBlock("YI_SYLLABLES", YI_SYLLABLES_ID);
1310         /**
1311          * @stable ICU 2.4
1312          */
1313         public static final UnicodeBlock YI_RADICALS
1314             = new UnicodeBlock("YI_RADICALS", YI_RADICALS_ID);
1315         /**
1316          * @stable ICU 2.4
1317          */
1318         public static final UnicodeBlock HANGUL_SYLLABLES
1319             = new UnicodeBlock("HANGUL_SYLLABLES", HANGUL_SYLLABLES_ID);
1320         /**
1321          * @stable ICU 2.4
1322          */
1323         public static final UnicodeBlock HIGH_SURROGATES
1324             = new UnicodeBlock("HIGH_SURROGATES", HIGH_SURROGATES_ID);
1325         /**
1326          * @stable ICU 2.4
1327          */
1328         public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES
1329             = new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", HIGH_PRIVATE_USE_SURROGATES_ID);
1330         /**
1331          * @stable ICU 2.4
1332          */
1333         public static final UnicodeBlock LOW_SURROGATES
1334             = new UnicodeBlock("LOW_SURROGATES", LOW_SURROGATES_ID);
1335         /**
1336          * Same as public static final int PRIVATE_USE.
1337          * Until Unicode 3.1.1; the corresponding block name was "Private Use";
1338          * and multiple code point ranges had this block.
1339          * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area"
1340          * and adds separate blocks for the supplementary PUAs.
1341          * @stable ICU 2.4
1342          */
1343         public static final UnicodeBlock PRIVATE_USE_AREA
1344             = new UnicodeBlock("PRIVATE_USE_AREA",  78);
1345         /**
1346          * Same as public static final int PRIVATE_USE_AREA.
1347          * Until Unicode 3.1.1; the corresponding block name was "Private Use";
1348          * and multiple code point ranges had this block.
1349          * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area"
1350          * and adds separate blocks for the supplementary PUAs.
1351          * @stable ICU 2.4
1352          */
1353         public static final UnicodeBlock PRIVATE_USE
1354             = PRIVATE_USE_AREA;
1355         /**
1356          * @stable ICU 2.4
1357          */
1358         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS
1359             = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", CJK_COMPATIBILITY_IDEOGRAPHS_ID);
1360         /**
1361          * @stable ICU 2.4
1362          */
1363         public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS
1364             = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", ALPHABETIC_PRESENTATION_FORMS_ID);
1365         /**
1366          * @stable ICU 2.4
1367          */
1368         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A
1369             = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", ARABIC_PRESENTATION_FORMS_A_ID);
1370         /**
1371          * @stable ICU 2.4
1372          */
1373         public static final UnicodeBlock COMBINING_HALF_MARKS
1374             = new UnicodeBlock("COMBINING_HALF_MARKS", COMBINING_HALF_MARKS_ID);
1375         /**
1376          * @stable ICU 2.4
1377          */
1378         public static final UnicodeBlock CJK_COMPATIBILITY_FORMS
1379             = new UnicodeBlock("CJK_COMPATIBILITY_FORMS", CJK_COMPATIBILITY_FORMS_ID);
1380         /**
1381          * @stable ICU 2.4
1382          */
1383         public static final UnicodeBlock SMALL_FORM_VARIANTS
1384             = new UnicodeBlock("SMALL_FORM_VARIANTS", SMALL_FORM_VARIANTS_ID);
1385         /**
1386          * @stable ICU 2.4
1387          */
1388         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B
1389             = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", ARABIC_PRESENTATION_FORMS_B_ID);
1390         /**
1391          * @stable ICU 2.4
1392          */
1393         public static final UnicodeBlock SPECIALS
1394             = new UnicodeBlock("SPECIALS", SPECIALS_ID);
1395         /**
1396          * @stable ICU 2.4
1397          */
1398         public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS
1399             = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", HALFWIDTH_AND_FULLWIDTH_FORMS_ID);
1400         /**
1401          * @stable ICU 2.4
1402          */
1403         public static final UnicodeBlock OLD_ITALIC
1404             = new UnicodeBlock("OLD_ITALIC", OLD_ITALIC_ID);
1405         /**
1406          * @stable ICU 2.4
1407          */
1408         public static final UnicodeBlock GOTHIC
1409             = new UnicodeBlock("GOTHIC", GOTHIC_ID);
1410         /**
1411          * @stable ICU 2.4
1412          */
1413         public static final UnicodeBlock DESERET
1414             = new UnicodeBlock("DESERET", DESERET_ID);
1415         /**
1416          * @stable ICU 2.4
1417          */
1418         public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS
1419             = new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", BYZANTINE_MUSICAL_SYMBOLS_ID);
1420         /**
1421          * @stable ICU 2.4
1422          */
1423         public static final UnicodeBlock MUSICAL_SYMBOLS
1424             = new UnicodeBlock("MUSICAL_SYMBOLS", MUSICAL_SYMBOLS_ID);
1425         /**
1426          * @stable ICU 2.4
1427          */
1428         public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS
1429             = new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS", MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID);
1430         /**
1431          * @stable ICU 2.4
1432          */
1433         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B
1434             = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID);
1435         /**
1436          * @stable ICU 2.4
1437          */
1438         public static final UnicodeBlock
1439             CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT
1440             = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID);
1441         /**
1442          * @stable ICU 2.4
1443          */
1444         public static final UnicodeBlock TAGS
1445             = new UnicodeBlock("TAGS", TAGS_ID);
1446
1447         // New blocks in Unicode 3.2
1448
1449         /**
1450          * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
1451          * @stable ICU 2.4
1452          */
1453         public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY
1454             = new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", CYRILLIC_SUPPLEMENTARY_ID);
1455         /**
1456          * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
1457          * @stable ICU 3.0
1458          */
1459         public static final UnicodeBlock CYRILLIC_SUPPLEMENT
1460             = new UnicodeBlock("CYRILLIC_SUPPLEMENT", CYRILLIC_SUPPLEMENT_ID);
1461         /**
1462          * @stable ICU 2.4
1463          */
1464         public static final UnicodeBlock TAGALOG
1465             = new UnicodeBlock("TAGALOG", TAGALOG_ID);
1466         /**
1467          * @stable ICU 2.4
1468          */
1469         public static final UnicodeBlock HANUNOO
1470             = new UnicodeBlock("HANUNOO", HANUNOO_ID);
1471         /**
1472          * @stable ICU 2.4
1473          */
1474         public static final UnicodeBlock BUHID
1475             = new UnicodeBlock("BUHID", BUHID_ID);
1476         /**
1477          * @stable ICU 2.4
1478          */
1479         public static final UnicodeBlock TAGBANWA
1480             = new UnicodeBlock("TAGBANWA", TAGBANWA_ID);
1481         /**
1482          * @stable ICU 2.4
1483          */
1484         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A
1485             = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID);
1486         /**
1487          * @stable ICU 2.4
1488          */
1489         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A
1490             = new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", SUPPLEMENTAL_ARROWS_A_ID);
1491         /**
1492          * @stable ICU 2.4
1493          */
1494         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B
1495             = new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", SUPPLEMENTAL_ARROWS_B_ID);
1496         /**
1497          * @stable ICU 2.4
1498          */
1499         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B
1500             = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID);
1501         /**
1502          * @stable ICU 2.4
1503          */
1504         public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS
1505             = new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS", SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID);
1506         /**
1507          * @stable ICU 2.4
1508          */
1509         public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS
1510             = new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", KATAKANA_PHONETIC_EXTENSIONS_ID);
1511         /**
1512          * @stable ICU 2.4
1513          */
1514         public static final UnicodeBlock VARIATION_SELECTORS
1515             = new UnicodeBlock("VARIATION_SELECTORS", VARIATION_SELECTORS_ID);
1516         /**
1517          * @stable ICU 2.4
1518          */
1519         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A
1520             = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A", SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID);
1521         /**
1522          * @stable ICU 2.4
1523          */
1524         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B
1525             = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B", SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID);
1526
1527         /**
1528          * @stable ICU 2.6
1529          */
1530         public static final UnicodeBlock LIMBU
1531             = new UnicodeBlock("LIMBU", LIMBU_ID);
1532         /**
1533          * @stable ICU 2.6
1534          */
1535         public static final UnicodeBlock TAI_LE
1536             = new UnicodeBlock("TAI_LE", TAI_LE_ID);
1537         /**
1538          * @stable ICU 2.6
1539          */
1540         public static final UnicodeBlock KHMER_SYMBOLS
1541             = new UnicodeBlock("KHMER_SYMBOLS", KHMER_SYMBOLS_ID);
1542
1543         /**
1544          * @stable ICU 2.6
1545          */
1546         public static final UnicodeBlock PHONETIC_EXTENSIONS
1547             = new UnicodeBlock("PHONETIC_EXTENSIONS", PHONETIC_EXTENSIONS_ID);
1548
1549         /**
1550          * @stable ICU 2.6
1551          */
1552         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS
1553             = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID);
1554         /**
1555          * @stable ICU 2.6
1556          */
1557         public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS
1558             = new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", YIJING_HEXAGRAM_SYMBOLS_ID);
1559         /**
1560          * @stable ICU 2.6
1561          */
1562         public static final UnicodeBlock LINEAR_B_SYLLABARY
1563             = new UnicodeBlock("LINEAR_B_SYLLABARY", LINEAR_B_SYLLABARY_ID);
1564         /**
1565          * @stable ICU 2.6
1566          */
1567         public static final UnicodeBlock LINEAR_B_IDEOGRAMS
1568             = new UnicodeBlock("LINEAR_B_IDEOGRAMS", LINEAR_B_IDEOGRAMS_ID);
1569         /**
1570          * @stable ICU 2.6
1571          */
1572         public static final UnicodeBlock AEGEAN_NUMBERS
1573             = new UnicodeBlock("AEGEAN_NUMBERS", AEGEAN_NUMBERS_ID);
1574         /**
1575          * @stable ICU 2.6
1576          */
1577         public static final UnicodeBlock UGARITIC
1578             = new UnicodeBlock("UGARITIC", UGARITIC_ID);
1579         /**
1580          * @stable ICU 2.6
1581          */
1582         public static final UnicodeBlock SHAVIAN
1583             = new UnicodeBlock("SHAVIAN", SHAVIAN_ID);
1584         /**
1585          * @stable ICU 2.6
1586          */
1587         public static final UnicodeBlock OSMANYA
1588             = new UnicodeBlock("OSMANYA", OSMANYA_ID);
1589         /**
1590          * @stable ICU 2.6
1591          */
1592         public static final UnicodeBlock CYPRIOT_SYLLABARY
1593             = new UnicodeBlock("CYPRIOT_SYLLABARY", CYPRIOT_SYLLABARY_ID);
1594         /**
1595          * @stable ICU 2.6
1596          */
1597         public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS
1598             = new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", TAI_XUAN_JING_SYMBOLS_ID);
1599
1600         /**
1601          * @stable ICU 2.6
1602          */
1603         public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT
1604             = new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", VARIATION_SELECTORS_SUPPLEMENT_ID);
1605
1606         /* New blocks in Unicode 4.1 */
1607
1608         /**
1609          * @stable ICU 3.4
1610          */
1611         public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION = new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION", ANCIENT_GREEK_MUSICAL_NOTATION_ID); /*[1D200]*/
1612
1613         /**
1614          * @stable ICU 3.4
1615          */
1616         public static final UnicodeBlock ANCIENT_GREEK_NUMBERS = new UnicodeBlock("ANCIENT_GREEK_NUMBERS", ANCIENT_GREEK_NUMBERS_ID); /*[10140]*/
1617
1618         /**
1619          * @stable ICU 3.4
1620          */
1621         public static final UnicodeBlock ARABIC_SUPPLEMENT = new UnicodeBlock("ARABIC_SUPPLEMENT", ARABIC_SUPPLEMENT_ID); /*[0750]*/
1622
1623         /**
1624          * @stable ICU 3.4
1625          */
1626         public static final UnicodeBlock BUGINESE = new UnicodeBlock("BUGINESE", BUGINESE_ID); /*[1A00]*/
1627
1628         /**
1629          * @stable ICU 3.4
1630          */
1631         public static final UnicodeBlock CJK_STROKES = new UnicodeBlock("CJK_STROKES", CJK_STROKES_ID); /*[31C0]*/
1632
1633         /**
1634          * @stable ICU 3.4
1635          */
1636         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT", COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID); /*[1DC0]*/
1637
1638         /**
1639          * @stable ICU 3.4
1640          */
1641         public static final UnicodeBlock COPTIC = new UnicodeBlock("COPTIC", COPTIC_ID); /*[2C80]*/
1642
1643         /**
1644          * @stable ICU 3.4
1645          */
1646         public static final UnicodeBlock ETHIOPIC_EXTENDED = new UnicodeBlock("ETHIOPIC_EXTENDED", ETHIOPIC_EXTENDED_ID); /*[2D80]*/
1647
1648         /**
1649          * @stable ICU 3.4
1650          */
1651         public static final UnicodeBlock ETHIOPIC_SUPPLEMENT = new UnicodeBlock("ETHIOPIC_SUPPLEMENT", ETHIOPIC_SUPPLEMENT_ID); /*[1380]*/
1652
1653         /**
1654          * @stable ICU 3.4
1655          */
1656         public static final UnicodeBlock GEORGIAN_SUPPLEMENT = new UnicodeBlock("GEORGIAN_SUPPLEMENT", GEORGIAN_SUPPLEMENT_ID); /*[2D00]*/
1657
1658         /**
1659          * @stable ICU 3.4
1660          */
1661         public static final UnicodeBlock GLAGOLITIC = new UnicodeBlock("GLAGOLITIC", GLAGOLITIC_ID); /*[2C00]*/
1662
1663         /**
1664          * @stable ICU 3.4
1665          */
1666         public static final UnicodeBlock KHAROSHTHI = new UnicodeBlock("KHAROSHTHI", KHAROSHTHI_ID); /*[10A00]*/
1667
1668         /**
1669          * @stable ICU 3.4
1670          */
1671         public static final UnicodeBlock MODIFIER_TONE_LETTERS = new UnicodeBlock("MODIFIER_TONE_LETTERS", MODIFIER_TONE_LETTERS_ID); /*[A700]*/
1672
1673         /**
1674          * @stable ICU 3.4
1675          */
1676         public static final UnicodeBlock NEW_TAI_LUE = new UnicodeBlock("NEW_TAI_LUE", NEW_TAI_LUE_ID); /*[1980]*/
1677
1678         /**
1679          * @stable ICU 3.4
1680          */
1681         public static final UnicodeBlock OLD_PERSIAN = new UnicodeBlock("OLD_PERSIAN", OLD_PERSIAN_ID); /*[103A0]*/
1682
1683         /**
1684          * @stable ICU 3.4
1685          */
1686         public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT = new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT", PHONETIC_EXTENSIONS_SUPPLEMENT_ID); /*[1D80]*/
1687
1688         /**
1689          * @stable ICU 3.4
1690          */
1691         public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION = new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION", SUPPLEMENTAL_PUNCTUATION_ID); /*[2E00]*/
1692
1693         /**
1694          * @stable ICU 3.4
1695          */
1696         public static final UnicodeBlock SYLOTI_NAGRI = new UnicodeBlock("SYLOTI_NAGRI", SYLOTI_NAGRI_ID); /*[A800]*/
1697
1698         /**
1699          * @stable ICU 3.4
1700          */
1701         public static final UnicodeBlock TIFINAGH = new UnicodeBlock("TIFINAGH", TIFINAGH_ID); /*[2D30]*/
1702
1703         /**
1704          * @stable ICU 3.4
1705          */
1706         public static final UnicodeBlock VERTICAL_FORMS = new UnicodeBlock("VERTICAL_FORMS", VERTICAL_FORMS_ID); /*[FE10]*/
1707
1708         /**
1709          * @stable ICU 3.6
1710          */
1711         public static final UnicodeBlock NKO = new UnicodeBlock("NKO", NKO_ID); /*[07C0]*/
1712         /**
1713          * @stable ICU 3.6
1714          */
1715         public static final UnicodeBlock BALINESE = new UnicodeBlock("BALINESE", BALINESE_ID); /*[1B00]*/
1716         /**
1717          * @stable ICU 3.6
1718          */
1719         public static final UnicodeBlock LATIN_EXTENDED_C = new UnicodeBlock("LATIN_EXTENDED_C", LATIN_EXTENDED_C_ID); /*[2C60]*/
1720         /**
1721          * @stable ICU 3.6
1722          */
1723         public static final UnicodeBlock LATIN_EXTENDED_D = new UnicodeBlock("LATIN_EXTENDED_D", LATIN_EXTENDED_D_ID); /*[A720]*/
1724         /**
1725          * @stable ICU 3.6
1726          */
1727         public static final UnicodeBlock PHAGS_PA = new UnicodeBlock("PHAGS_PA", PHAGS_PA_ID); /*[A840]*/
1728         /**
1729          * @stable ICU 3.6
1730          */
1731         public static final UnicodeBlock PHOENICIAN = new UnicodeBlock("PHOENICIAN", PHOENICIAN_ID); /*[10900]*/
1732         /**
1733          * @stable ICU 3.6
1734          */
1735         public static final UnicodeBlock CUNEIFORM = new UnicodeBlock("CUNEIFORM", CUNEIFORM_ID); /*[12000]*/
1736         /**
1737          * @stable ICU 3.6
1738          */
1739         public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION = new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION", CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID); /*[12400]*/
1740         /**
1741          * @stable ICU 3.6
1742          */
1743         public static final UnicodeBlock COUNTING_ROD_NUMERALS = new UnicodeBlock("COUNTING_ROD_NUMERALS", COUNTING_ROD_NUMERALS_ID); /*[1D360]*/
1744
1745         /**
1746          * @stable ICU 4.0
1747          */
1748         public static final UnicodeBlock SUNDANESE = new UnicodeBlock("SUNDANESE", SUNDANESE_ID); /* [1B80] */
1749
1750         /**
1751          * @stable ICU 4.0
1752          */
1753         public static final UnicodeBlock LEPCHA = new UnicodeBlock("LEPCHA", LEPCHA_ID); /* [1C00] */
1754
1755         /**
1756          * @stable ICU 4.0
1757          */
1758         public static final UnicodeBlock OL_CHIKI = new UnicodeBlock("OL_CHIKI", OL_CHIKI_ID); /* [1C50] */
1759
1760         /**
1761          * @stable ICU 4.0
1762          */
1763         public static final UnicodeBlock CYRILLIC_EXTENDED_A = new UnicodeBlock("CYRILLIC_EXTENDED_A", CYRILLIC_EXTENDED_A_ID); /* [2DE0] */
1764
1765         /**
1766          * @stable ICU 4.0
1767          */
1768         public static final UnicodeBlock VAI = new UnicodeBlock("VAI", VAI_ID); /* [A500] */
1769
1770         /**
1771          * @stable ICU 4.0
1772          */
1773         public static final UnicodeBlock CYRILLIC_EXTENDED_B = new UnicodeBlock("CYRILLIC_EXTENDED_B", CYRILLIC_EXTENDED_B_ID); /* [A640] */
1774
1775         /**
1776          * @stable ICU 4.0
1777          */
1778         public static final UnicodeBlock SAURASHTRA = new UnicodeBlock("SAURASHTRA", SAURASHTRA_ID); /* [A880] */
1779
1780         /**
1781          * @stable ICU 4.0
1782          */
1783         public static final UnicodeBlock KAYAH_LI = new UnicodeBlock("KAYAH_LI", KAYAH_LI_ID); /* [A900] */
1784
1785         /**
1786          * @stable ICU 4.0
1787          */
1788         public static final UnicodeBlock REJANG = new UnicodeBlock("REJANG", REJANG_ID); /* [A930] */
1789
1790         /**
1791          * @stable ICU 4.0
1792          */
1793         public static final UnicodeBlock CHAM = new UnicodeBlock("CHAM", CHAM_ID); /* [AA00] */
1794
1795         /**
1796          * @stable ICU 4.0
1797          */
1798         public static final UnicodeBlock ANCIENT_SYMBOLS = new UnicodeBlock("ANCIENT_SYMBOLS", ANCIENT_SYMBOLS_ID); /* [10190] */
1799
1800         /**
1801          * @stable ICU 4.0
1802          */
1803         public static final UnicodeBlock PHAISTOS_DISC = new UnicodeBlock("PHAISTOS_DISC", PHAISTOS_DISC_ID); /* [101D0] */
1804
1805         /**
1806          * @stable ICU 4.0
1807          */
1808         public static final UnicodeBlock LYCIAN = new UnicodeBlock("LYCIAN", LYCIAN_ID); /* [10280] */
1809
1810         /**
1811          * @stable ICU 4.0
1812          */
1813         public static final UnicodeBlock CARIAN = new UnicodeBlock("CARIAN", CARIAN_ID); /* [102A0] */
1814
1815         /**
1816          * @stable ICU 4.0
1817          */
1818         public static final UnicodeBlock LYDIAN = new UnicodeBlock("LYDIAN", LYDIAN_ID); /* [10920] */
1819
1820         /**
1821          * @stable ICU 4.0
1822          */
1823         public static final UnicodeBlock MAHJONG_TILES = new UnicodeBlock("MAHJONG_TILES", MAHJONG_TILES_ID); /* [1F000] */
1824
1825         /**
1826          * @stable ICU 4.0
1827          */
1828         public static final UnicodeBlock DOMINO_TILES = new UnicodeBlock("DOMINO_TILES", DOMINO_TILES_ID); /* [1F030] */
1829         /**
1830          * @stable ICU 2.4
1831          */
1832         public static final UnicodeBlock INVALID_CODE
1833             = new UnicodeBlock("INVALID_CODE", INVALID_CODE_ID);
1834
1835         // public methods --------------------------------------------------
1836
1837         /**
1838          * Gets the only instance of the UnicodeBlock with the argument ID.
1839          * If no such ID exists, a INVALID_CODE UnicodeBlock will be returned.
1840          * @param id UnicodeBlock ID
1841          * @return the only instance of the UnicodeBlock with the argument ID
1842          *         if it exists, otherwise a INVALID_CODE UnicodeBlock will be
1843          *         returned.
1844          * @stable ICU 2.4
1845          */
1846         public static UnicodeBlock getInstance(int id)
1847         {
1848             if (id >= 0 && id < BLOCKS_.length) {
1849                 return BLOCKS_[id];
1850             }
1851             return INVALID_CODE;
1852         }
1853
1854         /**
1855          * Returns the Unicode allocation block that contains the code point,
1856          * or null if the code point is not a member of a defined block.
1857          * @param ch code point to be tested
1858          * @return the Unicode allocation block that contains the code point
1859          * @stable ICU 2.4
1860          */
1861         public static UnicodeBlock of(int ch)
1862         {
1863             if (ch > MAX_VALUE) {
1864                 return INVALID_CODE;
1865             }
1866
1867             return UnicodeBlock.getInstance((PROPERTY_.getAdditional(ch, 0)
1868                          & BLOCK_MASK_) >> BLOCK_SHIFT_);
1869         }
1870
1871         /**
1872          * Internal function returning of(ch).getID().
1873          *
1874          * @param ch
1875          * @return numeric block value
1876          * @internal
1877          */
1878         static int idOf(int ch) {
1879             if (ch < 0 || ch > MAX_VALUE) {
1880                 return -1;
1881             }
1882
1883             return (PROPERTY_.getAdditional(ch, 0) & BLOCK_MASK_) >> BLOCK_SHIFT_;
1884         }
1885
1886         /**
1887          * Cover the JDK 1.5 API.  Return the Unicode block with the
1888          * given name.  <br/><b>Note</b>: Unlike JDK 1.5, this only matches
1889          * against the official UCD name and the Java block name
1890          * (ignoring case).
1891          * @param blockName the name of the block to match
1892          * @return the UnicodeBlock with that name
1893          * @throws IllegalArgumentException if the blockName could not be matched
1894          * @stable ICU 3.0
1895          */
1896         public static final UnicodeBlock forName(String blockName) {
1897             Map m = null;
1898             if (mref != null) {
1899                 m = (Map)mref.get();
1900             }
1901             if (m == null) {
1902                 m = new HashMap(BLOCKS_.length);
1903                 for (int i = 0; i < BLOCKS_.length; ++i) {
1904                     UnicodeBlock b = BLOCKS_[i];
1905                     String name = trimBlockName(getPropertyValueName(UProperty.BLOCK, b.getID(), UProperty.NameChoice.LONG));
1906                     m.put(name, b);
1907                 }
1908                 mref = new SoftReference(m);
1909             }
1910             UnicodeBlock b = (UnicodeBlock)m.get(trimBlockName(blockName));
1911             if (b == null) {
1912                 throw new IllegalArgumentException();
1913             }
1914             return b;
1915         }
1916         private static SoftReference mref;
1917
1918         private static String trimBlockName(String name) {
1919             String upper = name.toUpperCase();
1920             StringBuffer result = new StringBuffer(upper.length());
1921             for (int i = 0; i < upper.length(); i++) {
1922                 char c = upper.charAt(i);
1923                 if (c != ' ' && c != '_' && c != '-') {
1924                     result.append(c);
1925                 }
1926             }
1927             return result.toString();
1928         }
1929
1930         /**
1931          * Returns the type ID of this Unicode block
1932          * @return integer type ID of this Unicode block
1933          * @stable ICU 2.4
1934          */
1935         public int getID()
1936         {
1937             return m_id_;
1938         }
1939
1940         // private data members ---------------------------------------------
1941
1942         /**
1943          * Array of UnicodeBlocks, for easy access in getInstance(int)
1944          */
1945         private final static UnicodeBlock BLOCKS_[] = {
1946             NO_BLOCK, BASIC_LATIN,
1947             LATIN_1_SUPPLEMENT, LATIN_EXTENDED_A,
1948             LATIN_EXTENDED_B, IPA_EXTENSIONS,
1949             SPACING_MODIFIER_LETTERS, COMBINING_DIACRITICAL_MARKS,
1950             GREEK, CYRILLIC,
1951             ARMENIAN, HEBREW,
1952             ARABIC, SYRIAC,
1953             THAANA, DEVANAGARI,
1954             BENGALI, GURMUKHI,
1955             GUJARATI, ORIYA,
1956             TAMIL, TELUGU,
1957             KANNADA, MALAYALAM,
1958             SINHALA, THAI,
1959             LAO, TIBETAN,
1960             MYANMAR, GEORGIAN,
1961             HANGUL_JAMO, ETHIOPIC,
1962             CHEROKEE, UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
1963             OGHAM, RUNIC,
1964             KHMER, MONGOLIAN,
1965             LATIN_EXTENDED_ADDITIONAL, GREEK_EXTENDED,
1966             GENERAL_PUNCTUATION, SUPERSCRIPTS_AND_SUBSCRIPTS,
1967             CURRENCY_SYMBOLS, COMBINING_MARKS_FOR_SYMBOLS,
1968             LETTERLIKE_SYMBOLS, NUMBER_FORMS,
1969             ARROWS, MATHEMATICAL_OPERATORS,
1970             MISCELLANEOUS_TECHNICAL, CONTROL_PICTURES,
1971             OPTICAL_CHARACTER_RECOGNITION, ENCLOSED_ALPHANUMERICS,
1972             BOX_DRAWING, BLOCK_ELEMENTS,
1973             GEOMETRIC_SHAPES, MISCELLANEOUS_SYMBOLS,
1974             DINGBATS, BRAILLE_PATTERNS,
1975             CJK_RADICALS_SUPPLEMENT, KANGXI_RADICALS,
1976             IDEOGRAPHIC_DESCRIPTION_CHARACTERS, CJK_SYMBOLS_AND_PUNCTUATION,
1977             HIRAGANA, KATAKANA,
1978             BOPOMOFO, HANGUL_COMPATIBILITY_JAMO,
1979             KANBUN, BOPOMOFO_EXTENDED,
1980             ENCLOSED_CJK_LETTERS_AND_MONTHS, CJK_COMPATIBILITY,
1981             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A, CJK_UNIFIED_IDEOGRAPHS,
1982             YI_SYLLABLES, YI_RADICALS,
1983             HANGUL_SYLLABLES, HIGH_SURROGATES,
1984             HIGH_PRIVATE_USE_SURROGATES, LOW_SURROGATES,
1985             PRIVATE_USE_AREA, CJK_COMPATIBILITY_IDEOGRAPHS,
1986             ALPHABETIC_PRESENTATION_FORMS, ARABIC_PRESENTATION_FORMS_A,
1987             COMBINING_HALF_MARKS, CJK_COMPATIBILITY_FORMS,
1988             SMALL_FORM_VARIANTS, ARABIC_PRESENTATION_FORMS_B,
1989             SPECIALS, HALFWIDTH_AND_FULLWIDTH_FORMS,
1990             OLD_ITALIC, GOTHIC,
1991             DESERET, BYZANTINE_MUSICAL_SYMBOLS,
1992             MUSICAL_SYMBOLS, MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
1993             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
1994             CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
1995             TAGS, CYRILLIC_SUPPLEMENT,
1996             TAGALOG, HANUNOO,
1997             BUHID, TAGBANWA,
1998             MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A, SUPPLEMENTAL_ARROWS_A,
1999             SUPPLEMENTAL_ARROWS_B, MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
2000             SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
2001             KATAKANA_PHONETIC_EXTENSIONS,
2002             VARIATION_SELECTORS, SUPPLEMENTARY_PRIVATE_USE_AREA_A,
2003             SUPPLEMENTARY_PRIVATE_USE_AREA_B,
2004             LIMBU, TAI_LE, KHMER_SYMBOLS, PHONETIC_EXTENSIONS,
2005             MISCELLANEOUS_SYMBOLS_AND_ARROWS, YIJING_HEXAGRAM_SYMBOLS,
2006             LINEAR_B_SYLLABARY, LINEAR_B_IDEOGRAMS, AEGEAN_NUMBERS,
2007             UGARITIC, SHAVIAN, OSMANYA, CYPRIOT_SYLLABARY,
2008             TAI_XUAN_JING_SYMBOLS, VARIATION_SELECTORS_SUPPLEMENT,
2009
2010             /* New blocks in Unicode 4.1 */
2011             ANCIENT_GREEK_MUSICAL_NOTATION,
2012             ANCIENT_GREEK_NUMBERS,
2013             ARABIC_SUPPLEMENT,
2014             BUGINESE,
2015             CJK_STROKES,
2016             COMBINING_DIACRITICAL_MARKS_SUPPLEMENT,
2017             COPTIC,
2018             ETHIOPIC_EXTENDED,
2019             ETHIOPIC_SUPPLEMENT,
2020             GEORGIAN_SUPPLEMENT,
2021             GLAGOLITIC,
2022             KHAROSHTHI,
2023             MODIFIER_TONE_LETTERS,
2024             NEW_TAI_LUE,
2025             OLD_PERSIAN,
2026             PHONETIC_EXTENSIONS_SUPPLEMENT,
2027             SUPPLEMENTAL_PUNCTUATION,
2028             SYLOTI_NAGRI,
2029             TIFINAGH,
2030             VERTICAL_FORMS,
2031             NKO,
2032             BALINESE,
2033             LATIN_EXTENDED_C,
2034             LATIN_EXTENDED_D,
2035             PHAGS_PA,
2036             PHOENICIAN,
2037             CUNEIFORM,
2038             CUNEIFORM_NUMBERS_AND_PUNCTUATION,
2039             COUNTING_ROD_NUMERALS,
2040
2041             /* New blocks in Unicode 5.8 */
2042             SUNDANESE,
2043             LEPCHA,
2044             OL_CHIKI,
2045             CYRILLIC_EXTENDED_A,
2046             VAI,
2047             CYRILLIC_EXTENDED_B,
2048             SAURASHTRA,
2049             KAYAH_LI,
2050             REJANG,
2051             CHAM,
2052             ANCIENT_SYMBOLS,
2053             PHAISTOS_DISC,
2054             LYCIAN,
2055             CARIAN,
2056             LYDIAN,
2057             MAHJONG_TILES,
2058             DOMINO_TILES,
2059         };
2060
2061         static {
2062             if (COUNT!=BLOCKS_.length) {
2063                 throw new java.lang.IllegalStateException("UnicodeBlock fields are inconsistent!");
2064             }
2065         }
2066         /**
2067          * Identification code for this UnicodeBlock
2068          */
2069         private int m_id_;
2070
2071         // private constructor ----------------------------------------------
2072
2073         /**
2074          * UnicodeBlock constructor
2075          * @param name name of this UnicodeBlock
2076          * @param id unique id of this UnicodeBlock
2077          * @exception NullPointerException if name is <code>null</code>
2078          */
2079         private UnicodeBlock(String name, int id)
2080         {
2081             super(name);
2082             m_id_ = id;
2083         }
2084     }
2085
2086     /**
2087      * East Asian Width constants.
2088      * @see UProperty#EAST_ASIAN_WIDTH
2089      * @see UCharacter#getIntPropertyValue
2090      * @stable ICU 2.4
2091      */
2092     public static interface EastAsianWidth
2093     {
2094         /**
2095          * @stable ICU 2.4
2096          */
2097         public static final int NEUTRAL = 0;
2098         /**
2099          * @stable ICU 2.4
2100          */
2101         public static final int AMBIGUOUS = 1;
2102         /**
2103          * @stable ICU 2.4
2104          */
2105         public static final int HALFWIDTH = 2;
2106         /**
2107          * @stable ICU 2.4
2108          */
2109         public static final int FULLWIDTH = 3;
2110         /**
2111          * @stable ICU 2.4
2112          */
2113         public static final int NARROW = 4;
2114         /**
2115          * @stable ICU 2.4
2116          */
2117         public static final int WIDE = 5;
2118         /**
2119          * @stable ICU 2.4
2120          */
2121         public static final int COUNT = 6;
2122     }
2123
2124     /**
2125      * Decomposition Type constants.
2126      * @see UProperty#DECOMPOSITION_TYPE
2127      * @stable ICU 2.4
2128      */
2129     public static interface DecompositionType
2130     {
2131         /**
2132          * @stable ICU 2.4
2133          */
2134         public static final int NONE = 0;
2135         /**
2136          * @stable ICU 2.4
2137          */
2138         public static final int CANONICAL = 1;
2139         /**
2140          * @stable ICU 2.4
2141          */
2142         public static final int COMPAT = 2;
2143         /**
2144          * @stable ICU 2.4
2145          */
2146         public static final int CIRCLE = 3;
2147         /**
2148          * @stable ICU 2.4
2149          */
2150         public static final int FINAL = 4;
2151         /**
2152          * @stable ICU 2.4
2153          */
2154         public static final int FONT = 5;
2155         /**
2156          * @stable ICU 2.4
2157          */
2158         public static final int FRACTION = 6;
2159         /**
2160          * @stable ICU 2.4
2161          */
2162         public static final int INITIAL = 7;
2163         /**
2164          * @stable ICU 2.4
2165          */
2166         public static final int ISOLATED = 8;
2167         /**
2168          * @stable ICU 2.4
2169          */
2170         public static final int MEDIAL = 9;
2171         /**
2172          * @stable ICU 2.4
2173          */
2174         public static final int NARROW = 10;
2175         /**
2176          * @stable ICU 2.4
2177          */
2178         public static final int NOBREAK = 11;
2179         /**
2180          * @stable ICU 2.4
2181          */
2182         public static final int SMALL = 12;
2183         /**
2184          * @stable ICU 2.4
2185          */
2186         public static final int SQUARE = 13;
2187         /**
2188          * @stable ICU 2.4
2189          */
2190         public static final int SUB = 14;
2191         /**
2192          * @stable ICU 2.4
2193          */
2194         public static final int SUPER = 15;
2195         /**
2196          * @stable ICU 2.4
2197          */
2198         public static final int VERTICAL = 16;
2199         /**
2200          * @stable ICU 2.4
2201          */
2202         public static final int WIDE = 17;
2203         /**
2204          * @stable ICU 2.4
2205          */
2206         public static final int COUNT = 18;
2207     }
2208
2209     /**
2210      * Joining Type constants.
2211      * @see UProperty#JOINING_TYPE
2212      * @stable ICU 2.4
2213      */
2214     public static interface JoiningType
2215     {
2216         /**
2217          * @stable ICU 2.4
2218          */
2219         public static final int NON_JOINING = 0;
2220         /**
2221          * @stable ICU 2.4
2222          */
2223         public static final int JOIN_CAUSING = 1;
2224         /**
2225          * @stable ICU 2.4
2226          */
2227         public static final int DUAL_JOINING = 2;
2228         /**
2229          * @stable ICU 2.4
2230          */
2231         public static final int LEFT_JOINING = 3;
2232         /**
2233          * @stable ICU 2.4
2234          */
2235         public static final int RIGHT_JOINING = 4;
2236         /**
2237          * @stable ICU 2.4
2238          */
2239         public static final int TRANSPARENT = 5;
2240         /**
2241          * @stable ICU 2.4
2242          */
2243         public static final int COUNT = 6;
2244     }
2245
2246     /**
2247      * Joining Group constants.
2248      * @see UProperty#JOINING_GROUP
2249      * @stable ICU 2.4
2250      */
2251     public static interface JoiningGroup
2252     {
2253         /**
2254          * @stable ICU 2.4
2255          */
2256         public static final int NO_JOINING_GROUP = 0;
2257         /**
2258          * @stable ICU 2.4
2259          */
2260         public static final int AIN = 1;
2261         /**
2262          * @stable ICU 2.4
2263          */
2264         public static final int ALAPH = 2;
2265         /**
2266          * @stable ICU 2.4
2267          */
2268         public static final int ALEF = 3;
2269         /**
2270          * @stable ICU 2.4
2271          */
2272         public static final int BEH = 4;
2273         /**
2274          * @stable ICU 2.4
2275          */
2276         public static final int BETH = 5;
2277         /**
2278          * @stable ICU 2.4
2279          */
2280         public static final int DAL = 6;
2281         /**
2282          * @stable ICU 2.4
2283          */
2284         public static final int DALATH_RISH = 7;
2285         /**
2286          * @stable ICU 2.4
2287          */
2288         public static final int E = 8;
2289         /**
2290          * @stable ICU 2.4
2291          */
2292         public static final int FEH = 9;
2293         /**
2294          * @stable ICU 2.4
2295          */
2296         public static final int FINAL_SEMKATH = 10;
2297         /**
2298          * @stable ICU 2.4
2299          */
2300         public static final int GAF = 11;
2301         /**
2302          * @stable ICU 2.4
2303          */
2304         public static final int GAMAL = 12;
2305         /**
2306          * @stable ICU 2.4
2307          */
2308         public static final int HAH = 13;
2309         /**
2310          * @stable ICU 2.4
2311          */
2312         public static final int HAMZA_ON_HEH_GOAL = 14;
2313         /**
2314          * @stable ICU 2.4
2315          */
2316         public static final int HE = 15;
2317         /**
2318          * @stable ICU 2.4
2319          */
2320         public static final int HEH = 16;
2321         /**
2322          * @stable ICU 2.4
2323          */
2324         public static final int HEH_GOAL = 17;
2325         /**
2326          * @stable ICU 2.4
2327          */
2328         public static final int HETH = 18;
2329         /**
2330          * @stable ICU 2.4
2331          */
2332         public static final int KAF = 19;
2333         /**
2334          * @stable ICU 2.4
2335          */
2336         public static final int KAPH = 20;
2337         /**
2338          * @stable ICU 2.4
2339          */
2340         public static final int KNOTTED_HEH = 21;
2341         /**
2342          * @stable ICU 2.4
2343          */
2344         public static final int LAM = 22;
2345         /**
2346          * @stable ICU 2.4
2347          */
2348         public static final int LAMADH = 23;
2349         /**
2350          * @stable ICU 2.4
2351          */
2352         public static final int MEEM = 24;
2353         /**
2354          * @stable ICU 2.4
2355          */
2356         public static final int MIM = 25;
2357         /**
2358          * @stable ICU 2.4
2359          */
2360         public static final int NOON = 26;
2361         /**
2362          * @stable ICU 2.4
2363          */
2364         public static final int NUN = 27;
2365         /**
2366          * @stable ICU 2.4
2367          */
2368         public static final int PE = 28;
2369         /**
2370          * @stable ICU 2.4
2371          */
2372         public static final int QAF = 29;
2373         /**
2374          * @stable ICU 2.4
2375          */
2376         public static final int QAPH = 30;
2377         /**
2378          * @stable ICU 2.4
2379          */
2380         public static final int REH = 31;
2381         /**
2382          * @stable ICU 2.4
2383          */
2384         public static final int REVERSED_PE = 32;
2385         /**
2386          * @stable ICU 2.4
2387          */
2388         public static final int SAD = 33;
2389         /**
2390          * @stable ICU 2.4
2391          */
2392         public static final int SADHE = 34;
2393         /**
2394          * @stable ICU 2.4
2395          */
2396         public static final int SEEN = 35;
2397         /**
2398          * @stable ICU 2.4
2399          */
2400         public static final int SEMKATH = 36;
2401         /**
2402          * @stable ICU 2.4
2403          */
2404         public static final int SHIN = 37;
2405         /**
2406          * @stable ICU 2.4
2407          */
2408         public static final int SWASH_KAF = 38;
2409         /**
2410          * @stable ICU 2.4
2411          */
2412         public static final int SYRIAC_WAW = 39;
2413         /**
2414          * @stable ICU 2.4
2415          */
2416         public static final int TAH = 40;
2417         /**
2418          * @stable ICU 2.4
2419          */
2420         public static final int TAW = 41;
2421         /**
2422          * @stable ICU 2.4
2423          */
2424         public static final int TEH_MARBUTA = 42;
2425         /**
2426          * @stable ICU 2.4
2427          */
2428         public static final int TETH = 43;
2429         /**
2430          * @stable ICU 2.4
2431          */
2432         public static final int WAW = 44;
2433         /**
2434          * @stable ICU 2.4
2435          */
2436         public static final int YEH = 45;
2437         /**
2438          * @stable ICU 2.4
2439          */
2440         public static final int YEH_BARREE = 46;
2441         /**
2442          * @stable ICU 2.4
2443          */
2444         public static final int YEH_WITH_TAIL = 47;
2445         /**
2446          * @stable ICU 2.4
2447          */
2448         public static final int YUDH = 48;
2449         /**
2450          * @stable ICU 2.4
2451          */
2452         public static final int YUDH_HE = 49;
2453         /**
2454          * @stable ICU 2.4
2455          */
2456         public static final int ZAIN = 50;
2457         /**
2458          * @stable ICU 2.6
2459          */
2460         public static final int FE = 51;
2461         /**
2462          * @stable ICU 2.6
2463          */
2464         public static final int KHAPH = 52;
2465         /**
2466          * @stable ICU 2.6
2467          */
2468         public static final int ZHAIN = 53;
2469         /**
2470          * @stable ICU 4.0
2471          */
2472         public static final int BURUSHASKI_YEH_BARREE = 54;
2473         /**
2474          * @stable ICU 4.0
2475          */
2476         public static final int COUNT = 55;
2477     }
2478
2479     /**
2480      * Grapheme Cluster Break constants.
2481      * @see UProperty#GRAPHEME_CLUSTER_BREAK
2482      * @stable ICU 3.4
2483      */
2484     public static interface GraphemeClusterBreak {
2485         /**
2486          * @stable ICU 3.4
2487          */
2488         public static final int OTHER = 0;
2489         /**
2490          * @stable ICU 3.4
2491          */
2492         public static final int CONTROL = 1;
2493         /**
2494          * @stable ICU 3.4
2495          */
2496         public static final int CR = 2;
2497         /**
2498          * @stable ICU 3.4
2499          */
2500         public static final int EXTEND = 3;
2501         /**
2502          * @stable ICU 3.4
2503          */
2504         public static final int L = 4;
2505         /**
2506          * @stable ICU 3.4
2507          */
2508         public static final int LF = 5;
2509         /**
2510          * @stable ICU 3.4
2511          */
2512         public static final int LV = 6;
2513         /**
2514          * @stable ICU 3.4
2515          */
2516         public static final int LVT = 7;
2517         /**
2518          * @stable ICU 3.4
2519          */
2520         public static final int T = 8;
2521         /**
2522          * @stable ICU 3.4
2523          */
2524         public static final int V = 9;
2525         /**
2526          * @stable ICU 4.0
2527          */
2528         public static final int SPACING_MARK = 10;
2529         /**
2530          * @stable ICU 4.0
2531          */
2532         public static final int PREPEND = 11;
2533         /**
2534          * @stable ICU 3.4
2535          */
2536         public static final int COUNT = 12;
2537     }
2538
2539     /**
2540      * Word Break constants.
2541      * @see UProperty#WORD_BREAK
2542      * @stable ICU 3.4
2543      */
2544     public static interface WordBreak {
2545         /**
2546          * @stable ICU 3.8
2547          */
2548         public static final int OTHER = 0;
2549         /**
2550          * @stable ICU 3.8
2551          */
2552         public static final int ALETTER = 1;
2553         /**
2554          * @stable ICU 3.8
2555          */
2556         public static final int FORMAT = 2;
2557         /**
2558          * @stable ICU 3.8
2559          */
2560         public static final int KATAKANA = 3;
2561         /**
2562          * @stable ICU 3.8
2563          */
2564         public static final int MIDLETTER = 4;
2565         /**
2566          * @stable ICU 3.8
2567          */
2568         public static final int MIDNUM = 5;
2569         /**
2570          * @stable ICU 3.8
2571          */
2572         public static final int NUMERIC = 6;
2573         /**
2574          * @stable ICU 3.8
2575          */
2576         public static final int EXTENDNUMLET = 7;
2577         /**
2578          * @stable ICU 4.0
2579          */
2580         public static final int CR = 8;
2581         /**
2582          * @stable ICU 4.0
2583          */
2584         public static final int EXTEND = 9;
2585         /**
2586          * @stable ICU 4.0
2587          */
2588         public static final int LF = 10;
2589         /**
2590          * @stable ICU 4.0
2591          */
2592         public static final int MIDNUMLET = 11;
2593         /**
2594          * @stable ICU 4.0
2595          */
2596         public static final int NEWLINE = 12;
2597         /**
2598          * @stable ICU 4.0
2599          */
2600         public static final int COUNT = 13;
2601     }
2602
2603     /**
2604      * Sentence Break constants.
2605      * @see UProperty#SENTENCE_BREAK
2606      * @stable ICU 3.4
2607      */
2608     public static interface SentenceBreak {
2609         /**
2610          * @stable ICU 3.8
2611          */
2612         public static final int OTHER = 0;
2613         /**
2614          * @stable ICU 3.8
2615          */
2616         public static final int ATERM = 1;
2617         /**
2618          * @stable ICU 3.8
2619          */
2620         public static final int CLOSE = 2;
2621         /**
2622          * @stable ICU 3.8
2623          */
2624         public static final int FORMAT = 3;
2625         /**
2626          * @stable ICU 3.8
2627          */
2628         public static final int LOWER = 4;
2629         /**
2630          * @stable ICU 3.8
2631          */
2632         public static final int NUMERIC = 5;
2633         /**
2634          * @stable ICU 3.8
2635          */
2636         public static final int OLETTER = 6;
2637         /**
2638          * @stable ICU 3.8
2639          */
2640         public static final int SEP = 7;
2641         /**
2642          * @stable ICU 3.8
2643          */
2644         public static final int SP = 8;
2645         /**
2646          * @stable ICU 3.8
2647          */
2648         public static final int STERM = 9;
2649         /**
2650          * @stable ICU 3.8
2651          */
2652         public static final int UPPER = 10;
2653         /**
2654          * @stable ICU 4.0
2655          */
2656         public static final int CR = 11;
2657         /**
2658          * @stable ICU 4.0
2659          */
2660         public static final int EXTEND = 12;
2661         /**
2662          * @stable ICU 4.0
2663          */
2664         public static final int LF = 13;
2665         /**
2666          * @stable ICU 4.0
2667          */
2668         public static final int SCONTINUE = 14;
2669         /**
2670          * @stable ICU 4.0
2671          */
2672         public static final int COUNT = 15;
2673     }
2674
2675     /**
2676      * Line Break constants.
2677      * @see UProperty#LINE_BREAK
2678      * @stable ICU 2.4
2679      */
2680     public static interface LineBreak
2681     {
2682         /**
2683          * @stable ICU 2.4
2684          */
2685         public static final int UNKNOWN = 0;
2686         /**
2687          * @stable ICU 2.4
2688          */
2689         public static final int AMBIGUOUS = 1;
2690         /**
2691          * @stable ICU 2.4
2692          */
2693         public static final int ALPHABETIC = 2;
2694         /**
2695          * @stable ICU 2.4
2696          */
2697         public static final int BREAK_BOTH = 3;
2698         /**
2699          * @stable ICU 2.4
2700          */
2701         public static final int BREAK_AFTER = 4;
2702         /**
2703          * @stable ICU 2.4
2704          */
2705         public static final int BREAK_BEFORE = 5;
2706         /**
2707          * @stable ICU 2.4
2708          */
2709         public static final int MANDATORY_BREAK = 6;
2710         /**
2711          * @stable ICU 2.4
2712          */
2713         public static final int CONTINGENT_BREAK = 7;
2714         /**
2715          * @stable ICU 2.4
2716          */
2717         public static final int CLOSE_PUNCTUATION = 8;
2718         /**
2719          * @stable ICU 2.4
2720          */
2721         public static final int COMBINING_MARK = 9;
2722         /**
2723          * @stable ICU 2.4
2724          */
2725         public static final int CARRIAGE_RETURN = 10;
2726         /**
2727          * @stable ICU 2.4
2728          */
2729         public static final int EXCLAMATION = 11;
2730         /**
2731          * @stable ICU 2.4
2732          */
2733         public static final int GLUE = 12;
2734         /**
2735          * @stable ICU 2.4
2736          */
2737         public static final int HYPHEN = 13;
2738         /**
2739          * @stable ICU 2.4
2740          */
2741         public static final int IDEOGRAPHIC = 14;
2742         /**
2743          * @see #INSEPARABLE
2744          * @stable ICU 2.4
2745          */
2746         public static final int INSEPERABLE = 15;
2747         /**
2748          * Renamed from the misspelled "inseperable" in Unicode 4.0.1.
2749          * @stable ICU 3.0
2750          */
2751         public static final int INSEPARABLE = 15;
2752         /**
2753          * @stable ICU 2.4
2754          */
2755         public static final int INFIX_NUMERIC = 16;
2756         /**
2757          * @stable ICU 2.4
2758          */
2759         public static final int LINE_FEED = 17;
2760         /**
2761          * @stable ICU 2.4
2762          */
2763         public static final int NONSTARTER = 18;
2764         /**
2765          * @stable ICU 2.4
2766          */
2767         public static final int NUMERIC = 19;
2768         /**
2769          * @stable ICU 2.4
2770          */
2771         public static final int OPEN_PUNCTUATION = 20;
2772         /**
2773          * @stable ICU 2.4
2774          */
2775         public static final int POSTFIX_NUMERIC = 21;
2776         /**
2777          * @stable ICU 2.4
2778          */
2779         public static final int PREFIX_NUMERIC = 22;
2780         /**
2781          * @stable ICU 2.4
2782          */
2783         public static final int QUOTATION = 23;
2784         /**
2785          * @stable ICU 2.4
2786          */
2787         public static final int COMPLEX_CONTEXT = 24;
2788         /**
2789          * @stable ICU 2.4
2790          */
2791         public static final int SURROGATE = 25;
2792         /**
2793          * @stable ICU 2.4
2794          */
2795         public static final int SPACE = 26;
2796         /**
2797          * @stable ICU 2.4
2798          */
2799         public static final int BREAK_SYMBOLS = 27;
2800         /**
2801          * @stable ICU 2.4
2802          */
2803         public static final int ZWSPACE = 28;
2804
2805         /**
2806          * @stable ICU 2.6
2807          */
2808         public static final int NEXT_LINE = 29;       /*[NL]*/ /* from here on: new in Unicode 4/ICU 2.6 */
2809
2810         /**
2811          * @stable ICU 2.6
2812          */
2813         public static final int  WORD_JOINER = 30;      /*[WJ]*/
2814
2815         /* from here on: new in Unicode 4.1/ICU 3.4 */
2816
2817         /**
2818          * @stable ICU 3.4
2819          */
2820         public static final int  H2 = 31;
2821         /**
2822          * @stable ICU 3.4
2823          */
2824         public static final int  H3 = 32;
2825         /**
2826          * @stable ICU 3.4
2827          */
2828         public static final int  JL = 33;
2829         /**
2830          * @stable ICU 3.4
2831          */
2832         public static final int  JT = 34;
2833         /**
2834          * @stable ICU 3.4
2835          */
2836         public static final int  JV = 35;
2837
2838         /**
2839          * @stable ICU 2.4
2840          */
2841         public static final int COUNT = 36;
2842     }
2843
2844     /**
2845      * Numeric Type constants.
2846      * @see UProperty#NUMERIC_TYPE
2847      * @stable ICU 2.4
2848      */
2849     public static interface NumericType
2850     {
2851         /**
2852          * @stable ICU 2.4
2853          */
2854         public static final int NONE = 0;
2855         /**
2856          * @stable ICU 2.4
2857          */
2858         public static final int DECIMAL = 1;
2859         /**
2860          * @stable ICU 2.4
2861          */
2862         public static final int DIGIT = 2;
2863         /**
2864          * @stable ICU 2.4
2865          */
2866         public static final int NUMERIC = 3;
2867         /**
2868          * @stable ICU 2.4
2869          */
2870         public static final int COUNT = 4;
2871     }
2872
2873     /**
2874      * Hangul Syllable Type constants.
2875      *
2876      * @see UProperty#HANGUL_SYLLABLE_TYPE
2877      * @stable ICU 2.6
2878      */
2879     public static interface HangulSyllableType
2880     {
2881         /**
2882          * @stable ICU 2.6
2883          */
2884         public static final int NOT_APPLICABLE      = 0;   /*[NA]*/ /*See note !!*/
2885         /**
2886          * @stable ICU 2.6
2887          */
2888         public static final int LEADING_JAMO        = 1;   /*[L]*/
2889         /**
2890          * @stable ICU 2.6
2891          */
2892         public static final int VOWEL_JAMO          = 2;   /*[V]*/
2893         /**
2894          * @stable ICU 2.6
2895          */
2896         public static final int TRAILING_JAMO       = 3;   /*[T]*/
2897         /**
2898          * @stable ICU 2.6
2899          */
2900         public static final int LV_SYLLABLE         = 4;   /*[LV]*/
2901         /**
2902          * @stable ICU 2.6
2903          */
2904         public static final int LVT_SYLLABLE        = 5;   /*[LVT]*/
2905         /**
2906          * @stable ICU 2.6
2907          */
2908         public static final int COUNT               = 6;
2909     }
2910
2911     // public data members -----------------------------------------------
2912
2913     /**
2914      * The lowest Unicode code point value.
2915      * @stable ICU 2.1
2916      */
2917     public static final int MIN_VALUE = UTF16.CODEPOINT_MIN_VALUE;
2918
2919     /**
2920      * The highest Unicode code point value (scalar value) according to the
2921      * Unicode Standard.
2922      * This is a 21-bit value (21 bits, rounded up).<br>
2923      * Up-to-date Unicode implementation of java.lang.Character.MIN_VALUE
2924      * @stable ICU 2.1
2925      */
2926     public static final int MAX_VALUE = UTF16.CODEPOINT_MAX_VALUE;
2927
2928     /**
2929      * The minimum value for Supplementary code points
2930      * @stable ICU 2.1
2931      */
2932     public static final int SUPPLEMENTARY_MIN_VALUE =
2933         UTF16.SUPPLEMENTARY_MIN_VALUE;
2934
2935     /**
2936      * Unicode value used when translating into Unicode encoding form and there
2937      * is no existing character.
2938      * @stable ICU 2.1
2939      */
2940     public static final int REPLACEMENT_CHAR = '\uFFFD';
2941
2942     /**
2943      * Special value that is returned by getUnicodeNumericValue(int) when no
2944      * numeric value is defined for a code point.
2945      * @stable ICU 2.4
2946      * @see #getUnicodeNumericValue
2947      */
2948     public static final double NO_NUMERIC_VALUE = -123456789;
2949
2950     /**
2951      * Compatibility constant for Java Character's MIN_RADIX.
2952      * @stable ICU 3.4
2953      */
2954     public static final int MIN_RADIX = java.lang.Character.MIN_RADIX;
2955
2956     /**
2957      * Compatibility constant for Java Character's MAX_RADIX.
2958      * @stable ICU 3.4
2959      */
2960     public static final int MAX_RADIX = java.lang.Character.MAX_RADIX;
2961
2962     /**
2963      * Do not lowercase non-initial parts of words when titlecasing.
2964      * Option bit for titlecasing APIs that take an options bit set.
2965      *
2966      * By default, titlecasing will titlecase the first cased character
2967      * of a word and lowercase all other characters.
2968      * With this option, the other characters will not be modified.
2969      *
2970      * @see #toTitleCase
2971      * @stable ICU 3.8
2972      */
2973     public static final int TITLECASE_NO_LOWERCASE = 0x100;
2974
2975     /**
2976      * Do not adjust the titlecasing indexes from BreakIterator::next() indexes;
2977      * titlecase exactly the characters at breaks from the iterator.
2978      * Option bit for titlecasing APIs that take an options bit set.
2979      *
2980      * By default, titlecasing will take each break iterator index,
2981      * adjust it by looking for the next cased character, and titlecase that one.
2982      * Other characters are lowercased.
2983      *
2984      * This follows Unicode 4 & 5 section 3.13 Default Case Operations:
2985      *
2986      * R3  toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
2987      * #29, "Text Boundaries." Between each pair of word boundaries, find the first
2988      * cased character F. If F exists, map F to default_title(F); then map each
2989      * subsequent character C to default_lower(C).
2990      *
2991      * @see #toTitleCase
2992      * @see #TITLECASE_NO_LOWERCASE
2993      * @stable ICU 3.8
2994      */
2995     public static final int TITLECASE_NO_BREAK_ADJUSTMENT = 0x200;
2996
2997     // public methods ----------------------------------------------------
2998
2999     /**
3000      * Retrieves the numeric value of a decimal digit code point.
3001      * <br>This method observes the semantics of
3002      * <code>java.lang.Character.digit()</code>.  Note that this
3003      * will return positive values for code points for which isDigit
3004      * returns false, just like java.lang.Character.
3005      * <br><em>Semantic Change:</em> In release 1.3.1 and
3006      * prior, this did not treat the European letters as having a
3007      * digit value, and also treated numeric letters and other numbers as
3008      * digits.
3009      * This has been changed to conform to the java semantics.
3010      * <br>A code point is a valid digit if and only if:
3011      * <ul>
3012      *   <li>ch is a decimal digit or one of the european letters, and
3013      *   <li>the value of ch is less than the specified radix.
3014      * </ul>
3015      * @param ch the code point to query
3016      * @param radix the radix
3017      * @return the numeric value represented by the code point in the
3018      * specified radix, or -1 if the code point is not a decimal digit
3019      * or if its value is too large for the radix
3020      * @stable ICU 2.1
3021      */
3022     public static int digit(int ch, int radix)
3023     {
3024         // when ch is out of bounds getProperty == 0
3025         int props = getProperty(ch);
3026         int value;
3027         if (getNumericType(props) == NumericType.DECIMAL) {
3028             value = UCharacterProperty.getUnsignedValue(props);
3029         } else {
3030             value = getEuropeanDigit(ch);
3031         }
3032         return (0 <= value && value < radix) ? value : -1;
3033     }
3034
3035     /**
3036      * Retrieves the numeric value of a decimal digit code point.
3037      * <br>This is a convenience overload of <code>digit(int, int)</code>
3038      * that provides a decimal radix.
3039      * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this
3040      * treated numeric letters and other numbers as digits.  This has
3041      * been changed to conform to the java semantics.
3042      * @param ch the code point to query
3043      * @return the numeric value represented by the code point,
3044      * or -1 if the code point is not a decimal digit or if its
3045      * value is too large for a decimal radix
3046      * @stable ICU 2.1
3047      */
3048     public static int digit(int ch)
3049     {
3050         int props = getProperty(ch);
3051         if (getNumericType(props) == NumericType.DECIMAL) {
3052             return UCharacterProperty.getUnsignedValue(props);
3053         } else {
3054             return -1;
3055         }
3056     }
3057
3058     /**
3059      * Returns the numeric value of the code point as a nonnegative
3060      * integer.
3061      * <br>If the code point does not have a numeric value, then -1 is returned.
3062      * <br>
3063      * If the code point has a numeric value that cannot be represented as a
3064      * nonnegative integer (for example, a fractional value), then -2 is
3065      * returned.
3066      * @param ch the code point to query
3067      * @return the numeric value of the code point, or -1 if it has no numeric
3068      * value, or -2 if it has a numeric value that cannot be represented as a
3069      * nonnegative integer
3070      * @stable ICU 2.1
3071      */
3072     public static int getNumericValue(int ch)
3073     {
3074         // slightly pruned version of getUnicodeNumericValue(), plus getEuropeanDigit()
3075         int props = PROPERTY_.getProperty(ch);
3076         int numericType = getNumericType(props);
3077
3078         if(numericType==0) {
3079             return getEuropeanDigit(ch);
3080         }
3081         if(numericType==UCharacterProperty.NT_FRACTION || numericType>=UCharacterProperty.NT_COUNT) {
3082             return -2;
3083         }
3084
3085         int numericValue = UCharacterProperty.getUnsignedValue(props);
3086
3087         if(numericType<NumericType.COUNT) {
3088             /* normal type, the value is stored directly */
3089             return numericValue;
3090         } else /* numericType==NT_LARGE */ {
3091             /* large value with exponent */
3092             long numValue;
3093             int mant, exp;
3094
3095             mant=numericValue>>LARGE_MANT_SHIFT;
3096             exp=numericValue&LARGE_EXP_MASK;
3097             if(mant==0) {
3098                 mant=1;
3099                 exp+=LARGE_EXP_OFFSET_EXTRA;
3100             } else if(mant>9) {
3101                 return -2; /* reserved mantissa value */
3102             } else {
3103                 exp+=LARGE_EXP_OFFSET;
3104             }
3105             if(exp>9) {
3106                 return -2;
3107             }
3108
3109             numValue=mant;
3110
3111             /* multiply by 10^exp without math.h */
3112             while(exp>=4) {
3113                 numValue*=10000.;
3114                 exp-=4;
3115             }
3116             switch(exp) {
3117             case 3:
3118                 numValue*=1000.;
3119                 break;
3120             case 2:
3121                 numValue*=100.;
3122                 break;
3123             case 1:
3124                 numValue*=10.;
3125                 break;
3126             case 0:
3127             default:
3128                 break;
3129             }
3130             if(numValue<=Integer.MAX_VALUE) {
3131                 return (int)numValue;
3132             } else {
3133                 return -2;
3134             }
3135         }
3136     }
3137
3138     /**
3139      * <p>Get the numeric value for a Unicode code point as defined in the
3140      * Unicode Character Database.</p>
3141      * <p>A "double" return type is necessary because some numeric values are
3142      * fractions, negative, or too large for int.</p>
3143      * <p>For characters without any numeric values in the Unicode Character
3144      * Database, this function will return NO_NUMERIC_VALUE.</p>
3145      * <p><em>API Change:</em> In release 2.2 and prior, this API has a
3146      * return type int and returns -1 when the argument ch does not have a
3147      * corresponding numeric value. This has been changed to synch with ICU4C
3148      * </p>
3149      * This corresponds to the ICU4C function u_getNumericValue.
3150      * @param ch Code point to get the numeric value for.
3151      * @return numeric value of ch, or NO_NUMERIC_VALUE if none is defined.
3152      * @stable ICU 2.4
3153      */
3154     public static double getUnicodeNumericValue(int ch)
3155     {
3156         // equivalent to c version double u_getNumericValue(UChar32 c)
3157         int props = PROPERTY_.getProperty(ch);
3158         int numericType = getNumericType(props);
3159
3160         if(numericType==0 || numericType>=UCharacterProperty.NT_COUNT) {
3161             return NO_NUMERIC_VALUE;
3162         }
3163
3164         int numericValue = UCharacterProperty.getUnsignedValue(props);
3165
3166         if(numericType<NumericType.COUNT) {
3167             /* normal type, the value is stored directly */
3168             return numericValue;
3169         } else if(numericType==UCharacterProperty.NT_FRACTION) {
3170             /* fraction value */
3171             int numerator, denominator;
3172
3173             numerator=numericValue>>FRACTION_NUM_SHIFT;
3174             denominator=(numericValue&FRACTION_DEN_MASK)+FRACTION_DEN_OFFSET;
3175
3176             if(numerator==0) {
3177                 numerator=-1;
3178             }
3179             return (double)numerator/(double)denominator;
3180         } else /* numericType==NT_LARGE */ {
3181             /* large value with exponent */
3182             double numValue;
3183             int mant, exp;
3184
3185             mant=numericValue>>LARGE_MANT_SHIFT;
3186             exp=numericValue&LARGE_EXP_MASK;
3187             if(mant==0) {
3188                 mant=1;
3189                 exp+=LARGE_EXP_OFFSET_EXTRA;
3190             } else if(mant>9) {
3191                 return NO_NUMERIC_VALUE; /* reserved mantissa value */
3192             } else {
3193                 exp+=LARGE_EXP_OFFSET;
3194             }
3195
3196             numValue=mant;
3197
3198             /* multiply by 10^exp without math.h */
3199             while(exp>=4) {
3200                 numValue*=10000.;
3201                 exp-=4;
3202             }
3203             switch(exp) {
3204             case 3:
3205                 numValue*=1000.;
3206                 break;
3207             case 2:
3208                 numValue*=100.;
3209                 break;
3210             case 1:
3211                 numValue*=10.;
3212                 break;
3213             case 0:
3214             default:
3215                 break;
3216             }
3217
3218             return numValue;
3219         }
3220     }
3221
3222     /**
3223      * Compatibility override of Java deprecated method.  This
3224      * method will always remain deprecated.  Delegates to
3225      * java.lang.Character.isSpace.
3226      * @param ch the code point
3227      * @return true if the code point is a space character as
3228      * defined by java.lang.Character.isSpace.
3229      * @deprecated ICU 3.4 (Java)
3230      */
3231     public static boolean isSpace(int ch) {
3232         return ch <= 0x20 &&
3233             (ch == 0x20 || ch == 0x09 || ch == 0x0a || ch == 0x0c || ch == 0x0d);
3234     }
3235
3236     /**
3237      * Returns a value indicating a code point's Unicode category.
3238      * Up-to-date Unicode implementation of java.lang.Character.getType()
3239      * except for the above mentioned code points that had their category
3240      * changed.<br>
3241      * Return results are constants from the interface
3242      * <a href=UCharacterCategory.html>UCharacterCategory</a><br>
3243      * <em>NOTE:</em> the UCharacterCategory values are <em>not</em> compatible with
3244      * those returned by java.lang.Character.getType.  UCharacterCategory values
3245      * match the ones used in ICU4C, while java.lang.Character type
3246      * values, though similar, skip the value 17.</p>
3247      * @param ch code point whose type is to be determined
3248      * @return category which is a value of UCharacterCategory
3249      * @stable ICU 2.1
3250      */
3251     public static int getType(int ch)
3252     {
3253         return getProperty(ch) & UCharacterProperty.TYPE_MASK;
3254     }
3255
3256     /**
3257      * Determines if a code point has a defined meaning in the up-to-date
3258      * Unicode standard.
3259      * E.g. supplementary code points though allocated space are not defined in
3260      * Unicode yet.<br>
3261      * Up-to-date Unicode implementation of java.lang.Character.isDefined()
3262      * @param ch code point to be determined if it is defined in the most
3263      *        current version of Unicode
3264      * @return true if this code point is defined in unicode
3265      * @stable ICU 2.1
3266      */
3267     public static boolean isDefined(int ch)
3268     {
3269         return getType(ch) != 0;
3270     }
3271
3272     /**
3273      * Determines if a code point is a Java digit.
3274      * <br>This method observes the semantics of
3275      * <code>java.lang.Character.isDigit()</code>. It returns true for decimal
3276      * digits only.
3277      * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this treated
3278      * numeric letters and other numbers as digits.
3279      * This has been changed to conform to the java semantics.
3280      * @param ch code point to query
3281      * @return true if this code point is a digit
3282      * @stable ICU 2.1
3283      */
3284     public static boolean isDigit(int ch)
3285     {
3286         return getType(ch) == UCharacterCategory.DECIMAL_DIGIT_NUMBER;
3287     }
3288
3289     /**
3290      * Determines if the specified code point is an ISO control character.
3291      * A code point is considered to be an ISO control character if it is in
3292      * the range &#92u0000 through &#92u001F or in the range &#92u007F through
3293      * &#92u009F.<br>
3294      * Up-to-date Unicode implementation of java.lang.Character.isISOControl()
3295      * @param ch code point to determine if it is an ISO control character
3296      * @return true if code point is a ISO control character
3297      * @stable ICU 2.1
3298      */
3299     public static boolean isISOControl(int ch)
3300     {
3301         return ch >= 0 && ch <= APPLICATION_PROGRAM_COMMAND_ &&
3302             ((ch <= UNIT_SEPARATOR_) || (ch >= DELETE_));
3303     }
3304
3305     /**
3306      * Determines if the specified code point is a letter.
3307      * Up-to-date Unicode implementation of java.lang.Character.isLetter()
3308      * @param ch code point to determine if it is a letter
3309      * @return true if code point is a letter
3310      * @stable ICU 2.1
3311      */
3312     public static boolean isLetter(int ch)
3313     {
3314         // if props == 0, it will just fall through and return false
3315         return ((1 << getType(ch))
3316         & ((1 << UCharacterCategory.UPPERCASE_LETTER)
3317            | (1 << UCharacterCategory.LOWERCASE_LETTER)
3318            | (1 << UCharacterCategory.TITLECASE_LETTER)
3319            | (1 << UCharacterCategory.MODIFIER_LETTER)
3320            | (1 << UCharacterCategory.OTHER_LETTER))) != 0;
3321     }
3322
3323     /**
3324      * Determines if the specified code point is a letter or digit.
3325      * Note this method, unlike java.lang.Character does not regard the ascii
3326      * characters 'A' - 'Z' and 'a' - 'z' as digits.
3327      * @param ch code point to determine if it is a letter or a digit
3328      * @return true if code point is a letter or a digit
3329      * @stable ICU 2.1
3330      */
3331     public static boolean isLetterOrDigit(int ch)
3332     {
3333         return ((1 << getType(ch))
3334         & ((1 << UCharacterCategory.UPPERCASE_LETTER)
3335            | (1 << UCharacterCategory.LOWERCASE_LETTER)
3336            | (1 << UCharacterCategory.TITLECASE_LETTER)
3337            | (1 << UCharacterCategory.MODIFIER_LETTER)
3338            | (1 << UCharacterCategory.OTHER_LETTER)
3339            | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER))) != 0;
3340     }
3341
3342     /**
3343      * Compatibility override of Java deprecated method.  This
3344      * method will always remain deprecated.  Delegates to
3345      * java.lang.Character.isJavaIdentifierStart.
3346      * @param cp the code point
3347      * @return true if the code point can start a java identifier.
3348      * @deprecated ICU 3.4 (Java)
3349      */
3350     public static boolean isJavaLetter(int cp) {
3351         return isJavaIdentifierStart(cp);
3352     }
3353
3354     /**
3355      * Compatibility override of Java deprecated method.  This
3356      * method will always remain deprecated.  Delegates to
3357      * java.lang.Character.isJavaIdentifierPart.
3358      * @param cp the code point
3359      * @return true if the code point can continue a java identifier.
3360      * @deprecated ICU 3.4 (Java)
3361      */
3362     public static boolean isJavaLetterOrDigit(int cp) {
3363         return isJavaIdentifierPart(cp);
3364     }
3365
3366     /**
3367      * Compatibility override of Java method, delegates to
3368      * java.lang.Character.isJavaIdentifierStart.
3369      * @param cp the code point
3370      * @return true if the code point can start a java identifier.
3371      * @stable ICU 3.4
3372      */
3373     public static boolean isJavaIdentifierStart(int cp) {
3374         // note, downcast to char for jdk 1.4 compatibility
3375         return java.lang.Character.isJavaIdentifierStart((char)cp);
3376     }
3377
3378     /**
3379      * Compatibility override of Java method, delegates to
3380      * java.lang.Character.isJavaIdentifierPart.
3381      * @param cp the code point
3382      * @return true if the code point can continue a java identifier.
3383      * @stable ICU 3.4
3384      */
3385     public static boolean isJavaIdentifierPart(int cp) {
3386         // note, downcast to char for jdk 1.4 compatibility
3387         return java.lang.Character.isJavaIdentifierPart((char)cp);
3388     }
3389
3390     /**
3391      * Determines if the specified code point is a lowercase character.
3392      * UnicodeData only contains case mappings for code points where they are
3393      * one-to-one mappings; it also omits information about context-sensitive
3394      * case mappings.<br> For more information about Unicode case mapping
3395      * please refer to the
3396      * <a href=http://www.unicode.org/unicode/reports/tr21/>Technical report
3397      * #21</a>.<br>
3398      * Up-to-date Unicode implementation of java.lang.Character.isLowerCase()
3399      * @param ch code point to determine if it is in lowercase
3400      * @return true if code point is a lowercase character
3401      * @stable ICU 2.1
3402      */
3403     public static boolean isLowerCase(int ch)
3404     {
3405         // if props == 0, it will just fall through and return false
3406         return getType(ch) == UCharacterCategory.LOWERCASE_LETTER;
3407     }
3408
3409     /**
3410      * Determines if the specified code point is a white space character.
3411      * A code point is considered to be an whitespace character if and only
3412      * if it satisfies one of the following criteria:
3413      * <ul>
3414      * <li> It is a Unicode space character (categories "Zs" or "Zl" or "Zp"), but is not
3415      *      also a no-break space (&#92u00A0 or &#92u2007 or &#92u202F).
3416      * <li> It is &#92u0009, HORIZONTAL TABULATION.
3417      * <li> It is &#92u000A, LINE FEED.
3418      * <li> It is &#92u000B, VERTICAL TABULATION.
3419      * <li> It is &#92u000C, FORM FEED.
3420      * <li> It is &#92u000D, CARRIAGE RETURN.
3421      * <li> It is &#92u001C, FILE SEPARATOR.
3422      * <li> It is &#92u001D, GROUP SEPARATOR.
3423      * <li> It is &#92u001E, RECORD SEPARATOR.
3424      * <li> It is &#92u001F, UNIT SEPARATOR.
3425      * </ul>
3426      *
3427      * This API tries to synch to the semantics of the Java API,
3428      * java.lang.Character.isWhitespace(), but it may not return
3429      * the exactly same results because of the Unicode version
3430      * difference.
3431      * @param ch code point to determine if it is a white space
3432      * @return true if the specified code point is a white space character
3433      * @stable ICU 2.1
3434      */
3435     public static boolean isWhitespace(int ch)
3436     {
3437         // exclude no-break spaces
3438         // if props == 0, it will just fall through and return false
3439         return ((1 << getType(ch)) &
3440                 ((1 << UCharacterCategory.SPACE_SEPARATOR)
3441                  | (1 << UCharacterCategory.LINE_SEPARATOR)
3442                  | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR))) != 0
3443         && (ch != NO_BREAK_SPACE_) && (ch != FIGURE_SPACE_) && (ch != NARROW_NO_BREAK_SPACE_)
3444         // TAB VT LF FF CR FS GS RS US NL are all control characters
3445         // that are white spaces.
3446         || (ch >= 0x9 && ch <= 0xd) || (ch >= 0x1c && ch <= 0x1f);
3447     }
3448
3449     /**
3450      * Determines if the specified code point is a Unicode specified space
3451      * character, i.e. if code point is in the category Zs, Zl and Zp.
3452      * Up-to-date Unicode implementation of java.lang.Character.isSpaceChar().
3453      * @param ch code point to determine if it is a space
3454      * @return true if the specified code point is a space character
3455      * @stable ICU 2.1
3456      */
3457     public static boolean isSpaceChar(int ch)
3458     {
3459         // if props == 0, it will just fall through and return false
3460         return ((1 << getType(ch)) & ((1 << UCharacterCategory.SPACE_SEPARATOR)
3461                       | (1 << UCharacterCategory.LINE_SEPARATOR)
3462                       | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR)))
3463         != 0;
3464     }
3465
3466     /**
3467      * Determines if the specified code point is a titlecase character.
3468      * UnicodeData only contains case mappings for code points where they are
3469      * one-to-one mappings; it also omits information about context-sensitive
3470      * case mappings.<br>
3471      * For more information about Unicode case mapping please refer to the
3472      * <a href=http://www.unicode.org/unicode/reports/tr21/>
3473      * Technical report #21</a>.<br>
3474      * Up-to-date Unicode implementation of java.lang.Character.isTitleCase().
3475      * @param ch code point to determine if it is in title case
3476      * @return true if the specified code point is a titlecase character
3477      * @stable ICU 2.1
3478      */
3479     public static boolean isTitleCase(int ch)
3480     {
3481         // if props == 0, it will just fall through and return false
3482         return getType(ch) == UCharacterCategory.TITLECASE_LETTER;
3483     }
3484
3485     /**
3486      * Determines if the specified code point may be any part of a Unicode
3487      * identifier other than the starting character.
3488      * A code point may be part of a Unicode identifier if and only if it is
3489      * one of the following:
3490      * <ul>
3491      * <li> Lu Uppercase letter
3492      * <li> Ll Lowercase letter
3493      * <li> Lt Titlecase letter
3494      * <li> Lm Modifier letter
3495      * <li> Lo Other letter
3496      * <li> Nl Letter number
3497      * <li> Pc Connecting punctuation character
3498      * <li> Nd decimal number
3499      * <li> Mc Spacing combining mark
3500      * <li> Mn Non-spacing mark
3501      * <li> Cf formatting code
3502      * </ul>
3503      * Up-to-date Unicode implementation of
3504      * java.lang.Character.isUnicodeIdentifierPart().<br>
3505      * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>.
3506      * @param ch code point to determine if is can be part of a Unicode
3507      *        identifier
3508      * @return true if code point is any character belonging a unicode
3509      *         identifier suffix after the first character
3510      * @stable ICU 2.1
3511      */
3512     public static boolean isUnicodeIdentifierPart(int ch)
3513     {
3514         // if props == 0, it will just fall through and return false
3515         // cat == format
3516         return ((1 << getType(ch))
3517         & ((1 << UCharacterCategory.UPPERCASE_LETTER)
3518            | (1 << UCharacterCategory.LOWERCASE_LETTER)
3519            | (1 << UCharacterCategory.TITLECASE_LETTER)
3520            | (1 << UCharacterCategory.MODIFIER_LETTER)
3521            | (1 << UCharacterCategory.OTHER_LETTER)
3522            | (1 << UCharacterCategory.LETTER_NUMBER)
3523            | (1 << UCharacterCategory.CONNECTOR_PUNCTUATION)
3524            | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER)
3525            | (1 << UCharacterCategory.COMBINING_SPACING_MARK)
3526            | (1 << UCharacterCategory.NON_SPACING_MARK))) != 0
3527         || isIdentifierIgnorable(ch);
3528     }
3529
3530     /**
3531      * Determines if the specified code point is permissible as the first
3532      * character in a Unicode identifier.
3533      * A code point may start a Unicode identifier if it is of type either
3534      * <ul>
3535      * <li> Lu Uppercase letter
3536      * <li> Ll Lowercase letter
3537      * <li> Lt Titlecase letter
3538      * <li> Lm Modifier letter
3539      * <li> Lo Other letter
3540      * <li> Nl Letter number
3541      * </ul>
3542      * Up-to-date Unicode implementation of
3543      * java.lang.Character.isUnicodeIdentifierStart().<br>
3544      * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>.
3545      * @param ch code point to determine if it can start a Unicode identifier
3546      * @return true if code point is the first character belonging a unicode
3547      *              identifier
3548      * @stable ICU 2.1
3549      */
3550     public static boolean isUnicodeIdentifierStart(int ch)
3551     {
3552         /*int cat = getType(ch);*/
3553         // if props == 0, it will just fall through and return false
3554         return ((1 << getType(ch))
3555         & ((1 << UCharacterCategory.UPPERCASE_LETTER)
3556            | (1 << UCharacterCategory.LOWERCASE_LETTER)
3557            | (1 << UCharacterCategory.TITLECASE_LETTER)
3558            | (1 << UCharacterCategory.MODIFIER_LETTER)
3559            | (1 << UCharacterCategory.OTHER_LETTER)
3560            | (1 << UCharacterCategory.LETTER_NUMBER))) != 0;
3561     }
3562
3563     /**
3564      * Determines if the specified code point should be regarded as an
3565      * ignorable character in a Unicode identifier.
3566      * A character is ignorable in the Unicode standard if it is of the type
3567      * Cf, Formatting code.<br>
3568      * Up-to-date Unicode implementation of
3569      * java.lang.Character.isIdentifierIgnorable().<br>
3570      * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>.
3571      * @param ch code point to be determined if it can be ignored in a Unicode
3572      *        identifier.
3573      * @return true if the code point is ignorable
3574      * @stable ICU 2.1
3575      */
3576     public static boolean isIdentifierIgnorable(int ch)
3577     {
3578         // see java.lang.Character.isIdentifierIgnorable() on range of
3579         // ignorable characters.
3580         if (ch <= 0x9f) {
3581         return isISOControl(ch)
3582         && !((ch >= 0x9 && ch <= 0xd)
3583              || (ch >= 0x1c && ch <= 0x1f));
3584         }
3585         return getType(ch) == UCharacterCategory.FORMAT;
3586     }
3587
3588     /**
3589      * Determines if the specified code point is an uppercase character.
3590      * UnicodeData only contains case mappings for code point where they are
3591      * one-to-one mappings; it also omits information about context-sensitive
3592      * case mappings.<br>
3593      * For language specific case conversion behavior, use
3594      * toUpperCase(locale, str). <br>
3595      * For example, the case conversion for dot-less i and dotted I in Turkish,
3596      * or for final sigma in Greek.
3597      * For more information about Unicode case mapping please refer to the
3598      * <a href=http://www.unicode.org/unicode/reports/tr21/>
3599      * Technical report #21</a>.<br>
3600      * Up-to-date Unicode implementation of java.lang.Character.isUpperCase().
3601      * @param ch code point to determine if it is in uppercase
3602      * @return true if the code point is an uppercase character
3603      * @stable ICU 2.1
3604      */
3605     public static boolean isUpperCase(int ch)
3606     {
3607         // if props == 0, it will just fall through and return false
3608         return getType(ch) == UCharacterCategory.UPPERCASE_LETTER;
3609     }
3610
3611     /**
3612      * The given code point is mapped to its lowercase equivalent; if the code
3613      * point has no lowercase equivalent, the code point itself is returned.
3614      * Up-to-date Unicode implementation of java.lang.Character.toLowerCase()
3615      *
3616      * <p>This function only returns the simple, single-code point case mapping.
3617      * Full case mappings should be used whenever possible because they produce
3618      * better results by working on whole strings.
3619      * They take into account the string context and the language and can map
3620      * to a result string with a different length as appropriate.
3621      * Full case mappings are applied by the case mapping functions
3622      * that take String parameters rather than code points (int).
3623      * See also the User Guide chapter on C/POSIX migration:
3624      * http://www.icu-project.org/userguide/posix.html#case_mappings
3625      *
3626      * @param ch code point whose lowercase equivalent is to be retrieved
3627      * @return the lowercase equivalent code point
3628      * @stable ICU 2.1
3629      */
3630     public static int toLowerCase(int ch) {
3631         return gCsp.tolower(ch);
3632     }
3633
3634     /**
3635      * Converts argument code point and returns a String object representing
3636      * the code point's value in UTF16 format.
3637      * The result is a string whose length is 1 for non-supplementary code
3638      * points, 2 otherwise.<br>
3639      * com.ibm.ibm.icu.UTF16 can be used to parse Strings generated by this
3640      * function.<br>
3641      * Up-to-date Unicode implementation of java.lang.Character.toString()
3642      * @param ch code point
3643      * @return string representation of the code point, null if code point is not
3644      *         defined in unicode
3645      * @stable ICU 2.1
3646      */
3647     public static String toString(int ch)
3648     {
3649         if (ch < MIN_VALUE || ch > MAX_VALUE) {
3650             return null;
3651         }
3652
3653         if (ch < SUPPLEMENTARY_MIN_VALUE) {
3654             return String.valueOf((char)ch);
3655         }
3656
3657         StringBuffer result = new StringBuffer();
3658         result.append(UTF16.getLeadSurrogate(ch));
3659         result.append(UTF16.getTrailSurrogate(ch));
3660         return result.toString();
3661     }
3662
3663     /**
3664      * Converts the code point argument to titlecase.
3665      * If no titlecase is available, the uppercase is returned. If no uppercase
3666      * is available, the code point itself is returned.
3667      * Up-to-date Unicode implementation of java.lang.Character.toTitleCase()
3668      *
3669      * <p>This function only returns the simple, single-code point case mapping.
3670      * Full case mappings should be used whenever possible because they produce
3671      * better results by working on whole strings.
3672      * They take into account the string context and the language and can map
3673      * to a result string with a different length as appropriate.
3674      * Full case mappings are applied by the case mapping functions
3675      * that take String parameters rather than code points (int).
3676      * See also the User Guide chapter on C/POSIX migration:
3677      * http://www.icu-project.org/userguide/posix.html#case_mappings
3678      *
3679      * @param ch code point  whose title case is to be retrieved
3680      * @return titlecase code point
3681      * @stable ICU 2.1
3682      */
3683     public static int toTitleCase(int ch) {
3684         return gCsp.totitle(ch);
3685     }
3686
3687     /**
3688      * Converts the character argument to uppercase.
3689      * If no uppercase is available, the character itself is returned.
3690      * Up-to-date Unicode implementation of java.lang.Character.toUpperCase()
3691      *
3692      * <p>This function only returns the simple, single-code point case mapping.
3693      * Full case mappings should be used whenever possible because they produce
3694      * better results by working on whole strings.
3695      * They take into account the string context and the language and can map
3696      * to a result string with a different length as appropriate.
3697      * Full case mappings are applied by the case mapping functions
3698      * that take String parameters rather than code points (int).
3699      * See also the User Guide chapter on C/POSIX migration:
3700      * http://www.icu-project.org/userguide/posix.html#case_mappings
3701      *
3702      * @param ch code point whose uppercase is to be retrieved
3703      * @return uppercase code point
3704      * @stable ICU 2.1
3705      */
3706     public static int toUpperCase(int ch) {
3707         return gCsp.toupper(ch);
3708     }
3709
3710     // extra methods not in java.lang.Character --------------------------
3711
3712     /**
3713      * Determines if the code point is a supplementary character.
3714      * A code point is a supplementary character if and only if it is greater
3715      * than <a href=#SUPPLEMENTARY_MIN_VALUE>SUPPLEMENTARY_MIN_VALUE</a>
3716      * @param ch code point to be determined if it is in the supplementary
3717      *        plane
3718      * @return true if code point is a supplementary character
3719      * @stable ICU 2.1
3720      */
3721     public static boolean isSupplementary(int ch)
3722     {
3723         return ch >= UCharacter.SUPPLEMENTARY_MIN_VALUE &&
3724             ch <= UCharacter.MAX_VALUE;
3725     }
3726
3727     /**
3728      * Determines if the code point is in the BMP plane.
3729      * @param ch code point to be determined if it is not a supplementary
3730      *        character
3731      * @return true if code point is not a supplementary character
3732      * @stable ICU 2.1
3733      */
3734     public static boolean isBMP(int ch)
3735     {
3736         return (ch >= 0 && ch <= LAST_CHAR_MASK_);
3737     }
3738
3739     /**
3740      * Determines whether the specified code point is a printable character
3741      * according to the Unicode standard.
3742      * @param ch code point to be determined if it is printable
3743      * @return true if the code point is a printable character
3744      * @stable ICU 2.1
3745      */
3746     public static boolean isPrintable(int ch)
3747     {
3748         int cat = getType(ch);
3749         // if props == 0, it will just fall through and return false
3750         return (cat != UCharacterCategory.UNASSIGNED &&
3751         cat != UCharacterCategory.CONTROL &&
3752         cat != UCharacterCategory.FORMAT &&
3753         cat != UCharacterCategory.PRIVATE_USE &&
3754         cat != UCharacterCategory.SURROGATE &&
3755         cat != UCharacterCategory.GENERAL_OTHER_TYPES);
3756     }
3757
3758     /**
3759      * Determines whether the specified code point is of base form.
3760      * A code point of base form does not graphically combine with preceding
3761      * characters, and is neither a control nor a format character.
3762      * @param ch code point to be determined if it is of base form
3763      * @return true if the code point is of base form
3764      * @stable ICU 2.1
3765      */
3766     public static boolean isBaseForm(int ch)
3767     {
3768         int cat = getType(ch);
3769         // if props == 0, it will just fall through and return false
3770         return cat == UCharacterCategory.DECIMAL_DIGIT_NUMBER ||
3771             cat == UCharacterCategory.OTHER_NUMBER ||
3772             cat == UCharacterCategory.LETTER_NUMBER ||
3773             cat == UCharacterCategory.UPPERCASE_LETTER ||
3774             cat == UCharacterCategory.LOWERCASE_LETTER ||
3775             cat == UCharacterCategory.TITLECASE_LETTER ||
3776             cat == UCharacterCategory.MODIFIER_LETTER ||
3777             cat == UCharacterCategory.OTHER_LETTER ||
3778             cat == UCharacterCategory.NON_SPACING_MARK ||
3779             cat == UCharacterCategory.ENCLOSING_MARK ||
3780             cat == UCharacterCategory.COMBINING_SPACING_MARK;
3781     }
3782
3783     /**
3784      * Returns the Bidirection property of a code point.
3785      * For example, 0x0041 (letter A) has the LEFT_TO_RIGHT directional
3786      * property.<br>
3787      * Result returned belongs to the interface
3788      * <a href=UCharacterDirection.html>UCharacterDirection</a>
3789      * @param ch the code point to be determined its direction
3790      * @return direction constant from UCharacterDirection.
3791      * @stable ICU 2.1
3792      */
3793     public static int getDirection(int ch)
3794     {
3795         return gBdp.getClass(ch);
3796     }
3797
3798     /**
3799      * Determines whether the code point has the "mirrored" property.
3800      * This property is set for characters that are commonly used in
3801      * Right-To-Left contexts and need to be displayed with a "mirrored"
3802      * glyph.
3803      * @param ch code point whose mirror is to be determined
3804      * @return true if the code point has the "mirrored" property
3805      * @stable ICU 2.1
3806      */
3807     public static boolean isMirrored(int ch)
3808     {
3809         return gBdp.isMirrored(ch);
3810     }
3811
3812     /**
3813      * Maps the specified code point to a "mirror-image" code point.
3814      * For code points with the "mirrored" property, implementations sometimes
3815      * need a "poor man's" mapping to another code point such that the default
3816      * glyph may serve as the mirror-image of the default glyph of the
3817      * specified code point.<br>
3818      * This is useful for text conversion to and from codepages with visual
3819      * order, and for displays without glyph selection capabilities.
3820      * @param ch code point whose mirror is to be retrieved
3821      * @return another code point that may serve as a mirror-image substitute,
3822      *         or ch itself if there is no such mapping or ch does not have the
3823      *         "mirrored" property
3824      * @stable ICU 2.1
3825      */
3826     public static int getMirror(int ch)
3827     {
3828         return gBdp.getMirror(ch);
3829     }
3830
3831     /**
3832      * Gets the combining class of the argument codepoint
3833      * @param ch code point whose combining is to be retrieved
3834      * @return the combining class of the codepoint
3835      * @stable ICU 2.1
3836      */
3837     public static int getCombiningClass(int ch)
3838     {
3839         if (ch < MIN_VALUE || ch > MAX_VALUE) {
3840         throw new IllegalArgumentException("Codepoint out of bounds");
3841         }
3842         return NormalizerImpl.getCombiningClass(ch);
3843     }
3844
3845     /**
3846      * A code point is illegal if and only if
3847      * <ul>
3848      * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE
3849      * <li> A surrogate value, 0xD800 to 0xDFFF
3850      * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE
3851      * </ul>
3852      * Note: legal does not mean that it is assigned in this version of Unicode.
3853      * @param ch code point to determine if it is a legal code point by itself
3854      * @return true if and only if legal.
3855      * @stable ICU 2.1
3856      */
3857     public static boolean isLegal(int ch)
3858     {
3859         if (ch < MIN_VALUE) {
3860             return false;
3861         }
3862         if (ch < UTF16.SURROGATE_MIN_VALUE) {
3863             return true;
3864         }
3865         if (ch <= UTF16.SURROGATE_MAX_VALUE) {
3866             return false;
3867         }
3868         if (UCharacterUtility.isNonCharacter(ch)) {
3869             return false;
3870         }
3871         return (ch <= MAX_VALUE);
3872     }
3873
3874     /**
3875      * A string is legal iff all its code points are legal.
3876      * A code point is illegal if and only if
3877      * <ul>
3878      * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE
3879      * <li> A surrogate value, 0xD800 to 0xDFFF
3880      * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE
3881      * </ul>
3882      * Note: legal does not mean that it is assigned in this version of Unicode.
3883      * @param str containing code points to examin
3884      * @return true if and only if legal.
3885      * @stable ICU 2.1
3886      */
3887     public static boolean isLegal(String str)
3888     {
3889         int size = str.length();
3890         int codepoint;
3891         for (int i = 0; i < size; i ++)
3892         {
3893         codepoint = UTF16.charAt(str, i);
3894         if (!isLegal(codepoint)) {
3895             return false;
3896         }
3897         if (isSupplementary(codepoint)) {
3898             i ++;
3899         }
3900         }
3901         return true;
3902     }
3903
3904     /**
3905      * Gets the version of Unicode data used.
3906      * @return the unicode version number used
3907      * @stable ICU 2.1
3908      */
3909     public static VersionInfo getUnicodeVersion()
3910     {
3911         return PROPERTY_.m_unicodeVersion_;
3912     }
3913
3914     /**
3915      * Retrieve the most current Unicode name of the argument code point, or
3916      * null if the character is unassigned or outside the range
3917      * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name.
3918      * <br>
3919      * Note calling any methods related to code point names, e.g. get*Name*()
3920      * incurs a one-time initialisation cost to construct the name tables.
3921      * @param ch the code point for which to get the name
3922      * @return most current Unicode name
3923      * @stable ICU 2.1
3924      */
3925     public static String getName(int ch)
3926     {
3927         if(NAME_==null){
3928             throw new MissingResourceException("Could not load unames.icu","","");
3929         }
3930         return NAME_.getName(ch, UCharacterNameChoice.UNICODE_CHAR_NAME);
3931     }
3932
3933     /**
3934      * Gets the names for each of the characters in a string
3935      * @param s string to format
3936      * @param separator string to go between names
3937      * @return string of names
3938      * @stable ICU 3.8
3939      */
3940     public static String getName(String s, String separator) {
3941         if (s.length() == 1) { // handle common case
3942             return getName(s.charAt(0));
3943         }
3944         int cp;
3945         StringBuffer sb = new StringBuffer();
3946         for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
3947             cp = UTF16.charAt(s,i);
3948             if (i != 0) sb.append(separator);
3949             sb.append(UCharacter.getName(cp));
3950         }
3951         return sb.toString();
3952     }
3953
3954     /**
3955      * Retrieve the earlier version 1.0 Unicode name of the argument code
3956      * point, or null if the character is unassigned or outside the range
3957      * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name.
3958      * <br>
3959      * Note calling any methods related to code point names, e.g. get*Name*()
3960      * incurs a one-time initialisation cost to construct the name tables.
3961      * @param ch the code point for which to get the name
3962      * @return version 1.0 Unicode name
3963      * @stable ICU 2.1
3964      */
3965     public static String getName1_0(int ch)
3966     {
3967         if(NAME_==null){
3968             throw new MissingResourceException("Could not load unames.icu","","");
3969         }
3970         return NAME_.getName(ch,
3971                              UCharacterNameChoice.UNICODE_10_CHAR_NAME);
3972     }
3973
3974     /**
3975      * <p>Retrieves a name for a valid codepoint. Unlike, getName(int) and
3976      * getName1_0(int), this method will return a name even for codepoints that
3977      * are not assigned a name in UnicodeData.txt.
3978      * </p>
3979      * The names are returned in the following order.
3980      * <ul>
3981      * <li> Most current Unicode name if there is any
3982      * <li> Unicode 1.0 name if there is any
3983      * <li> Extended name in the form of
3984      *      "<codepoint_type-codepoint_hex_digits>". E.g. <noncharacter-fffe>
3985      * </ul>
3986      * Note calling any methods related to code point names, e.g. get*Name*()
3987      * incurs a one-time initialisation cost to construct the name tables.
3988      * @param ch the code point for which to get the name
3989      * @return a name for the argument codepoint
3990      * @stable ICU 2.6
3991      */
3992     public static String getExtendedName(int ch)
3993     {
3994         if(NAME_==null){
3995             throw new MissingResourceException("Could not load unames.icu","","");
3996         }
3997         return NAME_.getName(ch, UCharacterNameChoice.EXTENDED_CHAR_NAME);
3998     }
3999
4000     /**
4001      * Get the ISO 10646 comment for a character.
4002      * The ISO 10646 comment is an informative field in the Unicode Character
4003      * Database (UnicodeData.txt field 11) and is from the ISO 10646 names list.
4004      * @param ch The code point for which to get the ISO comment.
4005      *           It must be <code>0<=c<=0x10ffff</code>.
4006      * @return The ISO comment, or null if there is no comment for this
4007      *         character.
4008      * @stable ICU 2.4
4009      */
4010     public static String getISOComment(int ch)
4011     {
4012         if (ch < UCharacter.MIN_VALUE || ch > UCharacter.MAX_VALUE) {
4013             return null;
4014         }
4015         if(NAME_==null){
4016             throw new MissingResourceException("Could not load unames.icu","","");
4017         }
4018         String result = NAME_.getGroupName(ch,
4019                                            UCharacterNameChoice.ISO_COMMENT_);
4020         return result;
4021     }
4022
4023     /**
4024      * <p>Find a Unicode code point by its most current Unicode name and
4025      * return its code point value. All Unicode names are in uppercase.</p>
4026      * Note calling any methods related to code point names, e.g. get*Name*()
4027      * incurs a one-time initialisation cost to construct the name tables.
4028      * @param name most current Unicode character name whose code point is to
4029      *        be returned
4030      * @return code point or -1 if name is not found
4031      * @stable ICU 2.1
4032      */
4033     public static int getCharFromName(String name)
4034     {
4035         if(NAME_==null){
4036             throw new MissingResourceException("Could not load unames.icu","","");
4037         }
4038         return NAME_.getCharFromName(
4039                      UCharacterNameChoice.UNICODE_CHAR_NAME, name);
4040     }
4041
4042     /**
4043      * <p>Find a Unicode character by its version 1.0 Unicode name and return
4044      * its code point value. All Unicode names are in uppercase.</p>
4045      * Note calling any methods related to code point names, e.g. get*Name*()
4046      * incurs a one-time initialisation cost to construct the name tables.
4047      * @param name Unicode 1.0 code point name whose code point is to
4048      *             returned
4049      * @return code point or -1 if name is not found
4050      * @stable ICU 2.1
4051      */
4052     public static int getCharFromName1_0(String name)
4053     {
4054         if(NAME_==null){
4055             throw new MissingResourceException("Could not load unames.icu","","");
4056         }
4057         return NAME_.getCharFromName(
4058                      UCharacterNameChoice.UNICODE_10_CHAR_NAME, name);
4059     }
4060
4061     /**
4062      * <p>Find a Unicode character by either its name and return its code
4063      * point value. All Unicode names are in uppercase.
4064      * Extended names are all lowercase except for numbers and are contained
4065      * within angle brackets.</p>
4066      * The names are searched in the following order
4067      * <ul>
4068      * <li> Most current Unicode name if there is any
4069      * <li> Unicode 1.0 name if there is any
4070      * <li> Extended name in the form of
4071      *      "<codepoint_type-codepoint_hex_digits>". E.g. <noncharacter-FFFE>
4072      * </ul>
4073      * Note calling any methods related to code point names, e.g. get*Name*()
4074      * incurs a one-time initialisation cost to construct the name tables.
4075      * @param name codepoint name
4076      * @return code point associated with the name or -1 if the name is not
4077      *         found.
4078      * @stable ICU 2.6
4079      */
4080     public static int getCharFromExtendedName(String name)
4081     {
4082         if(NAME_==null){
4083             throw new MissingResourceException("Could not load unames.icu","","");
4084         }
4085         return NAME_.getCharFromName(
4086                      UCharacterNameChoice.EXTENDED_CHAR_NAME, name);
4087     }
4088
4089     /**
4090      * Return the Unicode name for a given property, as given in the
4091      * Unicode database file PropertyAliases.txt.  Most properties
4092      * have more than one name.  The nameChoice determines which one
4093      * is returned.
4094      *
4095      * In addition, this function maps the property
4096      * UProperty.GENERAL_CATEGORY_MASK to the synthetic names "gcm" /
4097      * "General_Category_Mask".  These names are not in
4098      * PropertyAliases.txt.
4099      *
4100      * @param property UProperty selector.
4101      *
4102      * @param nameChoice UProperty.NameChoice selector for which name
4103      * to get.  All properties have a long name.  Most have a short
4104      * name, but some do not.  Unicode allows for additional names; if
4105      * present these will be returned by UProperty.NameChoice.LONG + i,
4106      * where i=1, 2,...
4107      *
4108      * @return a name, or null if Unicode explicitly defines no name
4109      * ("n/a") for a given property/nameChoice.  If a given nameChoice
4110      * throws an exception, then all larger values of nameChoice will
4111      * throw an exception.  If null is returned for a given
4112      * nameChoice, then other nameChoice values may return non-null
4113      * results.
4114      *
4115      * @exception IllegalArgumentException thrown if property or
4116      * nameChoice are invalid.
4117      *
4118      * @see UProperty
4119      * @see UProperty.NameChoice
4120      * @stable ICU 2.4
4121      */
4122     public static String getPropertyName(int property,
4123                                          int nameChoice) {
4124         return PNAMES_.getPropertyName(property, nameChoice);
4125     }
4126
4127     /**
4128      * Return the UProperty selector for a given property name, as
4129      * specified in the Unicode database file PropertyAliases.txt.
4130      * Short, long, and any other variants are recognized.
4131      *
4132      * In addition, this function maps the synthetic names "gcm" /
4133      * "General_Category_Mask" to the property
4134      * UProperty.GENERAL_CATEGORY_MASK.  These names are not in
4135      * PropertyAliases.txt.
4136      *
4137      * @param propertyAlias the property name to be matched.  The name
4138      * is compared using "loose matching" as described in
4139      * PropertyAliases.txt.
4140      *
4141      * @return a UProperty enum.
4142      *
4143      * @exception IllegalArgumentException thrown if propertyAlias
4144      * is not recognized.
4145      *
4146      * @see UProperty
4147      * @stable ICU 2.4
4148      */
4149     public static int getPropertyEnum(String propertyAlias) {
4150         return PNAMES_.getPropertyEnum(propertyAlias);
4151     }
4152
4153     /**
4154      * Return the Unicode name for a given property value, as given in
4155      * the Unicode database file PropertyValueAliases.txt.  Most
4156      * values have more than one name.  The nameChoice determines
4157      * which one is returned.
4158      *
4159      * Note: Some of the names in PropertyValueAliases.txt can only be
4160      * retrieved using UProperty.GENERAL_CATEGORY_MASK, not
4161      * UProperty.GENERAL_CATEGORY.  These include: "C" / "Other", "L" /
4162      * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"
4163      * / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
4164      *
4165      * @param property UProperty selector constant.
4166      * UProperty.INT_START &lt;= property &lt; UProperty.INT_LIMIT or
4167      * UProperty.BINARY_START &lt;= property &lt; UProperty.BINARY_LIMIT or
4168      * UProperty.MASK_START &lt; = property &lt; UProperty.MASK_LIMIT.
4169      * If out of range, null is returned.
4170      *
4171      * @param value selector for a value for the given property.  In
4172      * general, valid values range from 0 up to some maximum.  There
4173      * are a few exceptions: (1.) UProperty.BLOCK values begin at the
4174      * non-zero value BASIC_LATIN.getID().  (2.)
4175      * UProperty.CANONICAL_COMBINING_CLASS values are not contiguous
4176      * and range from 0..240.  (3.)  UProperty.GENERAL_CATEGORY_MASK values
4177      * are mask values produced by left-shifting 1 by
4178      * UCharacter.getType().  This allows grouped categories such as
4179      * [:L:] to be represented.  Mask values are non-contiguous.
4180      *
4181      * @param nameChoice UProperty.NameChoice selector for which name
4182      * to get.  All values have a long name.  Most have a short name,
4183      * but some do not.  Unicode allows for additional names; if
4184      * present these will be returned by UProperty.NameChoice.LONG + i,
4185      * where i=1, 2,...
4186      *
4187      * @return a name, or null if Unicode explicitly defines no name
4188      * ("n/a") for a given property/value/nameChoice.  If a given
4189      * nameChoice throws an exception, then all larger values of
4190      * nameChoice will throw an exception.  If null is returned for a
4191      * given nameChoice, then other nameChoice values may return
4192      * non-null results.
4193      *
4194      * @exception IllegalArgumentException thrown if property, value,
4195      * or nameChoice are invalid.
4196      *
4197      * @see UProperty
4198      * @see UProperty.NameChoice
4199      * @stable ICU 2.4
4200      */
4201     public static String getPropertyValueName(int property,
4202                                               int value,
4203                                               int nameChoice)
4204     {
4205         if ((property == UProperty.CANONICAL_COMBINING_CLASS
4206              || property == UProperty.LEAD_CANONICAL_COMBINING_CLASS
4207              || property == UProperty.TRAIL_CANONICAL_COMBINING_CLASS)
4208             && value >= UCharacter.getIntPropertyMinValue(
4209                               UProperty.CANONICAL_COMBINING_CLASS)
4210             && value <= UCharacter.getIntPropertyMaxValue(
4211                               UProperty.CANONICAL_COMBINING_CLASS)
4212             && nameChoice >= 0 && nameChoice < UProperty.NameChoice.COUNT) {
4213             // this is hard coded for the valid cc
4214             // because PropertyValueAliases.txt does not contain all of them
4215             try {
4216                 return PNAMES_.getPropertyValueName(property, value,
4217                                                     nameChoice);
4218             }
4219             catch (IllegalArgumentException e) {
4220                 return null;
4221             }
4222         }
4223         return PNAMES_.getPropertyValueName(property, value, nameChoice);
4224     }
4225
4226     /**
4227      * Return the property value integer for a given value name, as
4228      * specified in the Unicode database file PropertyValueAliases.txt.
4229      * Short, long, and any other variants are recognized.
4230      *
4231      * Note: Some of the names in PropertyValueAliases.txt will only be
4232      * recognized with UProperty.GENERAL_CATEGORY_MASK, not
4233      * UProperty.GENERAL_CATEGORY.  These include: "C" / "Other", "L" /
4234      * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"
4235      * / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
4236      *
4237      * @param property UProperty selector constant.
4238      * UProperty.INT_START &lt;= property &lt; UProperty.INT_LIMIT or
4239      * UProperty.BINARY_START &lt;= property &lt; UProperty.BINARY_LIMIT or
4240      * UProperty.MASK_START &lt; = property &lt; UProperty.MASK_LIMIT.
4241      * Only these properties can be enumerated.
4242      *
4243      * @param valueAlias the value name to be matched.  The name is
4244      * compared using "loose matching" as described in
4245      * PropertyValueAliases.txt.
4246      *
4247      * @return a value integer.  Note: UProperty.GENERAL_CATEGORY
4248      * values are mask values produced by left-shifting 1 by
4249      * UCharacter.getType().  This allows grouped categories such as
4250      * [:L:] to be represented.
4251      *
4252      * @see UProperty
4253      * @throws IllegalArgumentException if property is not a valid UProperty
4254      *         selector
4255      * @stable ICU 2.4
4256      */
4257     public static int getPropertyValueEnum(int property, String valueAlias) {
4258         return PNAMES_.getPropertyValueEnum(property, valueAlias);
4259     }
4260
4261     /**
4262      * Returns a code point corresponding to the two UTF16 characters.
4263      * @param lead the lead char
4264      * @param trail the trail char
4265      * @return code point if surrogate characters are valid.
4266      * @exception IllegalArgumentException thrown when argument characters do
4267      *            not form a valid codepoint
4268      * @stable ICU 2.1
4269      */
4270     public static int getCodePoint(char lead, char trail)
4271     {
4272         if (UTF16.isLeadSurrogate(lead) && UTF16.isTrailSurrogate(trail)) {
4273             return UCharacterProperty.getRawSupplementary(lead, trail);
4274         }
4275         throw new IllegalArgumentException("Illegal surrogate characters");
4276     }
4277
4278     /**
4279      * Returns the code point corresponding to the UTF16 character.
4280      * @param char16 the UTF16 character
4281      * @return code point if argument is a valid character.
4282      * @exception IllegalArgumentException thrown when char16 is not a valid
4283      *            codepoint
4284      * @stable ICU 2.1
4285      */
4286     public static int getCodePoint(char char16)
4287     {
4288         if (UCharacter.isLegal(char16)) {
4289             return char16;
4290         }
4291         throw new IllegalArgumentException("Illegal codepoint");
4292     }
4293
4294     /**
4295      * Implementation of UCaseProps.ContextIterator, iterates over a String.
4296      * See ustrcase.c/utf16_caseContextIterator().
4297      */
4298     private static class StringContextIterator implements UCaseProps.ContextIterator {
4299         /**
4300          * Constructor.
4301          * @param s String to iterate over.
4302          */
4303         StringContextIterator(String s) {
4304             this.s=s;
4305             limit=s.length();
4306             cpStart=cpLimit=index=0;
4307             dir=0;
4308         }
4309
4310         /**
4311          * Set the iteration limit for nextCaseMapCP() to an index within the string.
4312          * If the limit parameter is negative or past the string, then the
4313          * string length is restored as the iteration limit.
4314          *
4315          * This limit does not affect the next() function which always
4316          * iterates to the very end of the string.
4317          *
4318          * @param lim The iteration limit.
4319          */
4320         public void setLimit(int lim) {
4321             if(0<=lim && lim<=s.length()) {
4322                 limit=lim;
4323             } else {
4324                 limit=s.length();
4325             }
4326         }
4327
4328         /**
4329          * Move to the iteration limit without fetching code points up to there.
4330          */
4331         public void moveToLimit() {
4332             cpStart=cpLimit=limit;
4333         }
4334
4335         /**
4336          * Iterate forward through the string to fetch the next code point
4337          * to be case-mapped, and set the context indexes for it.
4338          * Performance optimization, to save on function calls and redundant
4339          * tests. Combines UTF16.charAt(), UTF16.getCharCount(), and setIndex().
4340          *
4341          * When the iteration limit is reached (and -1 is returned),
4342          * getCPStart() will be at the iteration limit.
4343          *
4344          * Iteration with next() does not affect the position for nextCaseMapCP().
4345          *
4346          * @return The next code point to be case-mapped, or <0 when the iteration is done.
4347          */
4348         public int nextCaseMapCP() {
4349             cpStart=cpLimit;
4350             if(cpLimit<limit) {
4351                 int c=s.charAt(cpLimit++);
4352                 if(UTF16.LEAD_SURROGATE_MIN_VALUE<=c || c<=UTF16.TRAIL_SURROGATE_MAX_VALUE) {
4353                     char c2;
4354                     if( c<=UTF16.LEAD_SURROGATE_MAX_VALUE && cpLimit<limit &&
4355                         UTF16.TRAIL_SURROGATE_MIN_VALUE<=(c2=s.charAt(cpLimit)) && c2<=UTF16.TRAIL_SURROGATE_MAX_VALUE
4356                     ) {
4357                         // supplementary code point
4358                         ++cpLimit;
4359                         c=UCharacterProperty.getRawSupplementary((char)c, c2);
4360                     // else unpaired surrogate code point
4361                     }
4362                 // else BMP code point
4363                 }
4364                 return c;
4365             } else {
4366                 return -1;
4367             }
4368         }
4369
4370         /**
4371          * Get the start of the code point that was last returned
4372          * by nextCaseMapCP().
4373          */
4374         public int getCPStart() {
4375             return cpStart;
4376         }
4377
4378         /**
4379          * Get the limit of the code point that was last returned
4380          * by nextCaseMapCP().
4381          */
4382         public int getCPLimit() {
4383             return cpLimit;
4384         }
4385
4386         // implement UCaseProps.ContextIterator
4387         public void reset(int direction) {
4388             if(direction>0) {
4389                 /* reset for forward iteration */
4390                 dir=1;
4391                 index=cpLimit;
4392             } else if(direction<0) {
4393                 /* reset for backward iteration */
4394                 dir=-1;
4395                 index=cpStart;
4396             } else {
4397                 // not a valid direction
4398                 dir=0;
4399                 index=0;
4400             }
4401         }
4402
4403         public int next() {
4404             int c;
4405
4406             if(dir>0 && index<s.length()) {
4407                 c=UTF16.charAt(s, index);
4408                 index+=UTF16.getCharCount(c);
4409                 return c;
4410             } else if(dir<0 && index>0) {
4411                 c=UTF16.charAt(s, index-1);
4412                 index-=UTF16.getCharCount(c);
4413                 return c;
4414             }
4415             return -1;
4416         }
4417
4418         // variables
4419         protected String s;
4420         protected int index, limit, cpStart, cpLimit;
4421         protected int dir; // 0=initial state  >0=forward  <0=backward
4422     }
4423
4424     /**
4425      * Gets uppercase version of the argument string.
4426      * Casing is dependent on the default locale and context-sensitive.
4427      * @param str source string to be performed on
4428      * @return uppercase version of the argument string
4429      * @stable ICU 2.1
4430      */
4431     public static String toUpperCase(String str)
4432     {
4433         return toUpperCase(ULocale.getDefault(), str);
4434     }
4435
4436     /**
4437      * Gets lowercase version of the argument string.
4438      * Casing is dependent on the default locale and context-sensitive
4439      * @param str source string to be performed on
4440      * @return lowercase version of the argument string
4441      * @stable ICU 2.1
4442      */
4443     public static String toLowerCase(String str)
4444     {
4445         return toLowerCase(ULocale.getDefault(), str);
4446     }
4447
4448     /**
4449      * <p>Gets the titlecase version of the argument string.</p>
4450      * <p>Position for titlecasing is determined by the argument break
4451      * iterator, hence the user can customize his break iterator for
4452      * a specialized titlecasing. In this case only the forward iteration
4453      * needs to be implemented.
4454      * If the break iterator passed in is null, the default Unicode algorithm
4455      * will be used to determine the titlecase positions.
4456      * </p>
4457      * <p>Only positions returned by the break iterator will be title cased,
4458      * character in between the positions will all be in lower case.</p>
4459      * <p>Casing is dependent on the default locale and context-sensitive</p>
4460      * @param str source string to be performed on
4461      * @param breakiter break iterator to determine the positions in which
4462      *        the character should be title cased.
4463      * @return lowercase version of the argument string
4464      * @stable ICU 2.6
4465      */
4466     public static String toTitleCase(String str, BreakIterator breakiter)
4467     {
4468         return toTitleCase(ULocale.getDefault(), str, breakiter);
4469     }
4470
4471     /**
4472      * Gets uppercase version of the argument string.
4473      * Casing is dependent on the argument locale and context-sensitive.
4474      * @param locale which string is to be converted in
4475      * @param str source string to be performed on
4476      * @return uppercase version of the argument string
4477      * @stable ICU 2.1
4478      */
4479     public static String toUpperCase(Locale locale, String str)
4480     {
4481         return toUpperCase(ULocale.forLocale(locale), str);
4482     }
4483
4484     /**
4485      * Gets uppercase version of the argument string.
4486      * Casing is dependent on the argument locale and context-sensitive.
4487      * @param locale which string is to be converted in
4488      * @param str source string to be performed on
4489      * @return uppercase version of the argument string
4490      * @stable ICU 3.2
4491      */
4492     public static String toUpperCase(ULocale locale, String str) {
4493         StringContextIterator iter = new StringContextIterator(str);
4494         StringBuffer result = new StringBuffer(str.length());
4495         int[] locCache = new int[1];
4496         int c;
4497
4498         if (locale == null) {
4499             locale = ULocale.getDefault();
4500         }
4501         locCache[0]=0;
4502
4503         while((c=iter.nextCaseMapCP())>=0) {
4504             c=gCsp.toFullUpper(c, iter, result, locale, locCache);
4505
4506             /* decode the result */
4507             if(c<0) {
4508                 /* (not) original code point */
4509                 c=~c;
4510             } else if(c<=UCaseProps.MAX_STRING_LENGTH) {
4511                 /* mapping already appended to result */
4512                 continue;
4513             /* } else { append single-code point mapping */
4514             }
4515             if(c<=0xffff) {
4516                 result.append((char)c);
4517             } else {
4518                 UTF16.append(result, c);
4519             }
4520         }
4521         return result.toString();
4522     }
4523
4524     /**
4525      * Gets lowercase version of the argument string.
4526      * Casing is dependent on the argument locale and context-sensitive
4527      * @param locale which string is to be converted in
4528      * @param str source string to be performed on
4529      * @return lowercase version of the argument string
4530      * @stable ICU 2.1
4531      */
4532     public static String toLowerCase(Locale locale, String str)
4533     {
4534         return toLowerCase(ULocale.forLocale(locale), str);
4535     }
4536
4537     /**
4538      * Gets lowercase version of the argument string.
4539      * Casing is dependent on the argument locale and context-sensitive
4540      * @param locale which string is to be converted in
4541      * @param str source string to be performed on
4542      * @return lowercase version of the argument string
4543      * @stable ICU 3.2
4544      */
4545     public static String toLowerCase(ULocale locale, String str) {
4546         StringContextIterator iter = new StringContextIterator(str);
4547         StringBuffer result = new StringBuffer(str.length());
4548         int[] locCache = new int[1];
4549         int c;
4550
4551         if (locale == null) {
4552             locale = ULocale.getDefault();
4553         }
4554         locCache[0]=0;
4555
4556         while((c=iter.nextCaseMapCP())>=0) {
4557             c=gCsp.toFullLower(c, iter, result, locale, locCache);
4558
4559             /* decode the result */
4560             if(c<0) {
4561                 /* (not) original code point */
4562                 c=~c;
4563             } else if(c<=UCaseProps.MAX_STRING_LENGTH) {
4564                 /* mapping already appended to result */
4565                 continue;
4566             /* } else { append single-code point mapping */
4567             }
4568             if(c<=0xffff) {
4569                 result.append((char)c);
4570             } else {
4571                 UTF16.append(result, c);
4572             }
4573         }
4574         return result.toString();
4575     }
4576
4577     /**
4578      * <p>Gets the titlecase version of the argument string.</p>
4579      * <p>Position for titlecasing is determined by the argument break
4580      * iterator, hence the user can customize his break iterator for
4581      * a specialized titlecasing. In this case only the forward iteration
4582      * needs to be implemented.
4583      * If the break iterator passed in is null, the default Unicode algorithm
4584      * will be used to determine the titlecase positions.
4585      * </p>
4586      * <p>Only positions returned by the break iterator will be title cased,
4587      * character in between the positions will all be in lower case.</p>
4588      * <p>Casing is dependent on the argument locale and context-sensitive</p>
4589      * @param locale which string is to be converted in
4590      * @param str source string to be performed on
4591      * @param breakiter break iterator to determine the positions in which
4592      *        the character should be title cased.
4593      * @return lowercase version of the argument string
4594      * @stable ICU 2.6
4595      */
4596     public static String toTitleCase(Locale locale, String str,
4597                                      BreakIterator breakiter)
4598     {
4599         return toTitleCase(ULocale.forLocale(locale), str, breakiter);
4600     }
4601
4602     /**
4603      * <p>Gets the titlecase version of the argument string.</p>
4604      * <p>Position for titlecasing is determined by the argument break
4605      * iterator, hence the user can customize his break iterator for
4606      * a specialized titlecasing. In this case only the forward iteration
4607      * needs to be implemented.
4608      * If the break iterator passed in is null, the default Unicode algorithm
4609      * will be used to determine the titlecase positions.
4610      * </p>
4611      * <p>Only positions returned by the break iterator will be title cased,
4612      * character in between the positions will all be in lower case.</p>
4613      * <p>Casing is dependent on the argument locale and context-sensitive</p>
4614      * @param locale which string is to be converted in
4615      * @param str source string to be performed on
4616      * @param titleIter break iterator to determine the positions in which
4617      *        the character should be title cased.
4618      * @return lowercase version of the argument string
4619      * @stable ICU 3.2
4620      */
4621     public static String toTitleCase(ULocale locale, String str,
4622                                      BreakIterator titleIter) {
4623         return toTitleCase(locale, str, titleIter, 0);
4624     }
4625
4626     /**
4627      * <p>Gets the titlecase version of the argument string.</p>
4628      * <p>Position for titlecasing is determined by the argument break
4629      * iterator, hence the user can customize his break iterator for
4630      * a specialized titlecasing. In this case only the forward iteration
4631      * needs to be implemented.
4632      * If the break iterator passed in is null, the default Unicode algorithm
4633      * will be used to determine the titlecase positions.
4634      * </p>
4635      * <p>Only positions returned by the break iterator will be title cased,
4636      * character in between the positions will all be in lower case.</p>
4637      * <p>Casing is dependent on the argument locale and context-sensitive</p>
4638      * @param locale which string is to be converted in
4639      * @param str source string to be performed on
4640      * @param titleIter break iterator to determine the positions in which
4641      *        the character should be title cased.
4642      * @param options bit set to modify the titlecasing operation
4643      * @return lowercase version of the argument string
4644      * @stable ICU 3.8
4645      * @see #TITLECASE_NO_LOWERCASE
4646      * @see #TITLECASE_NO_BREAK_ADJUSTMENT
4647      */
4648     public static String toTitleCase(ULocale locale, String str,
4649                                      BreakIterator titleIter,
4650                                      int options) {
4651         StringContextIterator iter = new StringContextIterator(str);
4652         StringBuffer result = new StringBuffer(str.length());
4653         int[] locCache = new int[1];
4654         int c, nc, srcLength = str.length();
4655
4656         if (locale == null) {
4657             locale = ULocale.getDefault();
4658         }
4659         locCache[0]=0;
4660
4661         if(titleIter == null) {
4662             titleIter = BreakIterator.getWordInstance(locale);
4663         }
4664         titleIter.setText(str);
4665
4666         int prev, titleStart, index;
4667         boolean isFirstIndex;
4668         boolean isDutch = locale.getLanguage().equals("nl");
4669         boolean FirstIJ = true;
4670
4671         /* set up local variables */
4672         prev=0;
4673         isFirstIndex=true;
4674
4675         /* titlecasing loop */
4676         while(prev<srcLength) {
4677             /* find next index where to titlecase */
4678             if(isFirstIndex) {
4679                 isFirstIndex=false;
4680                 index=titleIter.first();
4681             } else {
4682                 index=titleIter.next();
4683             }
4684             if(index==BreakIterator.DONE || index>srcLength) {
4685                 index=srcLength;
4686             }
4687
4688             /*
4689              * Unicode 4 & 5 section 3.13 Default Case Operations:
4690              *
4691              * R3  toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
4692              * #29, "Text Boundaries." Between each pair of word boundaries, find the first
4693              * cased character F. If F exists, map F to default_title(F); then map each
4694              * subsequent character C to default_lower(C).
4695              *
4696              * In this implementation, segment [prev..index[ into 3 parts:
4697              * a) uncased characters (copy as-is) [prev..titleStart[
4698              * b) first case letter (titlecase)         [titleStart..titleLimit[
4699              * c) subsequent characters (lowercase)                 [titleLimit..index[
4700              */
4701             if(prev<index) {
4702                 /* find and copy uncased characters [prev..titleStart[ */
4703                 iter.setLimit(index);
4704                 c=iter.nextCaseMapCP();
4705                 if((options&TITLECASE_NO_BREAK_ADJUSTMENT)==0 && UCaseProps.NONE==gCsp.getType(c)) {
4706                     while((c=iter.nextCaseMapCP())>=0 && UCaseProps.NONE==gCsp.getType(c)) {}
4707                     titleStart=iter.getCPStart();
4708                     if(prev<titleStart) {
4709                         // TODO: With Java 5, this would want to be result.append(str, prev, titleStart);
4710                         result.append(str.substring(prev, titleStart));
4711                     }
4712                 } else {
4713                     titleStart=prev;
4714                 }
4715
4716                 if(titleStart<index) {
4717                     FirstIJ = true;
4718                     /* titlecase c which is from titleStart */
4719                     c=gCsp.toFullTitle(c, iter, result, locale, locCache);
4720
4721                     /* decode the result and lowercase up to index */
4722                     for(;;) {
4723                         if(c<0) {
4724                             /* (not) original code point */
4725                             c=~c;
4726                             if(c<=0xffff) {
4727                                 result.append((char)c);
4728                             } else {
4729                                 UTF16.append(result, c);
4730                             }
4731                         } else if(c<=UCaseProps.MAX_STRING_LENGTH) {
4732                             /* mapping already appended to result */
4733                         } else {
4734                             /* append single-code point mapping */
4735                             if(c<=0xffff) {
4736                                 result.append((char)c);
4737                             } else {
4738                                 UTF16.append(result, c);
4739                             }
4740                         }
4741
4742                         if((options&TITLECASE_NO_LOWERCASE)!=0) {
4743                             /* Optionally just copy the rest of the word unchanged. */
4744
4745                             int titleLimit=iter.getCPLimit();
4746                             if(titleLimit<index) {
4747                             // TODO: With Java 5, this would want to be result.append(str, titleLimit, index);
4748                                 String appendStr = str.substring(titleLimit,index);
4749                                 /* Special Case - Dutch IJ Titlecasing */
4750                                 if ( isDutch && c == 0x0049 && appendStr.startsWith("j")) {
4751                                    appendStr = "J" + appendStr.substring(1);
4752                                 }
4753                                 result.append(appendStr);
4754                                 iter.moveToLimit();
4755                                 break;
4756                             }
4757                         } else if((nc=iter.nextCaseMapCP())>=0) {
4758                             if ( isDutch && ( nc == 0x004A ||  nc == 0x006A ) && ( c == 0x0049 ) && ( FirstIJ == true )) {
4759                                 c = 0x004A; /* J */
4760                                 FirstIJ = false;
4761                             } else {
4762                                 /* Normal operation: Lowercase the rest of the word. */
4763                                 c=gCsp.toFullLower(nc, iter, result, locale, locCache);
4764                             }
4765                         } else {
4766                             break;
4767                         }
4768                     }
4769                 }
4770             }
4771
4772             prev=index;
4773         }
4774         return result.toString();
4775     }
4776
4777     /**
4778      * The given character is mapped to its case folding equivalent according
4779      * to UnicodeData.txt and CaseFolding.txt; if the character has no case
4780      * folding equivalent, the character itself is returned.
4781      *
4782      * <p>This function only returns the simple, single-code point case mapping.
4783      * Full case mappings should be used whenever possible because they produce
4784      * better results by working on whole strings.
4785      * They can map to a result string with a different length as appropriate.
4786      * Full case mappings are applied by the case mapping functions
4787      * that take String parameters rather than code points (int).
4788      * See also the User Guide chapter on C/POSIX migration:
4789      * http://www.icu-project.org/userguide/posix.html#case_mappings
4790      *
4791      * @param ch             the character to be converted
4792      * @param defaultmapping Indicates if all mappings defined in
4793      *                       CaseFolding.txt is to be used, otherwise the
4794      *                       mappings for dotted I  and dotless i marked with
4795      *                       'I' in CaseFolding.txt will be skipped.
4796      * @return               the case folding equivalent of the character, if
4797      *                       any; otherwise the character itself.
4798      * @see                  #foldCase(String, boolean)
4799      * @stable ICU 2.1
4800      */
4801     public static int foldCase(int ch, boolean defaultmapping) {
4802         return foldCase(ch, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I);
4803     }
4804
4805     /**
4806      * The given string is mapped to its case folding equivalent according to
4807      * UnicodeData.txt and CaseFolding.txt; if any character has no case
4808      * folding equivalent, the character itself is returned.
4809      * "Full", multiple-code point case folding mappings are returned here.
4810      * For "simple" single-code point mappings use the API
4811      * foldCase(int ch, boolean defaultmapping).
4812      * @param str            the String to be converted
4813      * @param defaultmapping Indicates if all mappings defined in
4814      *                       CaseFolding.txt is to be used, otherwise the
4815      *                       mappings for dotted I and dotless i marked with
4816      *                       'I' in CaseFolding.txt will be skipped.
4817      * @return               the case folding equivalent of the character, if
4818      *                       any; otherwise the character itself.
4819      * @see                  #foldCase(int, boolean)
4820      * @stable ICU 2.1
4821      */
4822     public static String foldCase(String str, boolean defaultmapping) {
4823         return foldCase(str, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I);
4824     }
4825
4826     /**
4827      * Option value for case folding: use default mappings defined in CaseFolding.txt.
4828      * @stable ICU 2.6
4829      */
4830     public static final int FOLD_CASE_DEFAULT    =      0x0000;
4831     /**
4832      * Option value for case folding: exclude the mappings for dotted I
4833      * and dotless i marked with 'I' in CaseFolding.txt.
4834      * @stable ICU 2.6
4835      */
4836     public static final int FOLD_CASE_EXCLUDE_SPECIAL_I = 0x0001;
4837
4838     /**
4839      * The given character is mapped to its case folding equivalent according
4840      * to UnicodeData.txt and CaseFolding.txt; if the character has no case
4841      * folding equivalent, the character itself is returned.
4842      *
4843      * <p>This function only returns the simple, single-code point case mapping.
4844      * Full case mappings should be used whenever possible because they produce
4845      * better results by working on whole strings.
4846      * They can map to a result string with a different length as appropriate.
4847      * Full case mappings are applied by the case mapping functions
4848      * that take String parameters rather than code points (int).
4849      * See also the User Guide chapter on C/POSIX migration:
4850      * http://www.icu-project.org/userguide/posix.html#case_mappings
4851      *
4852      * @param ch             the character to be converted
4853      * @param options        A bit set for special processing. Currently the recognised options are
4854      *                        FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT
4855      * @return               the case folding equivalent of the character, if
4856      *                       any; otherwise the character itself.
4857      * @see #foldCase(String, boolean)
4858      * @stable ICU 2.6
4859      */
4860     public static int foldCase(int ch, int options) {
4861         return gCsp.fold(ch, options);
4862     }
4863
4864     /**
4865      * The given string is mapped to its case folding equivalent according to
4866      * UnicodeData.txt and CaseFolding.txt; if any character has no case
4867      * folding equivalent, the character itself is returned.
4868      * "Full", multiple-code point case folding mappings are returned here.
4869      * For "simple" single-code point mappings use the API
4870      * foldCase(int ch, boolean defaultmapping).
4871      * @param str            the String to be converted
4872      * @param options        A bit set for special processing. Currently the recognised options are
4873      *                        FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT
4874      * @return               the case folding equivalent of the character, if
4875      *                       any; otherwise the character itself.
4876      * @see #foldCase(int, boolean)
4877      * @stable ICU 2.6
4878      */
4879     public static final String foldCase(String str, int options) {
4880         StringBuffer result = new StringBuffer(str.length());
4881         int c, i, length;
4882
4883         length = str.length();
4884         for(i=0; i<length;) {
4885             c=UTF16.charAt(str, i);
4886             i+=UTF16.getCharCount(c);
4887             c=gCsp.toFullFolding(c, result, options);
4888
4889             /* decode the result */
4890             if(c<0) {
4891                 /* (not) original code point */
4892                 c=~c;
4893             } else if(c<=UCaseProps.MAX_STRING_LENGTH) {
4894                 /* mapping already appended to result */
4895                 continue;
4896             /* } else { append single-code point mapping */
4897             }
4898             if(c<=0xffff) {
4899                 result.append((char)c);
4900             } else {
4901                 UTF16.append(result, c);
4902             }
4903         }
4904         return result.toString();
4905     }
4906
4907     /**
4908      * Return numeric value of Han code points.
4909      * <br> This returns the value of Han 'numeric' code points,
4910      * including those for zero, ten, hundred, thousand, ten thousand,
4911      * and hundred million.
4912      * This includes both the standard and 'checkwriting'
4913      * characters, the 'big circle' zero character, and the standard
4914      * zero character.
4915      * @param ch code point to query
4916      * @return value if it is a Han 'numeric character,' otherwise return -1.
4917      * @stable ICU 2.4
4918      */
4919     public static int getHanNumericValue(int ch)
4920     {
4921         // TODO: Are these all covered by Unicode numeric value data?
4922         switch(ch)
4923         {
4924         case IDEOGRAPHIC_NUMBER_ZERO_ :
4925         case CJK_IDEOGRAPH_COMPLEX_ZERO_ :
4926         return 0; // Han Zero
4927         case CJK_IDEOGRAPH_FIRST_ :
4928         case CJK_IDEOGRAPH_COMPLEX_ONE_ :
4929         return 1; // Han One
4930         case CJK_IDEOGRAPH_SECOND_ :
4931         case CJK_IDEOGRAPH_COMPLEX_TWO_ :
4932         return 2; // Han Two
4933         case CJK_IDEOGRAPH_THIRD_ :
4934         case CJK_IDEOGRAPH_COMPLEX_THREE_ :
4935         return 3; // Han Three
4936         case CJK_IDEOGRAPH_FOURTH_ :
4937         case CJK_IDEOGRAPH_COMPLEX_FOUR_ :
4938         return 4; // Han Four
4939         case CJK_IDEOGRAPH_FIFTH_ :
4940         case CJK_IDEOGRAPH_COMPLEX_FIVE_ :
4941         return 5; // Han Five
4942         case CJK_IDEOGRAPH_SIXTH_ :
4943         case CJK_IDEOGRAPH_COMPLEX_SIX_ :
4944         return 6; // Han Six
4945         case CJK_IDEOGRAPH_SEVENTH_ :
4946         case CJK_IDEOGRAPH_COMPLEX_SEVEN_ :
4947         return 7; // Han Seven
4948         case CJK_IDEOGRAPH_EIGHTH_ :
4949         case CJK_IDEOGRAPH_COMPLEX_EIGHT_ :
4950         return 8; // Han Eight
4951         case CJK_IDEOGRAPH_NINETH_ :
4952         case CJK_IDEOGRAPH_COMPLEX_NINE_ :
4953         return 9; // Han Nine
4954         case CJK_IDEOGRAPH_TEN_ :
4955         case CJK_IDEOGRAPH_COMPLEX_TEN_ :
4956         return 10;
4957         case CJK_IDEOGRAPH_HUNDRED_ :
4958         case CJK_IDEOGRAPH_COMPLEX_HUNDRED_ :
4959         return 100;
4960         case CJK_IDEOGRAPH_THOUSAND_ :
4961         case CJK_IDEOGRAPH_COMPLEX_THOUSAND_ :
4962         return 1000;
4963         case CJK_IDEOGRAPH_TEN_THOUSAND_ :
4964         return 10000;
4965         case CJK_IDEOGRAPH_HUNDRED_MILLION_ :
4966         return 100000000;
4967         }
4968         return -1; // no value
4969     }
4970
4971     /**
4972      * <p>Gets an iterator for character types, iterating over codepoints.</p>
4973      * Example of use:<br>
4974      * <pre>
4975      * RangeValueIterator iterator = UCharacter.getTypeIterator();
4976      * RangeValueIterator.Element element = new RangeValueIterator.Element();
4977      * while (iterator.next(element)) {
4978      *     System.out.println("Codepoint \\u" +
4979      *                        Integer.toHexString(element.start) +
4980      *                        " to codepoint \\u" +
4981      *                        Integer.toHexString(element.limit - 1) +
4982      *                        " has the character type " +
4983      *                        element.value);
4984      * }
4985      * </pre>
4986      * @return an iterator
4987      * @stable ICU 2.6
4988      */
4989     public static RangeValueIterator getTypeIterator()
4990     {
4991         return new UCharacterTypeIterator(PROPERTY_);
4992     }
4993
4994     /**
4995      * <p>Gets an iterator for character names, iterating over codepoints.</p>
4996      * <p>This API only gets the iterator for the modern, most up-to-date
4997      * Unicode names. For older 1.0 Unicode names use get1_0NameIterator() or
4998      * for extended names use getExtendedNameIterator().</p>
4999      * Example of use:<br>
5000      * <pre>
5001      * ValueIterator iterator = UCharacter.getNameIterator();
5002      * ValueIterator.Element element = new ValueIterator.Element();
5003      * while (iterator.next(element)) {
5004      *     System.out.println("Codepoint \\u" +
5005      *                        Integer.toHexString(element.codepoint) +
5006      *                        " has the name " + (String)element.value);
5007      * }
5008      * </pre>
5009      * <p>The maximal range which the name iterator iterates is from
5010      * UCharacter.MIN_VALUE to UCharacter.MAX_VALUE.</p>
5011      * @return an iterator
5012      * @stable ICU 2.6
5013      */
5014     public static ValueIterator getNameIterator()
5015     {
5016         if(NAME_==null){
5017             throw new RuntimeException("Could not load unames.icu");
5018         }
5019         return new UCharacterNameIterator(NAME_,
5020                       UCharacterNameChoice.UNICODE_CHAR_NAME);
5021     }
5022
5023     /**
5024      * <p>Gets an iterator for character names, iterating over codepoints.</p>
5025      * <p>This API only gets the iterator for the older 1.0 Unicode names.
5026      * For modern, most up-to-date Unicode names use getNameIterator() or
5027      * for extended names use getExtendedNameIterator().</p>
5028      * Example of use:<br>
5029      * <pre>
5030      * ValueIterator iterator = UCharacter.get1_0NameIterator();
5031      * ValueIterator.Element element = new ValueIterator.Element();
5032      * while (iterator.next(element)) {
5033      *     System.out.println("Codepoint \\u" +
5034      *                        Integer.toHexString(element.codepoint) +
5035      *                        " has the name " + (String)element.value);
5036      * }
5037      * </pre>
5038      * <p>The maximal range which the name iterator iterates is from
5039      * @return an iterator
5040      * @stable ICU 2.6
5041      */
5042     public static ValueIterator getName1_0Iterator()
5043     {
5044         if(NAME_==null){
5045             throw new RuntimeException("Could not load unames.icu");
5046         }
5047         return new UCharacterNameIterator(NAME_,
5048                       UCharacterNameChoice.UNICODE_10_CHAR_NAME);
5049     }
5050
5051     /**
5052      * <p>Gets an iterator for character names, iterating over codepoints.</p>
5053      * <p>This API only gets the iterator for the extended names.
5054      * For modern, most up-to-date Unicode names use getNameIterator() or
5055      * for older 1.0 Unicode names use get1_0NameIterator().</p>
5056      * Example of use:<br>
5057      * <pre>
5058      * ValueIterator iterator = UCharacter.getExtendedNameIterator();
5059      * ValueIterator.Element element = new ValueIterator.Element();
5060      * while (iterator.next(element)) {
5061      *     System.out.println("Codepoint \\u" +
5062      *                        Integer.toHexString(element.codepoint) +
5063      *                        " has the name " + (String)element.value);
5064      * }
5065      * </pre>
5066      * <p>The maximal range which the name iterator iterates is from
5067      * @return an iterator
5068      * @stable ICU 2.6
5069      */
5070     public static ValueIterator getExtendedNameIterator()
5071     {
5072         if(NAME_==null){
5073             throw new MissingResourceException("Could not load unames.icu","","");
5074         }
5075         return new UCharacterNameIterator(NAME_,
5076                       UCharacterNameChoice.EXTENDED_CHAR_NAME);
5077     }
5078
5079     /**
5080      * <p>Get the "age" of the code point.</p>
5081      * <p>The "age" is the Unicode version when the code point was first
5082      * designated (as a non-character or for Private Use) or assigned a
5083      * character.
5084      * <p>This can be useful to avoid emitting code points to receiving
5085      * processes that do not accept newer characters.</p>
5086      * <p>The data is from the UCD file DerivedAge.txt.</p>
5087      * @param ch The code point.
5088      * @return the Unicode version number
5089      * @stable ICU 2.6
5090      */
5091     public static VersionInfo getAge(int ch)
5092     {
5093         if (ch < MIN_VALUE || ch > MAX_VALUE) {
5094         throw new IllegalArgumentException("Codepoint out of bounds");
5095         }
5096         return PROPERTY_.getAge(ch);
5097     }
5098
5099     /**
5100      * <p>Check a binary Unicode property for a code point.</p>
5101      * <p>Unicode, especially in version 3.2, defines many more properties
5102      * than the original set in UnicodeData.txt.</p>
5103      * <p>This API is intended to reflect Unicode properties as defined in
5104      * the Unicode Character Database (UCD) and Unicode Technical Reports
5105      * (UTR).</p>
5106      * <p>For details about the properties see
5107      * <a href=http://www.unicode.org/>http://www.unicode.org/</a>.</p>
5108      * <p>For names of Unicode properties see the UCD file
5109      * PropertyAliases.txt.</p>
5110      * <p>This API does not check the validity of the codepoint.</p>
5111      * <p>Important: If ICU is built with UCD files from Unicode versions
5112      * below 3.2, then properties marked with "new" are not or
5113      * not fully available.</p>
5114      * @param ch code point to test.
5115      * @param property selector constant from com.ibm.icu.lang.UProperty,
5116      *        identifies which binary property to check.
5117      * @return true or false according to the binary Unicode property value
5118      *         for ch. Also false if property is out of bounds or if the
5119      *         Unicode version does not have data for the property at all, or
5120      *         not for this code point.
5121      * @see com.ibm.icu.lang.UProperty
5122      * @stable ICU 2.6
5123      */
5124     public static boolean hasBinaryProperty(int ch, int property)
5125     {
5126     if (ch < MIN_VALUE || ch > MAX_VALUE) {
5127         throw new IllegalArgumentException("Codepoint out of bounds");
5128         }
5129         return PROPERTY_.hasBinaryProperty(ch, property);
5130     }
5131
5132     /**
5133      * <p>Check if a code point has the Alphabetic Unicode property.</p>
5134      * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.ALPHABETIC).</p>
5135      * <p>Different from UCharacter.isLetter(ch)!</p>
5136      * @stable ICU 2.6
5137      * @param ch codepoint to be tested
5138      */
5139     public static boolean isUAlphabetic(int ch)
5140     {
5141     return hasBinaryProperty(ch, UProperty.ALPHABETIC);
5142     }
5143
5144     /**
5145      * <p>Check if a code point has the Lowercase Unicode property.</p>
5146      * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.LOWERCASE).</p>
5147      * <p>This is different from UCharacter.isLowerCase(ch)!</p>
5148      * @param ch codepoint to be tested
5149      * @stable ICU 2.6
5150      */
5151     public static boolean isULowercase(int ch)
5152     {
5153     return hasBinaryProperty(ch, UProperty.LOWERCASE);
5154     }
5155
5156     /**
5157      * <p>Check if a code point has the Uppercase Unicode property.</p>
5158      * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.UPPERCASE).</p>
5159      * <p>This is different from UCharacter.isUpperCase(ch)!</p>
5160      * @param ch codepoint to be tested
5161      * @stable ICU 2.6
5162      */
5163     public static boolean isUUppercase(int ch)
5164     {
5165     return hasBinaryProperty(ch, UProperty.UPPERCASE);
5166     }
5167
5168     /**
5169      * <p>Check if a code point has the White_Space Unicode property.</p>
5170      * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.WHITE_SPACE).</p>
5171      * <p>This is different from both UCharacter.isSpace(ch) and
5172      * UCharacter.isWhitespace(ch)!</p>
5173      * @param ch codepoint to be tested
5174      * @stable ICU 2.6
5175      */
5176     public static boolean isUWhiteSpace(int ch)
5177     {
5178     return hasBinaryProperty(ch, UProperty.WHITE_SPACE);
5179     }
5180
5181
5182     /**
5183      * <p>Gets the property value for an Unicode property type of a code point.
5184      * Also returns binary and mask property values.</p>
5185      * <p>Unicode, especially in version 3.2, defines many more properties than
5186      * the original set in UnicodeData.txt.</p>
5187      * <p>The properties APIs are intended to reflect Unicode properties as
5188      * defined in the Unicode Character Database (UCD) and Unicode Technical
5189      * Reports (UTR). For details about the properties see
5190      * http://www.unicode.org/.</p>
5191      * <p>For names of Unicode properties see the UCD file PropertyAliases.txt.
5192      * </p>
5193      * <pre>
5194      * Sample usage:
5195      * int ea = UCharacter.getIntPropertyValue(c, UProperty.EAST_ASIAN_WIDTH);
5196      * int ideo = UCharacter.getIntPropertyValue(c, UProperty.IDEOGRAPHIC);
5197      * boolean b = (ideo == 1) ? true : false;
5198      * </pre>
5199      * @param ch code point to test.
5200      * @param type UProperty selector constant, identifies which binary
5201      *        property to check. Must be
5202      *        UProperty.BINARY_START &lt;= type &lt; UProperty.BINARY_LIMIT or
5203      *        UProperty.INT_START &lt;= type &lt; UProperty.INT_LIMIT or
5204      *        UProperty.MASK_START &lt;= type &lt; UProperty.MASK_LIMIT.
5205      * @return numeric value that is directly the property value or,
5206      *         for enumerated properties, corresponds to the numeric value of
5207      *         the enumerated constant of the respective property value
5208      *         enumeration type (cast to enum type if necessary).
5209      *         Returns 0 or 1 (for false / true) for binary Unicode properties.
5210      *         Returns a bit-mask for mask properties.
5211      *         Returns 0 if 'type' is out of bounds or if the Unicode version
5212      *         does not have data for the property at all, or not for this code
5213      *         point.
5214      * @see UProperty
5215      * @see #hasBinaryProperty
5216      * @see #getIntPropertyMinValue
5217      * @see #getIntPropertyMaxValue
5218      * @see #getUnicodeVersion
5219      * @stable ICU 2.4
5220      */
5221     public static int getIntPropertyValue(int ch, int type)
5222     {
5223         if (type < UProperty.BINARY_START) {
5224             return 0; // undefined
5225         }
5226         else if (type < UProperty.BINARY_LIMIT) {
5227             return hasBinaryProperty(ch, type) ? 1 : 0;
5228         }
5229         else if (type < UProperty.INT_START) {
5230             return 0; // undefined
5231         }
5232         else if (type < UProperty.INT_LIMIT) {
5233             //int result = 0;
5234             switch (type) {
5235             case UProperty.BIDI_CLASS:
5236                 return getDirection(ch);
5237             case UProperty.BLOCK:
5238                 return UnicodeBlock.idOf(ch);
5239             case UProperty.CANONICAL_COMBINING_CLASS:
5240                 return getCombiningClass(ch);
5241             case UProperty.DECOMPOSITION_TYPE:
5242                 return PROPERTY_.getAdditional(ch, 2)
5243             & DECOMPOSITION_TYPE_MASK_;
5244             case UProperty.EAST_ASIAN_WIDTH:
5245                 return (PROPERTY_.getAdditional(ch, 0)
5246             & EAST_ASIAN_MASK_) >> EAST_ASIAN_SHIFT_;
5247             case UProperty.GENERAL_CATEGORY:
5248                 return getType(ch);
5249             case UProperty.JOINING_GROUP:
5250                 return gBdp.getJoiningGroup(ch);
5251             case UProperty.JOINING_TYPE:
5252                 return gBdp.getJoiningType(ch);
5253             case UProperty.LINE_BREAK:
5254                 return (int)(PROPERTY_.getAdditional(ch, LB_VWORD)& LB_MASK)>>LB_SHIFT;
5255             case UProperty.NUMERIC_TYPE:
5256                 type=getNumericType(PROPERTY_.getProperty(ch));
5257                 if(type>NumericType.NUMERIC) {
5258                     /* keep internal variants of NumericType.NUMERIC from becoming visible */
5259                     type=NumericType.NUMERIC;
5260                 }
5261                 return type;
5262             case UProperty.SCRIPT:
5263                 return UScript.getScript(ch);
5264             case UProperty.HANGUL_SYLLABLE_TYPE:
5265         /* purely algorithmic; hardcode known characters, check for assigned new ones */
5266         if(ch<NormalizerImpl.JAMO_L_BASE) {
5267             /* NA */
5268         } else if(ch<=0x11ff) {
5269             /* Jamo range */
5270             if(ch<=0x115f) {
5271             /* Jamo L range, HANGUL CHOSEONG ... */
5272             if(ch==0x115f || ch<=0x1159 || getType(ch)==UCharacterCategory.OTHER_LETTER) {
5273                 return HangulSyllableType.LEADING_JAMO;
5274             }
5275             } else if(ch<=0x11a7) {
5276             /* Jamo V range, HANGUL JUNGSEONG ... */
5277             if(ch<=0x11a2 || getType(ch)==UCharacterCategory.OTHER_LETTER) {
5278                 return HangulSyllableType.VOWEL_JAMO;
5279             }
5280             } else {
5281             /* Jamo T range */
5282             if(ch<=0x11f9 || getType(ch)==UCharacterCategory.OTHER_LETTER) {
5283                 return HangulSyllableType.TRAILING_JAMO;
5284             }
5285             }
5286         } else if((ch-=NormalizerImpl.HANGUL_BASE)<0) {
5287             /* NA */
5288         } else if(ch<NormalizerImpl.HANGUL_COUNT) {
5289             /* Hangul syllable */
5290             return ch%NormalizerImpl.JAMO_T_COUNT==0 ? HangulSyllableType.LV_SYLLABLE : HangulSyllableType.LVT_SYLLABLE;
5291         }
5292         return 0; /* NA */
5293
5294             case UProperty.NFD_QUICK_CHECK:
5295             case UProperty.NFKD_QUICK_CHECK:
5296             case UProperty.NFC_QUICK_CHECK:
5297             case UProperty.NFKC_QUICK_CHECK:
5298                 return NormalizerImpl.quickCheck(ch, (type-UProperty.NFD_QUICK_CHECK)+2); // 2=UNORM_NFD
5299             case UProperty.LEAD_CANONICAL_COMBINING_CLASS:
5300                 return NormalizerImpl.getFCD16(ch)>>8;
5301             case UProperty.TRAIL_CANONICAL_COMBINING_CLASS:
5302                 return NormalizerImpl.getFCD16(ch)&0xff;
5303             case UProperty.GRAPHEME_CLUSTER_BREAK:
5304                 return (int)(PROPERTY_.getAdditional(ch, 2)& GCB_MASK)>>GCB_SHIFT;
5305             case UProperty.SENTENCE_BREAK:
5306                 return (int)(PROPERTY_.getAdditional(ch, 2)& SB_MASK)>>SB_SHIFT;
5307             case UProperty.WORD_BREAK:
5308                 return (int)(PROPERTY_.getAdditional(ch, 2)& WB_MASK)>>WB_SHIFT;
5309             default:
5310
5311         return 0; /* undefined */
5312             }
5313         } else if (type == UProperty.GENERAL_CATEGORY_MASK) {
5314             return UCharacterProperty.getMask(getType(ch));
5315         }
5316         return 0; // undefined
5317     }
5318     /**
5319      * Returns a string version of the property value.
5320      * @param propertyEnum
5321      * @param codepoint
5322      * @param nameChoice
5323      * @return value as string
5324      * @internal
5325      * @deprecated This API is ICU internal only.
5326      */
5327     public static String getStringPropertyValue(int propertyEnum, int codepoint, int nameChoice) {
5328         // TODO some of these are less efficient, since a string is forced!
5329         if ((propertyEnum >= UProperty.BINARY_START && propertyEnum < UProperty.BINARY_LIMIT) ||
5330                 (propertyEnum >= UProperty.INT_START && propertyEnum < UProperty.INT_LIMIT)) {
5331             return getPropertyValueName(propertyEnum, getIntPropertyValue(codepoint, propertyEnum), nameChoice);
5332         }
5333         if (propertyEnum == UProperty.NUMERIC_VALUE) {
5334                 return String.valueOf(getUnicodeNumericValue(codepoint));
5335         }
5336         // otherwise must be string property
5337         switch (propertyEnum) {
5338         case UProperty.AGE: return getAge(codepoint).toString();
5339         case UProperty.ISO_COMMENT: return getISOComment(codepoint);
5340         case UProperty.BIDI_MIRRORING_GLYPH: return UTF16.valueOf(getMirror(codepoint));
5341         case UProperty.CASE_FOLDING: return foldCase(UTF16.valueOf(codepoint), true);
5342         case UProperty.LOWERCASE_MAPPING: return toLowerCase(UTF16.valueOf(codepoint));
5343         case UProperty.NAME: return getName(codepoint);
5344         case UProperty.SIMPLE_CASE_FOLDING: return UTF16.valueOf(foldCase(codepoint,true));
5345         case UProperty.SIMPLE_LOWERCASE_MAPPING: return UTF16.valueOf(toLowerCase(codepoint));
5346         case UProperty.SIMPLE_TITLECASE_MAPPING: return UTF16.valueOf(toTitleCase(codepoint));
5347         case UProperty.SIMPLE_UPPERCASE_MAPPING: return UTF16.valueOf(toUpperCase(codepoint));
5348         case UProperty.TITLECASE_MAPPING: return toTitleCase(UTF16.valueOf(codepoint),null);
5349         case UProperty.UNICODE_1_NAME: return getName1_0(codepoint);
5350         case UProperty.UPPERCASE_MAPPING: return toUpperCase(UTF16.valueOf(codepoint));
5351         }
5352         throw new IllegalArgumentException("Illegal Property Enum");
5353     }
5354
5355     /**
5356      * Get the minimum value for an integer/binary Unicode property type.
5357      * Can be used together with UCharacter.getIntPropertyMaxValue(int)
5358      * to allocate arrays of com.ibm.icu.text.UnicodeSet or similar.
5359      * @param type UProperty selector constant, identifies which binary
5360      *        property to check. Must be
5361      *        UProperty.BINARY_START &lt;= type &lt; UProperty.BINARY_LIMIT or
5362      *        UProperty.INT_START &lt;= type &lt; UProperty.INT_LIMIT.
5363      * @return Minimum value returned by UCharacter.getIntPropertyValue(int)
5364      *         for a Unicode property. 0 if the property
5365      *         selector 'type' is out of range.
5366      * @see UProperty
5367      * @see #hasBinaryProperty
5368      * @see #getUnicodeVersion
5369      * @see #getIntPropertyMaxValue
5370      * @see #getIntPropertyValue
5371      * @stable ICU 2.4
5372      */
5373     public static int getIntPropertyMinValue(int type)
5374     {
5375
5376         return 0; // undefined; and: all other properties have a minimum value
5377     // of 0
5378     }
5379
5380
5381     /**
5382      * Get the maximum value for an integer/binary Unicode property.
5383      * Can be used together with UCharacter.getIntPropertyMinValue(int)
5384      * to allocate arrays of com.ibm.icu.text.UnicodeSet or similar.
5385      * Examples for min/max values (for Unicode 3.2):
5386      * <ul>
5387      * <li> UProperty.BIDI_CLASS:    0/18 (UCharacterDirection.LEFT_TO_RIGHT/UCharacterDirection.BOUNDARY_NEUTRAL)
5388      * <li> UProperty.SCRIPT:        0/45 (UScript.COMMON/UScript.TAGBANWA)
5389      * <li> UProperty.IDEOGRAPHIC:   0/1  (false/true)
5390      * </ul>
5391      * For undefined UProperty constant values, min/max values will be 0/-1.
5392      * @param type UProperty selector constant, identifies which binary
5393      *        property to check. Must be
5394      *        UProperty.BINARY_START &lt;= type &lt; UProperty.BINARY_LIMIT or
5395      *        UProperty.INT_START &lt;= type &lt; UProperty.INT_LIMIT.
5396      * @return Maximum value returned by u_getIntPropertyValue for a Unicode
5397      *         property. &lt;= 0 if the property selector 'type' is out of range.
5398      * @see UProperty
5399      * @see #hasBinaryProperty
5400      * @see #getUnicodeVersion
5401      * @see #getIntPropertyMaxValue
5402      * @see #getIntPropertyValue
5403      * @stable ICU 2.4
5404      */
5405     public static int getIntPropertyMaxValue(int type)
5406     {
5407         if (type < UProperty.BINARY_START) {
5408             return -1; // undefined
5409         }
5410         else if (type < UProperty.BINARY_LIMIT) {
5411             return 1; // maximum TRUE for all binary properties
5412         }
5413         else if (type < UProperty.INT_START) {
5414             return -1; // undefined
5415         }
5416         else if (type < UProperty.INT_LIMIT) {
5417             switch (type) {
5418             case UProperty.BIDI_CLASS:
5419             case UProperty.JOINING_GROUP:
5420             case UProperty.JOINING_TYPE:
5421                 return gBdp.getMaxValue(type);
5422             case UProperty.BLOCK:
5423                 return (PROPERTY_.getMaxValues(0) & BLOCK_MASK_) >> BLOCK_SHIFT_;
5424             case UProperty.CANONICAL_COMBINING_CLASS:
5425             case UProperty.LEAD_CANONICAL_COMBINING_CLASS:
5426             case UProperty.TRAIL_CANONICAL_COMBINING_CLASS:
5427                 return 0xff; // TODO do we need to be more precise,
5428                              // getting the actual maximum?
5429             case UProperty.DECOMPOSITION_TYPE:
5430                 return PROPERTY_.getMaxValues(2) & DECOMPOSITION_TYPE_MASK_;
5431             case UProperty.EAST_ASIAN_WIDTH:
5432                 return (PROPERTY_.getMaxValues(0) & EAST_ASIAN_MASK_) >> EAST_ASIAN_SHIFT_;
5433             case UProperty.GENERAL_CATEGORY:
5434                 return UCharacterCategory.CHAR_CATEGORY_COUNT - 1;
5435             case UProperty.LINE_BREAK:
5436                 return (PROPERTY_.getMaxValues(LB_VWORD) & LB_MASK) >> LB_SHIFT;
5437             case UProperty.NUMERIC_TYPE:
5438                 return NumericType.COUNT - 1;
5439             case UProperty.SCRIPT:
5440                 return PROPERTY_.getMaxValues(0) & SCRIPT_MASK_;
5441             case UProperty.HANGUL_SYLLABLE_TYPE:
5442                 return HangulSyllableType.COUNT-1;
5443             case UProperty.NFD_QUICK_CHECK:
5444             case UProperty.NFKD_QUICK_CHECK:
5445                 return 1; // YES -- these are never "maybe", only "no" or "yes"
5446             case UProperty.NFC_QUICK_CHECK:
5447             case UProperty.NFKC_QUICK_CHECK:
5448                 return 2; // MAYBE
5449             case UProperty.GRAPHEME_CLUSTER_BREAK:
5450                 return (PROPERTY_.getMaxValues(2) & GCB_MASK) >> GCB_SHIFT;
5451             case UProperty.SENTENCE_BREAK:
5452                 return (PROPERTY_.getMaxValues(2) & SB_MASK) >> SB_SHIFT;
5453             case UProperty.WORD_BREAK:
5454                 return (PROPERTY_.getMaxValues(2) & WB_MASK) >> WB_SHIFT;
5455             default:
5456                 return -1; // undefined
5457             }
5458
5459         }
5460         return -1; // undefined
5461     }
5462
5463     /**
5464      * Provide the java.lang.Character forDigit API, for convenience.
5465      * @stable ICU 3.0
5466      */
5467     public static char forDigit(int digit, int radix) {
5468         return java.lang.Character.forDigit(digit, radix);
5469     }
5470
5471     // JDK 1.5 API coverage
5472
5473     /**
5474      * Cover the JDK 1.5 API, for convenience.
5475      * @see UTF16#LEAD_SURROGATE_MIN_VALUE
5476      * @stable ICU 3.0
5477      */
5478     public static final char MIN_HIGH_SURROGATE = UTF16.LEAD_SURROGATE_MIN_VALUE;
5479
5480     /**
5481      * Cover the JDK 1.5 API, for convenience.
5482      * @see UTF16#LEAD_SURROGATE_MAX_VALUE
5483      * @stable ICU 3.0
5484      */
5485     public static final char MAX_HIGH_SURROGATE = UTF16.LEAD_SURROGATE_MAX_VALUE;
5486
5487     /**
5488      * Cover the JDK 1.5 API, for convenience.
5489      * @see UTF16#TRAIL_SURROGATE_MIN_VALUE
5490      * @stable ICU 3.0
5491      */
5492     public static final char MIN_LOW_SURROGATE = UTF16.TRAIL_SURROGATE_MIN_VALUE;
5493
5494     /**
5495      * Cover the JDK 1.5 API, for convenience.
5496      * @see UTF16#TRAIL_SURROGATE_MAX_VALUE
5497      * @stable ICU 3.0
5498      */
5499     public static final char MAX_LOW_SURROGATE = UTF16.TRAIL_SURROGATE_MAX_VALUE;
5500
5501     /**
5502      * Cover the JDK 1.5 API, for convenience.
5503      * @see UTF16#SURROGATE_MIN_VALUE
5504      * @stable ICU 3.0
5505      */
5506     public static final char MIN_SURROGATE = UTF16.SURROGATE_MIN_VALUE;
5507
5508     /**
5509      * Cover the JDK 1.5 API, for convenience.
5510      * @see UTF16#SURROGATE_MAX_VALUE
5511      * @stable ICU 3.0
5512      */
5513     public static final char MAX_SURROGATE = UTF16.SURROGATE_MAX_VALUE;
5514
5515     /**
5516      * Cover the JDK 1.5 API, for convenience.
5517      * @see UTF16#SUPPLEMENTARY_MIN_VALUE
5518      * @stable ICU 3.0
5519      */
5520     public static final int  MIN_SUPPLEMENTARY_CODE_POINT = UTF16.SUPPLEMENTARY_MIN_VALUE;
5521
5522     /**
5523      * Cover the JDK 1.5 API, for convenience.
5524      * @see UTF16#CODEPOINT_MAX_VALUE
5525      * @stable ICU 3.0
5526      */
5527     public static final int  MAX_CODE_POINT = UTF16.CODEPOINT_MAX_VALUE;
5528
5529     /**
5530      * Cover the JDK 1.5 API, for convenience.
5531      * @see UTF16#CODEPOINT_MIN_VALUE
5532      * @stable ICU 3.0
5533      */
5534     public static final int  MIN_CODE_POINT = UTF16.CODEPOINT_MIN_VALUE;
5535
5536     /**
5537      * Cover the JDK 1.5 API, for convenience.
5538      * @param cp the code point to check
5539      * @return true if cp is a valid code point
5540      * @stable ICU 3.0
5541      */
5542     public static final boolean isValidCodePoint(int cp) {
5543         return cp >= 0 && cp <= MAX_CODE_POINT;
5544     }
5545
5546     /**
5547      * Cover the JDK 1.5 API, for convenience.
5548      * @param cp the code point to check
5549      * @return true if cp is a supplementary code point
5550      * @stable ICU 3.0
5551      */
5552     public static final boolean isSupplementaryCodePoint(int cp) {
5553         return cp >= UTF16.SUPPLEMENTARY_MIN_VALUE
5554             && cp <= UTF16.CODEPOINT_MAX_VALUE;
5555     }
5556
5557     /**
5558      * Cover the JDK 1.5 API, for convenience.
5559      * @param ch the char to check
5560      * @return true if ch is a high (lead) surrogate
5561      * @stable ICU 3.0
5562      */
5563     public static boolean isHighSurrogate(char ch) {
5564         return ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE;
5565     }
5566
5567     /**
5568      * Cover the JDK 1.5 API, for convenience.
5569      * @param ch the char to check
5570      * @return true if ch is a low (trail) surrogate
5571      * @stable ICU 3.0
5572      */
5573     public static boolean isLowSurrogate(char ch) {
5574         return ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE;
5575     }
5576
5577     /**
5578      * Cover the JDK 1.5 API, for convenience.  Return true if the chars
5579      * form a valid surrogate pair.
5580      * @param high the high (lead) char
5581      * @param low the low (trail) char
5582      * @return true if high, low form a surrogate pair
5583      * @stable ICU 3.0
5584      */
5585     public static final boolean isSurrogatePair(char high, char low) {
5586         return isHighSurrogate(high) && isLowSurrogate(low);
5587     }
5588
5589     /**
5590      * Cover the JDK 1.5 API, for convenience.  Return the number of chars needed
5591      * to represent the code point.  This does not check the
5592      * code point for validity.
5593      * @param cp the code point to check
5594      * @return the number of chars needed to represent the code point
5595      * @see UTF16#getCharCount
5596      * @stable ICU 3.0
5597      */
5598     public static int charCount(int cp) {
5599         return UTF16.getCharCount(cp);
5600     }
5601
5602     /**
5603      * Cover the JDK 1.5 API, for convenience.  Return the code point represented by
5604      * the characters.  This does not check the surrogate pair for validity.
5605      * @param high the high (lead) surrogate
5606      * @param low the low (trail) surrogate
5607      * @return the code point formed by the surrogate pair
5608      * @stable ICU 3.0
5609      */
5610     public static final int toCodePoint(char high, char low) {
5611         return UCharacterProperty.getRawSupplementary(high, low);
5612     }
5613
5614     /**
5615      * Cover the JDK 1.5 API, for convenience.  Return the code point at index.
5616      * <br/><b>Note</b>: the semantics of this API is different from the related UTF16
5617      * API.  This examines only the characters at index and index+1.
5618      * @param seq the characters to check
5619      * @param index the index of the first or only char forming the code point
5620      * @return the code point at the index
5621      * @stable ICU 3.0
5622      */
5623 //#if defined(FOUNDATION10) || defined(J2SE13)
5624 //##    public static final int codePointAt(String seq, int index) {
5625 //##        char c1 = seq.charAt(index++);
5626 //##        if (isHighSurrogate(c1)) {
5627 //##            if (index < seq.length()) {
5628 //##                char c2 = seq.charAt(index);
5629 //##                if (isLowSurrogate(c2)) {
5630 //##                    return toCodePoint(c1, c2);
5631 //##                }
5632 //##            }
5633 //##        }
5634 //##        return c1;
5635 //##    }
5636 //##    public static final int codePointAt(StringBuffer seq, int index) {
5637 //##        return codePointAt(seq.toString(), index);
5638 //##    }
5639 //#else
5640 //#if defined(ECLIPSE_FRAGMENT)
5641 //##    public static final int codePointAt(String seq, int index) {
5642 //##        return codePointAt((CharSequence)seq, index);
5643 //##    }
5644 //##    public static final int codePointAt(StringBuffer seq, int index) {
5645 //##        return codePointAt((CharSequence)seq, index);
5646 //##    }
5647 //#endif
5648     public static final int codePointAt(CharSequence seq, int index) {
5649         char c1 = seq.charAt(index++);
5650         if (isHighSurrogate(c1)) {
5651             if (index < seq.length()) {
5652                 char c2 = seq.charAt(index);
5653                 if (isLowSurrogate(c2)) {
5654                     return toCodePoint(c1, c2);
5655                 }
5656             }
5657         }
5658         return c1;
5659     }
5660 //#endif
5661
5662     /**
5663      * Cover the JDK 1.5 API, for convenience.  Return the code point at index.
5664      * <br/><b>Note</b>: the semantics of this API is different from the related UTF16
5665      * API.  This examines only the characters at index and index+1.
5666      * @param text the characters to check
5667      * @param index the index of the first or only char forming the code point
5668      * @return the code point at the index
5669      * @stable ICU 3.0
5670      */
5671     public static final int codePointAt(char[] text, int index) {
5672         char c1 = text[index++];
5673         if (isHighSurrogate(c1)) {
5674             if (index < text.length) {
5675                 char c2 = text[index];
5676                 if (isLowSurrogate(c2)) {
5677                     return toCodePoint(c1, c2);
5678                 }
5679             }
5680         }
5681         return c1;
5682     }
5683
5684     /**
5685      * Cover the JDK 1.5 API, for convenience.  Return the code point at index.
5686      * <br/><b>Note</b>: the semantics of this API is different from the related UTF16
5687      * API.  This examines only the characters at index and index+1.
5688      * @param text the characters to check
5689      * @param index the index of the first or only char forming the code point
5690      * @param limit the limit of the valid text
5691      * @return the code point at the index
5692      * @stable ICU 3.0
5693      */
5694     public static final int codePointAt(char[] text, int index, int limit) {
5695     if (index >= limit || limit > text.length) {
5696         throw new IndexOutOfBoundsException();
5697     }
5698         char c1 = text[index++];
5699         if (isHighSurrogate(c1)) {
5700             if (index < limit) {
5701                 char c2 = text[index];
5702                 if (isLowSurrogate(c2)) {
5703                     return toCodePoint(c1, c2);
5704                 }
5705             }
5706         }
5707         return c1;
5708     }
5709
5710     /**
5711      * Cover the JDK 1.5 API, for convenience.  Return the code point before index.
5712      * <br/><b>Note</b>: the semantics of this API is different from the related UTF16
5713      * API.  This examines only the characters at index-1 and index-2.
5714      * @param seq the characters to check
5715      * @param index the index after the last or only char forming the code point
5716      * @return the code point before the index
5717      * @stable ICU 3.0
5718      */
5719 //#if defined(FOUNDATION10) || defined(J2SE13)
5720 //##    public static final int codePointBefore(String seq, int index) {
5721 //##        char c2 = seq.charAt(--index);
5722 //##        if (isLowSurrogate(c2)) {
5723 //##            if (index > 0) {
5724 //##                char c1 = seq.charAt(--index);
5725 //##                if (isHighSurrogate(c1)) {
5726 //##                    return toCodePoint(c1, c2);
5727 //##                }
5728 //##            }
5729 //##        }
5730 //##        return c2;
5731 //##    }
5732 //##    public static final int codePointBefore(StringBuffer seq, int index) {
5733 //##        return codePointBefore(seq.toString(), index);
5734 //##    }
5735 //#else
5736 //#if defined(ECLIPSE_FRAGMENT)
5737 //##    public static final int codePointBefore(String seq, int index) {
5738 //##        return codePointBefore((CharSequence)seq, index);
5739 //##    }
5740 //##    public static final int codePointBefore(StringBuffer seq, int index) {
5741 //##        return codePointBefore((CharSequence)seq, index);
5742 //##    }
5743 //#endif
5744     public static final int codePointBefore(CharSequence seq, int index) {
5745         char c2 = seq.charAt(--index);
5746         if (isLowSurrogate(c2)) {
5747             if (index > 0) {
5748                 char c1 = seq.charAt(--index);
5749                 if (isHighSurrogate(c1)) {
5750                     return toCodePoint(c1, c2);
5751                 }
5752             }
5753         }
5754         return c2;
5755     }
5756 //#endif
5757
5758     /**
5759      * Cover the JDK 1.5 API, for convenience.  Return the code point before index.
5760      * <br/><b>Note</b>: the semantics of this API is different from the related UTF16
5761      * API.  This examines only the characters at index-1 and index-2.
5762      * @param text the characters to check
5763      * @param index the index after the last or only char forming the code point
5764      * @return the code point before the index
5765      * @stable ICU 3.0
5766      */
5767     public static final int codePointBefore(char[] text, int index) {
5768         char c2 = text[--index];
5769         if (isLowSurrogate(c2)) {
5770             if (index > 0) {
5771                 char c1 = text[--index];
5772                 if (isHighSurrogate(c1)) {
5773                     return toCodePoint(c1, c2);
5774                 }
5775             }
5776         }
5777         return c2;
5778     }
5779
5780     /**
5781      * Cover the JDK 1.5 API, for convenience.  Return the code point before index.
5782      * <br/><b>Note</b>: the semantics of this API is different from the related UTF16
5783      * API.  This examines only the characters at index-1 and index-2.
5784      * @param text the characters to check
5785      * @param index the index after the last or only char forming the code point
5786      * @param limit the start of the valid text
5787      * @return the code point before the index
5788      * @stable ICU 3.0
5789      */
5790     public static final int codePointBefore(char[] text, int index, int limit) {
5791     if (index <= limit || limit < 0) {
5792         throw new IndexOutOfBoundsException();
5793     }
5794         char c2 = text[--index];
5795         if (isLowSurrogate(c2)) {
5796             if (index > limit) {
5797                 char c1 = text[--index];
5798                 if (isHighSurrogate(c1)) {
5799                     return toCodePoint(c1, c2);
5800                 }
5801             }
5802         }
5803         return c2;
5804     }
5805
5806     /**
5807      * Cover the JDK 1.5 API, for convenience.  Writes the chars representing the
5808      * code point into the destination at the given index.
5809      * @param cp the code point to convert
5810      * @param dst the destination array into which to put the char(s) representing the code point
5811      * @param dstIndex the index at which to put the first (or only) char
5812      * @return the count of the number of chars written (1 or 2)
5813      * @throws IllegalArgumentException if cp is not a valid code point
5814      * @stable ICU 3.0
5815      */
5816     public static final int toChars(int cp, char[] dst, int dstIndex) {
5817         if (cp >= 0) {
5818             if (cp < MIN_SUPPLEMENTARY_CODE_POINT) {
5819                 dst[dstIndex] = (char)cp;
5820                 return 1;
5821             }
5822             if (cp <= MAX_CODE_POINT) {
5823                 dst[dstIndex] = UTF16.getLeadSurrogate(cp);
5824                 dst[dstIndex+1] = UTF16.getTrailSurrogate(cp);
5825                 return 2;
5826             }
5827         }
5828         throw new IllegalArgumentException();
5829     }
5830
5831     /**
5832      * Cover the JDK 1.5 API, for convenience.  Returns a char array
5833      * representing the code point.
5834      * @param cp the code point to convert
5835      * @return an array containing the char(s) representing the code point
5836      * @throws IllegalArgumentException if cp is not a valid code point
5837      * @stable ICU 3.0
5838      */
5839     public static final char[] toChars(int cp) {
5840         if (cp >= 0) {
5841             if (cp < MIN_SUPPLEMENTARY_CODE_POINT) {
5842                 return new char[] { (char)cp };
5843             }
5844             if (cp <= MAX_CODE_POINT) {
5845                 return new char[] {
5846                     UTF16.getLeadSurrogate(cp),
5847                     UTF16.getTrailSurrogate(cp)
5848                 };
5849             }
5850         }
5851         throw new IllegalArgumentException();
5852     }
5853
5854     /**
5855      * Cover the JDK API, for convenience.  Return a byte representing the directionality of
5856      * the character.
5857      * <br/><b>Note</b>: Unlike the JDK, this returns DIRECTIONALITY_LEFT_TO_RIGHT for undefined or
5858      * out-of-bounds characters.  <br/><b>Note</b>: The return value must be
5859      * tested using the constants defined in {@link UCharacterEnums.ECharacterDirection}
5860      * since the values are different from the ones defined by <code>java.lang.Character</code>.
5861      * @param cp the code point to check
5862      * @return the directionality of the code point
5863      * @see #getDirection
5864      * @stable ICU 3.0
5865      */
5866     public static byte getDirectionality(int cp)
5867     {
5868         return (byte)getDirection(cp);
5869     }
5870
5871     /**
5872      * Cover the JDK API, for convenience.  Count the number of code points in the range of text.
5873      * @param text the characters to check
5874      * @param start the start of the range
5875      * @param limit the limit of the range
5876      * @return the number of code points in the range
5877      * @stable ICU 3.0
5878      */
5879 //#if defined(FOUNDATION10) || defined(J2SE13)
5880 //##    public static int codePointCount(String text, int start, int limit) {
5881 //##        if (start < 0 || limit < start || limit > text.length()) {
5882 //##            throw new IndexOutOfBoundsException("start (" + start +
5883 //##                ") or limit (" + limit +
5884 //##                ") invalid or out of range 0, " + text.length());
5885 //##        }
5886 //##
5887 //##        int len = limit - start;
5888 //##        while (limit > start) {
5889 //##            char ch = text.charAt(--limit);
5890 //##            while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) {
5891 //##                ch = text.charAt(--limit);
5892 //##                if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) {
5893 //##                    --len;
5894 //##                    break;
5895 //##                }
5896 //##            }
5897 //##        }
5898 //##        return len;
5899 //##    }
5900 //##    public static int codePointCount(StringBuffer text, int start, int limit) {
5901 //##        return codePointCount(text.toString(), start, limit);
5902 //##    }
5903 //#else
5904 //#if defined(ECLIPSE_FRAGMENT)
5905 //##    public static int codePointCount(String text, int start, int limit) {
5906 //##        return codePointCount((CharSequence)text, start, limit);
5907 //##    }
5908 //##    public static int codePointCount(StringBuffer text, int start, int limit) {
5909 //##        return codePointCount((CharSequence)text, start, limit);
5910 //##    }
5911 //#endif
5912     public static int codePointCount(CharSequence text, int start, int limit) {
5913         if (start < 0 || limit < start || limit > text.length()) {
5914             throw new IndexOutOfBoundsException("start (" + start +
5915                 ") or limit (" + limit +
5916                 ") invalid or out of range 0, " + text.length());
5917         }
5918
5919         int len = limit - start;
5920         while (limit > start) {
5921             char ch = text.charAt(--limit);
5922             while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) {
5923                 ch = text.charAt(--limit);
5924                 if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) {
5925                     --len;
5926                     break;
5927                 }
5928             }
5929         }
5930         return len;
5931     }
5932 //#endif
5933
5934     /**
5935      * Cover the JDK API, for convenience.  Count the number of code points in the range of text.
5936      * @param text the characters to check
5937      * @param start the start of the range
5938      * @param limit the limit of the range
5939      * @return the number of code points in the range
5940      * @stable ICU 3.0
5941      */
5942     public static int codePointCount(char[] text, int start, int limit) {
5943         if (start < 0 || limit < start || limit > text.length) {
5944             throw new IndexOutOfBoundsException("start (" + start +
5945                                                 ") or limit (" + limit +
5946                                                 ") invalid or out of range 0, " + text.length);
5947         }
5948
5949         int len = limit - start;
5950         while (limit > start) {
5951             char ch = text[--limit];
5952             while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) {
5953                 ch = text[--limit];
5954                 if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) {
5955                     --len;
5956                     break;
5957                 }
5958             }
5959         }
5960         return len;
5961     }
5962
5963     /**
5964      * Cover the JDK API, for convenience.  Adjust the char index by a code point offset.
5965      * @param text the characters to check
5966      * @param index the index to adjust
5967      * @param codePointOffset the number of code points by which to offset the index
5968      * @return the adjusted index
5969      * @stable ICU 3.0
5970      */
5971 //#if defined(FOUNDATION10) || defined(J2SE13)
5972 //##    public static int offsetByCodePoints(String text, int index, int codePointOffset) {
5973 //##        if (index < 0 || index > text.length()) {
5974 //##            throw new IndexOutOfBoundsException("index ( " + index +
5975 //##                                                ") out of range 0, " + text.length());
5976 //##        }
5977 //##
5978 //##        if (codePointOffset < 0) {
5979 //##            while (++codePointOffset <= 0) {
5980 //##                char ch = text.charAt(--index);
5981 //##                while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > 0) {
5982 //##                    ch = text.charAt(--index);
5983 //##                    if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) {
5984 //##                        if (++codePointOffset > 0) {
5985 //##                            return index+1;
5986 //##                        }
5987 //##                    }
5988 //##                }
5989 //##            }
5990 //##        } else {
5991 //##            int limit = text.length();
5992 //##            while (--codePointOffset >= 0) {
5993 //##                char ch = text.charAt(index++);
5994 //##                while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) {
5995 //##                    ch = text.charAt(index++);
5996 //##                    if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) {
5997 //##                        if (--codePointOffset < 0) {
5998 //##                            return index-1;
5999 //##                        }
6000 //##                    }
6001 //##                }
6002 //##            }
6003 //##        }
6004 //##
6005 //##        return index;
6006 //##    }
6007 //##    public static int offsetByCodePoints(StringBuffer text, int index, int codePointOffset) {
6008 //##        return offsetByCodePoints(text.toString(), index, codePointOffset);
6009 //##    }
6010 //#else
6011 //#if defined(ECLIPSE_FRAGMENT)
6012 //##    public static int offsetByCodePoints(String text, int index, int codePointOffset) {
6013 //##        return offsetByCodePoints((CharSequence)text, index, codePointOffset);
6014 //##    }
6015 //##    public static int offsetByCodePoints(StringBuffer text, int index, int codePointOffset) {
6016 //##        return offsetByCodePoints((CharSequence)text, index, codePointOffset);
6017 //##    }
6018 //#endif
6019     public static int offsetByCodePoints(CharSequence text, int index, int codePointOffset) {
6020         if (index < 0 || index > text.length()) {
6021             throw new IndexOutOfBoundsException("index ( " + index +
6022                                                 ") out of range 0, " + text.length());
6023         }
6024
6025         if (codePointOffset < 0) {
6026             while (++codePointOffset <= 0) {
6027                 char ch = text.charAt(--index);
6028                 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > 0) {
6029                     ch = text.charAt(--index);
6030                     if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) {
6031                         if (++codePointOffset > 0) {
6032                             return index+1;
6033                         }
6034                     }
6035                 }
6036             }
6037         } else {
6038             int limit = text.length();
6039             while (--codePointOffset >= 0) {
6040                 char ch = text.charAt(index++);
6041                 while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) {
6042                     ch = text.charAt(index++);
6043                     if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) {
6044                         if (--codePointOffset < 0) {
6045                             return index-1;
6046                         }
6047                     }
6048                 }
6049             }
6050         }
6051
6052         return index;
6053     }
6054 //#endif
6055
6056     /**
6057      * Cover the JDK API, for convenience.  Adjust the char index by a code point offset.
6058      * @param text the characters to check
6059      * @param start the start of the range to check
6060      * @param count the length of the range to check
6061      * @param index the index to adjust
6062      * @param codePointOffset the number of code points by which to offset the index
6063      * @return the adjusted index
6064      * @stable ICU 3.0
6065      */
6066     public static int offsetByCodePoints(char[] text, int start, int count, int index, int codePointOffset) {
6067         int limit = start + count;
6068         if (start < 0 || limit < start || limit > text.length || index < start || index > limit) {
6069             throw new IndexOutOfBoundsException("index ( " + index +
6070                                                 ") out of range " + start +
6071                                                 ", " + limit +
6072                                                 " in array 0, " + text.length);
6073         }
6074
6075         if (codePointOffset < 0) {
6076             while (++codePointOffset <= 0) {
6077                 char ch = text[--index];
6078                 if (index < start) {
6079                     throw new IndexOutOfBoundsException("index ( " + index +
6080                                                         ") < start (" + start +
6081                                                         ")");
6082                 }
6083                 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > start) {
6084                     ch = text[--index];
6085                     if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) {
6086                         if (++codePointOffset > 0) {
6087                             return index+1;
6088                         }
6089                     }
6090                 }
6091             }
6092         } else {
6093             while (--codePointOffset >= 0) {
6094                 char ch = text[index++];
6095                 if (index > limit) {
6096                     throw new IndexOutOfBoundsException("index ( " + index +
6097                                                         ") > limit (" + limit +
6098                                                         ")");
6099                 }
6100                 while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) {
6101                     ch = text[index++];
6102                     if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) {
6103                         if (--codePointOffset < 0) {
6104                             return index-1;
6105                         }
6106                     }
6107                 }
6108             }
6109         }
6110
6111         return index;
6112     }
6113
6114     // protected data members --------------------------------------------
6115
6116     /**
6117      * Database storing the sets of character name
6118      */
6119     static UCharacterName NAME_ = null;
6120
6121     /**
6122      * Singleton object encapsulating the imported pnames.icu property aliases
6123      */
6124     static UPropertyAliases PNAMES_ = null;
6125
6126     // block to initialise name database and unicode 1.0 data
6127     static {
6128         try {
6129             PNAMES_ = new UPropertyAliases();
6130             NAME_ = UCharacterName.getInstance();
6131         } catch (IOException e) {
6132             // e.printStackTrace();
6133             throw new MissingResourceException(e.getMessage(),"","");
6134             //throw new RuntimeException(e.getMessage());
6135             // DONOT throw an exception
6136             // we might be building ICU modularly wothout names.icu and
6137             // pnames.icu
6138         }
6139     }
6140
6141     // private variables -------------------------------------------------
6142
6143     /**
6144      * Database storing the sets of character property
6145      */
6146     private static final UCharacterProperty PROPERTY_;
6147     /**
6148      * For optimization
6149      */
6150     private static final char[] PROPERTY_TRIE_INDEX_;
6151     private static final char[] PROPERTY_TRIE_DATA_;
6152     private static final int PROPERTY_INITIAL_VALUE_;
6153
6154     private static final UCaseProps gCsp;
6155     private static final UBiDiProps gBdp;
6156
6157     // block to initialise character property database
6158     static
6159     {
6160         try
6161         {
6162             PROPERTY_ = UCharacterProperty.getInstance();
6163             PROPERTY_TRIE_INDEX_ = PROPERTY_.m_trieIndex_;
6164             PROPERTY_TRIE_DATA_ = PROPERTY_.m_trieData_;
6165             PROPERTY_INITIAL_VALUE_ = PROPERTY_.m_trieInitialValue_;
6166         }
6167         catch (Exception e)
6168         {
6169             throw new MissingResourceException(e.getMessage(),"","");
6170         }
6171
6172         /*
6173          * In ICU4J 3.2, most Unicode properties were loaded from uprops.icu.
6174          * ICU4J 3.4 adds ucase.icu for case mapping properties and
6175          * ubidi.icu for bidi/shaping properties and
6176          * removes case/bidi/shaping properties from uprops.icu.
6177          *
6178          * Loading of uprops.icu was always done during class loading of UCharacter.class.
6179          * In order to maintain performance for all such properties,
6180          * ucase.icu and ubidi.icu are also loaded during class loading of UCharacter.class.
6181          * It will not fail if they are missing.
6182          * These data items are loaded early to avoid having to synchronize access to them,
6183          * for thread safety and performance.
6184          *
6185          * We try to load these data items at most once.
6186          * If it works, we use the resulting singleton object.
6187          * If it fails, then we get a dummy object, which always works unless
6188          * we are seriously out of memory.
6189          * After UCharacter.class loading, we have a never-changing pointer to either the
6190          * real singleton or the dummy.
6191          *
6192          * This method is used in Unicode properties APIs that
6193          * do not have a service object and also do not have an error code parameter.
6194          * Other API implementations get the singleton themselves
6195          * (synchronized), store it in the service object, and report errors.
6196          */
6197         UCaseProps csp;
6198         try {
6199             csp=UCaseProps.getSingleton();
6200         } catch(IOException e) {
6201             csp=UCaseProps.getDummy();
6202         }
6203         gCsp=csp;
6204
6205         UBiDiProps bdp;
6206         try {
6207             bdp=UBiDiProps.getSingleton();
6208         } catch(IOException e) {
6209             bdp=UBiDiProps.getDummy();
6210         }
6211         gBdp=bdp;
6212     }
6213
6214     /**
6215      * To get the last character out from a data type
6216      */
6217     private static final int LAST_CHAR_MASK_ = 0xFFFF;
6218
6219 //    /**
6220 //     * To get the last byte out from a data type
6221 //     */
6222 //    private static final int LAST_BYTE_MASK_ = 0xFF;
6223 //
6224 //    /**
6225 //     * Shift 16 bits
6226 //     */
6227 //    private static final int SHIFT_16_ = 16;
6228 //
6229 //    /**
6230 //     * Shift 24 bits
6231 //     */
6232 //    private static final int SHIFT_24_ = 24;
6233 //
6234 //    /**
6235 //     * Decimal radix
6236 //     */
6237 //    private static final int DECIMAL_RADIX_ = 10;
6238
6239     /**
6240      * No break space code point
6241      */
6242     private static final int NO_BREAK_SPACE_ = 0xA0;
6243
6244     /**
6245      * Figure space code point
6246      */
6247     private static final int FIGURE_SPACE_ = 0x2007;
6248
6249     /**
6250      * Narrow no break space code point
6251      */
6252     private static final int NARROW_NO_BREAK_SPACE_ = 0x202F;
6253
6254     /**
6255      * Ideographic number zero code point
6256      */
6257     private static final int IDEOGRAPHIC_NUMBER_ZERO_ = 0x3007;
6258
6259     /**
6260      * CJK Ideograph, First code point
6261      */
6262     private static final int CJK_IDEOGRAPH_FIRST_ = 0x4e00;
6263
6264     /**
6265      * CJK Ideograph, Second code point
6266      */
6267     private static final int CJK_IDEOGRAPH_SECOND_ = 0x4e8c;
6268
6269     /**
6270      * CJK Ideograph, Third code point
6271      */
6272     private static final int CJK_IDEOGRAPH_THIRD_ = 0x4e09;
6273
6274     /**
6275      * CJK Ideograph, Fourth code point
6276      */
6277     private static final int CJK_IDEOGRAPH_FOURTH_ = 0x56d8;
6278
6279     /**
6280      * CJK Ideograph, FIFTH code point
6281      */
6282     private static final int CJK_IDEOGRAPH_FIFTH_ = 0x4e94;
6283
6284     /**
6285      * CJK Ideograph, Sixth code point
6286      */
6287     private static final int CJK_IDEOGRAPH_SIXTH_ = 0x516d;
6288
6289     /**
6290      * CJK Ideograph, Seventh code point
6291      */
6292     private static final int CJK_IDEOGRAPH_SEVENTH_ = 0x4e03;
6293
6294     /**
6295      * CJK Ideograph, Eighth code point
6296      */
6297     private static final int CJK_IDEOGRAPH_EIGHTH_ = 0x516b;
6298
6299     /**
6300      * CJK Ideograph, Nineth code point
6301      */
6302     private static final int CJK_IDEOGRAPH_NINETH_ = 0x4e5d;
6303
6304     /**
6305      * Application Program command code point
6306      */
6307     private static final int APPLICATION_PROGRAM_COMMAND_ = 0x009F;
6308
6309     /**
6310      * Unit separator code point
6311      */
6312     private static final int UNIT_SEPARATOR_ = 0x001F;
6313
6314     /**
6315      * Delete code point
6316      */
6317     private static final int DELETE_ = 0x007F;
6318     /*
6319      * ISO control character first range upper limit 0x0 - 0x1F
6320      */
6321     //private static final int ISO_CONTROL_FIRST_RANGE_MAX_ = 0x1F;
6322     /**
6323      * Shift to get numeric type
6324      */
6325     private static final int NUMERIC_TYPE_SHIFT_ = 5;
6326     /**
6327      * Mask to get numeric type
6328      */
6329     private static final int NUMERIC_TYPE_MASK_ = 0x7 << NUMERIC_TYPE_SHIFT_;
6330
6331     /* encoding of fractional and large numbers */
6332     //private static final int MAX_SMALL_NUMBER=0xff;
6333
6334     private static final int FRACTION_NUM_SHIFT=3;        /* numerator: bits 7..3 */
6335     private static final int FRACTION_DEN_MASK=7;         /* denominator: bits 2..0 */
6336
6337     //private static final int FRACTION_MAX_NUM=31;
6338     private static final int FRACTION_DEN_OFFSET=2;       /* denominator values are 2..9 */
6339
6340     //private static final int FRACTION_MIN_DEN=FRACTION_DEN_OFFSET;
6341     //private static final int FRACTION_MAX_DEN=FRACTION_MIN_DEN+FRACTION_DEN_MASK;
6342
6343     private static final int LARGE_MANT_SHIFT=4;          /* mantissa: bits 7..4 */
6344     private static final int LARGE_EXP_MASK=0xf;          /* exponent: bits 3..0 */
6345     private static final int LARGE_EXP_OFFSET=2;          /* regular exponents 2..17 */
6346     private static final int LARGE_EXP_OFFSET_EXTRA=18;   /* extra large exponents 18..33 */
6347
6348     //private static final int LARGE_MIN_EXP=LARGE_EXP_OFFSET;
6349     //private static final int LARGE_MAX_EXP=LARGE_MIN_EXP+LARGE_EXP_MASK;
6350     //private static final int LARGE_MAX_EXP_EXTRA=LARGE_EXP_OFFSET_EXTRA+LARGE_EXP_MASK;
6351
6352     /**
6353      * Han digit characters
6354      */
6355     private static final int CJK_IDEOGRAPH_COMPLEX_ZERO_     = 0x96f6;
6356     private static final int CJK_IDEOGRAPH_COMPLEX_ONE_      = 0x58f9;
6357     private static final int CJK_IDEOGRAPH_COMPLEX_TWO_      = 0x8cb3;
6358     private static final int CJK_IDEOGRAPH_COMPLEX_THREE_    = 0x53c3;
6359     private static final int CJK_IDEOGRAPH_COMPLEX_FOUR_     = 0x8086;
6360     private static final int CJK_IDEOGRAPH_COMPLEX_FIVE_     = 0x4f0d;
6361     private static final int CJK_IDEOGRAPH_COMPLEX_SIX_      = 0x9678;
6362     private static final int CJK_IDEOGRAPH_COMPLEX_SEVEN_    = 0x67d2;
6363     private static final int CJK_IDEOGRAPH_COMPLEX_EIGHT_    = 0x634c;
6364     private static final int CJK_IDEOGRAPH_COMPLEX_NINE_     = 0x7396;
6365     private static final int CJK_IDEOGRAPH_TEN_              = 0x5341;
6366     private static final int CJK_IDEOGRAPH_COMPLEX_TEN_      = 0x62fe;
6367     private static final int CJK_IDEOGRAPH_HUNDRED_          = 0x767e;
6368     private static final int CJK_IDEOGRAPH_COMPLEX_HUNDRED_  = 0x4f70;
6369     private static final int CJK_IDEOGRAPH_THOUSAND_         = 0x5343;
6370     private static final int CJK_IDEOGRAPH_COMPLEX_THOUSAND_ = 0x4edf;
6371     private static final int CJK_IDEOGRAPH_TEN_THOUSAND_     = 0x824c;
6372     private static final int CJK_IDEOGRAPH_HUNDRED_MILLION_  = 0x5104;
6373
6374 //    /**
6375 //     * Zero Width Non Joiner.
6376 //     * Equivalent to icu4c ZWNJ.
6377 //     */
6378 //    private static final int ZERO_WIDTH_NON_JOINER_ = 0x200c;
6379 //    /**
6380 //     * Zero Width Joiner
6381 //     * Equivalent to icu4c ZWJ.
6382 //     */
6383 //    private static final int ZERO_WIDTH_JOINER_ = 0x200d;
6384
6385     /*
6386      * Properties in vector word 2
6387      * Bits
6388      * 31..26   reserved
6389      * 25..20   Line Break
6390      * 19..15   Sentence Break
6391      * 14..10   Word Break
6392      *  9.. 5   Grapheme Cluster Break
6393      *  4.. 0   Decomposition Type
6394      */
6395     private static final int LB_MASK          = 0x03f00000;
6396     private static final int LB_SHIFT         = 20;
6397     private static final int LB_VWORD         = 2;
6398
6399     private static final int SB_MASK          = 0x000f8000;
6400     private static final int SB_SHIFT         = 15;
6401
6402     private static final int WB_MASK          = 0x00007c00;
6403     private static final int WB_SHIFT         = 10;
6404
6405     private static final int GCB_MASK         = 0x000003e0;
6406     private static final int GCB_SHIFT        = 5;
6407
6408     /**
6409      * Integer properties mask for decomposition type.
6410      * Equivalent to icu4c UPROPS_DT_MASK.
6411      */
6412     private static final int DECOMPOSITION_TYPE_MASK_ = 0x0000001f;
6413
6414     /*
6415      * Properties in vector word 0
6416      * Bits
6417      * 31..24   DerivedAge version major/minor one nibble each
6418      * 23..20   reserved
6419      * 19..17   East Asian Width
6420      * 16.. 8   UBlockCode
6421      *  7.. 0   UScriptCode
6422      */
6423
6424     /**
6425      * Integer properties mask and shift values for East Asian cell width.
6426      * Equivalent to icu4c UPROPS_EA_MASK
6427      */
6428     private static final int EAST_ASIAN_MASK_ = 0x000e0000;
6429     /**
6430      * Integer properties mask and shift values for East Asian cell width.
6431      * Equivalent to icu4c UPROPS_EA_SHIFT
6432      */
6433     private static final int EAST_ASIAN_SHIFT_ = 17;
6434     /**
6435      * Integer properties mask and shift values for blocks.
6436      * Equivalent to icu4c UPROPS_BLOCK_MASK
6437      */
6438     private static final int BLOCK_MASK_ = 0x0001ff00;
6439     /**
6440      * Integer properties mask and shift values for blocks.
6441      * Equivalent to icu4c UPROPS_BLOCK_SHIFT
6442      */
6443     private static final int BLOCK_SHIFT_ = 8;
6444     /**
6445      * Integer properties mask and shift values for scripts.
6446      * Equivalent to icu4c UPROPS_SHIFT_MASK
6447      */
6448     private static final int SCRIPT_MASK_ = 0x000000ff;
6449
6450     // private constructor -----------------------------------------------
6451     ///CLOVER:OFF
6452     /**
6453      * Private constructor to prevent instantiation
6454      */
6455     private UCharacter()
6456     {
6457     }
6458     ///CLOVER:ON
6459     // private methods ---------------------------------------------------
6460
6461     /**
6462      * Getting the digit values of characters like 'A' - 'Z', normal,
6463      * half-width and full-width. This method assumes that the other digit
6464      * characters are checked by the calling method.
6465      * @param ch character to test
6466      * @return -1 if ch is not a character of the form 'A' - 'Z', otherwise
6467      *         its corresponding digit will be returned.
6468      */
6469     private static int getEuropeanDigit(int ch) {
6470         if ((ch > 0x7a && ch < 0xff21)
6471             || ch < 0x41 || (ch > 0x5a && ch < 0x61)
6472             || ch > 0xff5a || (ch > 0xff3a && ch < 0xff41)) {
6473             return -1;
6474         }
6475         if (ch <= 0x7a) {
6476             // ch >= 0x41 or ch < 0x61
6477             return ch + 10 - ((ch <= 0x5a) ? 0x41 : 0x61);
6478         }
6479         // ch >= 0xff21
6480         if (ch <= 0xff3a) {
6481             return ch + 10 - 0xff21;
6482         }
6483         // ch >= 0xff41 && ch <= 0xff5a
6484         return ch + 10 - 0xff41;
6485     }
6486
6487     /**
6488      * Gets the numeric type of the property argument
6489      * @param props 32 bit property
6490      * @return the numeric type
6491      */
6492     private static int getNumericType(int props)
6493     {
6494         return (props & NUMERIC_TYPE_MASK_) >> NUMERIC_TYPE_SHIFT_;
6495     }
6496
6497     /**
6498      * Gets the property value at the index.
6499      * This is optimized.
6500      * Note this is alittle different from CharTrie the index m_trieData_
6501      * is never negative.
6502      * This is a duplicate of UCharacterProperty.getProperty. For optimization
6503      * purposes, this method calls the trie data directly instead of through
6504      * UCharacterProperty.getProperty.
6505      * @param ch code point whose property value is to be retrieved
6506      * @return property value of code point
6507      * @stable ICU 2.6
6508      */
6509     private static final int getProperty(int ch)
6510     {
6511         if (ch < UTF16.LEAD_SURROGATE_MIN_VALUE
6512             || (ch > UTF16.LEAD_SURROGATE_MAX_VALUE
6513                 && ch < UTF16.SUPPLEMENTARY_MIN_VALUE)) {
6514             // BMP codepoint 0000..D7FF or DC00..FFFF
6515             try { // using try for ch < 0 is faster than using an if statement
6516                 return PROPERTY_TRIE_DATA_[
6517                               (PROPERTY_TRIE_INDEX_[ch >> 5] << 2)
6518                               + (ch & 0x1f)];
6519             } catch (ArrayIndexOutOfBoundsException e) {
6520                 return PROPERTY_INITIAL_VALUE_;
6521             }
6522         }
6523         if (ch <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
6524             // lead surrogate D800..DBFF
6525             return PROPERTY_TRIE_DATA_[
6526                               (PROPERTY_TRIE_INDEX_[(0x2800 >> 5) + (ch >> 5)] << 2)
6527                               + (ch & 0x1f)];
6528         }
6529         // for optimization
6530         if (ch <= UTF16.CODEPOINT_MAX_VALUE) {
6531             // supplementary code point 10000..10FFFF
6532             // look at the construction of supplementary characters
6533             // trail forms the ends of it.
6534             return PROPERTY_.m_trie_.getSurrogateValue(
6535                                       UTF16.getLeadSurrogate(ch),
6536                                       (char)(ch & 0x3ff));
6537         }
6538         // return m_dataOffset_ if there is an error, in this case we return
6539         // the default value: m_initialValue_
6540         // we cannot assume that m_initialValue_ is at offset 0
6541         // this is for optimization.
6542         return PROPERTY_INITIAL_VALUE_;
6543     }
6544 }