jars/icu4j-4_2_1-src/src/com/ibm/icu/text/UTF16.java

   1 //##header\r
   2 /**\r
   3  *******************************************************************************\r
   4  * Copyright (C) 1996-2009, International Business Machines Corporation and    *\r
   5  * others. All Rights Reserved.                                                *\r
   6  *******************************************************************************\r
   7  */\r
   8 \r
   9 package com.ibm.icu.text;\r
  10 \r
  11 import com.ibm.icu.impl.UCharacterProperty;\r
  12 import com.ibm.icu.impl.NormalizerImpl;\r
  13 \r
  14 /**\r
  15  * <p>\r
  16  * Standalone utility class providing UTF16 character conversions and indexing conversions.\r
  17  * </p>\r
  18  * <p>\r
  19  * Code that uses strings alone rarely need modification. By design, UTF-16 does not allow overlap,\r
  20  * so searching for strings is a safe operation. Similarly, concatenation is always safe.\r
  21  * Substringing is safe if the start and end are both on UTF-32 boundaries. In normal code, the\r
  22  * values for start and end are on those boundaries, since they arose from operations like\r
  23  * searching. If not, the nearest UTF-32 boundaries can be determined using <code>bounds()</code>.\r
  24  * </p>\r
  25  * <strong>Examples:</strong>\r
  26  * <p>\r
  27  * The following examples illustrate use of some of these methods.\r
  28  * \r
  29  * <pre>\r
  30  * // iteration forwards: Original\r
  31  * for (int i = 0; i &lt; s.length(); ++i) {\r
  32  *     char ch = s.charAt(i);\r
  33  *     doSomethingWith(ch);\r
  34  * }\r
  35  * \r
  36  * // iteration forwards: Changes for UTF-32\r
  37  * int ch;\r
  38  * for (int i = 0; i &lt; s.length(); i += UTF16.getCharCount(ch)) {\r
  39  *     ch = UTF16.charAt(s, i);\r
  40  *     doSomethingWith(ch);\r
  41  * }\r
  42  * \r
  43  * // iteration backwards: Original\r
  44  * for (int i = s.length() - 1; i &gt;= 0; --i) {\r
  45  *     char ch = s.charAt(i);\r
  46  *     doSomethingWith(ch);\r
  47  * }\r
  48  * \r
  49  * // iteration backwards: Changes for UTF-32\r
  50  * int ch;\r
  51  * for (int i = s.length() - 1; i &gt; 0; i -= UTF16.getCharCount(ch)) {\r
  52  *     ch = UTF16.charAt(s, i);\r
  53  *     doSomethingWith(ch);\r
  54  * }\r
  55  * </pre>\r
  56  * \r
  57  * <strong>Notes:</strong>\r
  58  * <ul>\r
  59  * <li> <strong>Naming:</strong> For clarity, High and Low surrogates are called <code>Lead</code>\r
  60  * and <code>Trail</code> in the API, which gives a better sense of their ordering in a string.\r
  61  * <code>offset16</code> and <code>offset32</code> are used to distinguish offsets to UTF-16\r
  62  * boundaries vs offsets to UTF-32 boundaries. <code>int char32</code> is used to contain UTF-32\r
  63  * characters, as opposed to <code>char16</code>, which is a UTF-16 code unit. </li>\r
  64  * <li> <strong>Roundtripping Offsets:</strong> You can always roundtrip from a UTF-32 offset to a\r
  65  * UTF-16 offset and back. Because of the difference in structure, you can roundtrip from a UTF-16\r
  66  * offset to a UTF-32 offset and back if and only if <code>bounds(string, offset16) != TRAIL</code>.\r
  67  * </li>\r
  68  * <li> <strong>Exceptions:</strong> The error checking will throw an exception if indices are out\r
  69  * of bounds. Other than than that, all methods will behave reasonably, even if unmatched surrogates\r
  70  * or out-of-bounds UTF-32 values are present. <code>UCharacter.isLegal()</code> can be used to\r
  71  * check for validity if desired. </li>\r
  72  * <li> <strong>Unmatched Surrogates:</strong> If the string contains unmatched surrogates, then\r
  73  * these are counted as one UTF-32 value. This matches their iteration behavior, which is vital. It\r
  74  * also matches common display practice as missing glyphs (see the Unicode Standard Section 5.4,\r
  75  * 5.5). </li>\r
  76  * <li> <strong>Optimization:</strong> The method implementations may need optimization if the\r
  77  * compiler doesn't fold static final methods. Since surrogate pairs will form an exceeding small\r
  78  * percentage of all the text in the world, the singleton case should always be optimized for. </li>\r
  79  * </ul>\r
  80  * \r
  81  * @author Mark Davis, with help from Markus Scherer\r
  82  * @stable ICU 2.1\r
  83  */\r
  84 \r
  85 public final class UTF16 {\r
  86     // public variables ---------------------------------------------------\r
  87 \r
  88     /**\r
  89      * Value returned in <code><a href="#bounds(java.lang.String, int)">\r
  90      * bounds()</a></code>.\r
  91      * These values are chosen specifically so that it actually represents the position of the\r
  92      * character [offset16 - (value >> 2), offset16 + (value & 3)]\r
  93      * \r
  94      * @stable ICU 2.1\r
  95      */\r
  96     public static final int SINGLE_CHAR_BOUNDARY = 1, LEAD_SURROGATE_BOUNDARY = 2,\r
  97             TRAIL_SURROGATE_BOUNDARY = 5;\r
  98 \r
  99     /**\r
 100      * The lowest Unicode code point value.\r
 101      * \r
 102      * @stable ICU 2.1\r
 103      */\r
 104     public static final int CODEPOINT_MIN_VALUE = 0;\r
 105 \r
 106     /**\r
 107      * The highest Unicode code point value (scalar value) according to the Unicode Standard.\r
 108      * \r
 109      * @stable ICU 2.1\r
 110      */\r
 111     public static final int CODEPOINT_MAX_VALUE = 0x10ffff;\r
 112 \r
 113     /**\r
 114      * The minimum value for Supplementary code points\r
 115      * \r
 116      * @stable ICU 2.1\r
 117      */\r
 118     public static final int SUPPLEMENTARY_MIN_VALUE = 0x10000;\r
 119 \r
 120     /**\r
 121      * Lead surrogate minimum value\r
 122      * \r
 123      * @stable ICU 2.1\r
 124      */\r
 125     public static final int LEAD_SURROGATE_MIN_VALUE = 0xD800;\r
 126 \r
 127     /**\r
 128      * Trail surrogate minimum value\r
 129      * \r
 130      * @stable ICU 2.1\r
 131      */\r
 132     public static final int TRAIL_SURROGATE_MIN_VALUE = 0xDC00;\r
 133 \r
 134     /**\r
 135      * Lead surrogate maximum value\r
 136      * \r
 137      * @stable ICU 2.1\r
 138      */\r
 139     public static final int LEAD_SURROGATE_MAX_VALUE = 0xDBFF;\r
 140 \r
 141     /**\r
 142      * Trail surrogate maximum value\r
 143      * \r
 144      * @stable ICU 2.1\r
 145      */\r
 146     public static final int TRAIL_SURROGATE_MAX_VALUE = 0xDFFF;\r
 147 \r
 148     /**\r
 149      * Surrogate minimum value\r
 150      * \r
 151      * @stable ICU 2.1\r
 152      */\r
 153     public static final int SURROGATE_MIN_VALUE = LEAD_SURROGATE_MIN_VALUE;\r
 154 \r
 155     /**\r
 156      * Maximum surrogate value\r
 157      * \r
 158      * @stable ICU 2.1\r
 159      */\r
 160     public static final int SURROGATE_MAX_VALUE = TRAIL_SURROGATE_MAX_VALUE;\r
 161 \r
 162     /**\r
 163      * Lead surrogate bitmask\r
 164      */\r
 165     private static final int LEAD_SURROGATE_BITMASK = 0xFFFFFC00;\r
 166 \r
 167     /**\r
 168      * Trail surrogate bitmask\r
 169      */\r
 170     private static final int TRAIL_SURROGATE_BITMASK = 0xFFFFFC00;\r
 171 \r
 172     /**\r
 173      * Surrogate bitmask\r
 174      */\r
 175     private static final int SURROGATE_BITMASK = 0xFFFFF800;\r
 176 \r
 177     /**\r
 178      * Lead surrogate bits\r
 179      */\r
 180     private static final int LEAD_SURROGATE_BITS = 0xD800;\r
 181 \r
 182     /**\r
 183      * Trail surrogate bits\r
 184      */\r
 185     private static final int TRAIL_SURROGATE_BITS = 0xDC00;\r
 186 \r
 187     /**\r
 188      * Surrogate bits\r
 189      */\r
 190     private static final int SURROGATE_BITS = 0xD800;\r
 191 \r
 192     // constructor --------------------------------------------------------\r
 193 \r
 194     // /CLOVER:OFF\r
 195     /**\r
 196      * Prevent instance from being created.\r
 197      */\r
 198     private UTF16() {\r
 199     }\r
 200 \r
 201     // /CLOVER:ON\r
 202     // public method ------------------------------------------------------\r
 203 \r
 204     /**\r
 205      * Extract a single UTF-32 value from a string. Used when iterating forwards or backwards (with\r
 206      * <code>UTF16.getCharCount()</code>, as well as random access. If a validity check is\r
 207      * required, use <code><a href="../lang/UCharacter.html#isLegal(char)">\r
 208      * UCharacter.isLegal()</a></code>\r
 209      * on the return value. If the char retrieved is part of a surrogate pair, its supplementary\r
 210      * character will be returned. If a complete supplementary character is not found the incomplete\r
 211      * character will be returned\r
 212      * \r
 213      * @param source\r
 214      *            array of UTF-16 chars\r
 215      * @param offset16\r
 216      *            UTF-16 offset to the start of the character.\r
 217      * @return UTF-32 value for the UTF-32 value that contains the char at offset16. The boundaries\r
 218      *         of that codepoint are the same as in <code>bounds32()</code>.\r
 219      * @exception IndexOutOfBoundsException\r
 220      *                thrown if offset16 is out of bounds.\r
 221      * @stable ICU 2.1\r
 222      */\r
 223     public static int charAt(String source, int offset16) {\r
 224         char single = source.charAt(offset16);\r
 225         if (single < LEAD_SURROGATE_MIN_VALUE) {\r
 226             return single;\r
 227         }\r
 228         return _charAt(source, offset16, single);\r
 229     }\r
 230 \r
 231     private static int _charAt(String source, int offset16, char single) {\r
 232         if (single > TRAIL_SURROGATE_MAX_VALUE) {\r
 233             return single;\r
 234         }\r
 235 \r
 236         // Convert the UTF-16 surrogate pair if necessary.\r
 237         // For simplicity in usage, and because the frequency of pairs is\r
 238         // low, look both directions.\r
 239 \r
 240         if (single <= LEAD_SURROGATE_MAX_VALUE) {\r
 241             ++offset16;\r
 242             if (source.length() != offset16) {\r
 243                 char trail = source.charAt(offset16);\r
 244                 if (trail >= TRAIL_SURROGATE_MIN_VALUE && trail <= TRAIL_SURROGATE_MAX_VALUE) {\r
 245                     return UCharacterProperty.getRawSupplementary(single, trail);\r
 246                 }\r
 247             }\r
 248         } else {\r
 249             --offset16;\r
 250             if (offset16 >= 0) {\r
 251                 // single is a trail surrogate so\r
 252                 char lead = source.charAt(offset16);\r
 253                 if (lead >= LEAD_SURROGATE_MIN_VALUE && lead <= LEAD_SURROGATE_MAX_VALUE) {\r
 254                     return UCharacterProperty.getRawSupplementary(lead, single);\r
 255                 }\r
 256             }\r
 257         }\r
 258         return single; // return unmatched surrogate\r
 259     }\r
 260 \r
 261 //#if defined(FOUNDATION10) || defined(J2SE13)\r
 262 //#else\r
 263     /**\r
 264      * Extract a single UTF-32 value from a string. Used when iterating forwards or backwards (with\r
 265      * <code>UTF16.getCharCount()</code>, as well as random access. If a validity check is\r
 266      * required, use <code><a href="../lang/UCharacter.html#isLegal(char)">\r
 267      * UCharacter.isLegal()</a></code>\r
 268      * on the return value. If the char retrieved is part of a surrogate pair, its supplementary\r
 269      * character will be returned. If a complete supplementary character is not found the incomplete\r
 270      * character will be returned\r
 271      * \r
 272      * @param source\r
 273      *            array of UTF-16 chars\r
 274      * @param offset16\r
 275      *            UTF-16 offset to the start of the character.\r
 276      * @return UTF-32 value for the UTF-32 value that contains the char at offset16. The boundaries\r
 277      *         of that codepoint are the same as in <code>bounds32()</code>.\r
 278      * @exception IndexOutOfBoundsException\r
 279      *                thrown if offset16 is out of bounds.\r
 280      * @stable ICU 2.1\r
 281      */\r
 282     public static int charAt(CharSequence source, int offset16) {\r
 283         char single = source.charAt(offset16);\r
 284         if (single < UTF16.LEAD_SURROGATE_MIN_VALUE) {\r
 285             return single;\r
 286         }\r
 287         return _charAt(source, offset16, single);\r
 288     }\r
 289 \r
 290     private static int _charAt(CharSequence source, int offset16, char single) {\r
 291         if (single > UTF16.TRAIL_SURROGATE_MAX_VALUE) {\r
 292             return single;\r
 293         }\r
 294 \r
 295         // Convert the UTF-16 surrogate pair if necessary.\r
 296         // For simplicity in usage, and because the frequency of pairs is\r
 297         // low, look both directions.\r
 298 \r
 299         if (single <= UTF16.LEAD_SURROGATE_MAX_VALUE) {\r
 300             ++offset16;\r
 301             if (source.length() != offset16) {\r
 302                 char trail = source.charAt(offset16);\r
 303                 if (trail >= UTF16.TRAIL_SURROGATE_MIN_VALUE\r
 304                         && trail <= UTF16.TRAIL_SURROGATE_MAX_VALUE) {\r
 305                     return UCharacterProperty.getRawSupplementary(single, trail);\r
 306                 }\r
 307             }\r
 308         } else {\r
 309             --offset16;\r
 310             if (offset16 >= 0) {\r
 311                 // single is a trail surrogate so\r
 312                 char lead = source.charAt(offset16);\r
 313                 if (lead >= UTF16.LEAD_SURROGATE_MIN_VALUE\r
 314                         && lead <= UTF16.LEAD_SURROGATE_MAX_VALUE) {\r
 315                     return UCharacterProperty.getRawSupplementary(lead, single);\r
 316                 }\r
 317             }\r
 318         }\r
 319         return single; // return unmatched surrogate\r
 320     }\r
 321 \r
 322 //#endif\r
 323 \r
 324     /**\r
 325      * Extract a single UTF-32 value from a string. Used when iterating forwards or backwards (with\r
 326      * <code>UTF16.getCharCount()</code>, as well as random access. If a validity check is\r
 327      * required, use <code><a href="../lang/UCharacter.html#isLegal(char)">UCharacter.isLegal()\r
 328      * </a></code>\r
 329      * on the return value. If the char retrieved is part of a surrogate pair, its supplementary\r
 330      * character will be returned. If a complete supplementary character is not found the incomplete\r
 331      * character will be returned\r
 332      * \r
 333      * @param source\r
 334      *            UTF-16 chars string buffer\r
 335      * @param offset16\r
 336      *            UTF-16 offset to the start of the character.\r
 337      * @return UTF-32 value for the UTF-32 value that contains the char at offset16. The boundaries\r
 338      *         of that codepoint are the same as in <code>bounds32()</code>.\r
 339      * @exception IndexOutOfBoundsException\r
 340      *                thrown if offset16 is out of bounds.\r
 341      * @stable ICU 2.1\r
 342      */\r
 343     public static int charAt(StringBuffer source, int offset16) {\r
 344         if (offset16 < 0 || offset16 >= source.length()) {\r
 345             throw new StringIndexOutOfBoundsException(offset16);\r
 346         }\r
 347 \r
 348         char single = source.charAt(offset16);\r
 349         if (!isSurrogate(single)) {\r
 350             return single;\r
 351         }\r
 352 \r
 353         // Convert the UTF-16 surrogate pair if necessary.\r
 354         // For simplicity in usage, and because the frequency of pairs is\r
 355         // low, look both directions.\r
 356 \r
 357         if (single <= LEAD_SURROGATE_MAX_VALUE) {\r
 358             ++offset16;\r
 359             if (source.length() != offset16) {\r
 360                 char trail = source.charAt(offset16);\r
 361                 if (isTrailSurrogate(trail))\r
 362                     return UCharacterProperty.getRawSupplementary(single, trail);\r
 363             }\r
 364         } else {\r
 365             --offset16;\r
 366             if (offset16 >= 0) {\r
 367                 // single is a trail surrogate so\r
 368                 char lead = source.charAt(offset16);\r
 369                 if (isLeadSurrogate(lead)) {\r
 370                     return UCharacterProperty.getRawSupplementary(lead, single);\r
 371                 }\r
 372             }\r
 373         }\r
 374         return single; // return unmatched surrogate\r
 375     }\r
 376 \r
 377     /**\r
 378      * Extract a single UTF-32 value from a substring. Used when iterating forwards or backwards\r
 379      * (with <code>UTF16.getCharCount()</code>, as well as random access. If a validity check is\r
 380      * required, use <code><a href="../lang/UCharacter.html#isLegal(char)">UCharacter.isLegal()\r
 381      * </a></code>\r
 382      * on the return value. If the char retrieved is part of a surrogate pair, its supplementary\r
 383      * character will be returned. If a complete supplementary character is not found the incomplete\r
 384      * character will be returned\r
 385      * \r
 386      * @param source\r
 387      *            array of UTF-16 chars\r
 388      * @param start\r
 389      *            offset to substring in the source array for analyzing\r
 390      * @param limit\r
 391      *            offset to substring in the source array for analyzing\r
 392      * @param offset16\r
 393      *            UTF-16 offset relative to start\r
 394      * @return UTF-32 value for the UTF-32 value that contains the char at offset16. The boundaries\r
 395      *         of that codepoint are the same as in <code>bounds32()</code>.\r
 396      * @exception IndexOutOfBoundsException\r
 397      *                thrown if offset16 is not within the range of start and limit.\r
 398      * @stable ICU 2.1\r
 399      */\r
 400     public static int charAt(char source[], int start, int limit, int offset16) {\r
 401         offset16 += start;\r
 402         if (offset16 < start || offset16 >= limit) {\r
 403             throw new ArrayIndexOutOfBoundsException(offset16);\r
 404         }\r
 405 \r
 406         char single = source[offset16];\r
 407         if (!isSurrogate(single)) {\r
 408             return single;\r
 409         }\r
 410 \r
 411         // Convert the UTF-16 surrogate pair if necessary.\r
 412         // For simplicity in usage, and because the frequency of pairs is\r
 413         // low, look both directions.\r
 414         if (single <= LEAD_SURROGATE_MAX_VALUE) {\r
 415             offset16++;\r
 416             if (offset16 >= limit) {\r
 417                 return single;\r
 418             }\r
 419             char trail = source[offset16];\r
 420             if (isTrailSurrogate(trail)) {\r
 421                 return UCharacterProperty.getRawSupplementary(single, trail);\r
 422             }\r
 423         } else { // isTrailSurrogate(single), so\r
 424             if (offset16 == start) {\r
 425                 return single;\r
 426             }\r
 427             offset16--;\r
 428             char lead = source[offset16];\r
 429             if (isLeadSurrogate(lead))\r
 430                 return UCharacterProperty.getRawSupplementary(lead, single);\r
 431         }\r
 432         return single; // return unmatched surrogate\r
 433     }\r
 434 \r
 435     /**\r
 436      * Extract a single UTF-32 value from a string. Used when iterating forwards or backwards (with\r
 437      * <code>UTF16.getCharCount()</code>, as well as random access. If a validity check is\r
 438      * required, use <code><a href="../lang/UCharacter.html#isLegal(char)">UCharacter.isLegal()\r
 439      * </a></code>\r
 440      * on the return value. If the char retrieved is part of a surrogate pair, its supplementary\r
 441      * character will be returned. If a complete supplementary character is not found the incomplete\r
 442      * character will be returned\r
 443      * \r
 444      * @param source\r
 445      *            UTF-16 chars string buffer\r
 446      * @param offset16\r
 447      *            UTF-16 offset to the start of the character.\r
 448      * @return UTF-32 value for the UTF-32 value that contains the char at offset16. The boundaries\r
 449      *         of that codepoint are the same as in <code>bounds32()</code>.\r
 450      * @exception IndexOutOfBoundsException\r
 451      *                thrown if offset16 is out of bounds.\r
 452      * @stable ICU 2.1\r
 453      */\r
 454     public static int charAt(Replaceable source, int offset16) {\r
 455         if (offset16 < 0 || offset16 >= source.length()) {\r
 456             throw new StringIndexOutOfBoundsException(offset16);\r
 457         }\r
 458 \r
 459         char single = source.charAt(offset16);\r
 460         if (!isSurrogate(single)) {\r
 461             return single;\r
 462         }\r
 463 \r
 464         // Convert the UTF-16 surrogate pair if necessary.\r
 465         // For simplicity in usage, and because the frequency of pairs is\r
 466         // low, look both directions.\r
 467 \r
 468         if (single <= LEAD_SURROGATE_MAX_VALUE) {\r
 469             ++offset16;\r
 470             if (source.length() != offset16) {\r
 471                 char trail = source.charAt(offset16);\r
 472                 if (isTrailSurrogate(trail))\r
 473                     return UCharacterProperty.getRawSupplementary(single, trail);\r
 474             }\r
 475         } else {\r
 476             --offset16;\r
 477             if (offset16 >= 0) {\r
 478                 // single is a trail surrogate so\r
 479                 char lead = source.charAt(offset16);\r
 480                 if (isLeadSurrogate(lead)) {\r
 481                     return UCharacterProperty.getRawSupplementary(lead, single);\r
 482                 }\r
 483             }\r
 484         }\r
 485         return single; // return unmatched surrogate\r
 486     }\r
 487 \r
 488     /**\r
 489      * Determines how many chars this char32 requires. If a validity check is required, use <code>\r
 490      * <a href="../lang/UCharacter.html#isLegal(char)">isLegal()</a></code>\r
 491      * on char32 before calling.\r
 492      * \r
 493      * @param char32\r
 494      *            the input codepoint.\r
 495      * @return 2 if is in supplementary space, otherwise 1.\r
 496      * @stable ICU 2.1\r
 497      */\r
 498     public static int getCharCount(int char32) {\r
 499         if (char32 < SUPPLEMENTARY_MIN_VALUE) {\r
 500             return 1;\r
 501         }\r
 502         return 2;\r
 503     }\r
 504 \r
 505     /**\r
 506      * Returns the type of the boundaries around the char at offset16. Used for random access.\r
 507      * \r
 508      * @param source\r
 509      *            text to analyse\r
 510      * @param offset16\r
 511      *            UTF-16 offset\r
 512      * @return\r
 513      *            <ul>\r
 514      *            <li> SINGLE_CHAR_BOUNDARY : a single char; the bounds are [offset16, offset16+1]\r
 515      *            <li> LEAD_SURROGATE_BOUNDARY : a surrogate pair starting at offset16; the bounds\r
 516      *            are [offset16, offset16 + 2]\r
 517      *            <li> TRAIL_SURROGATE_BOUNDARY : a surrogate pair starting at offset16 - 1; the\r
 518      *            bounds are [offset16 - 1, offset16 + 1]\r
 519      *            </ul>\r
 520      *            For bit-twiddlers, the return values for these are chosen so that the boundaries\r
 521      *            can be gotten by: [offset16 - (value >> 2), offset16 + (value & 3)].\r
 522      * @exception IndexOutOfBoundsException\r
 523      *                if offset16 is out of bounds.\r
 524      * @stable ICU 2.1\r
 525      */\r
 526     public static int bounds(String source, int offset16) {\r
 527         char ch = source.charAt(offset16);\r
 528         if (isSurrogate(ch)) {\r
 529             if (isLeadSurrogate(ch)) {\r
 530                 if (++offset16 < source.length() && isTrailSurrogate(source.charAt(offset16))) {\r
 531                     return LEAD_SURROGATE_BOUNDARY;\r
 532                 }\r
 533             } else {\r
 534                 // isTrailSurrogate(ch), so\r
 535                 --offset16;\r
 536                 if (offset16 >= 0 && isLeadSurrogate(source.charAt(offset16))) {\r
 537                     return TRAIL_SURROGATE_BOUNDARY;\r
 538                 }\r
 539             }\r
 540         }\r
 541         return SINGLE_CHAR_BOUNDARY;\r
 542     }\r
 543 \r
 544     /**\r
 545      * Returns the type of the boundaries around the char at offset16. Used for random access.\r
 546      * \r
 547      * @param source\r
 548      *            string buffer to analyse\r
 549      * @param offset16\r
 550      *            UTF16 offset\r
 551      * @return\r
 552      *            <ul>\r
 553      *            <li> SINGLE_CHAR_BOUNDARY : a single char; the bounds are [offset16, offset16 + 1]\r
 554      *            <li> LEAD_SURROGATE_BOUNDARY : a surrogate pair starting at offset16; the bounds\r
 555      *            are [offset16, offset16 + 2]\r
 556      *            <li> TRAIL_SURROGATE_BOUNDARY : a surrogate pair starting at offset16 - 1; the\r
 557      *            bounds are [offset16 - 1, offset16 + 1]\r
 558      *            </ul>\r
 559      *            For bit-twiddlers, the return values for these are chosen so that the boundaries\r
 560      *            can be gotten by: [offset16 - (value >> 2), offset16 + (value & 3)].\r
 561      * @exception IndexOutOfBoundsException\r
 562      *                if offset16 is out of bounds.\r
 563      * @stable ICU 2.1\r
 564      */\r
 565     public static int bounds(StringBuffer source, int offset16) {\r
 566         char ch = source.charAt(offset16);\r
 567         if (isSurrogate(ch)) {\r
 568             if (isLeadSurrogate(ch)) {\r
 569                 if (++offset16 < source.length() && isTrailSurrogate(source.charAt(offset16))) {\r
 570                     return LEAD_SURROGATE_BOUNDARY;\r
 571                 }\r
 572             } else {\r
 573                 // isTrailSurrogate(ch), so\r
 574                 --offset16;\r
 575                 if (offset16 >= 0 && isLeadSurrogate(source.charAt(offset16))) {\r
 576                     return TRAIL_SURROGATE_BOUNDARY;\r
 577                 }\r
 578             }\r
 579         }\r
 580         return SINGLE_CHAR_BOUNDARY;\r
 581     }\r
 582 \r
 583     /**\r
 584      * Returns the type of the boundaries around the char at offset16. Used for random access. Note\r
 585      * that the boundaries are determined with respect to the subarray, hence the char array\r
 586      * {0xD800, 0xDC00} has the result SINGLE_CHAR_BOUNDARY for start = offset16 = 0 and limit = 1.\r
 587      * \r
 588      * @param source\r
 589      *            char array to analyse\r
 590      * @param start\r
 591      *            offset to substring in the source array for analyzing\r
 592      * @param limit\r
 593      *            offset to substring in the source array for analyzing\r
 594      * @param offset16\r
 595      *            UTF16 offset relative to start\r
 596      * @return\r
 597      *            <ul>\r
 598      *            <li> SINGLE_CHAR_BOUNDARY : a single char; the bounds are\r
 599      *            <li> LEAD_SURROGATE_BOUNDARY : a surrogate pair starting at offset16; the bounds\r
 600      *            are [offset16, offset16 + 2]\r
 601      *            <li> TRAIL_SURROGATE_BOUNDARY : a surrogate pair starting at offset16 - 1; the\r
 602      *            bounds are [offset16 - 1, offset16 + 1]\r
 603      *            </ul>\r
 604      *            For bit-twiddlers, the boundary values for these are chosen so that the boundaries\r
 605      *            can be gotten by: [offset16 - (boundvalue >> 2), offset16 + (boundvalue & 3)].\r
 606      * @exception IndexOutOfBoundsException\r
 607      *                if offset16 is not within the range of start and limit.\r
 608      * @stable ICU 2.1\r
 609      */\r
 610     public static int bounds(char source[], int start, int limit, int offset16) {\r
 611         offset16 += start;\r
 612         if (offset16 < start || offset16 >= limit) {\r
 613             throw new ArrayIndexOutOfBoundsException(offset16);\r
 614         }\r
 615         char ch = source[offset16];\r
 616         if (isSurrogate(ch)) {\r
 617             if (isLeadSurrogate(ch)) {\r
 618                 ++offset16;\r
 619                 if (offset16 < limit && isTrailSurrogate(source[offset16])) {\r
 620                     return LEAD_SURROGATE_BOUNDARY;\r
 621                 }\r
 622             } else { // isTrailSurrogate(ch), so\r
 623                 --offset16;\r
 624                 if (offset16 >= start && isLeadSurrogate(source[offset16])) {\r
 625                     return TRAIL_SURROGATE_BOUNDARY;\r
 626                 }\r
 627             }\r
 628         }\r
 629         return SINGLE_CHAR_BOUNDARY;\r
 630     }\r
 631 \r
 632     /**\r
 633      * Determines whether the code value is a surrogate.\r
 634      * \r
 635      * @param char16\r
 636      *            the input character.\r
 637      * @return true iff the input character is a surrogate.\r
 638      * @stable ICU 2.1\r
 639      */\r
 640     public static boolean isSurrogate(char char16) {\r
 641         return (char16 & SURROGATE_BITMASK) == SURROGATE_BITS;\r
 642     }\r
 643 \r
 644     /**\r
 645      * Determines whether the character is a trail surrogate.\r
 646      * \r
 647      * @param char16\r
 648      *            the input character.\r
 649      * @return true iff the input character is a trail surrogate.\r
 650      * @stable ICU 2.1\r
 651      */\r
 652     public static boolean isTrailSurrogate(char char16) {\r
 653         return (char16 & TRAIL_SURROGATE_BITMASK) == TRAIL_SURROGATE_BITS;\r
 654     }\r
 655 \r
 656     /**\r
 657      * Determines whether the character is a lead surrogate.\r
 658      * \r
 659      * @param char16\r
 660      *            the input character.\r
 661      * @return true iff the input character is a lead surrogate\r
 662      * @stable ICU 2.1\r
 663      */\r
 664     public static boolean isLeadSurrogate(char char16) {\r
 665         return (char16 & LEAD_SURROGATE_BITMASK) == LEAD_SURROGATE_BITS;\r
 666     }\r
 667 \r
 668     /**\r
 669      * Returns the lead surrogate. If a validity check is required, use\r
 670      * <code><a href="../lang/UCharacter.html#isLegal(char)">isLegal()</a></code> on char32\r
 671      * before calling.\r
 672      * \r
 673      * @param char32\r
 674      *            the input character.\r
 675      * @return lead surrogate if the getCharCount(ch) is 2; <br>\r
 676      *         and 0 otherwise (note: 0 is not a valid lead surrogate).\r
 677      * @stable ICU 2.1\r
 678      */\r
 679     public static char getLeadSurrogate(int char32) {\r
 680         if (char32 >= SUPPLEMENTARY_MIN_VALUE) {\r
 681             return (char) (LEAD_SURROGATE_OFFSET_ + (char32 >> LEAD_SURROGATE_SHIFT_));\r
 682         }\r
 683         return 0;\r
 684     }\r
 685 \r
 686     /**\r
 687      * Returns the trail surrogate. If a validity check is required, use\r
 688      * <code><a href="../lang/UCharacter.html#isLegal(char)">isLegal()</a></code> on char32\r
 689      * before calling.\r
 690      * \r
 691      * @param char32\r
 692      *            the input character.\r
 693      * @return the trail surrogate if the getCharCount(ch) is 2; <br>\r
 694      *         otherwise the character itself\r
 695      * @stable ICU 2.1\r
 696      */\r
 697     public static char getTrailSurrogate(int char32) {\r
 698         if (char32 >= SUPPLEMENTARY_MIN_VALUE) {\r
 699             return (char) (TRAIL_SURROGATE_MIN_VALUE + (char32 & TRAIL_SURROGATE_MASK_));\r
 700         }\r
 701         return (char) char32;\r
 702     }\r
 703 \r
 704     /**\r
 705      * Convenience method corresponding to String.valueOf(char). Returns a one or two char string\r
 706      * containing the UTF-32 value in UTF16 format. If a validity check is required, use <a\r
 707      * href="../lang/UCharacter.html#isLegal(char)">isLegal()</a></code> on char32 before calling.\r
 708      * \r
 709      * @param char32\r
 710      *            the input character.\r
 711      * @return string value of char32 in UTF16 format\r
 712      * @exception IllegalArgumentException\r
 713      *                thrown if char32 is a invalid codepoint.\r
 714      * @stable ICU 2.1\r
 715      */\r
 716     public static String valueOf(int char32) {\r
 717         if (char32 < CODEPOINT_MIN_VALUE || char32 > CODEPOINT_MAX_VALUE) {\r
 718             throw new IllegalArgumentException("Illegal codepoint");\r
 719         }\r
 720         return toString(char32);\r
 721     }\r
 722 \r
 723     /**\r
 724      * Convenience method corresponding to String.valueOf(codepoint at offset16). Returns a one or\r
 725      * two char string containing the UTF-32 value in UTF16 format. If offset16 indexes a surrogate\r
 726      * character, the whole supplementary codepoint will be returned. If a validity check is\r
 727      * required, use <a href="../lang/UCharacter.html#isLegal(char)">isLegal()</a></code> on the\r
 728      * codepoint at offset16 before calling. The result returned will be a newly created String\r
 729      * obtained by calling source.substring(..) with the appropriate indexes.\r
 730      * \r
 731      * @param source\r
 732      *            the input string.\r
 733      * @param offset16\r
 734      *            the UTF16 index to the codepoint in source\r
 735      * @return string value of char32 in UTF16 format\r
 736      * @stable ICU 2.1\r
 737      */\r
 738     public static String valueOf(String source, int offset16) {\r
 739         switch (bounds(source, offset16)) {\r
 740         case LEAD_SURROGATE_BOUNDARY:\r
 741             return source.substring(offset16, offset16 + 2);\r
 742         case TRAIL_SURROGATE_BOUNDARY:\r
 743             return source.substring(offset16 - 1, offset16 + 1);\r
 744         default:\r
 745             return source.substring(offset16, offset16 + 1);\r
 746         }\r
 747     }\r
 748 \r
 749     /**\r
 750      * Convenience method corresponding to StringBuffer.valueOf(codepoint at offset16). Returns a\r
 751      * one or two char string containing the UTF-32 value in UTF16 format. If offset16 indexes a\r
 752      * surrogate character, the whole supplementary codepoint will be returned. If a validity check\r
 753      * is required, use <a href="../lang/UCharacter.html#isLegal(char)">isLegal()</a></code> on\r
 754      * the codepoint at offset16 before calling. The result returned will be a newly created String\r
 755      * obtained by calling source.substring(..) with the appropriate indexes.\r
 756      * \r
 757      * @param source\r
 758      *            the input string buffer.\r
 759      * @param offset16\r
 760      *            the UTF16 index to the codepoint in source\r
 761      * @return string value of char32 in UTF16 format\r
 762      * @stable ICU 2.1\r
 763      */\r
 764     public static String valueOf(StringBuffer source, int offset16) {\r
 765         switch (bounds(source, offset16)) {\r
 766         case LEAD_SURROGATE_BOUNDARY:\r
 767             return source.substring(offset16, offset16 + 2);\r
 768         case TRAIL_SURROGATE_BOUNDARY:\r
 769             return source.substring(offset16 - 1, offset16 + 1);\r
 770         default:\r
 771             return source.substring(offset16, offset16 + 1);\r
 772         }\r
 773     }\r
 774 \r
 775     /**\r
 776      * Convenience method. Returns a one or two char string containing the UTF-32 value in UTF16\r
 777      * format. If offset16 indexes a surrogate character, the whole supplementary codepoint will be\r
 778      * returned, except when either the leading or trailing surrogate character lies out of the\r
 779      * specified subarray. In the latter case, only the surrogate character within bounds will be\r
 780      * returned. If a validity check is required, use <a\r
 781      * href="../lang/UCharacter.html#isLegal(char)">isLegal()</a></code> on the codepoint at\r
 782      * offset16 before calling. The result returned will be a newly created String containing the\r
 783      * relevant characters.\r
 784      * \r
 785      * @param source\r
 786      *            the input char array.\r
 787      * @param start\r
 788      *            start index of the subarray\r
 789      * @param limit\r
 790      *            end index of the subarray\r
 791      * @param offset16\r
 792      *            the UTF16 index to the codepoint in source relative to start\r
 793      * @return string value of char32 in UTF16 format\r
 794      * @stable ICU 2.1\r
 795      */\r
 796     public static String valueOf(char source[], int start, int limit, int offset16) {\r
 797         switch (bounds(source, start, limit, offset16)) {\r
 798         case LEAD_SURROGATE_BOUNDARY:\r
 799             return new String(source, start + offset16, 2);\r
 800         case TRAIL_SURROGATE_BOUNDARY:\r
 801             return new String(source, start + offset16 - 1, 2);\r
 802         }\r
 803         return new String(source, start + offset16, 1);\r
 804     }\r
 805 \r
 806     /**\r
 807      * Returns the UTF-16 offset that corresponds to a UTF-32 offset. Used for random access. See\r
 808      * the <a name="_top_">class description</a> for notes on roundtripping.\r
 809      * \r
 810      * @param source\r
 811      *            the UTF-16 string\r
 812      * @param offset32\r
 813      *            UTF-32 offset\r
 814      * @return UTF-16 offset\r
 815      * @exception IndexOutOfBoundsException\r
 816      *                if offset32 is out of bounds.\r
 817      * @stable ICU 2.1\r
 818      */\r
 819     public static int findOffsetFromCodePoint(String source, int offset32) {\r
 820         char ch;\r
 821         int size = source.length(), result = 0, count = offset32;\r
 822         if (offset32 < 0 || offset32 > size) {\r
 823             throw new StringIndexOutOfBoundsException(offset32);\r
 824         }\r
 825         while (result < size && count > 0) {\r
 826             ch = source.charAt(result);\r
 827             if (isLeadSurrogate(ch) && ((result + 1) < size)\r
 828                     && isTrailSurrogate(source.charAt(result + 1))) {\r
 829                 result++;\r
 830             }\r
 831 \r
 832             count--;\r
 833             result++;\r
 834         }\r
 835         if (count != 0) {\r
 836             throw new StringIndexOutOfBoundsException(offset32);\r
 837         }\r
 838         return result;\r
 839     }\r
 840 \r
 841     /**\r
 842      * Returns the UTF-16 offset that corresponds to a UTF-32 offset. Used for random access. See\r
 843      * the <a name="_top_">class description</a> for notes on roundtripping.\r
 844      * \r
 845      * @param source\r
 846      *            the UTF-16 string buffer\r
 847      * @param offset32\r
 848      *            UTF-32 offset\r
 849      * @return UTF-16 offset\r
 850      * @exception IndexOutOfBoundsException\r
 851      *                if offset32 is out of bounds.\r
 852      * @stable ICU 2.1\r
 853      */\r
 854     public static int findOffsetFromCodePoint(StringBuffer source, int offset32) {\r
 855         char ch;\r
 856         int size = source.length(), result = 0, count = offset32;\r
 857         if (offset32 < 0 || offset32 > size) {\r
 858             throw new StringIndexOutOfBoundsException(offset32);\r
 859         }\r
 860         while (result < size && count > 0) {\r
 861             ch = source.charAt(result);\r
 862             if (isLeadSurrogate(ch) && ((result + 1) < size)\r
 863                     && isTrailSurrogate(source.charAt(result + 1))) {\r
 864                 result++;\r
 865             }\r
 866 \r
 867             count--;\r
 868             result++;\r
 869         }\r
 870         if (count != 0) {\r
 871             throw new StringIndexOutOfBoundsException(offset32);\r
 872         }\r
 873         return result;\r
 874     }\r
 875 \r
 876     /**\r
 877      * Returns the UTF-16 offset that corresponds to a UTF-32 offset. Used for random access. See\r
 878      * the <a name="_top_">class description</a> for notes on roundtripping.\r
 879      * \r
 880      * @param source\r
 881      *            the UTF-16 char array whose substring is to be analysed\r
 882      * @param start\r
 883      *            offset of the substring to be analysed\r
 884      * @param limit\r
 885      *            offset of the substring to be analysed\r
 886      * @param offset32\r
 887      *            UTF-32 offset relative to start\r
 888      * @return UTF-16 offset relative to start\r
 889      * @exception IndexOutOfBoundsException\r
 890      *                if offset32 is out of bounds.\r
 891      * @stable ICU 2.1\r
 892      */\r
 893     public static int findOffsetFromCodePoint(char source[], int start, int limit, int offset32) {\r
 894         char ch;\r
 895         int result = start, count = offset32;\r
 896         if (offset32 > limit - start) {\r
 897             throw new ArrayIndexOutOfBoundsException(offset32);\r
 898         }\r
 899         while (result < limit && count > 0) {\r
 900             ch = source[result];\r
 901             if (isLeadSurrogate(ch) && ((result + 1) < limit)\r
 902                     && isTrailSurrogate(source[result + 1])) {\r
 903                 result++;\r
 904             }\r
 905 \r
 906             count--;\r
 907             result++;\r
 908         }\r
 909         if (count != 0) {\r
 910             throw new ArrayIndexOutOfBoundsException(offset32);\r
 911         }\r
 912         return result - start;\r
 913     }\r
 914 \r
 915     /**\r
 916      * Returns the UTF-32 offset corresponding to the first UTF-32 boundary at or after the given\r
 917      * UTF-16 offset. Used for random access. See the <a name="_top_">class description</a> for\r
 918      * notes on roundtripping.<br>\r
 919      * <i>Note: If the UTF-16 offset is into the middle of a surrogate pair, then the UTF-32 offset\r
 920      * of the <strong>lead</strong> of the pair is returned. </i>\r
 921      * <p>\r
 922      * To find the UTF-32 length of a string, use:\r
 923      * \r
 924      * <pre>\r
 925      * len32 = countCodePoint(source, source.length());\r
 926      * </pre>\r
 927      * \r
 928      * </p>\r
 929      * <p>\r
 930      * \r
 931      * @param source\r
 932      *            text to analyse\r
 933      * @param offset16\r
 934      *            UTF-16 offset < source text length.\r
 935      * @return UTF-32 offset\r
 936      * @exception IndexOutOfBoundsException\r
 937      *                if offset16 is out of bounds.\r
 938      * @stable ICU 2.1\r
 939      */\r
 940     public static int findCodePointOffset(String source, int offset16) {\r
 941         if (offset16 < 0 || offset16 > source.length()) {\r
 942             throw new StringIndexOutOfBoundsException(offset16);\r
 943         }\r
 944 \r
 945         int result = 0;\r
 946         char ch;\r
 947         boolean hadLeadSurrogate = false;\r
 948 \r
 949         for (int i = 0; i < offset16; ++i) {\r
 950             ch = source.charAt(i);\r
 951             if (hadLeadSurrogate && isTrailSurrogate(ch)) {\r
 952                 hadLeadSurrogate = false; // count valid trail as zero\r
 953             } else {\r
 954                 hadLeadSurrogate = isLeadSurrogate(ch);\r
 955                 ++result; // count others as 1\r
 956             }\r
 957         }\r
 958 \r
 959         if (offset16 == source.length()) {\r
 960             return result;\r
 961         }\r
 962 \r
 963         // end of source being the less significant surrogate character\r
 964         // shift result back to the start of the supplementary character\r
 965         if (hadLeadSurrogate && (isTrailSurrogate(source.charAt(offset16)))) {\r
 966             result--;\r
 967         }\r
 968 \r
 969         return result;\r
 970     }\r
 971 \r
 972     /**\r
 973      * Returns the UTF-32 offset corresponding to the first UTF-32 boundary at the given UTF-16\r
 974      * offset. Used for random access. See the <a name="_top_">class description</a> for notes on\r
 975      * roundtripping.<br>\r
 976      * <i>Note: If the UTF-16 offset is into the middle of a surrogate pair, then the UTF-32 offset\r
 977      * of the <strong>lead</strong> of the pair is returned. </i>\r
 978      * <p>\r
 979      * To find the UTF-32 length of a string, use:\r
 980      * \r
 981      * <pre>\r
 982      * len32 = countCodePoint(source);\r
 983      * </pre>\r
 984      * \r
 985      * </p>\r
 986      * <p>\r
 987      * \r
 988      * @param source\r
 989      *            text to analyse\r
 990      * @param offset16\r
 991      *            UTF-16 offset < source text length.\r
 992      * @return UTF-32 offset\r
 993      * @exception IndexOutOfBoundsException\r
 994      *                if offset16 is out of bounds.\r
 995      * @stable ICU 2.1\r
 996      */\r
 997     public static int findCodePointOffset(StringBuffer source, int offset16) {\r
 998         if (offset16 < 0 || offset16 > source.length()) {\r
 999             throw new StringIndexOutOfBoundsException(offset16);\r
1000         }\r
1001 \r
1002         int result = 0;\r
1003         char ch;\r
1004         boolean hadLeadSurrogate = false;\r
1005 \r
1006         for (int i = 0; i < offset16; ++i) {\r
1007             ch = source.charAt(i);\r
1008             if (hadLeadSurrogate && isTrailSurrogate(ch)) {\r
1009                 hadLeadSurrogate = false; // count valid trail as zero\r
1010             } else {\r
1011                 hadLeadSurrogate = isLeadSurrogate(ch);\r
1012                 ++result; // count others as 1\r
1013             }\r
1014         }\r
1015 \r
1016         if (offset16 == source.length()) {\r
1017             return result;\r
1018         }\r
1019 \r
1020         // end of source being the less significant surrogate character\r
1021         // shift result back to the start of the supplementary character\r
1022         if (hadLeadSurrogate && (isTrailSurrogate(source.charAt(offset16)))) {\r
1023             result--;\r
1024         }\r
1025 \r
1026         return result;\r
1027     }\r
1028 \r
1029     /**\r
1030      * Returns the UTF-32 offset corresponding to the first UTF-32 boundary at the given UTF-16\r
1031      * offset. Used for random access. See the <a name="_top_">class description</a> for notes on\r
1032      * roundtripping.<br>\r
1033      * <i>Note: If the UTF-16 offset is into the middle of a surrogate pair, then the UTF-32 offset\r
1034      * of the <strong>lead</strong> of the pair is returned. </i>\r
1035      * <p>\r
1036      * To find the UTF-32 length of a substring, use:\r
1037      * \r
1038      * <pre>\r
1039      * len32 = countCodePoint(source, start, limit);\r
1040      * </pre>\r
1041      * \r
1042      * </p>\r
1043      * <p>\r
1044      * \r
1045      * @param source\r
1046      *            text to analyse\r
1047      * @param start\r
1048      *            offset of the substring\r
1049      * @param limit\r
1050      *            offset of the substring\r
1051      * @param offset16\r
1052      *            UTF-16 relative to start\r
1053      * @return UTF-32 offset relative to start\r
1054      * @exception IndexOutOfBoundsException\r
1055      *                if offset16 is not within the range of start and limit.\r
1056      * @stable ICU 2.1\r
1057      */\r
1058     public static int findCodePointOffset(char source[], int start, int limit, int offset16) {\r
1059         offset16 += start;\r
1060         if (offset16 > limit) {\r
1061             throw new StringIndexOutOfBoundsException(offset16);\r
1062         }\r
1063 \r
1064         int result = 0;\r
1065         char ch;\r
1066         boolean hadLeadSurrogate = false;\r
1067 \r
1068         for (int i = start; i < offset16; ++i) {\r
1069             ch = source[i];\r
1070             if (hadLeadSurrogate && isTrailSurrogate(ch)) {\r
1071                 hadLeadSurrogate = false; // count valid trail as zero\r
1072             } else {\r
1073                 hadLeadSurrogate = isLeadSurrogate(ch);\r
1074                 ++result; // count others as 1\r
1075             }\r
1076         }\r
1077 \r
1078         if (offset16 == limit) {\r
1079             return result;\r
1080         }\r
1081 \r
1082         // end of source being the less significant surrogate character\r
1083         // shift result back to the start of the supplementary character\r
1084         if (hadLeadSurrogate && (isTrailSurrogate(source[offset16]))) {\r
1085             result--;\r
1086         }\r
1087 \r
1088         return result;\r
1089     }\r
1090 \r
1091     /**\r
1092      * Append a single UTF-32 value to the end of a StringBuffer. If a validity check is required,\r
1093      * use <a href="../lang/UCharacter.html#isLegal(char)">isLegal()</a></code> on char32 before\r
1094      * calling.\r
1095      * \r
1096      * @param target\r
1097      *            the buffer to append to\r
1098      * @param char32\r
1099      *            value to append.\r
1100      * @return the updated StringBuffer\r
1101      * @exception IllegalArgumentException\r
1102      *                thrown when char32 does not lie within the range of the Unicode codepoints\r
1103      * @stable ICU 2.1\r
1104      */\r
1105     public static StringBuffer append(StringBuffer target, int char32) {\r
1106         // Check for irregular values\r
1107         if (char32 < CODEPOINT_MIN_VALUE || char32 > CODEPOINT_MAX_VALUE) {\r
1108             throw new IllegalArgumentException("Illegal codepoint: " + Integer.toHexString(char32));\r
1109         }\r
1110 \r
1111         // Write the UTF-16 values\r
1112         if (char32 >= SUPPLEMENTARY_MIN_VALUE) {\r
1113             target.append(getLeadSurrogate(char32));\r
1114             target.append(getTrailSurrogate(char32));\r
1115         } else {\r
1116             target.append((char) char32);\r
1117         }\r
1118         return target;\r
1119     }\r
1120 \r
1121     /**\r
1122      * Cover JDK 1.5 APIs. Append the code point to the buffer and return the buffer as a\r
1123      * convenience.\r
1124      * \r
1125      * @param target\r
1126      *            the buffer to append to\r
1127      * @param cp\r
1128      *            the code point to append\r
1129      * @return the updated StringBuffer\r
1130      * @throws IllegalArgumentException\r
1131      *             if cp is not a valid code point\r
1132      * @stable ICU 3.0\r
1133      */\r
1134     public static StringBuffer appendCodePoint(StringBuffer target, int cp) {\r
1135         return append(target, cp);\r
1136     }\r
1137 \r
1138     /**\r
1139      * Adds a codepoint to offset16 position of the argument char array.\r
1140      * \r
1141      * @param target\r
1142      *            char array to be append with the new code point\r
1143      * @param limit\r
1144      *            UTF16 offset which the codepoint will be appended.\r
1145      * @param char32\r
1146      *            code point to be appended\r
1147      * @return offset after char32 in the array.\r
1148      * @exception IllegalArgumentException\r
1149      *                thrown if there is not enough space for the append, or when char32 does not\r
1150      *                lie within the range of the Unicode codepoints.\r
1151      * @stable ICU 2.1\r
1152      */\r
1153     public static int append(char[] target, int limit, int char32) {\r
1154         // Check for irregular values\r
1155         if (char32 < CODEPOINT_MIN_VALUE || char32 > CODEPOINT_MAX_VALUE) {\r
1156             throw new IllegalArgumentException("Illegal codepoint");\r
1157         }\r
1158         // Write the UTF-16 values\r
1159         if (char32 >= SUPPLEMENTARY_MIN_VALUE) {\r
1160             target[limit++] = getLeadSurrogate(char32);\r
1161             target[limit++] = getTrailSurrogate(char32);\r
1162         } else {\r
1163             target[limit++] = (char) char32;\r
1164         }\r
1165         return limit;\r
1166     }\r
1167 \r
1168     /**\r
1169      * Number of codepoints in a UTF16 String\r
1170      * \r
1171      * @param source\r
1172      *            UTF16 string\r
1173      * @return number of codepoint in string\r
1174      * @stable ICU 2.1\r
1175      */\r
1176     public static int countCodePoint(String source) {\r
1177         if (source == null || source.length() == 0) {\r
1178             return 0;\r
1179         }\r
1180         return findCodePointOffset(source, source.length());\r
1181     }\r
1182 \r
1183     /**\r
1184      * Number of codepoints in a UTF16 String buffer\r
1185      * \r
1186      * @param source\r
1187      *            UTF16 string buffer\r
1188      * @return number of codepoint in string\r
1189      * @stable ICU 2.1\r
1190      */\r
1191     public static int countCodePoint(StringBuffer source) {\r
1192         if (source == null || source.length() == 0) {\r
1193             return 0;\r
1194         }\r
1195         return findCodePointOffset(source, source.length());\r
1196     }\r
1197 \r
1198     /**\r
1199      * Number of codepoints in a UTF16 char array substring\r
1200      * \r
1201      * @param source\r
1202      *            UTF16 char array\r
1203      * @param start\r
1204      *            offset of the substring\r
1205      * @param limit\r
1206      *            offset of the substring\r
1207      * @return number of codepoint in the substring\r
1208      * @exception IndexOutOfBoundsException\r
1209      *                if start and limit are not valid.\r
1210      * @stable ICU 2.1\r
1211      */\r
1212     public static int countCodePoint(char source[], int start, int limit) {\r
1213         if (source == null || source.length == 0) {\r
1214             return 0;\r
1215         }\r
1216         return findCodePointOffset(source, start, limit, limit - start);\r
1217     }\r
1218 \r
1219     /**\r
1220      * Set a code point into a UTF16 position. Adjusts target according if we are replacing a\r
1221      * non-supplementary codepoint with a supplementary and vice versa.\r
1222      * \r
1223      * @param target\r
1224      *            stringbuffer\r
1225      * @param offset16\r
1226      *            UTF16 position to insert into\r
1227      * @param char32\r
1228      *            code point\r
1229      * @stable ICU 2.1\r
1230      */\r
1231     public static void setCharAt(StringBuffer target, int offset16, int char32) {\r
1232         int count = 1;\r
1233         char single = target.charAt(offset16);\r
1234 \r
1235         if (isSurrogate(single)) {\r
1236             // pairs of the surrogate with offset16 at the lead char found\r
1237             if (isLeadSurrogate(single) && (target.length() > offset16 + 1)\r
1238                     && isTrailSurrogate(target.charAt(offset16 + 1))) {\r
1239                 count++;\r
1240             } else {\r
1241                 // pairs of the surrogate with offset16 at the trail char\r
1242                 // found\r
1243                 if (isTrailSurrogate(single) && (offset16 > 0)\r
1244                         && isLeadSurrogate(target.charAt(offset16 - 1))) {\r
1245                     offset16--;\r
1246                     count++;\r
1247                 }\r
1248             }\r
1249         }\r
1250         target.replace(offset16, offset16 + count, valueOf(char32));\r
1251     }\r
1252 \r
1253     /**\r
1254      * Set a code point into a UTF16 position in a char array. Adjusts target according if we are\r
1255      * replacing a non-supplementary codepoint with a supplementary and vice versa.\r
1256      * \r
1257      * @param target\r
1258      *            char array\r
1259      * @param limit\r
1260      *            numbers of valid chars in target, different from target.length. limit counts the\r
1261      *            number of chars in target that represents a string, not the size of array target.\r
1262      * @param offset16\r
1263      *            UTF16 position to insert into\r
1264      * @param char32\r
1265      *            code point\r
1266      * @return new number of chars in target that represents a string\r
1267      * @exception IndexOutOfBoundsException\r
1268      *                if offset16 is out of range\r
1269      * @stable ICU 2.1\r
1270      */\r
1271     public static int setCharAt(char target[], int limit, int offset16, int char32) {\r
1272         if (offset16 >= limit) {\r
1273             throw new ArrayIndexOutOfBoundsException(offset16);\r
1274         }\r
1275         int count = 1;\r
1276         char single = target[offset16];\r
1277 \r
1278         if (isSurrogate(single)) {\r
1279             // pairs of the surrogate with offset16 at the lead char found\r
1280             if (isLeadSurrogate(single) && (target.length > offset16 + 1)\r
1281                     && isTrailSurrogate(target[offset16 + 1])) {\r
1282                 count++;\r
1283             } else {\r
1284                 // pairs of the surrogate with offset16 at the trail char\r
1285                 // found\r
1286                 if (isTrailSurrogate(single) && (offset16 > 0)\r
1287                         && isLeadSurrogate(target[offset16 - 1])) {\r
1288                     offset16--;\r
1289                     count++;\r
1290                 }\r
1291             }\r
1292         }\r
1293 \r
1294         String str = valueOf(char32);\r
1295         int result = limit;\r
1296         int strlength = str.length();\r
1297         target[offset16] = str.charAt(0);\r
1298         if (count == strlength) {\r
1299             if (count == 2) {\r
1300                 target[offset16 + 1] = str.charAt(1);\r
1301             }\r
1302         } else {\r
1303             // this is not exact match in space, we'll have to do some\r
1304             // shifting\r
1305             System.arraycopy(target, offset16 + count, target, offset16 + strlength, limit\r
1306                     - (offset16 + count));\r
1307             if (count < strlength) {\r
1308                 // char32 is a supplementary character trying to squeeze into\r
1309                 // a non-supplementary space\r
1310                 target[offset16 + 1] = str.charAt(1);\r
1311                 result++;\r
1312                 if (result < target.length) {\r
1313                     target[result] = 0;\r
1314                 }\r
1315             } else {\r
1316                 // char32 is a non-supplementary character trying to fill\r
1317                 // into a supplementary space\r
1318                 result--;\r
1319                 target[result] = 0;\r
1320             }\r
1321         }\r
1322         return result;\r
1323     }\r
1324 \r
1325     /**\r
1326      * Shifts offset16 by the argument number of codepoints\r
1327      * \r
1328      * @param source\r
1329      *            string\r
1330      * @param offset16\r
1331      *            UTF16 position to shift\r
1332      * @param shift32\r
1333      *            number of codepoints to shift\r
1334      * @return new shifted offset16\r
1335      * @exception IndexOutOfBoundsException\r
1336      *                if the new offset16 is out of bounds.\r
1337      * @stable ICU 2.1\r
1338      */\r
1339     public static int moveCodePointOffset(String source, int offset16, int shift32) {\r
1340         int result = offset16;\r
1341         int size = source.length();\r
1342         int count;\r
1343         char ch;\r
1344         if (offset16 < 0 || offset16 > size) {\r
1345             throw new StringIndexOutOfBoundsException(offset16);\r
1346         }\r
1347         if (shift32 > 0) {\r
1348             if (shift32 + offset16 > size) {\r
1349                 throw new StringIndexOutOfBoundsException(offset16);\r
1350             }\r
1351             count = shift32;\r
1352             while (result < size && count > 0) {\r
1353                 ch = source.charAt(result);\r
1354                 if (isLeadSurrogate(ch) && ((result + 1) < size)\r
1355                         && isTrailSurrogate(source.charAt(result + 1))) {\r
1356                     result++;\r
1357                 }\r
1358                 count--;\r
1359                 result++;\r
1360             }\r
1361         } else {\r
1362             if (offset16 + shift32 < 0) {\r
1363                 throw new StringIndexOutOfBoundsException(offset16);\r
1364             }\r
1365             for (count = -shift32; count > 0; count--) {\r
1366                 result--;\r
1367                 if (result < 0) {\r
1368                     break;\r
1369                 }\r
1370                 ch = source.charAt(result);\r
1371                 if (isTrailSurrogate(ch) && result > 0\r
1372                         && isLeadSurrogate(source.charAt(result - 1))) {\r
1373                     result--;\r
1374                 }\r
1375             }\r
1376         }\r
1377         if (count != 0) {\r
1378             throw new StringIndexOutOfBoundsException(shift32);\r
1379         }\r
1380         return result;\r
1381     }\r
1382 \r
1383     /**\r
1384      * Shifts offset16 by the argument number of codepoints\r
1385      * \r
1386      * @param source\r
1387      *            string buffer\r
1388      * @param offset16\r
1389      *            UTF16 position to shift\r
1390      * @param shift32\r
1391      *            number of codepoints to shift\r
1392      * @return new shifted offset16\r
1393      * @exception IndexOutOfBoundsException\r
1394      *                if the new offset16 is out of bounds.\r
1395      * @stable ICU 2.1\r
1396      */\r
1397     public static int moveCodePointOffset(StringBuffer source, int offset16, int shift32) {\r
1398         int result = offset16;\r
1399         int size = source.length();\r
1400         int count;\r
1401         char ch;\r
1402         if (offset16 < 0 || offset16 > size) {\r
1403             throw new StringIndexOutOfBoundsException(offset16);\r
1404         }\r
1405         if (shift32 > 0) {\r
1406             if (shift32 + offset16 > size) {\r
1407                 throw new StringIndexOutOfBoundsException(offset16);\r
1408             }\r
1409             count = shift32;\r
1410             while (result < size && count > 0) {\r
1411                 ch = source.charAt(result);\r
1412                 if (isLeadSurrogate(ch) && ((result + 1) < size)\r
1413                         && isTrailSurrogate(source.charAt(result + 1))) {\r
1414                     result++;\r
1415                 }\r
1416                 count--;\r
1417                 result++;\r
1418             }\r
1419         } else {\r
1420             if (offset16 + shift32 < 0) {\r
1421                 throw new StringIndexOutOfBoundsException(offset16);\r
1422             }\r
1423             for (count = -shift32; count > 0; count--) {\r
1424                 result--;\r
1425                 if (result < 0) {\r
1426                     break;\r
1427                 }\r
1428                 ch = source.charAt(result);\r
1429                 if (isTrailSurrogate(ch) && result > 0\r
1430                         && isLeadSurrogate(source.charAt(result - 1))) {\r
1431                     result--;\r
1432                 }\r
1433             }\r
1434         }\r
1435         if (count != 0) {\r
1436             throw new StringIndexOutOfBoundsException(shift32);\r
1437         }\r
1438         return result;\r
1439     }\r
1440 \r
1441     /**\r
1442      * Shifts offset16 by the argument number of codepoints within a subarray.\r
1443      * \r
1444      * @param source\r
1445      *            char array\r
1446      * @param start\r
1447      *            position of the subarray to be performed on\r
1448      * @param limit\r
1449      *            position of the subarray to be performed on\r
1450      * @param offset16\r
1451      *            UTF16 position to shift relative to start\r
1452      * @param shift32\r
1453      *            number of codepoints to shift\r
1454      * @return new shifted offset16 relative to start\r
1455      * @exception IndexOutOfBoundsException\r
1456      *                if the new offset16 is out of bounds with respect to the subarray or the\r
1457      *                subarray bounds are out of range.\r
1458      * @stable ICU 2.1\r
1459      */\r
1460     public static int moveCodePointOffset(char source[], int start, int limit, int offset16,\r
1461             int shift32) {\r
1462         int size = source.length;\r
1463         int count;\r
1464         char ch;\r
1465         int result = offset16 + start;\r
1466         if (start < 0 || limit < start) {\r
1467             throw new StringIndexOutOfBoundsException(start);\r
1468         }\r
1469         if (limit > size) {\r
1470             throw new StringIndexOutOfBoundsException(limit);\r
1471         }\r
1472         if (offset16 < 0 || result > limit) {\r
1473             throw new StringIndexOutOfBoundsException(offset16);\r
1474         }\r
1475         if (shift32 > 0) {\r
1476             if (shift32 + result > size) {\r
1477                 throw new StringIndexOutOfBoundsException(result);\r
1478             }\r
1479             count = shift32;\r
1480             while (result < limit && count > 0) {\r
1481                 ch = source[result];\r
1482                 if (isLeadSurrogate(ch) && (result + 1 < limit)\r
1483                         && isTrailSurrogate(source[result + 1])) {\r
1484                     result++;\r
1485                 }\r
1486                 count--;\r
1487                 result++;\r
1488             }\r
1489         } else {\r
1490             if (result + shift32 < start) {\r
1491                 throw new StringIndexOutOfBoundsException(result);\r
1492             }\r
1493             for (count = -shift32; count > 0; count--) {\r
1494                 result--;\r
1495                 if (result < start) {\r
1496                     break;\r
1497                 }\r
1498                 ch = source[result];\r
1499                 if (isTrailSurrogate(ch) && result > start && isLeadSurrogate(source[result - 1])) {\r
1500                     result--;\r
1501                 }\r
1502             }\r
1503         }\r
1504         if (count != 0) {\r
1505             throw new StringIndexOutOfBoundsException(shift32);\r
1506         }\r
1507         result -= start;\r
1508         return result;\r
1509     }\r
1510 \r
1511     /**\r
1512      * Inserts char32 codepoint into target at the argument offset16. If the offset16 is in the\r
1513      * middle of a supplementary codepoint, char32 will be inserted after the supplementary\r
1514      * codepoint. The length of target increases by one if codepoint is non-supplementary, 2\r
1515      * otherwise.\r
1516      * <p>\r
1517      * The overall effect is exactly as if the argument were converted to a string by the method\r
1518      * valueOf(char) and the characters in that string were then inserted into target at the\r
1519      * position indicated by offset16.\r
1520      * </p>\r
1521      * <p>\r
1522      * The offset argument must be greater than or equal to 0, and less than or equal to the length\r
1523      * of source.\r
1524      * \r
1525      * @param target\r
1526      *            string buffer to insert to\r
1527      * @param offset16\r
1528      *            offset which char32 will be inserted in\r
1529      * @param char32\r
1530      *            codepoint to be inserted\r
1531      * @return a reference to target\r
1532      * @exception IndexOutOfBoundsException\r
1533      *                thrown if offset16 is invalid.\r
1534      * @stable ICU 2.1\r
1535      */\r
1536     public static StringBuffer insert(StringBuffer target, int offset16, int char32) {\r
1537         String str = valueOf(char32);\r
1538         if (offset16 != target.length() && bounds(target, offset16) == TRAIL_SURROGATE_BOUNDARY) {\r
1539             offset16++;\r
1540         }\r
1541         target.insert(offset16, str);\r
1542         return target;\r
1543     }\r
1544 \r
1545     /**\r
1546      * Inserts char32 codepoint into target at the argument offset16. If the offset16 is in the\r
1547      * middle of a supplementary codepoint, char32 will be inserted after the supplementary\r
1548      * codepoint. Limit increases by one if codepoint is non-supplementary, 2 otherwise.\r
1549      * <p>\r
1550      * The overall effect is exactly as if the argument were converted to a string by the method\r
1551      * valueOf(char) and the characters in that string were then inserted into target at the\r
1552      * position indicated by offset16.\r
1553      * </p>\r
1554      * <p>\r
1555      * The offset argument must be greater than or equal to 0, and less than or equal to the limit.\r
1556      * \r
1557      * @param target\r
1558      *            char array to insert to\r
1559      * @param limit\r
1560      *            end index of the char array, limit <= target.length\r
1561      * @param offset16\r
1562      *            offset which char32 will be inserted in\r
1563      * @param char32\r
1564      *            codepoint to be inserted\r
1565      * @return new limit size\r
1566      * @exception IndexOutOfBoundsException\r
1567      *                thrown if offset16 is invalid.\r
1568      * @stable ICU 2.1\r
1569      */\r
1570     public static int insert(char target[], int limit, int offset16, int char32) {\r
1571         String str = valueOf(char32);\r
1572         if (offset16 != limit && bounds(target, 0, limit, offset16) == TRAIL_SURROGATE_BOUNDARY) {\r
1573             offset16++;\r
1574         }\r
1575         int size = str.length();\r
1576         if (limit + size > target.length) {\r
1577             throw new ArrayIndexOutOfBoundsException(offset16 + size);\r
1578         }\r
1579         System.arraycopy(target, offset16, target, offset16 + size, limit - offset16);\r
1580         target[offset16] = str.charAt(0);\r
1581         if (size == 2) {\r
1582             target[offset16 + 1] = str.charAt(1);\r
1583         }\r
1584         return limit + size;\r
1585     }\r
1586 \r
1587     /**\r
1588      * Removes the codepoint at the specified position in this target (shortening target by 1\r
1589      * character if the codepoint is a non-supplementary, 2 otherwise).\r
1590      * \r
1591      * @param target\r
1592      *            string buffer to remove codepoint from\r
1593      * @param offset16\r
1594      *            offset which the codepoint will be removed\r
1595      * @return a reference to target\r
1596      * @exception IndexOutOfBoundsException\r
1597      *                thrown if offset16 is invalid.\r
1598      * @stable ICU 2.1\r
1599      */\r
1600     public static StringBuffer delete(StringBuffer target, int offset16) {\r
1601         int count = 1;\r
1602         switch (bounds(target, offset16)) {\r
1603         case LEAD_SURROGATE_BOUNDARY:\r
1604             count++;\r
1605             break;\r
1606         case TRAIL_SURROGATE_BOUNDARY:\r
1607             count++;\r
1608             offset16--;\r
1609             break;\r
1610         }\r
1611         target.delete(offset16, offset16 + count);\r
1612         return target;\r
1613     }\r
1614 \r
1615     /**\r
1616      * Removes the codepoint at the specified position in this target (shortening target by 1\r
1617      * character if the codepoint is a non-supplementary, 2 otherwise).\r
1618      * \r
1619      * @param target\r
1620      *            string buffer to remove codepoint from\r
1621      * @param limit\r
1622      *            end index of the char array, limit <= target.length\r
1623      * @param offset16\r
1624      *            offset which the codepoint will be removed\r
1625      * @return a new limit size\r
1626      * @exception IndexOutOfBoundsException\r
1627      *                thrown if offset16 is invalid.\r
1628      * @stable ICU 2.1\r
1629      */\r
1630     public static int delete(char target[], int limit, int offset16) {\r
1631         int count = 1;\r
1632         switch (bounds(target, 0, limit, offset16)) {\r
1633         case LEAD_SURROGATE_BOUNDARY:\r
1634             count++;\r
1635             break;\r
1636         case TRAIL_SURROGATE_BOUNDARY:\r
1637             count++;\r
1638             offset16--;\r
1639             break;\r
1640         }\r
1641         System.arraycopy(target, offset16 + count, target, offset16, limit - (offset16 + count));\r
1642         target[limit - count] = 0;\r
1643         return limit - count;\r
1644     }\r
1645 \r
1646     /**\r
1647      * Returns the index within the argument UTF16 format Unicode string of the first occurrence of\r
1648      * the argument codepoint. I.e., the smallest index <code>i</code> such that\r
1649      * <code>UTF16.charAt(source, i) ==\r
1650      * char32</code> is true.\r
1651      * <p>\r
1652      * If no such character occurs in this string, then -1 is returned.\r
1653      * </p>\r
1654      * <p>\r
1655      * Examples:<br>\r
1656      * UTF16.indexOf("abc", 'a') returns 0<br>\r
1657      * UTF16.indexOf("abc\ud800\udc00", 0x10000) returns 3<br>\r
1658      * UTF16.indexOf("abc\ud800\udc00", 0xd800) returns -1<br>\r
1659      * </p>\r
1660      * Note this method is provided as support to jdk 1.3, which does not support supplementary\r
1661      * characters to its fullest.\r
1662      * \r
1663      * @param source\r
1664      *            UTF16 format Unicode string that will be searched\r
1665      * @param char32\r
1666      *            codepoint to search for\r
1667      * @return the index of the first occurrence of the codepoint in the argument Unicode string, or\r
1668      *         -1 if the codepoint does not occur.\r
1669      * @stable ICU 2.6\r
1670      */\r
1671     public static int indexOf(String source, int char32) {\r
1672         if (char32 < CODEPOINT_MIN_VALUE || char32 > CODEPOINT_MAX_VALUE) {\r
1673             throw new IllegalArgumentException("Argument char32 is not a valid codepoint");\r
1674         }\r
1675         // non-surrogate bmp\r
1676         if (char32 < LEAD_SURROGATE_MIN_VALUE\r
1677                 || (char32 > TRAIL_SURROGATE_MAX_VALUE && char32 < SUPPLEMENTARY_MIN_VALUE)) {\r
1678             return source.indexOf((char) char32);\r
1679         }\r
1680         // surrogate\r
1681         if (char32 < SUPPLEMENTARY_MIN_VALUE) {\r
1682             int result = source.indexOf((char) char32);\r
1683             if (result >= 0) {\r
1684                 if (isLeadSurrogate((char) char32) && (result < source.length() - 1)\r
1685                         && isTrailSurrogate(source.charAt(result + 1))) {\r
1686                     return indexOf(source, char32, result + 1);\r
1687                 }\r
1688                 // trail surrogate\r
1689                 if (result > 0 && isLeadSurrogate(source.charAt(result - 1))) {\r
1690                     return indexOf(source, char32, result + 1);\r
1691                 }\r
1692             }\r
1693             return result;\r
1694         }\r
1695         // supplementary\r
1696         String char32str = toString(char32);\r
1697         return source.indexOf(char32str);\r
1698     }\r
1699 \r
1700     /**\r
1701      * Returns the index within the argument UTF16 format Unicode string of the first occurrence of\r
1702      * the argument string str. This method is implemented based on codepoints, hence a "lead\r
1703      * surrogate character + trail surrogate character" is treated as one entity.e Hence if the str\r
1704      * starts with trail surrogate character at index 0, a source with a leading a surrogate\r
1705      * character before str found at in source will not have a valid match. Vice versa for lead\r
1706      * surrogates that ends str. See example below.\r
1707      * <p>\r
1708      * If no such string str occurs in this source, then -1 is returned.\r
1709      * </p>\r
1710      * <p>\r
1711      * Examples:<br>\r
1712      * UTF16.indexOf("abc", "ab") returns 0<br>\r
1713      * UTF16.indexOf("abc\ud800\udc00", "\ud800\udc00") returns 3<br>\r
1714      * UTF16.indexOf("abc\ud800\udc00", "\ud800") returns -1<br>\r
1715      * </p>\r
1716      * Note this method is provided as support to jdk 1.3, which does not support supplementary\r
1717      * characters to its fullest.\r
1718      * \r
1719      * @param source\r
1720      *            UTF16 format Unicode string that will be searched\r
1721      * @param str\r
1722      *            UTF16 format Unicode string to search for\r
1723      * @return the index of the first occurrence of the codepoint in the argument Unicode string, or\r
1724      *         -1 if the codepoint does not occur.\r
1725      * @stable ICU 2.6\r
1726      */\r
1727     public static int indexOf(String source, String str) {\r
1728         int strLength = str.length();\r
1729         // non-surrogate ends\r
1730         if (!isTrailSurrogate(str.charAt(0)) && !isLeadSurrogate(str.charAt(strLength - 1))) {\r
1731             return source.indexOf(str);\r
1732         }\r
1733 \r
1734         int result = source.indexOf(str);\r
1735         int resultEnd = result + strLength;\r
1736         if (result >= 0) {\r
1737             // check last character\r
1738             if (isLeadSurrogate(str.charAt(strLength - 1)) && (result < source.length() - 1)\r
1739                     && isTrailSurrogate(source.charAt(resultEnd + 1))) {\r
1740                 return indexOf(source, str, resultEnd + 1);\r
1741             }\r
1742             // check first character which is a trail surrogate\r
1743             if (isTrailSurrogate(str.charAt(0)) && result > 0\r
1744                     && isLeadSurrogate(source.charAt(result - 1))) {\r
1745                 return indexOf(source, str, resultEnd + 1);\r
1746             }\r
1747         }\r
1748         return result;\r
1749     }\r
1750 \r
1751     /**\r
1752      * Returns the index within the argument UTF16 format Unicode string of the first occurrence of\r
1753      * the argument codepoint. I.e., the smallest index i such that: <br>\r
1754      * (UTF16.charAt(source, i) == char32 && i >= fromIndex) is true.\r
1755      * <p>\r
1756      * If no such character occurs in this string, then -1 is returned.\r
1757      * </p>\r
1758      * <p>\r
1759      * Examples:<br>\r
1760      * UTF16.indexOf("abc", 'a', 1) returns -1<br>\r
1761      * UTF16.indexOf("abc\ud800\udc00", 0x10000, 1) returns 3<br>\r
1762      * UTF16.indexOf("abc\ud800\udc00", 0xd800, 1) returns -1<br>\r
1763      * </p>\r
1764      * Note this method is provided as support to jdk 1.3, which does not support supplementary\r
1765      * characters to its fullest.\r
1766      * \r
1767      * @param source\r
1768      *            UTF16 format Unicode string that will be searched\r
1769      * @param char32\r
1770      *            codepoint to search for\r
1771      * @param fromIndex\r
1772      *            the index to start the search from.\r
1773      * @return the index of the first occurrence of the codepoint in the argument Unicode string at\r
1774      *         or after fromIndex, or -1 if the codepoint does not occur.\r
1775      * @stable ICU 2.6\r
1776      */\r
1777     public static int indexOf(String source, int char32, int fromIndex) {\r
1778         if (char32 < CODEPOINT_MIN_VALUE || char32 > CODEPOINT_MAX_VALUE) {\r
1779             throw new IllegalArgumentException("Argument char32 is not a valid codepoint");\r
1780         }\r
1781         // non-surrogate bmp\r
1782         if (char32 < LEAD_SURROGATE_MIN_VALUE\r
1783                 || (char32 > TRAIL_SURROGATE_MAX_VALUE && char32 < SUPPLEMENTARY_MIN_VALUE)) {\r
1784             return source.indexOf((char) char32, fromIndex);\r
1785         }\r
1786         // surrogate\r
1787         if (char32 < SUPPLEMENTARY_MIN_VALUE) {\r
1788             int result = source.indexOf((char) char32, fromIndex);\r
1789             if (result >= 0) {\r
1790                 if (isLeadSurrogate((char) char32) && (result < source.length() - 1)\r
1791                         && isTrailSurrogate(source.charAt(result + 1))) {\r
1792                     return indexOf(source, char32, result + 1);\r
1793                 }\r
1794                 // trail surrogate\r
1795                 if (result > 0 && isLeadSurrogate(source.charAt(result - 1))) {\r
1796                     return indexOf(source, char32, result + 1);\r
1797                 }\r
1798             }\r
1799             return result;\r
1800         }\r
1801         // supplementary\r
1802         String char32str = toString(char32);\r
1803         return source.indexOf(char32str, fromIndex);\r
1804     }\r
1805 \r
1806     /**\r
1807      * Returns the index within the argument UTF16 format Unicode string of the first occurrence of\r
1808      * the argument string str. This method is implemented based on codepoints, hence a "lead\r
1809      * surrogate character + trail surrogate character" is treated as one entity.e Hence if the str\r
1810      * starts with trail surrogate character at index 0, a source with a leading a surrogate\r
1811      * character before str found at in source will not have a valid match. Vice versa for lead\r
1812      * surrogates that ends str. See example below.\r
1813      * <p>\r
1814      * If no such string str occurs in this source, then -1 is returned.\r
1815      * </p>\r
1816      * <p>\r
1817      * Examples:<br>\r
1818      * UTF16.indexOf("abc", "ab", 0) returns 0<br>\r
1819      * UTF16.indexOf("abc\ud800\udc00", "\ud800\udc00", 0) returns 3<br>\r
1820      * UTF16.indexOf("abc\ud800\udc00", "\ud800\udc00", 2) returns 3<br>\r
1821      * UTF16.indexOf("abc\ud800\udc00", "\ud800", 0) returns -1<br>\r
1822      * </p>\r
1823      * Note this method is provided as support to jdk 1.3, which does not support supplementary\r
1824      * characters to its fullest.\r
1825      * \r
1826      * @param source\r
1827      *            UTF16 format Unicode string that will be searched\r
1828      * @param str\r
1829      *            UTF16 format Unicode string to search for\r
1830      * @param fromIndex\r
1831      *            the index to start the search from.\r
1832      * @return the index of the first occurrence of the codepoint in the argument Unicode string, or\r
1833      *         -1 if the codepoint does not occur.\r
1834      * @stable ICU 2.6\r
1835      */\r
1836     public static int indexOf(String source, String str, int fromIndex) {\r
1837         int strLength = str.length();\r
1838         // non-surrogate ends\r
1839         if (!isTrailSurrogate(str.charAt(0)) && !isLeadSurrogate(str.charAt(strLength - 1))) {\r
1840             return source.indexOf(str, fromIndex);\r
1841         }\r
1842 \r
1843         int result = source.indexOf(str, fromIndex);\r
1844         int resultEnd = result + strLength;\r
1845         if (result >= 0) {\r
1846             // check last character\r
1847             if (isLeadSurrogate(str.charAt(strLength - 1)) && (result < source.length() - 1)\r
1848                     && isTrailSurrogate(source.charAt(resultEnd))) {\r
1849                 return indexOf(source, str, resultEnd + 1);\r
1850             }\r
1851             // check first character which is a trail surrogate\r
1852             if (isTrailSurrogate(str.charAt(0)) && result > 0\r
1853                     && isLeadSurrogate(source.charAt(result - 1))) {\r
1854                 return indexOf(source, str, resultEnd + 1);\r
1855             }\r
1856         }\r
1857         return result;\r
1858     }\r
1859 \r
1860     /**\r
1861      * Returns the index within the argument UTF16 format Unicode string of the last occurrence of\r
1862      * the argument codepoint. I.e., the index returned is the largest value i such that:\r
1863      * UTF16.charAt(source, i) == char32 is true.\r
1864      * <p>\r
1865      * Examples:<br>\r
1866      * UTF16.lastIndexOf("abc", 'a') returns 0<br>\r
1867      * UTF16.lastIndexOf("abc\ud800\udc00", 0x10000) returns 3<br>\r
1868      * UTF16.lastIndexOf("abc\ud800\udc00", 0xd800) returns -1<br>\r
1869      * </p>\r
1870      * <p>\r
1871      * source is searched backwards starting at the last character.\r
1872      * </p>\r
1873      * Note this method is provided as support to jdk 1.3, which does not support supplementary\r
1874      * characters to its fullest.\r
1875      * \r
1876      * @param source\r
1877      *            UTF16 format Unicode string that will be searched\r
1878      * @param char32\r
1879      *            codepoint to search for\r
1880      * @return the index of the last occurrence of the codepoint in source, or -1 if the codepoint\r
1881      *         does not occur.\r
1882      * @stable ICU 2.6\r
1883      */\r
1884     public static int lastIndexOf(String source, int char32) {\r
1885         if (char32 < CODEPOINT_MIN_VALUE || char32 > CODEPOINT_MAX_VALUE) {\r
1886             throw new IllegalArgumentException("Argument char32 is not a valid codepoint");\r
1887         }\r
1888         // non-surrogate bmp\r
1889         if (char32 < LEAD_SURROGATE_MIN_VALUE\r
1890                 || (char32 > TRAIL_SURROGATE_MAX_VALUE && char32 < SUPPLEMENTARY_MIN_VALUE)) {\r
1891             return source.lastIndexOf((char) char32);\r
1892         }\r
1893         // surrogate\r
1894         if (char32 < SUPPLEMENTARY_MIN_VALUE) {\r
1895             int result = source.lastIndexOf((char) char32);\r
1896             if (result >= 0) {\r
1897                 if (isLeadSurrogate((char) char32) && (result < source.length() - 1)\r
1898                         && isTrailSurrogate(source.charAt(result + 1))) {\r
1899                     return lastIndexOf(source, char32, result - 1);\r
1900                 }\r
1901                 // trail surrogate\r
1902                 if (result > 0 && isLeadSurrogate(source.charAt(result - 1))) {\r
1903                     return lastIndexOf(source, char32, result - 1);\r
1904                 }\r
1905             }\r
1906             return result;\r
1907         }\r
1908         // supplementary\r
1909         String char32str = toString(char32);\r
1910         return source.lastIndexOf(char32str);\r
1911     }\r
1912 \r
1913     /**\r
1914      * Returns the index within the argument UTF16 format Unicode string of the last occurrence of\r
1915      * the argument string str. This method is implemented based on codepoints, hence a "lead\r
1916      * surrogate character + trail surrogate character" is treated as one entity.e Hence if the str\r
1917      * starts with trail surrogate character at index 0, a source with a leading a surrogate\r
1918      * character before str found at in source will not have a valid match. Vice versa for lead\r
1919      * surrogates that ends str. See example below.\r
1920      * <p>\r
1921      * Examples:<br>\r
1922      * UTF16.lastIndexOf("abc", "a") returns 0<br>\r
1923      * UTF16.lastIndexOf("abc\ud800\udc00", "\ud800\udc00") returns 3<br>\r
1924      * UTF16.lastIndexOf("abc\ud800\udc00", "\ud800") returns -1<br>\r
1925      * </p>\r
1926      * <p>\r
1927      * source is searched backwards starting at the last character.\r
1928      * </p>\r
1929      * Note this method is provided as support to jdk 1.3, which does not support supplementary\r
1930      * characters to its fullest.\r
1931      * \r
1932      * @param source\r
1933      *            UTF16 format Unicode string that will be searched\r
1934      * @param str\r
1935      *            UTF16 format Unicode string to search for\r
1936      * @return the index of the last occurrence of the codepoint in source, or -1 if the codepoint\r
1937      *         does not occur.\r
1938      * @stable ICU 2.6\r
1939      */\r
1940     public static int lastIndexOf(String source, String str) {\r
1941         int strLength = str.length();\r
1942         // non-surrogate ends\r
1943         if (!isTrailSurrogate(str.charAt(0)) && !isLeadSurrogate(str.charAt(strLength - 1))) {\r
1944             return source.lastIndexOf(str);\r
1945         }\r
1946 \r
1947         int result = source.lastIndexOf(str);\r
1948         if (result >= 0) {\r
1949             // check last character\r
1950             if (isLeadSurrogate(str.charAt(strLength - 1)) && (result < source.length() - 1)\r
1951                     && isTrailSurrogate(source.charAt(result + strLength + 1))) {\r
1952                 return lastIndexOf(source, str, result - 1);\r
1953             }\r
1954             // check first character which is a trail surrogate\r
1955             if (isTrailSurrogate(str.charAt(0)) && result > 0\r
1956                     && isLeadSurrogate(source.charAt(result - 1))) {\r
1957                 return lastIndexOf(source, str, result - 1);\r
1958             }\r
1959         }\r
1960         return result;\r
1961     }\r
1962 \r
1963     /**\r
1964      * <p>\r
1965      * Returns the index within the argument UTF16 format Unicode string of the last occurrence of\r
1966      * the argument codepoint, where the result is less than or equals to fromIndex.\r
1967      * </p>\r
1968      * <p>\r
1969      * This method is implemented based on codepoints, hence a single surrogate character will not\r
1970      * match a supplementary character.\r
1971      * </p>\r
1972      * <p>\r
1973      * source is searched backwards starting at the last character starting at the specified index.\r
1974      * </p>\r
1975      * <p>\r
1976      * Examples:<br>\r
1977      * UTF16.lastIndexOf("abc", 'c', 2) returns 2<br>\r
1978      * UTF16.lastIndexOf("abc", 'c', 1) returns -1<br>\r
1979      * UTF16.lastIndexOf("abc\ud800\udc00", 0x10000, 5) returns 3<br>\r
1980      * UTF16.lastIndexOf("abc\ud800\udc00", 0x10000, 3) returns 3<br>\r
1981      * UTF16.lastIndexOf("abc\ud800\udc00", 0xd800) returns -1<br>\r
1982      * </p>\r
1983      * Note this method is provided as support to jdk 1.3, which does not support supplementary\r
1984      * characters to its fullest.\r
1985      * \r
1986      * @param source\r
1987      *            UTF16 format Unicode string that will be searched\r
1988      * @param char32\r
1989      *            codepoint to search for\r
1990      * @param fromIndex\r
1991      *            the index to start the search from. There is no restriction on the value of\r
1992      *            fromIndex. If it is greater than or equal to the length of this string, it has the\r
1993      *            same effect as if it were equal to one less than the length of this string: this\r
1994      *            entire string may be searched. If it is negative, it has the same effect as if it\r
1995      *            were -1: -1 is returned.\r
1996      * @return the index of the last occurrence of the codepoint in source, or -1 if the codepoint\r
1997      *         does not occur.\r
1998      * @stable ICU 2.6\r
1999      */\r
2000     public static int lastIndexOf(String source, int char32, int fromIndex) {\r
2001         if (char32 < CODEPOINT_MIN_VALUE || char32 > CODEPOINT_MAX_VALUE) {\r
2002             throw new IllegalArgumentException("Argument char32 is not a valid codepoint");\r
2003         }\r
2004         // non-surrogate bmp\r
2005         if (char32 < LEAD_SURROGATE_MIN_VALUE\r
2006                 || (char32 > TRAIL_SURROGATE_MAX_VALUE && char32 < SUPPLEMENTARY_MIN_VALUE)) {\r
2007             return source.lastIndexOf((char) char32, fromIndex);\r
2008         }\r
2009         // surrogate\r
2010         if (char32 < SUPPLEMENTARY_MIN_VALUE) {\r
2011             int result = source.lastIndexOf((char) char32, fromIndex);\r
2012             if (result >= 0) {\r
2013                 if (isLeadSurrogate((char) char32) && (result < source.length() - 1)\r
2014                         && isTrailSurrogate(source.charAt(result + 1))) {\r
2015                     return lastIndexOf(source, char32, result - 1);\r
2016                 }\r
2017                 // trail surrogate\r
2018                 if (result > 0 && isLeadSurrogate(source.charAt(result - 1))) {\r
2019                     return lastIndexOf(source, char32, result - 1);\r
2020                 }\r
2021             }\r
2022             return result;\r
2023         }\r
2024         // supplementary\r
2025         String char32str = toString(char32);\r
2026         return source.lastIndexOf(char32str, fromIndex);\r
2027     }\r
2028 \r
2029     /**\r
2030      * <p>\r
2031      * Returns the index within the argument UTF16 format Unicode string of the last occurrence of\r
2032      * the argument string str, where the result is less than or equals to fromIndex.\r
2033      * </p>\r
2034      * <p>\r
2035      * This method is implemented based on codepoints, hence a "lead surrogate character + trail\r
2036      * surrogate character" is treated as one entity. Hence if the str starts with trail surrogate\r
2037      * character at index 0, a source with a leading a surrogate character before str found at in\r
2038      * source will not have a valid match. Vice versa for lead surrogates that ends str.\r
2039      * </p>\r
2040      * See example below.\r
2041      * <p>\r
2042      * Examples:<br>\r
2043      * UTF16.lastIndexOf("abc", "c", 2) returns 2<br>\r
2044      * UTF16.lastIndexOf("abc", "c", 1) returns -1<br>\r
2045      * UTF16.lastIndexOf("abc\ud800\udc00", "\ud800\udc00", 5) returns 3<br>\r
2046      * UTF16.lastIndexOf("abc\ud800\udc00", "\ud800\udc00", 3) returns 3<br>\r
2047      * UTF16.lastIndexOf("abc\ud800\udc00", "\ud800", 4) returns -1<br>\r
2048      * </p>\r
2049      * <p>\r
2050      * source is searched backwards starting at the last character.\r
2051      * </p>\r
2052      * Note this method is provided as support to jdk 1.3, which does not support supplementary\r
2053      * characters to its fullest.\r
2054      * \r
2055      * @param source\r
2056      *            UTF16 format Unicode string that will be searched\r
2057      * @param str\r
2058      *            UTF16 format Unicode string to search for\r
2059      * @param fromIndex\r
2060      *            the index to start the search from. There is no restriction on the value of\r
2061      *            fromIndex. If it is greater than or equal to the length of this string, it has the\r
2062      *            same effect as if it were equal to one less than the length of this string: this\r
2063      *            entire string may be searched. If it is negative, it has the same effect as if it\r
2064      *            were -1: -1 is returned.\r
2065      * @return the index of the last occurrence of the codepoint in source, or -1 if the codepoint\r
2066      *         does not occur.\r
2067      * @stable ICU 2.6\r
2068      */\r
2069     public static int lastIndexOf(String source, String str, int fromIndex) {\r
2070         int strLength = str.length();\r
2071         // non-surrogate ends\r
2072         if (!isTrailSurrogate(str.charAt(0)) && !isLeadSurrogate(str.charAt(strLength - 1))) {\r
2073             return source.lastIndexOf(str, fromIndex);\r
2074         }\r
2075 \r
2076         int result = source.lastIndexOf(str, fromIndex);\r
2077         if (result >= 0) {\r
2078             // check last character\r
2079             if (isLeadSurrogate(str.charAt(strLength - 1)) && (result < source.length() - 1)\r
2080                     && isTrailSurrogate(source.charAt(result + strLength))) {\r
2081                 return lastIndexOf(source, str, result - 1);\r
2082             }\r
2083             // check first character which is a trail surrogate\r
2084             if (isTrailSurrogate(str.charAt(0)) && result > 0\r
2085                     && isLeadSurrogate(source.charAt(result - 1))) {\r
2086                 return lastIndexOf(source, str, result - 1);\r
2087             }\r
2088         }\r
2089         return result;\r
2090     }\r
2091 \r
2092     /**\r
2093      * Returns a new UTF16 format Unicode string resulting from replacing all occurrences of\r
2094      * oldChar32 in source with newChar32. If the character oldChar32 does not occur in the UTF16\r
2095      * format Unicode string source, then source will be returned. Otherwise, a new String object is\r
2096      * created that represents a codepoint sequence identical to the codepoint sequence represented\r
2097      * by source, except that every occurrence of oldChar32 is replaced by an occurrence of\r
2098      * newChar32.\r
2099      * <p>\r
2100      * Examples: <br>\r
2101      * UTF16.replace("mesquite in your cellar", 'e', 'o');<br>\r
2102      * returns "mosquito in your collar"<br>\r
2103      * UTF16.replace("JonL", 'q', 'x');<br>\r
2104      * returns "JonL" (no change)<br>\r
2105      * UTF16.replace("Supplementary character \ud800\udc00", 0x10000, '!'); <br>\r
2106      * returns "Supplementary character !"<br>\r
2107      * UTF16.replace("Supplementary character \ud800\udc00", 0xd800, '!'); <br>\r
2108      * returns "Supplementary character \ud800\udc00"<br>\r
2109      * </p>\r
2110      * Note this method is provided as support to jdk 1.3, which does not support supplementary\r
2111      * characters to its fullest.\r
2112      * \r
2113      * @param source\r
2114      *            UTF16 format Unicode string which the codepoint replacements will be based on.\r
2115      * @param oldChar32\r
2116      *            non-zero old codepoint to be replaced.\r
2117      * @param newChar32\r
2118      *            the new codepoint to replace oldChar32\r
2119      * @return new String derived from source by replacing every occurrence of oldChar32 with\r
2120      *         newChar32, unless when no oldChar32 is found in source then source will be returned.\r
2121      * @stable ICU 2.6\r
2122      */\r
2123     public static String replace(String source, int oldChar32, int newChar32) {\r
2124         if (oldChar32 <= 0 || oldChar32 > CODEPOINT_MAX_VALUE) {\r
2125             throw new IllegalArgumentException("Argument oldChar32 is not a valid codepoint");\r
2126         }\r
2127         if (newChar32 <= 0 || newChar32 > CODEPOINT_MAX_VALUE) {\r
2128             throw new IllegalArgumentException("Argument newChar32 is not a valid codepoint");\r
2129         }\r
2130 \r
2131         int index = indexOf(source, oldChar32);\r
2132         if (index == -1) {\r
2133             return source;\r
2134         }\r
2135         String newChar32Str = toString(newChar32);\r
2136         int oldChar32Size = 1;\r
2137         int newChar32Size = newChar32Str.length();\r
2138         StringBuffer result = new StringBuffer(source);\r
2139         int resultIndex = index;\r
2140 \r
2141         if (oldChar32 >= SUPPLEMENTARY_MIN_VALUE) {\r
2142             oldChar32Size = 2;\r
2143         }\r
2144 \r
2145         while (index != -1) {\r
2146             int endResultIndex = resultIndex + oldChar32Size;\r
2147             result.replace(resultIndex, endResultIndex, newChar32Str);\r
2148             int lastEndIndex = index + oldChar32Size;\r
2149             index = indexOf(source, oldChar32, lastEndIndex);\r
2150             resultIndex += newChar32Size + index - lastEndIndex;\r
2151         }\r
2152         return result.toString();\r
2153     }\r
2154 \r
2155     /**\r
2156      * Returns a new UTF16 format Unicode string resulting from replacing all occurrences of oldStr\r
2157      * in source with newStr. If the string oldStr does not occur in the UTF16 format Unicode string\r
2158      * source, then source will be returned. Otherwise, a new String object is created that\r
2159      * represents a codepoint sequence identical to the codepoint sequence represented by source,\r
2160      * except that every occurrence of oldStr is replaced by an occurrence of newStr.\r
2161      * <p>\r
2162      * Examples: <br>\r
2163      * UTF16.replace("mesquite in your cellar", "e", "o");<br>\r
2164      * returns "mosquito in your collar"<br>\r
2165      * UTF16.replace("mesquite in your cellar", "mesquite", "cat");<br>\r
2166      * returns "cat in your cellar"<br>\r
2167      * UTF16.replace("JonL", "q", "x");<br>\r
2168      * returns "JonL" (no change)<br>\r
2169      * UTF16.replace("Supplementary character \ud800\udc00", "\ud800\udc00", '!'); <br>\r
2170      * returns "Supplementary character !"<br>\r
2171      * UTF16.replace("Supplementary character \ud800\udc00", "\ud800", '!'); <br>\r
2172      * returns "Supplementary character \ud800\udc00"<br>\r
2173      * </p>\r
2174      * Note this method is provided as support to jdk 1.3, which does not support supplementary\r
2175      * characters to its fullest.\r
2176      * \r
2177      * @param source\r
2178      *            UTF16 format Unicode string which the replacements will be based on.\r
2179      * @param oldStr\r
2180      *            non-zero-length string to be replaced.\r
2181      * @param newStr\r
2182      *            the new string to replace oldStr\r
2183      * @return new String derived from source by replacing every occurrence of oldStr with newStr.\r
2184      *         When no oldStr is found in source, then source will be returned.\r
2185      * @stable ICU 2.6\r
2186      */\r
2187     public static String replace(String source, String oldStr, String newStr) {\r
2188         int index = indexOf(source, oldStr);\r
2189         if (index == -1) {\r
2190             return source;\r
2191         }\r
2192         int oldStrSize = oldStr.length();\r
2193         int newStrSize = newStr.length();\r
2194         StringBuffer result = new StringBuffer(source);\r
2195         int resultIndex = index;\r
2196 \r
2197         while (index != -1) {\r
2198             int endResultIndex = resultIndex + oldStrSize;\r
2199             result.replace(resultIndex, endResultIndex, newStr);\r
2200             int lastEndIndex = index + oldStrSize;\r
2201             index = indexOf(source, oldStr, lastEndIndex);\r
2202             resultIndex += newStrSize + index - lastEndIndex;\r
2203         }\r
2204         return result.toString();\r
2205     }\r
2206 \r
2207     /**\r
2208      * Reverses a UTF16 format Unicode string and replaces source's content with it. This method\r
2209      * will reverse surrogate characters correctly, instead of blindly reversing every character.\r
2210      * <p>\r
2211      * Examples:<br>\r
2212      * UTF16.reverse(new StringBuffer( "Supplementary characters \ud800\udc00\ud801\udc01"))<br>\r
2213      * returns "\ud801\udc01\ud800\udc00 sretcarahc yratnemelppuS".\r
2214      * \r
2215      * @param source\r
2216      *            the source StringBuffer that contains UTF16 format Unicode string to be reversed\r
2217      * @return a modified source with reversed UTF16 format Unicode string.\r
2218      * @stable ICU 2.6\r
2219      */\r
2220     public static StringBuffer reverse(StringBuffer source) {\r
2221         int length = source.length();\r
2222         StringBuffer result = new StringBuffer(length);\r
2223         for (int i = length; i-- > 0;) {\r
2224             char ch = source.charAt(i);\r
2225             if (isTrailSurrogate(ch) && i > 0) {\r
2226                 char ch2 = source.charAt(i - 1);\r
2227                 if (isLeadSurrogate(ch2)) {\r
2228                     result.append(ch2);\r
2229                     result.append(ch);\r
2230                     --i;\r
2231                     continue;\r
2232                 }\r
2233             }\r
2234             result.append(ch);\r
2235         }\r
2236         return result;\r
2237     }\r
2238 \r
2239     /**\r
2240      * Check if the string contains more Unicode code points than a certain number. This is more\r
2241      * efficient than counting all code points in the entire string and comparing that number with a\r
2242      * threshold. This function may not need to scan the string at all if the length is within a\r
2243      * certain range, and never needs to count more than 'number + 1' code points. Logically\r
2244      * equivalent to (countCodePoint(s) > number). A Unicode code point may occupy either one or two\r
2245      * code units.\r
2246      * \r
2247      * @param source\r
2248      *            The input string.\r
2249      * @param number\r
2250      *            The number of code points in the string is compared against the 'number'\r
2251      *            parameter.\r
2252      * @return boolean value for whether the string contains more Unicode code points than 'number'.\r
2253      * @stable ICU 2.4\r
2254      */\r
2255     public static boolean hasMoreCodePointsThan(String source, int number) {\r
2256         if (number < 0) {\r
2257             return true;\r
2258         }\r
2259         if (source == null) {\r
2260             return false;\r
2261         }\r
2262         int length = source.length();\r
2263 \r
2264         // length >= 0 known\r
2265         // source contains at least (length + 1) / 2 code points: <= 2\r
2266         // chars per cp\r
2267         if (((length + 1) >> 1) > number) {\r
2268             return true;\r
2269         }\r
2270 \r
2271         // check if source does not even contain enough chars\r
2272         int maxsupplementary = length - number;\r
2273         if (maxsupplementary <= 0) {\r
2274             return false;\r
2275         }\r
2276 \r
2277         // there are maxsupplementary = length - number more chars than\r
2278         // asked-for code points\r
2279 \r
2280         // count code points until they exceed and also check that there are\r
2281         // no more than maxsupplementary supplementary code points (char pairs)\r
2282         int start = 0;\r
2283         while (true) {\r
2284             if (length == 0) {\r
2285                 return false;\r
2286             }\r
2287             if (number == 0) {\r
2288                 return true;\r
2289             }\r
2290             if (isLeadSurrogate(source.charAt(start++)) && start != length\r
2291                     && isTrailSurrogate(source.charAt(start))) {\r
2292                 start++;\r
2293                 if (--maxsupplementary <= 0) {\r
2294                     // too many pairs - too few code points\r
2295                     return false;\r
2296                 }\r
2297             }\r
2298             --number;\r
2299         }\r
2300     }\r
2301 \r
2302     /**\r
2303      * Check if the sub-range of char array, from argument start to limit, contains more Unicode\r
2304      * code points than a certain number. This is more efficient than counting all code points in\r
2305      * the entire char array range and comparing that number with a threshold. This function may not\r
2306      * need to scan the char array at all if start and limit is within a certain range, and never\r
2307      * needs to count more than 'number + 1' code points. Logically equivalent to\r
2308      * (countCodePoint(source, start, limit) > number). A Unicode code point may occupy either one\r
2309      * or two code units.\r
2310      * \r
2311      * @param source\r
2312      *            array of UTF-16 chars\r
2313      * @param start\r
2314      *            offset to substring in the source array for analyzing\r
2315      * @param limit\r
2316      *            offset to substring in the source array for analyzing\r
2317      * @param number\r
2318      *            The number of code points in the string is compared against the 'number'\r
2319      *            parameter.\r
2320      * @return boolean value for whether the string contains more Unicode code points than 'number'.\r
2321      * @exception IndexOutOfBoundsException\r
2322      *                thrown when limit &lt; start\r
2323      * @stable ICU 2.4\r
2324      */\r
2325     public static boolean hasMoreCodePointsThan(char source[], int start, int limit, int number) {\r
2326         int length = limit - start;\r
2327         if (length < 0 || start < 0 || limit < 0) {\r
2328             throw new IndexOutOfBoundsException(\r
2329                     "Start and limit indexes should be non-negative and start <= limit");\r
2330         }\r
2331         if (number < 0) {\r
2332             return true;\r
2333         }\r
2334         if (source == null) {\r
2335             return false;\r
2336         }\r
2337 \r
2338         // length >= 0 known\r
2339         // source contains at least (length + 1) / 2 code points: <= 2\r
2340         // chars per cp\r
2341         if (((length + 1) >> 1) > number) {\r
2342             return true;\r
2343         }\r
2344 \r
2345         // check if source does not even contain enough chars\r
2346         int maxsupplementary = length - number;\r
2347         if (maxsupplementary <= 0) {\r
2348             return false;\r
2349         }\r
2350 \r
2351         // there are maxsupplementary = length - number more chars than\r
2352         // asked-for code points\r
2353 \r
2354         // count code points until they exceed and also check that there are\r
2355         // no more than maxsupplementary supplementary code points (char pairs)\r
2356         while (true) {\r
2357             if (length == 0) {\r
2358                 return false;\r
2359             }\r
2360             if (number == 0) {\r
2361                 return true;\r
2362             }\r
2363             if (isLeadSurrogate(source[start++]) && start != limit\r
2364                     && isTrailSurrogate(source[start])) {\r
2365                 start++;\r
2366                 if (--maxsupplementary <= 0) {\r
2367                     // too many pairs - too few code points\r
2368                     return false;\r
2369                 }\r
2370             }\r
2371             --number;\r
2372         }\r
2373     }\r
2374 \r
2375     /**\r
2376      * Check if the string buffer contains more Unicode code points than a certain number. This is\r
2377      * more efficient than counting all code points in the entire string buffer and comparing that\r
2378      * number with a threshold. This function may not need to scan the string buffer at all if the\r
2379      * length is within a certain range, and never needs to count more than 'number + 1' code\r
2380      * points. Logically equivalent to (countCodePoint(s) > number). A Unicode code point may occupy\r
2381      * either one or two code units.\r
2382      * \r
2383      * @param source\r
2384      *            The input string buffer.\r
2385      * @param number\r
2386      *            The number of code points in the string buffer is compared against the 'number'\r
2387      *            parameter.\r
2388      * @return boolean value for whether the string buffer contains more Unicode code points than\r
2389      *         'number'.\r
2390      * @stable ICU 2.4\r
2391      */\r
2392     public static boolean hasMoreCodePointsThan(StringBuffer source, int number) {\r
2393         if (number < 0) {\r
2394             return true;\r
2395         }\r
2396         if (source == null) {\r
2397             return false;\r
2398         }\r
2399         int length = source.length();\r
2400 \r
2401         // length >= 0 known\r
2402         // source contains at least (length + 1) / 2 code points: <= 2\r
2403         // chars per cp\r
2404         if (((length + 1) >> 1) > number) {\r
2405             return true;\r
2406         }\r
2407 \r
2408         // check if source does not even contain enough chars\r
2409         int maxsupplementary = length - number;\r
2410         if (maxsupplementary <= 0) {\r
2411             return false;\r
2412         }\r
2413 \r
2414         // there are maxsupplementary = length - number more chars than\r
2415         // asked-for code points\r
2416 \r
2417         // count code points until they exceed and also check that there are\r
2418         // no more than maxsupplementary supplementary code points (char pairs)\r
2419         int start = 0;\r
2420         while (true) {\r
2421             if (length == 0) {\r
2422                 return false;\r
2423             }\r
2424             if (number == 0) {\r
2425                 return true;\r
2426             }\r
2427             if (isLeadSurrogate(source.charAt(start++)) && start != length\r
2428                     && isTrailSurrogate(source.charAt(start))) {\r
2429                 start++;\r
2430                 if (--maxsupplementary <= 0) {\r
2431                     // too many pairs - too few code points\r
2432                     return false;\r
2433                 }\r
2434             }\r
2435             --number;\r
2436         }\r
2437     }\r
2438 \r
2439     /**\r
2440      * Cover JDK 1.5 API. Create a String from an array of codePoints.\r
2441      * \r
2442      * @param codePoints\r
2443      *            the code array\r
2444      * @param offset\r
2445      *            the start of the text in the code point array\r
2446      * @param count\r
2447      *            the number of code points\r
2448      * @return a String representing the code points between offset and count\r
2449      * @throws IllegalArgumentException\r
2450      *             if an invalid code point is encountered\r
2451      * @throws IndexOutOfBoundsException\r
2452      *             if the offset or count are out of bounds.\r
2453      * @stable ICU 3.0\r
2454      */\r
2455     public static String newString(int[] codePoints, int offset, int count) {\r
2456         if (count < 0) {\r
2457             throw new IllegalArgumentException();\r
2458         }\r
2459         char[] chars = new char[count];\r
2460         int w = 0;\r
2461         for (int r = offset, e = offset + count; r < e; ++r) {\r
2462             int cp = codePoints[r];\r
2463             if (cp < 0 || cp > 0x10ffff) {\r
2464                 throw new IllegalArgumentException();\r
2465             }\r
2466             while (true) {\r
2467                 try {\r
2468                     if (cp < 0x010000) {\r
2469                         chars[w] = (char) cp;\r
2470                         w++;\r
2471                     } else {\r
2472                         chars[w] = (char) (LEAD_SURROGATE_OFFSET_ + (cp >> LEAD_SURROGATE_SHIFT_));\r
2473                         chars[w + 1] = (char) (TRAIL_SURROGATE_MIN_VALUE + (cp & TRAIL_SURROGATE_MASK_));\r
2474                         w += 2;\r
2475                     }\r
2476                     break;\r
2477                 } catch (IndexOutOfBoundsException ex) {\r
2478                     int newlen = (int) (Math.ceil((double) codePoints.length * (w + 2)\r
2479                             / (r - offset + 1)));\r
2480                     char[] temp = new char[newlen];\r
2481                     System.arraycopy(chars, 0, temp, 0, w);\r
2482                     chars = temp;\r
2483                 }\r
2484             }\r
2485         }\r
2486         return new String(chars, 0, w);\r
2487     }\r
2488 \r
2489     /**\r
2490      * <p>\r
2491      * UTF16 string comparator class. Allows UTF16 string comparison to be done with the various\r
2492      * modes\r
2493      * </p>\r
2494      * <ul>\r
2495      * <li> Code point comparison or code unit comparison\r
2496      * <li> Case sensitive comparison, case insensitive comparison or case insensitive comparison\r
2497      * with special handling for character 'i'.\r
2498      * </ul>\r
2499      * <p>\r
2500      * The code unit or code point comparison differ only when comparing supplementary code points\r
2501      * (&#92;u10000..&#92;u10ffff) to BMP code points near the end of the BMP (i.e.,\r
2502      * &#92;ue000..&#92;uffff). In code unit comparison, high BMP code points sort after\r
2503      * supplementary code points because they are stored as pairs of surrogates which are at\r
2504      * &#92;ud800..&#92;udfff.\r
2505      * </p>\r
2506      * \r
2507      * @see #FOLD_CASE_DEFAULT\r
2508      * @see #FOLD_CASE_EXCLUDE_SPECIAL_I\r
2509      * @stable ICU 2.1\r
2510      */\r
2511     public static final class StringComparator implements java.util.Comparator {\r
2512         // public constructor ------------------------------------------------\r
2513 \r
2514         /**\r
2515          * Default constructor that does code unit comparison and case sensitive comparison.\r
2516          * \r
2517          * @stable ICU 2.1\r
2518          */\r
2519         public StringComparator() {\r
2520             this(false, false, FOLD_CASE_DEFAULT);\r
2521         }\r
2522 \r
2523         /**\r
2524          * Constructor that does comparison based on the argument options.\r
2525          * \r
2526          * @param codepointcompare\r
2527          *            flag to indicate true for code point comparison or false for code unit\r
2528          *            comparison.\r
2529          * @param ignorecase\r
2530          *            false for case sensitive comparison, true for case-insensitive comparison\r
2531          * @param foldcaseoption\r
2532          *            FOLD_CASE_DEFAULT or FOLD_CASE_EXCLUDE_SPECIAL_I. This option is used only\r
2533          *            when ignorecase is set to true. If ignorecase is false, this option is\r
2534          *            ignored.\r
2535          * @see #FOLD_CASE_DEFAULT\r
2536          * @see #FOLD_CASE_EXCLUDE_SPECIAL_I\r
2537          * @throws IllegalArgumentException\r
2538          *             if foldcaseoption is out of range\r
2539          * @stable ICU 2.4\r
2540          */\r
2541         public StringComparator(boolean codepointcompare, boolean ignorecase, int foldcaseoption) {\r
2542             setCodePointCompare(codepointcompare);\r
2543             m_ignoreCase_ = ignorecase;\r
2544             if (foldcaseoption < FOLD_CASE_DEFAULT || foldcaseoption > FOLD_CASE_EXCLUDE_SPECIAL_I) {\r
2545                 throw new IllegalArgumentException("Invalid fold case option");\r
2546             }\r
2547             m_foldCase_ = foldcaseoption;\r
2548         }\r
2549 \r
2550         // public data member ------------------------------------------------\r
2551 \r
2552         /**\r
2553          * <p>\r
2554          * Option value for case folding comparison:\r
2555          * </p>\r
2556          * <p>\r
2557          * Comparison is case insensitive, strings are folded using default mappings defined in\r
2558          * Unicode data file CaseFolding.txt, before comparison.\r
2559          * </p>\r
2560          * \r
2561          * @stable ICU 2.4\r
2562          */\r
2563         public static final int FOLD_CASE_DEFAULT = 0;\r
2564 \r
2565         /**\r
2566          * <p>\r
2567          * Option value for case folding comparison:\r
2568          * </p>\r
2569          * <p>\r
2570          * Comparison is case insensitive, strings are folded using modified mappings defined in\r
2571          * Unicode data file CaseFolding.txt, before comparison.\r
2572          * </p>\r
2573          * <p>\r
2574          * The modified set of mappings is provided in a Unicode data file CaseFolding.txt to handle\r
2575          * dotted I and dotless i appropriately for Turkic languages (tr, az).\r
2576          * </p>\r
2577          * <p>\r
2578          * Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that are to be\r
2579          * included for default mappings and excluded for the Turkic-specific mappings.\r
2580          * </p>\r
2581          * <p>\r
2582          * Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that are to be\r
2583          * excluded for default mappings and included for the Turkic-specific mappings.\r
2584          * </p>\r
2585          * \r
2586          * @stable ICU 2.4\r
2587          */\r
2588         public static final int FOLD_CASE_EXCLUDE_SPECIAL_I = 1;\r
2589 \r
2590         // public methods ----------------------------------------------------\r
2591 \r
2592         // public setters ----------------------------------------------------\r
2593 \r
2594         /**\r
2595          * Sets the comparison mode to code point compare if flag is true. Otherwise comparison mode\r
2596          * is set to code unit compare\r
2597          * \r
2598          * @param flag\r
2599          *            true for code point compare, false for code unit compare\r
2600          * @stable ICU 2.4\r
2601          */\r
2602         public void setCodePointCompare(boolean flag) {\r
2603             if (flag) {\r
2604                 m_codePointCompare_ = Normalizer.COMPARE_CODE_POINT_ORDER;\r
2605             } else {\r
2606                 m_codePointCompare_ = 0;\r
2607             }\r
2608         }\r
2609 \r
2610         /**\r
2611          * Sets the Comparator to case-insensitive comparison mode if argument is true, otherwise\r
2612          * case sensitive comparison mode if set to false.\r
2613          * \r
2614          * @param ignorecase\r
2615          *            true for case-insitive comparison, false for case sensitive comparison\r
2616          * @param foldcaseoption\r
2617          *            FOLD_CASE_DEFAULT or FOLD_CASE_EXCLUDE_SPECIAL_I. This option is used only\r
2618          *            when ignorecase is set to true. If ignorecase is false, this option is\r
2619          *            ignored.\r
2620          * @see #FOLD_CASE_DEFAULT\r
2621          * @see #FOLD_CASE_EXCLUDE_SPECIAL_I\r
2622          * @stable ICU 2.4\r
2623          */\r
2624         public void setIgnoreCase(boolean ignorecase, int foldcaseoption) {\r
2625             m_ignoreCase_ = ignorecase;\r
2626             if (foldcaseoption < FOLD_CASE_DEFAULT || foldcaseoption > FOLD_CASE_EXCLUDE_SPECIAL_I) {\r
2627                 throw new IllegalArgumentException("Invalid fold case option");\r
2628             }\r
2629             m_foldCase_ = foldcaseoption;\r
2630         }\r
2631 \r
2632         // public getters ----------------------------------------------------\r
2633 \r
2634         /**\r
2635          * Checks if the comparison mode is code point compare.\r
2636          * \r
2637          * @return true for code point compare, false for code unit compare\r
2638          * @stable ICU 2.4\r
2639          */\r
2640         public boolean getCodePointCompare() {\r
2641             return m_codePointCompare_ == Normalizer.COMPARE_CODE_POINT_ORDER;\r
2642         }\r
2643 \r
2644         /**\r
2645          * Checks if Comparator is in the case insensitive mode.\r
2646          * \r
2647          * @return true if Comparator performs case insensitive comparison, false otherwise\r
2648          * @stable ICU 2.4\r
2649          */\r
2650         public boolean getIgnoreCase() {\r
2651             return m_ignoreCase_;\r
2652         }\r
2653 \r
2654         /**\r
2655          * Gets the fold case options set in Comparator to be used with case insensitive comparison.\r
2656          * \r
2657          * @return either FOLD_CASE_DEFAULT or FOLD_CASE_EXCLUDE_SPECIAL_I\r
2658          * @see #FOLD_CASE_DEFAULT\r
2659          * @see #FOLD_CASE_EXCLUDE_SPECIAL_I\r
2660          * @stable ICU 2.4\r
2661          */\r
2662         public int getIgnoreCaseOption() {\r
2663             return m_foldCase_;\r
2664         }\r
2665 \r
2666         // public other methods ----------------------------------------------\r
2667 \r
2668         /**\r
2669          * Compare two strings depending on the options selected during construction.\r
2670          * \r
2671          * @param a\r
2672          *            first source string.\r
2673          * @param b\r
2674          *            second source string.\r
2675          * @return 0 returned if a == b. If a < b, a negative value is returned. Otherwise if a > b,\r
2676          *         a positive value is returned.\r
2677          * @exception ClassCastException\r
2678          *                thrown when either a or b is not a String object\r
2679          * @stable ICU 2.4\r
2680          */\r
2681         public int compare(Object a, Object b) {\r
2682             String str1 = (String) a;\r
2683             String str2 = (String) b;\r
2684 \r
2685             if (str1 == str2) {\r
2686                 return 0;\r
2687             }\r
2688             if (str1 == null) {\r
2689                 return -1;\r
2690             }\r
2691             if (str2 == null) {\r
2692                 return 1;\r
2693             }\r
2694 \r
2695             if (m_ignoreCase_) {\r
2696                 return compareCaseInsensitive(str1, str2);\r
2697             }\r
2698             return compareCaseSensitive(str1, str2);\r
2699         }\r
2700 \r
2701         // private data member ----------------------------------------------\r
2702 \r
2703         /**\r
2704          * Code unit comparison flag. True if code unit comparison is required. False if code point\r
2705          * comparison is required.\r
2706          */\r
2707         private int m_codePointCompare_;\r
2708 \r
2709         /**\r
2710          * Fold case comparison option.\r
2711          */\r
2712         private int m_foldCase_;\r
2713 \r
2714         /**\r
2715          * Flag indicator if ignore case is to be used during comparison\r
2716          */\r
2717         private boolean m_ignoreCase_;\r
2718 \r
2719         /**\r
2720          * Code point order offset for surrogate characters\r
2721          */\r
2722         private static final int CODE_POINT_COMPARE_SURROGATE_OFFSET_ = 0x2800;\r
2723 \r
2724         // private method ---------------------------------------------------\r
2725 \r
2726         /**\r
2727          * Compares case insensitive. This is a direct port of ICU4C, to make maintainence life\r
2728          * easier.\r
2729          * \r
2730          * @param s1\r
2731          *            first string to compare\r
2732          * @param s2\r
2733          *            second string to compare\r
2734          * @return -1 is s1 &lt; s2, 0 if equals,\r
2735          */\r
2736         private int compareCaseInsensitive(String s1, String s2) {\r
2737             return NormalizerImpl.cmpEquivFold(s1, s2, m_foldCase_ | m_codePointCompare_\r
2738                     | Normalizer.COMPARE_IGNORE_CASE);\r
2739         }\r
2740 \r
2741         /**\r
2742          * Compares case sensitive. This is a direct port of ICU4C, to make maintainence life\r
2743          * easier.\r
2744          * \r
2745          * @param s1\r
2746          *            first string to compare\r
2747          * @param s2\r
2748          *            second string to compare\r
2749          * @return -1 is s1 &lt; s2, 0 if equals,\r
2750          */\r
2751         private int compareCaseSensitive(String s1, String s2) {\r
2752             // compare identical prefixes - they do not need to be fixed up\r
2753             // limit1 = start1 + min(lenght1, length2)\r
2754             int length1 = s1.length();\r
2755             int length2 = s2.length();\r
2756             int minlength = length1;\r
2757             int result = 0;\r
2758             if (length1 < length2) {\r
2759                 result = -1;\r
2760             } else if (length1 > length2) {\r
2761                 result = 1;\r
2762                 minlength = length2;\r
2763             }\r
2764 \r
2765             char c1 = 0;\r
2766             char c2 = 0;\r
2767             int index = 0;\r
2768             for (; index < minlength; index++) {\r
2769                 c1 = s1.charAt(index);\r
2770                 c2 = s2.charAt(index);\r
2771                 // check pseudo-limit\r
2772                 if (c1 != c2) {\r
2773                     break;\r
2774                 }\r
2775             }\r
2776 \r
2777             if (index == minlength) {\r
2778                 return result;\r
2779             }\r
2780 \r
2781             boolean codepointcompare = m_codePointCompare_ == Normalizer.COMPARE_CODE_POINT_ORDER;\r
2782             // if both values are in or above the surrogate range, fix them up\r
2783             if (c1 >= LEAD_SURROGATE_MIN_VALUE && c2 >= LEAD_SURROGATE_MIN_VALUE\r
2784                     && codepointcompare) {\r
2785                 // subtract 0x2800 from BMP code points to make them smaller\r
2786                 // than supplementary ones\r
2787                 if ((c1 <= LEAD_SURROGATE_MAX_VALUE && (index + 1) != length1 && isTrailSurrogate(s1.charAt(index + 1)))\r
2788                         || (isTrailSurrogate(c1) && index != 0 && isLeadSurrogate(s1.charAt(index - 1)))) {\r
2789                     // part of a surrogate pair, leave >=d800\r
2790                 } else {\r
2791                     // BMP code point - may be surrogate code point - make\r
2792                     // < d800\r
2793                     c1 -= CODE_POINT_COMPARE_SURROGATE_OFFSET_;\r
2794                 }\r
2795 \r
2796                 if ((c2 <= LEAD_SURROGATE_MAX_VALUE && (index + 1) != length2 && isTrailSurrogate(s2.charAt(index + 1)))\r
2797                         || (isTrailSurrogate(c2) && index != 0 && isLeadSurrogate(s2.charAt(index - 1)))) {\r
2798                     // part of a surrogate pair, leave >=d800\r
2799                 } else {\r
2800                     // BMP code point - may be surrogate code point - make <d800\r
2801                     c2 -= CODE_POINT_COMPARE_SURROGATE_OFFSET_;\r
2802                 }\r
2803             }\r
2804 \r
2805             // now c1 and c2 are in UTF-32-compatible order\r
2806             return c1 - c2;\r
2807         }\r
2808     }\r
2809 \r
2810     // private data members -------------------------------------------------\r
2811 \r
2812     /**\r
2813      * Shift value for lead surrogate to form a supplementary character.\r
2814      */\r
2815     private static final int LEAD_SURROGATE_SHIFT_ = 10;\r
2816 \r
2817     /**\r
2818      * Mask to retrieve the significant value from a trail surrogate.\r
2819      */\r
2820     private static final int TRAIL_SURROGATE_MASK_ = 0x3FF;\r
2821 \r
2822     /**\r
2823      * Value that all lead surrogate starts with\r
2824      */\r
2825     private static final int LEAD_SURROGATE_OFFSET_ = LEAD_SURROGATE_MIN_VALUE\r
2826             - (SUPPLEMENTARY_MIN_VALUE >> LEAD_SURROGATE_SHIFT_);\r
2827 \r
2828     // private methods ------------------------------------------------------\r
2829 \r
2830     /**\r
2831      * <p>\r
2832      * Converts argument code point and returns a String object representing the code point's value\r
2833      * in UTF16 format.\r
2834      * </p>\r
2835      * <p>\r
2836      * This method does not check for the validity of the codepoint, the results are not guaranteed\r
2837      * if a invalid codepoint is passed as argument.\r
2838      * </p>\r
2839      * <p>\r
2840      * The result is a string whose length is 1 for non-supplementary code points, 2 otherwise.\r
2841      * </p>\r
2842      * \r
2843      * @param ch\r
2844      *            code point\r
2845      * @return string representation of the code point\r
2846      */\r
2847     private static String toString(int ch) {\r
2848         if (ch < SUPPLEMENTARY_MIN_VALUE) {\r
2849             return String.valueOf((char) ch);\r
2850         }\r
2851 \r
2852         StringBuffer result = new StringBuffer();\r
2853         result.append(getLeadSurrogate(ch));\r
2854         result.append(getTrailSurrogate(ch));\r
2855         return result.toString();\r
2856     }\r
2857 }\r
2858 // eof\r