jars/icu4j-52_1/main/tests/core/src/com/ibm/icu/dev/test/lang/UCharacterCaseTest.java

   1 /**
   2 *******************************************************************************
   3 * Copyright (C) 1996-2010, International Business Machines Corporation and
   4 * others. All Rights Reserved.
   5 *******************************************************************************
   6 */
   7
   8
   9 package com.ibm.icu.dev.test.lang;
  10
  11
  12 import java.io.BufferedReader;
  13 import java.util.ArrayList;
  14 import java.util.List;
  15 import java.util.Locale;
  16
  17 import com.ibm.icu.dev.test.TestFmwk;
  18 import com.ibm.icu.dev.test.TestUtil;
  19 import com.ibm.icu.impl.Utility;
  20 import com.ibm.icu.lang.UCharacter;
  21 import com.ibm.icu.lang.UProperty;
  22 import com.ibm.icu.text.BreakIterator;
  23 import com.ibm.icu.text.RuleBasedBreakIterator;
  24 import com.ibm.icu.text.UTF16;
  25 import com.ibm.icu.util.ULocale;
  26
  27
  28 /**
  29 * <p>Testing character casing</p>
  30 * <p>Mostly following the test cases in strcase.cpp for ICU</p>
  31 * @author Syn Wee Quek
  32 * @since march 14 2002
  33 */
  34 public final class UCharacterCaseTest extends TestFmwk
  35 {
  36     // constructor -----------------------------------------------------------
  37
  38     /**
  39      * Constructor
  40      */
  41     public UCharacterCaseTest()
  42     {
  43     }
  44
  45     // public methods --------------------------------------------------------
  46
  47     public static void main(String[] arg)
  48     {
  49         try
  50         {
  51             UCharacterCaseTest test = new UCharacterCaseTest();
  52             test.run(arg);
  53         }
  54         catch (Exception e)
  55         {
  56             e.printStackTrace();
  57         }
  58     }
  59
  60     /**
  61      * Testing the uppercase and lowercase function of UCharacter
  62      */
  63     public void TestCharacter()
  64     {
  65         for (int i = 0; i < CHARACTER_LOWER_.length; i ++) {
  66             if (UCharacter.isLetter(CHARACTER_LOWER_[i]) &&
  67                 !UCharacter.isLowerCase(CHARACTER_LOWER_[i])) {
  68                 errln("FAIL isLowerCase test for \\u" +
  69                       hex(CHARACTER_LOWER_[i]));
  70                 break;
  71             }
  72             if (UCharacter.isLetter(CHARACTER_UPPER_[i]) &&
  73                 !(UCharacter.isUpperCase(CHARACTER_UPPER_[i]) ||
  74                   UCharacter.isTitleCase(CHARACTER_UPPER_[i]))) {
  75                 errln("FAIL isUpperCase test for \\u" +
  76                       hex(CHARACTER_UPPER_[i]));
  77                 break;
  78             }
  79             if (CHARACTER_LOWER_[i] !=
  80                 UCharacter.toLowerCase(CHARACTER_UPPER_[i]) ||
  81                 (CHARACTER_UPPER_[i] !=
  82                 UCharacter.toUpperCase(CHARACTER_LOWER_[i]) &&
  83                 CHARACTER_UPPER_[i] !=
  84                 UCharacter.toTitleCase(CHARACTER_LOWER_[i]))) {
  85                 errln("FAIL case conversion test for \\u" +
  86                       hex(CHARACTER_UPPER_[i]) +
  87                       " to \\u" + hex(CHARACTER_LOWER_[i]));
  88                 break;
  89             }
  90             if (CHARACTER_LOWER_[i] !=
  91                 UCharacter.toLowerCase(CHARACTER_LOWER_[i])) {
  92                 errln("FAIL lower case conversion test for \\u" +
  93                       hex(CHARACTER_LOWER_[i]));
  94                 break;
  95             }
  96             if (CHARACTER_UPPER_[i] !=
  97                 UCharacter.toUpperCase(CHARACTER_UPPER_[i]) &&
  98                 CHARACTER_UPPER_[i] !=
  99                 UCharacter.toTitleCase(CHARACTER_UPPER_[i])) {
 100                 errln("FAIL upper case conversion test for \\u" +
 101                       hex(CHARACTER_UPPER_[i]));
 102                 break;
 103             }
 104             logln("Ok    \\u" + hex(CHARACTER_UPPER_[i]) + " and \\u" +
 105                   hex(CHARACTER_LOWER_[i]));
 106         }
 107     }
 108
 109     public void TestFolding()
 110     {
 111         // test simple case folding
 112         for (int i = 0; i < FOLDING_SIMPLE_.length; i += 3) {
 113             if (UCharacter.foldCase(FOLDING_SIMPLE_[i], true) !=
 114                 FOLDING_SIMPLE_[i + 1]) {
 115                 errln("FAIL: foldCase(\\u" + hex(FOLDING_SIMPLE_[i]) +
 116                       ", true) should be \\u" + hex(FOLDING_SIMPLE_[i + 1]));
 117             }
 118             if (UCharacter.foldCase(FOLDING_SIMPLE_[i],
 119                                     UCharacter.FOLD_CASE_DEFAULT) !=
 120                                                       FOLDING_SIMPLE_[i + 1]) {
 121                 errln("FAIL: foldCase(\\u" + hex(FOLDING_SIMPLE_[i]) +
 122                       ", UCharacter.FOLD_CASE_DEFAULT) should be \\u"
 123                       + hex(FOLDING_SIMPLE_[i + 1]));
 124             }
 125             if (UCharacter.foldCase(FOLDING_SIMPLE_[i], false) !=
 126                 FOLDING_SIMPLE_[i + 2]) {
 127                 errln("FAIL: foldCase(\\u" + hex(FOLDING_SIMPLE_[i]) +
 128                       ", false) should be \\u" + hex(FOLDING_SIMPLE_[i + 2]));
 129             }
 130             if (UCharacter.foldCase(FOLDING_SIMPLE_[i],
 131                                     UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I) !=
 132                                     FOLDING_SIMPLE_[i + 2]) {
 133                 errln("FAIL: foldCase(\\u" + hex(FOLDING_SIMPLE_[i]) +
 134                       ", UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I) should be \\u"
 135                       + hex(FOLDING_SIMPLE_[i + 2]));
 136             }
 137         }
 138
 139         // Test full string case folding with default option and separate
 140         // buffers
 141         if (!FOLDING_DEFAULT_[0].equals(UCharacter.foldCase(FOLDING_MIXED_[0], true))) {
 142             errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[0]) +
 143                   ", true)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[0], true)) +
 144                   " should be " + prettify(FOLDING_DEFAULT_[0]));
 145         }
 146
 147         if (!FOLDING_DEFAULT_[0].equals(UCharacter.foldCase(FOLDING_MIXED_[0], UCharacter.FOLD_CASE_DEFAULT))) {
 148                     errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[0]) +
 149                           ", UCharacter.FOLD_CASE_DEFAULT)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[0], UCharacter.FOLD_CASE_DEFAULT))
 150                           + " should be " + prettify(FOLDING_DEFAULT_[0]));
 151                 }
 152
 153         if (!FOLDING_EXCLUDE_SPECIAL_I_[0].equals(
 154                             UCharacter.foldCase(FOLDING_MIXED_[0], false))) {
 155             errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[0]) +
 156                   ", false)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[0], false))
 157                   + " should be " + prettify(FOLDING_EXCLUDE_SPECIAL_I_[0]));
 158         }
 159
 160         if (!FOLDING_EXCLUDE_SPECIAL_I_[0].equals(
 161                                     UCharacter.foldCase(FOLDING_MIXED_[0], UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I))) {
 162             errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[0]) +
 163                   ", UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[0], UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I))
 164                   + " should be " + prettify(FOLDING_EXCLUDE_SPECIAL_I_[0]));
 165         }
 166
 167         if (!FOLDING_DEFAULT_[1].equals(UCharacter.foldCase(FOLDING_MIXED_[1], true))) {
 168            errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[1]) +
 169                  ", true)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[1], true))
 170                  + " should be " + prettify(FOLDING_DEFAULT_[1]));
 171         }
 172
 173         if (!FOLDING_DEFAULT_[1].equals(UCharacter.foldCase(FOLDING_MIXED_[1], UCharacter.FOLD_CASE_DEFAULT))) {
 174             errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[1]) +
 175                          ", UCharacter.FOLD_CASE_DEFAULT)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[1], UCharacter.FOLD_CASE_DEFAULT))
 176                          + " should be " + prettify(FOLDING_DEFAULT_[1]));
 177         }
 178
 179         // alternate handling for dotted I/dotless i (U+0130, U+0131)
 180         if (!FOLDING_EXCLUDE_SPECIAL_I_[1].equals(
 181                         UCharacter.foldCase(FOLDING_MIXED_[1], false))) {
 182             errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[1]) +
 183                   ", false)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[1], false))
 184                   + " should be " + prettify(FOLDING_EXCLUDE_SPECIAL_I_[1]));
 185         }
 186
 187         if (!FOLDING_EXCLUDE_SPECIAL_I_[1].equals(
 188                                 UCharacter.foldCase(FOLDING_MIXED_[1], UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I))) {
 189             errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[1]) +
 190                   ", UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[1], UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I))
 191                   + " should be "
 192                   + prettify(FOLDING_EXCLUDE_SPECIAL_I_[1]));
 193         }
 194     }
 195
 196     /**
 197      * Testing the strings case mapping methods
 198      */
 199     public void TestUpper()
 200     {
 201         // uppercase with root locale and in the same buffer
 202         if (!UPPER_ROOT_.equals(UCharacter.toUpperCase(UPPER_BEFORE_))) {
 203             errln("Fail " + UPPER_BEFORE_ + " after uppercase should be " +
 204                   UPPER_ROOT_ + " instead got " +
 205                   UCharacter.toUpperCase(UPPER_BEFORE_));
 206         }
 207
 208         // uppercase with turkish locale and separate buffers
 209         if (!UPPER_TURKISH_.equals(UCharacter.toUpperCase(TURKISH_LOCALE_,
 210                                                          UPPER_BEFORE_))) {
 211             errln("Fail " + UPPER_BEFORE_ +
 212                   " after turkish-sensitive uppercase should be " +
 213                   UPPER_TURKISH_ + " instead of " +
 214                   UCharacter.toUpperCase(TURKISH_LOCALE_, UPPER_BEFORE_));
 215         }
 216
 217         // uppercase a short string with root locale
 218         if (!UPPER_MINI_UPPER_.equals(UCharacter.toUpperCase(UPPER_MINI_))) {
 219             errln("error in toUpper(root locale)=\"" + UPPER_MINI_ +
 220                   "\" expected \"" + UPPER_MINI_UPPER_ + "\"");
 221         }
 222
 223         if (!SHARED_UPPERCASE_TOPKAP_.equals(
 224                        UCharacter.toUpperCase(SHARED_LOWERCASE_TOPKAP_))) {
 225             errln("toUpper failed: expected \"" +
 226                   SHARED_UPPERCASE_TOPKAP_ + "\", got \"" +
 227                   UCharacter.toUpperCase(SHARED_LOWERCASE_TOPKAP_) + "\".");
 228         }
 229
 230         if (!SHARED_UPPERCASE_TURKISH_.equals(
 231                   UCharacter.toUpperCase(TURKISH_LOCALE_,
 232                                          SHARED_LOWERCASE_TOPKAP_))) {
 233             errln("toUpper failed: expected \"" +
 234                   SHARED_UPPERCASE_TURKISH_ + "\", got \"" +
 235                   UCharacter.toUpperCase(TURKISH_LOCALE_,
 236                                      SHARED_LOWERCASE_TOPKAP_) + "\".");
 237         }
 238
 239         if (!SHARED_UPPERCASE_GERMAN_.equals(
 240                 UCharacter.toUpperCase(GERMAN_LOCALE_,
 241                                        SHARED_LOWERCASE_GERMAN_))) {
 242             errln("toUpper failed: expected \"" + SHARED_UPPERCASE_GERMAN_
 243                   + "\", got \"" + UCharacter.toUpperCase(GERMAN_LOCALE_,
 244                                         SHARED_LOWERCASE_GERMAN_) + "\".");
 245         }
 246
 247         if (!SHARED_UPPERCASE_GREEK_.equals(
 248                 UCharacter.toUpperCase(SHARED_LOWERCASE_GREEK_))) {
 249             errln("toLower failed: expected \"" + SHARED_UPPERCASE_GREEK_ +
 250                   "\", got \"" + UCharacter.toUpperCase(
 251                                         SHARED_LOWERCASE_GREEK_) + "\".");
 252         }
 253     }
 254
 255     public void TestLower()
 256     {
 257         if (!LOWER_ROOT_.equals(UCharacter.toLowerCase(LOWER_BEFORE_))) {
 258             errln("Fail " + LOWER_BEFORE_ + " after lowercase should be " +
 259                   LOWER_ROOT_ + " instead of " +
 260                   UCharacter.toLowerCase(LOWER_BEFORE_));
 261         }
 262
 263         // lowercase with turkish locale
 264         if (!LOWER_TURKISH_.equals(UCharacter.toLowerCase(TURKISH_LOCALE_,
 265                                                           LOWER_BEFORE_))) {
 266             errln("Fail " + LOWER_BEFORE_ +
 267                   " after turkish-sensitive lowercase should be " +
 268                   LOWER_TURKISH_ + " instead of " +
 269                   UCharacter.toLowerCase(TURKISH_LOCALE_, LOWER_BEFORE_));
 270         }
 271         if (!SHARED_LOWERCASE_ISTANBUL_.equals(
 272                      UCharacter.toLowerCase(SHARED_UPPERCASE_ISTANBUL_))) {
 273             errln("1. toLower failed: expected \"" +
 274                   SHARED_LOWERCASE_ISTANBUL_ + "\", got \"" +
 275               UCharacter.toLowerCase(SHARED_UPPERCASE_ISTANBUL_) + "\".");
 276         }
 277
 278         if (!SHARED_LOWERCASE_TURKISH_.equals(
 279                 UCharacter.toLowerCase(TURKISH_LOCALE_,
 280                                        SHARED_UPPERCASE_ISTANBUL_))) {
 281             errln("2. toLower failed: expected \"" +
 282                   SHARED_LOWERCASE_TURKISH_ + "\", got \"" +
 283                   UCharacter.toLowerCase(TURKISH_LOCALE_,
 284                                 SHARED_UPPERCASE_ISTANBUL_) + "\".");
 285         }
 286         if (!SHARED_LOWERCASE_GREEK_.equals(
 287                 UCharacter.toLowerCase(GREEK_LOCALE_,
 288                                        SHARED_UPPERCASE_GREEK_))) {
 289             errln("toLower failed: expected \"" + SHARED_LOWERCASE_GREEK_ +
 290                   "\", got \"" + UCharacter.toLowerCase(GREEK_LOCALE_,
 291                                         SHARED_UPPERCASE_GREEK_) + "\".");
 292         }
 293     }
 294
 295     public void TestTitleRegression() throws java.io.IOException {
 296         boolean isIgnorable = UCharacter.hasBinaryProperty('\'', UProperty.CASE_IGNORABLE);
 297         assertTrue("Case Ignorable check of ASCII apostrophe", isIgnorable);
 298         assertEquals("Titlecase check",
 299                 "The Quick Brown Fox Can't Jump Over The Lazy Dogs.",
 300                 UCharacter.toTitleCase(ULocale.ENGLISH, "THE QUICK BROWN FOX CAN'T JUMP OVER THE LAZY DOGS.", null));
 301     }
 302
 303     public void TestTitle()
 304     {
 305          try{
 306             for (int i = 0; i < TITLE_DATA_.length;) {
 307                 String test = TITLE_DATA_[i++];
 308                 String expected = TITLE_DATA_[i++];
 309                 ULocale locale = new ULocale(TITLE_DATA_[i++]);
 310                 int breakType = Integer.parseInt(TITLE_DATA_[i++]);
 311                 String optionsString = TITLE_DATA_[i++];
 312                 BreakIterator iter =
 313                     breakType >= 0 ?
 314                         BreakIterator.getBreakInstance(locale, breakType) :
 315                         breakType == -2 ?
 316                             // Open a trivial break iterator that only delivers { 0, length }
 317                             // or even just { 0 } as boundaries.
 318                             new RuleBasedBreakIterator(".*;") :
 319                             null;
 320                 int options = 0;
 321                 if (optionsString.indexOf('L') >= 0) {
 322                     options |= UCharacter.TITLECASE_NO_LOWERCASE;
 323                 }
 324                 if (optionsString.indexOf('A') >= 0) {
 325                     options |= UCharacter.TITLECASE_NO_BREAK_ADJUSTMENT;
 326                 }
 327                 String result = UCharacter.toTitleCase(locale, test, iter, options);
 328                 if (!expected.equals(result)) {
 329                     errln("titlecasing for " + prettify(test) + " (options " + options + ") should be " +
 330                           prettify(expected) + " but got " +
 331                           prettify(result));
 332                 }
 333                 if (options == 0) {
 334                     result = UCharacter.toTitleCase(locale, test, iter);
 335                     if (!expected.equals(result)) {
 336                         errln("titlecasing for " + prettify(test) + " should be " +
 337                               prettify(expected) + " but got " +
 338                               prettify(result));
 339                     }
 340                 }
 341             }
 342          }catch(Exception ex){
 343             warnln("Could not find data for BreakIterators");
 344          }
 345     }
 346
 347     public void TestDutchTitle() {
 348         ULocale LOC_DUTCH = new ULocale("nl");
 349         int options = 0;
 350         options |= UCharacter.TITLECASE_NO_LOWERCASE;
 351         BreakIterator iter = BreakIterator.getWordInstance(LOC_DUTCH);
 352
 353         assertEquals("Dutch titlecase check in English",
 354                 "Ijssel Igloo Ijmuiden",
 355                 UCharacter.toTitleCase(ULocale.ENGLISH, "ijssel igloo IJMUIDEN", null));
 356
 357         assertEquals("Dutch titlecase check in Dutch",
 358                 "IJssel Igloo IJmuiden",
 359                 UCharacter.toTitleCase(LOC_DUTCH, "ijssel igloo IJMUIDEN", null));
 360
 361         iter.setText("ijssel igloo IjMUIdEN iPoD ijenough");
 362         assertEquals("Dutch titlecase check in Dutch with nolowercase option",
 363                 "IJssel Igloo IJMUIdEN IPoD IJenough",
 364                 UCharacter.toTitleCase(LOC_DUTCH, "ijssel igloo IjMUIdEN iPoD ijenough", iter, options));
 365     }
 366
 367     public void TestSpecial()
 368     {
 369         for (int i = 0; i < SPECIAL_LOCALES_.length; i ++) {
 370             int    j      = i * 3;
 371             Locale locale = SPECIAL_LOCALES_[i];
 372             String str    = SPECIAL_DATA_[j];
 373             if (locale != null) {
 374                 if (!SPECIAL_DATA_[j + 1].equals(
 375                      UCharacter.toLowerCase(locale, str))) {
 376                     errln("error lowercasing special characters " +
 377                         hex(str) + " expected " + hex(SPECIAL_DATA_[j + 1])
 378                         + " for locale " + locale.toString() + " but got " +
 379                         hex(UCharacter.toLowerCase(locale, str)));
 380                 }
 381                 if (!SPECIAL_DATA_[j + 2].equals(
 382                      UCharacter.toUpperCase(locale, str))) {
 383                     errln("error uppercasing special characters " +
 384                         hex(str) + " expected " + SPECIAL_DATA_[j + 2]
 385                         + " for locale " + locale.toString() + " but got " +
 386                         hex(UCharacter.toUpperCase(locale, str)));
 387                 }
 388             }
 389             else {
 390                 if (!SPECIAL_DATA_[j + 1].equals(
 391                      UCharacter.toLowerCase(str))) {
 392                     errln("error lowercasing special characters " +
 393                         hex(str) + " expected " + SPECIAL_DATA_[j + 1] +
 394                         " but got " +
 395                         hex(UCharacter.toLowerCase(locale, str)));
 396                 }
 397                 if (!SPECIAL_DATA_[j + 2].equals(
 398                      UCharacter.toUpperCase(locale, str))) {
 399                     errln("error uppercasing special characters " +
 400                         hex(str) + " expected " + SPECIAL_DATA_[j + 2] +
 401                         " but got " +
 402                         hex(UCharacter.toUpperCase(locale, str)));
 403                 }
 404             }
 405         }
 406
 407         // turkish & azerbaijani dotless i & dotted I
 408         // remove dot above if there was a capital I before and there are no
 409         // more accents above
 410         if (!SPECIAL_DOTTED_LOWER_TURKISH_.equals(UCharacter.toLowerCase(
 411                                         TURKISH_LOCALE_, SPECIAL_DOTTED_))) {
 412             errln("error in dots.toLower(tr)=\"" + SPECIAL_DOTTED_ +
 413                   "\" expected \"" + SPECIAL_DOTTED_LOWER_TURKISH_ +
 414                   "\" but got " + UCharacter.toLowerCase(TURKISH_LOCALE_,
 415                                                          SPECIAL_DOTTED_));
 416         }
 417         if (!SPECIAL_DOTTED_LOWER_GERMAN_.equals(UCharacter.toLowerCase(
 418                                              GERMAN_LOCALE_, SPECIAL_DOTTED_))) {
 419             errln("error in dots.toLower(de)=\"" + SPECIAL_DOTTED_ +
 420                   "\" expected \"" + SPECIAL_DOTTED_LOWER_GERMAN_ +
 421                   "\" but got " + UCharacter.toLowerCase(GERMAN_LOCALE_,
 422                                                          SPECIAL_DOTTED_));
 423         }
 424
 425         // lithuanian dot above in uppercasing
 426         if (!SPECIAL_DOT_ABOVE_UPPER_LITHUANIAN_.equals(
 427              UCharacter.toUpperCase(LITHUANIAN_LOCALE_, SPECIAL_DOT_ABOVE_))) {
 428             errln("error in dots.toUpper(lt)=\"" + SPECIAL_DOT_ABOVE_ +
 429                   "\" expected \"" + SPECIAL_DOT_ABOVE_UPPER_LITHUANIAN_ +
 430                   "\" but got " + UCharacter.toUpperCase(LITHUANIAN_LOCALE_,
 431                                                          SPECIAL_DOT_ABOVE_));
 432         }
 433         if (!SPECIAL_DOT_ABOVE_UPPER_GERMAN_.equals(UCharacter.toUpperCase(
 434                                         GERMAN_LOCALE_, SPECIAL_DOT_ABOVE_))) {
 435             errln("error in dots.toUpper(de)=\"" + SPECIAL_DOT_ABOVE_ +
 436                   "\" expected \"" + SPECIAL_DOT_ABOVE_UPPER_GERMAN_ +
 437                   "\" but got " + UCharacter.toUpperCase(GERMAN_LOCALE_,
 438                                                          SPECIAL_DOT_ABOVE_));
 439         }
 440
 441         // lithuanian adds dot above to i in lowercasing if there are more
 442         // above accents
 443         if (!SPECIAL_DOT_ABOVE_LOWER_LITHUANIAN_.equals(
 444             UCharacter.toLowerCase(LITHUANIAN_LOCALE_,
 445                                    SPECIAL_DOT_ABOVE_UPPER_))) {
 446             errln("error in dots.toLower(lt)=\"" + SPECIAL_DOT_ABOVE_UPPER_ +
 447                   "\" expected \"" + SPECIAL_DOT_ABOVE_LOWER_LITHUANIAN_ +
 448                   "\" but got " + UCharacter.toLowerCase(LITHUANIAN_LOCALE_,
 449                                                    SPECIAL_DOT_ABOVE_UPPER_));
 450         }
 451         if (!SPECIAL_DOT_ABOVE_LOWER_GERMAN_.equals(
 452             UCharacter.toLowerCase(GERMAN_LOCALE_,
 453                                    SPECIAL_DOT_ABOVE_UPPER_))) {
 454             errln("error in dots.toLower(de)=\"" + SPECIAL_DOT_ABOVE_UPPER_ +
 455                   "\" expected \"" + SPECIAL_DOT_ABOVE_LOWER_GERMAN_ +
 456                   "\" but got " + UCharacter.toLowerCase(GERMAN_LOCALE_,
 457                                                    SPECIAL_DOT_ABOVE_UPPER_));
 458         }
 459     }
 460
 461     /**
 462      * Tests for case mapping in the file SpecialCasing.txt
 463      * This method reads in SpecialCasing.txt file for testing purposes.
 464      * A default path is provided relative to the src path, however the user
 465      * could set a system property to change the directory path.<br>
 466      * e.g. java -DUnicodeData="data_dir_path" com.ibm.dev.test.lang.UCharacterTest
 467      */
 468     public void TestSpecialCasingTxt()
 469     {
 470         try
 471         {
 472             // reading in the SpecialCasing file
 473             BufferedReader input = TestUtil.getDataReader(
 474                                                   "unicode/SpecialCasing.txt");
 475             while (true)
 476             {
 477                 String s = input.readLine();
 478                 if (s == null) {
 479                     break;
 480                 }
 481                 if (s.length() == 0 || s.charAt(0) == '#') {
 482                     continue;
 483                 }
 484
 485                 String chstr[] = getUnicodeStrings(s);
 486                 StringBuffer strbuffer   = new StringBuffer(chstr[0]);
 487                 StringBuffer lowerbuffer = new StringBuffer(chstr[1]);
 488                 StringBuffer upperbuffer = new StringBuffer(chstr[3]);
 489                 Locale locale = null;
 490                 for (int i = 4; i < chstr.length; i ++) {
 491                     String condition = chstr[i];
 492                     if (Character.isLowerCase(chstr[i].charAt(0))) {
 493                         // specified locale
 494                         locale = new Locale(chstr[i], "");
 495                     }
 496                     else if (condition.compareToIgnoreCase("Not_Before_Dot")
 497                                                       == 0) {
 498                         // turns I into dotless i
 499                     }
 500                     else if (condition.compareToIgnoreCase(
 501                                                       "More_Above") == 0) {
 502                             strbuffer.append((char)0x300);
 503                             lowerbuffer.append((char)0x300);
 504                             upperbuffer.append((char)0x300);
 505                     }
 506                     else if (condition.compareToIgnoreCase(
 507                                                 "After_Soft_Dotted") == 0) {
 508                             strbuffer.insert(0, 'i');
 509                             lowerbuffer.insert(0, 'i');
 510                             String lang = "";
 511                             if (locale != null) {
 512                                 lang = locale.getLanguage();
 513                             }
 514                             if (lang.equals("tr") || lang.equals("az")) {
 515                                 // this is to be removed when 4.0 data comes out
 516                                 // and upperbuffer.insert uncommented
 517                                 // see jitterbug 2344
 518                                 chstr[i] = "After_I";
 519                                 strbuffer.deleteCharAt(0);
 520                                 lowerbuffer.deleteCharAt(0);
 521                                 i --;
 522                                 continue;
 523                                 // upperbuffer.insert(0, '\u0130');
 524                             }
 525                             else {
 526                                 upperbuffer.insert(0, 'I');
 527                             }
 528                     }
 529                     else if (condition.compareToIgnoreCase(
 530                                                       "Final_Sigma") == 0) {
 531                             strbuffer.insert(0, 'c');
 532                             lowerbuffer.insert(0, 'c');
 533                             upperbuffer.insert(0, 'C');
 534                     }
 535                     else if (condition.compareToIgnoreCase("After_I") == 0) {
 536                             strbuffer.insert(0, 'I');
 537                             lowerbuffer.insert(0, 'i');
 538                             String lang = "";
 539                             if (locale != null) {
 540                                 lang = locale.getLanguage();
 541                             }
 542                             if (lang.equals("tr") || lang.equals("az")) {
 543                                 upperbuffer.insert(0, 'I');
 544                             }
 545                     }
 546                 }
 547                 chstr[0] = strbuffer.toString();
 548                 chstr[1] = lowerbuffer.toString();
 549                 chstr[3] = upperbuffer.toString();
 550                 if (locale == null) {
 551                     if (!UCharacter.toLowerCase(chstr[0]).equals(chstr[1])) {
 552                         errln(s);
 553                         errln("Fail: toLowerCase for character " +
 554                               Utility.escape(chstr[0]) + ", expected "
 555                               + Utility.escape(chstr[1]) + " but resulted in " +
 556                               Utility.escape(UCharacter.toLowerCase(chstr[0])));
 557                     }
 558                     if (!UCharacter.toUpperCase(chstr[0]).equals(chstr[3])) {
 559                         errln(s);
 560                         errln("Fail: toUpperCase for character " +
 561                               Utility.escape(chstr[0]) + ", expected "
 562                               + Utility.escape(chstr[3]) + " but resulted in " +
 563                               Utility.escape(UCharacter.toUpperCase(chstr[0])));
 564                     }
 565                 }
 566                 else {
 567                     if (!UCharacter.toLowerCase(locale, chstr[0]).equals(
 568                                                                    chstr[1])) {
 569                         errln(s);
 570                         errln("Fail: toLowerCase for character " +
 571                               Utility.escape(chstr[0]) + ", expected "
 572                               + Utility.escape(chstr[1]) + " but resulted in " +
 573                               Utility.escape(UCharacter.toLowerCase(locale,
 574                                                                     chstr[0])));
 575                     }
 576                     if (!UCharacter.toUpperCase(locale, chstr[0]).equals(
 577                                                                    chstr[3])) {
 578                         errln(s);
 579                         errln("Fail: toUpperCase for character " +
 580                               Utility.escape(chstr[0]) + ", expected "
 581                               + Utility.escape(chstr[3]) + " but resulted in " +
 582                               Utility.escape(UCharacter.toUpperCase(locale,
 583                                                                     chstr[0])));
 584                     }
 585                 }
 586             }
 587             input.close();
 588         }
 589         catch (Exception e)
 590         {
 591           e.printStackTrace();
 592         }
 593     }
 594
 595     public void TestUpperLower()
 596     {
 597         int upper[] = {0x0041, 0x0042, 0x00b2, 0x01c4, 0x01c6, 0x01c9, 0x01c8,
 598                         0x01c9, 0x000c};
 599         int lower[] = {0x0061, 0x0062, 0x00b2, 0x01c6, 0x01c6, 0x01c9, 0x01c9,
 600                         0x01c9, 0x000c};
 601         String upperTest = "abcdefg123hij.?:klmno";
 602         String lowerTest = "ABCDEFG123HIJ.?:KLMNO";
 603
 604         // Checks LetterLike Symbols which were previously a source of
 605         // confusion [Bertrand A. D. 02/04/98]
 606         for (int i = 0x2100; i < 0x2138; i ++) {
 607             /* Unicode 5.0 adds lowercase U+214E (TURNED SMALL F) to U+2132 (TURNED CAPITAL F) */
 608             if (i != 0x2126 && i != 0x212a && i != 0x212b && i!=0x2132) {
 609                 if (i != UCharacter.toLowerCase(i)) { // itself
 610                     errln("Failed case conversion with itself: \\u"
 611                             + Utility.hex(i, 4));
 612                 }
 613                 if (i != UCharacter.toUpperCase(i)) {
 614                     errln("Failed case conversion with itself: \\u"
 615                             + Utility.hex(i, 4));
 616                 }
 617             }
 618         }
 619         for (int i = 0; i < upper.length; i ++) {
 620             if (UCharacter.toLowerCase(upper[i]) != lower[i]) {
 621                 errln("FAILED UCharacter.tolower() for \\u"
 622                         + Utility.hex(upper[i], 4)
 623                         + " Expected \\u" + Utility.hex(lower[i], 4)
 624                         + " Got \\u"
 625                         + Utility.hex(UCharacter.toLowerCase(upper[i]), 4));
 626             }
 627         }
 628         logln("testing upper lower");
 629         for (int i = 0; i < upperTest.length(); i ++) {
 630             logln("testing to upper to lower");
 631             if (UCharacter.isLetter(upperTest.charAt(i)) &&
 632                 !UCharacter.isLowerCase(upperTest.charAt(i))) {
 633                 errln("Failed isLowerCase test at \\u"
 634                         + Utility.hex(upperTest.charAt(i), 4));
 635             }
 636             else if (UCharacter.isLetter(lowerTest.charAt(i))
 637                      && !UCharacter.isUpperCase(lowerTest.charAt(i))) {
 638                 errln("Failed isUpperCase test at \\u"
 639                       + Utility.hex(lowerTest.charAt(i), 4));
 640             }
 641             else if (upperTest.charAt(i)
 642                             != UCharacter.toLowerCase(lowerTest.charAt(i))) {
 643                 errln("Failed case conversion from \\u"
 644                         + Utility.hex(lowerTest.charAt(i), 4) + " To \\u"
 645                         + Utility.hex(upperTest.charAt(i), 4));
 646             }
 647             else if (lowerTest.charAt(i)
 648                     != UCharacter.toUpperCase(upperTest.charAt(i))) {
 649                 errln("Failed case conversion : \\u"
 650                         + Utility.hex(upperTest.charAt(i), 4) + " To \\u"
 651                         + Utility.hex(lowerTest.charAt(i), 4));
 652             }
 653             else if (upperTest.charAt(i)
 654                     != UCharacter.toLowerCase(upperTest.charAt(i))) {
 655                 errln("Failed case conversion with itself: \\u"
 656                         + Utility.hex(upperTest.charAt(i)));
 657             }
 658             else if (lowerTest.charAt(i)
 659                     != UCharacter.toUpperCase(lowerTest.charAt(i))) {
 660                 errln("Failed case conversion with itself: \\u"
 661                         + Utility.hex(lowerTest.charAt(i)));
 662             }
 663         }
 664         logln("done testing upper Lower");
 665     }
 666
 667     // private data members - test data --------------------------------------
 668
 669     private static final Locale TURKISH_LOCALE_ = new Locale("tr", "TR");
 670     private static final Locale GERMAN_LOCALE_ = new Locale("de", "DE");
 671     private static final Locale GREEK_LOCALE_ = new Locale("el", "GR");
 672     private static final Locale ENGLISH_LOCALE_ = new Locale("en", "US");
 673     private static final Locale LITHUANIAN_LOCALE_ = new Locale("lt", "LT");
 674
 675     private static final int CHARACTER_UPPER_[] =
 676                       {0x41, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
 677                        0x00b1, 0x00b2, 0xb3, 0x0048, 0x0049, 0x004a, 0x002e,
 678                        0x003f, 0x003a, 0x004b, 0x004c, 0x4d, 0x004e, 0x004f,
 679                        0x01c4, 0x01c8, 0x000c, 0x0000};
 680     private static final int CHARACTER_LOWER_[] =
 681                       {0x61, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
 682                        0x00b1, 0x00b2, 0xb3, 0x0068, 0x0069, 0x006a, 0x002e,
 683                        0x003f, 0x003a, 0x006b, 0x006c, 0x6d, 0x006e, 0x006f,
 684                        0x01c6, 0x01c9, 0x000c, 0x0000};
 685
 686     /*
 687      * CaseFolding.txt says about i and its cousins:
 688      *   0049; C; 0069; # LATIN CAPITAL LETTER I
 689      *   0049; T; 0131; # LATIN CAPITAL LETTER I
 690      *
 691      *   0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE
 692      *   0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE
 693      * That's all.
 694      * See CaseFolding.txt and the Unicode Standard for how to apply the case foldings.
 695      */
 696     private static final int FOLDING_SIMPLE_[] = {
 697         // input, default, exclude special i
 698         0x61,   0x61,  0x61,
 699         0x49,   0x69,  0x131,
 700         0x130,  0x130, 0x69,
 701         0x131,  0x131, 0x131,
 702         0xdf,   0xdf,  0xdf,
 703         0xfb03, 0xfb03, 0xfb03,
 704         0x1040e,0x10436,0x10436,
 705         0x5ffff,0x5ffff,0x5ffff
 706     };
 707     private static final String FOLDING_MIXED_[] =
 708                           {"\u0061\u0042\u0130\u0049\u0131\u03d0\u00df\ufb03\ud93f\udfff",
 709                            "A\u00df\u00b5\ufb03\uD801\uDC0C\u0130\u0131"};
 710     private static final String FOLDING_DEFAULT_[] =
 711          {"\u0061\u0062\u0069\u0307\u0069\u0131\u03b2\u0073\u0073\u0066\u0066\u0069\ud93f\udfff",
 712           "ass\u03bcffi\uD801\uDC34i\u0307\u0131"};
 713     private static final String FOLDING_EXCLUDE_SPECIAL_I_[] =
 714          {"\u0061\u0062\u0069\u0131\u0131\u03b2\u0073\u0073\u0066\u0066\u0069\ud93f\udfff",
 715           "ass\u03bcffi\uD801\uDC34i\u0131"};
 716     /**
 717      * "IESUS CHRISTOS"
 718      */
 719     private static final String SHARED_UPPERCASE_GREEK_ =
 720         "\u0399\u0395\u03a3\u03a5\u03a3\u0020\u03a7\u03a1\u0399\u03a3\u03a4\u039f\u03a3";
 721     /**
 722      * "iesus christos"
 723      */
 724     private static final String SHARED_LOWERCASE_GREEK_ =
 725         "\u03b9\u03b5\u03c3\u03c5\u03c2\u0020\u03c7\u03c1\u03b9\u03c3\u03c4\u03bf\u03c2";
 726     private static final String SHARED_LOWERCASE_TURKISH_ =
 727         "\u0069\u0073\u0074\u0061\u006e\u0062\u0075\u006c\u002c\u0020\u006e\u006f\u0074\u0020\u0063\u006f\u006e\u0073\u0074\u0061\u006e\u0074\u0131\u006e\u006f\u0070\u006c\u0065\u0021";
 728     private static final String SHARED_UPPERCASE_TURKISH_ =
 729         "\u0054\u004f\u0050\u004b\u0041\u0050\u0049\u0020\u0050\u0041\u004c\u0041\u0043\u0045\u002c\u0020\u0130\u0053\u0054\u0041\u004e\u0042\u0055\u004c";
 730     private static final String SHARED_UPPERCASE_ISTANBUL_ =
 731                                           "\u0130STANBUL, NOT CONSTANTINOPLE!";
 732     private static final String SHARED_LOWERCASE_ISTANBUL_ =
 733                                           "i\u0307stanbul, not constantinople!";
 734     private static final String SHARED_LOWERCASE_TOPKAP_ =
 735                                           "topkap\u0131 palace, istanbul";
 736     private static final String SHARED_UPPERCASE_TOPKAP_ =
 737                                           "TOPKAPI PALACE, ISTANBUL";
 738     private static final String SHARED_LOWERCASE_GERMAN_ =
 739                                           "S\u00FC\u00DFmayrstra\u00DFe";
 740     private static final String SHARED_UPPERCASE_GERMAN_ =
 741                                           "S\u00DCSSMAYRSTRASSE";
 742
 743     private static final String UPPER_BEFORE_ =
 744          "\u0061\u0042\u0069\u03c2\u00df\u03c3\u002f\ufb03\ufb03\ufb03\ud93f\udfff";
 745     private static final String UPPER_ROOT_ =
 746          "\u0041\u0042\u0049\u03a3\u0053\u0053\u03a3\u002f\u0046\u0046\u0049\u0046\u0046\u0049\u0046\u0046\u0049\ud93f\udfff";
 747     private static final String UPPER_TURKISH_ =
 748          "\u0041\u0042\u0130\u03a3\u0053\u0053\u03a3\u002f\u0046\u0046\u0049\u0046\u0046\u0049\u0046\u0046\u0049\ud93f\udfff";
 749     private static final String UPPER_MINI_ = "\u00df\u0061";
 750     private static final String UPPER_MINI_UPPER_ = "\u0053\u0053\u0041";
 751
 752     private static final String LOWER_BEFORE_ =
 753                       "\u0061\u0042\u0049\u03a3\u00df\u03a3\u002f\ud93f\udfff";
 754     private static final String LOWER_ROOT_ =
 755                       "\u0061\u0062\u0069\u03c3\u00df\u03c2\u002f\ud93f\udfff";
 756     private static final String LOWER_TURKISH_ =
 757                       "\u0061\u0062\u0131\u03c3\u00df\u03c2\u002f\ud93f\udfff";
 758
 759     /**
 760      * each item is an array with input string, result string, locale ID, break iterator, options
 761      * the break iterator is specified as an int, same as in BreakIterator.KIND_*:
 762      * 0=KIND_CHARACTER  1=KIND_WORD  2=KIND_LINE  3=KIND_SENTENCE  4=KIND_TITLE  -1=default (NULL=words)  -2=no breaks (.*)
 763      * options: T=U_FOLD_CASE_EXCLUDE_SPECIAL_I  L=U_TITLECASE_NO_LOWERCASE  A=U_TITLECASE_NO_BREAK_ADJUSTMENT
 764      * see ICU4C source/test/testdata/casing.txt
 765      */
 766     private static final String TITLE_DATA_[] = {
 767         "\u0061\u0042\u0020\u0069\u03c2\u0020\u00df\u03c3\u002f\ufb03\ud93f\udfff",
 768         "\u0041\u0042\u0020\u0049\u03a3\u0020\u0053\u0073\u03a3\u002f\u0046\u0066\u0069\ud93f\udfff",
 769         "",
 770         "0",
 771         "",
 772
 773         "\u0061\u0042\u0020\u0069\u03c2\u0020\u00df\u03c3\u002f\ufb03\ud93f\udfff",
 774         "\u0041\u0062\u0020\u0049\u03c2\u0020\u0053\u0073\u03c3\u002f\u0046\u0066\u0069\ud93f\udfff",
 775         "",
 776         "1",
 777         "",
 778
 779         "\u02bbaMeLikA huI P\u016b \u02bb\u02bb\u02bbiA", "\u02bbAmelika Hui P\u016b \u02bb\u02bb\u02bbIa", // titlecase first _cased_ letter, j4933
 780         "",
 781         "-1",
 782         "",
 783
 784         " tHe QUIcK bRoWn", " The Quick Brown",
 785         "",
 786         "4",
 787         "",
 788
 789         "\u01c4\u01c5\u01c6\u01c7\u01c8\u01c9\u01ca\u01cb\u01cc",
 790         "\u01c5\u01c5\u01c5\u01c8\u01c8\u01c8\u01cb\u01cb\u01cb", // UBRK_CHARACTER
 791         "",
 792         "0",
 793         "",
 794
 795         "\u01c9ubav ljubav", "\u01c8ubav Ljubav", // Lj vs. L+j
 796         "",
 797         "-1",
 798         "",
 799
 800         "'oH dOn'T tItLeCaSe AfTeR lEtTeR+'",  "'Oh Don't Titlecase After Letter+'",
 801         "",
 802         "-1",
 803         "",
 804
 805         "a \u02bbCaT. A \u02bbdOg! \u02bbeTc.",
 806         "A \u02bbCat. A \u02bbDog! \u02bbEtc.",
 807         "",
 808         "-1",
 809         "", // default
 810
 811         "a \u02bbCaT. A \u02bbdOg! \u02bbeTc.",
 812         "A \u02bbcat. A \u02bbdog! \u02bbetc.",
 813         "",
 814         "-1",
 815         "A", // U_TITLECASE_NO_BREAK_ADJUSTMENT
 816
 817         "a \u02bbCaT. A \u02bbdOg! \u02bbeTc.",
 818         "A \u02bbCaT. A \u02bbdOg! \u02bbETc.",
 819         "",
 820         "3",
 821         "L", // UBRK_SENTENCE and U_TITLECASE_NO_LOWERCASE
 822
 823
 824         "\u02bbcAt! \u02bbeTc.",
 825         "\u02bbCat! \u02bbetc.",
 826         "",
 827         "-2",
 828         "", // -2=Trivial break iterator
 829
 830         "\u02bbcAt! \u02bbeTc.",
 831         "\u02bbcat! \u02bbetc.",
 832         "",
 833         "-2",
 834         "A", // U_TITLECASE_NO_BREAK_ADJUSTMENT
 835
 836         "\u02bbcAt! \u02bbeTc.",
 837         "\u02bbCAt! \u02bbeTc.",
 838         "",
 839         "-2",
 840         "L", // U_TITLECASE_NO_LOWERCASE
 841
 842         "\u02bbcAt! \u02bbeTc.",
 843         "\u02bbcAt! \u02bbeTc.",
 844         "",
 845         "-2",
 846         "AL", // Both options
 847
 848         // Test case for ticket #7251: UCharacter.toTitleCase() throws OutOfMemoryError
 849         // when TITLECASE_NO_LOWERCASE encounters a single-letter word
 850         "a b c",
 851         "A B C",
 852         "",
 853         "1",
 854         "L" // U_TITLECASE_NO_LOWERCASE
 855     };
 856
 857
 858     /**
 859      * <p>basic string, lower string, upper string, title string</p>
 860      */
 861     private static final String SPECIAL_DATA_[] = {
 862         UTF16.valueOf(0x1043C) + UTF16.valueOf(0x10414),
 863         UTF16.valueOf(0x1043C) + UTF16.valueOf(0x1043C),
 864         UTF16.valueOf(0x10414) + UTF16.valueOf(0x10414),
 865         "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " +
 866                          UTF16.valueOf(0x1043C) + UTF16.valueOf(0x10414),
 867         "ab'cd \uFB00i\u0131ii\u0307 \u01C9\u01C9\u01C9 " +
 868                               UTF16.valueOf(0x1043C) + UTF16.valueOf(0x1043C),
 869         "AB'CD FFIII\u0130 \u01C7\u01C7\u01C7 " +
 870                               UTF16.valueOf(0x10414) + UTF16.valueOf(0x10414),
 871         // sigmas followed/preceded by cased letters
 872         "i\u0307\u03a3\u0308j \u0307\u03a3\u0308j i\u00ad\u03a3\u0308 \u0307\u03a3\u0308 ",
 873         "i\u0307\u03c3\u0308j \u0307\u03c3\u0308j i\u00ad\u03c2\u0308 \u0307\u03c3\u0308 ",
 874         "I\u0307\u03a3\u0308J \u0307\u03a3\u0308J I\u00ad\u03a3\u0308 \u0307\u03a3\u0308 "
 875     };
 876     private static final Locale SPECIAL_LOCALES_[] = {
 877         null,
 878         ENGLISH_LOCALE_,
 879         null,
 880     };
 881
 882     private static final String SPECIAL_DOTTED_ =
 883             "I \u0130 I\u0307 I\u0327\u0307 I\u0301\u0307 I\u0327\u0307\u0301";
 884     private static final String SPECIAL_DOTTED_LOWER_TURKISH_ =
 885             "\u0131 i i i\u0327 \u0131\u0301\u0307 i\u0327\u0301";
 886     private static final String SPECIAL_DOTTED_LOWER_GERMAN_ =
 887             "i i\u0307 i\u0307 i\u0327\u0307 i\u0301\u0307 i\u0327\u0307\u0301";
 888     private static final String SPECIAL_DOT_ABOVE_ =
 889             "a\u0307 \u0307 i\u0307 j\u0327\u0307 j\u0301\u0307";
 890     private static final String SPECIAL_DOT_ABOVE_UPPER_LITHUANIAN_ =
 891             "A\u0307 \u0307 I J\u0327 J\u0301\u0307";
 892     private static final String SPECIAL_DOT_ABOVE_UPPER_GERMAN_ =
 893             "A\u0307 \u0307 I\u0307 J\u0327\u0307 J\u0301\u0307";
 894     private static final String SPECIAL_DOT_ABOVE_UPPER_ =
 895             "I I\u0301 J J\u0301 \u012e \u012e\u0301 \u00cc\u00cd\u0128";
 896     private static final String SPECIAL_DOT_ABOVE_LOWER_LITHUANIAN_ =
 897             "i i\u0307\u0301 j j\u0307\u0301 \u012f \u012f\u0307\u0301 i\u0307\u0300i\u0307\u0301i\u0307\u0303";
 898     private static final String SPECIAL_DOT_ABOVE_LOWER_GERMAN_ =
 899             "i i\u0301 j j\u0301 \u012f \u012f\u0301 \u00ec\u00ed\u0129";
 900
 901     // private methods -------------------------------------------------------
 902
 903     /**
 904      * Converting the hex numbers represented betwee                             n ';' to Unicode strings
 905      * @param str string to break up into Unicode strings
 906      * @return array of Unicode strings ending with a null
 907      */
 908     private String[] getUnicodeStrings(String str)
 909     {
 910         List<String> v = new ArrayList<String>(10);
 911         int start = 0;
 912         for (int casecount = 4; casecount > 0; casecount --) {
 913             int end = str.indexOf("; ", start);
 914             String casestr = str.substring(start, end);
 915             StringBuffer buffer = new StringBuffer();
 916             int spaceoffset = 0;
 917             while (spaceoffset < casestr.length()) {
 918                 int nextspace = casestr.indexOf(' ', spaceoffset);
 919                 if (nextspace == -1) {
 920                     nextspace = casestr.length();
 921                 }
 922                 buffer.append((char)Integer.parseInt(
 923                                      casestr.substring(spaceoffset, nextspace),
 924                                                       16));
 925                 spaceoffset = nextspace + 1;
 926             }
 927             start = end + 2;
 928             v.add(buffer.toString());
 929         }
 930         int comments = str.indexOf(" #", start);
 931         if (comments != -1 && comments != start) {
 932             if (str.charAt(comments - 1) == ';') {
 933                 comments --;
 934             }
 935             String conditions = str.substring(start, comments);
 936             int offset = 0;
 937             while (offset < conditions.length()) {
 938                 int spaceoffset = conditions.indexOf(' ', offset);
 939                 if (spaceoffset == -1) {
 940                     spaceoffset = conditions.length();
 941                 }
 942                 v.add(conditions.substring(offset, spaceoffset));
 943                 offset = spaceoffset + 1;
 944             }
 945         }
 946         int size = v.size();
 947         String result[] = new String[size];
 948         for (int i = 0; i < size; i ++) {
 949             result[i] = v.get(i);
 950         }
 951         return result;
 952     }
 953 }