]> gitweb.fperrin.net Git - Dictionary.git/blob - jars/icu4j-4_2_1-src/src/com/ibm/icu/dev/test/lang/UCharacterCaseTest.java
icu4jsrc
[Dictionary.git] / jars / icu4j-4_2_1-src / src / com / ibm / icu / dev / test / lang / UCharacterCaseTest.java
1 /**\r
2 *******************************************************************************\r
3 * Copyright (C) 1996-2008, International Business Machines Corporation and    *\r
4 * others. All Rights Reserved.                                                *\r
5 *******************************************************************************\r
6 */\r
7 \r
8 \r
9 package com.ibm.icu.dev.test.lang;\r
10 \r
11 \r
12 import com.ibm.icu.dev.test.TestFmwk;\r
13 import com.ibm.icu.dev.test.TestUtil;\r
14 import com.ibm.icu.lang.UCharacter;\r
15 import com.ibm.icu.text.UTF16;\r
16 import com.ibm.icu.text.BreakIterator;\r
17 import com.ibm.icu.text.RuleBasedBreakIterator;\r
18 import com.ibm.icu.text.UnicodeSet;\r
19 import com.ibm.icu.util.ULocale;\r
20 import com.ibm.icu.impl.UCaseProps;\r
21 import com.ibm.icu.impl.Utility;\r
22 import java.util.Locale;\r
23 import java.io.BufferedReader;\r
24 import java.util.Vector;\r
25 \r
26 \r
27 /**\r
28 * <p>Testing character casing</p>\r
29 * <p>Mostly following the test cases in strcase.cpp for ICU</p>\r
30 * @author Syn Wee Quek\r
31 * @since march 14 2002\r
32 */\r
33 public final class UCharacterCaseTest extends TestFmwk\r
34 {\r
35     // constructor -----------------------------------------------------------\r
36 \r
37     /**\r
38      * Constructor\r
39      */\r
40     public UCharacterCaseTest()\r
41     {\r
42     }\r
43 \r
44     // public methods --------------------------------------------------------\r
45 \r
46     public static void main(String[] arg)\r
47     {\r
48         try\r
49         {\r
50             UCharacterCaseTest test = new UCharacterCaseTest();\r
51             test.run(arg);\r
52         }\r
53         catch (Exception e)\r
54         {\r
55             e.printStackTrace();\r
56         }\r
57     }\r
58 \r
59     /**\r
60      * Testing the uppercase and lowercase function of UCharacter\r
61      */\r
62     public void TestCharacter()\r
63     {\r
64         for (int i = 0; i < CHARACTER_LOWER_.length; i ++) {\r
65             if (UCharacter.isLetter(CHARACTER_LOWER_[i]) &&\r
66                 !UCharacter.isLowerCase(CHARACTER_LOWER_[i])) {\r
67                 errln("FAIL isLowerCase test for \\u" +\r
68                       hex(CHARACTER_LOWER_[i]));\r
69                 break;\r
70             }\r
71             if (UCharacter.isLetter(CHARACTER_UPPER_[i]) &&\r
72                 !(UCharacter.isUpperCase(CHARACTER_UPPER_[i]) ||\r
73                   UCharacter.isTitleCase(CHARACTER_UPPER_[i]))) {\r
74                 errln("FAIL isUpperCase test for \\u" +\r
75                       hex(CHARACTER_UPPER_[i]));\r
76                 break;\r
77             }\r
78             if (CHARACTER_LOWER_[i] !=\r
79                 UCharacter.toLowerCase(CHARACTER_UPPER_[i]) ||\r
80                 (CHARACTER_UPPER_[i] !=\r
81                 UCharacter.toUpperCase(CHARACTER_LOWER_[i]) &&\r
82                 CHARACTER_UPPER_[i] !=\r
83                 UCharacter.toTitleCase(CHARACTER_LOWER_[i]))) {\r
84                 errln("FAIL case conversion test for \\u" +\r
85                       hex(CHARACTER_UPPER_[i]) +\r
86                       " to \\u" + hex(CHARACTER_LOWER_[i]));\r
87                 break;\r
88             }\r
89             if (CHARACTER_LOWER_[i] !=\r
90                 UCharacter.toLowerCase(CHARACTER_LOWER_[i])) {\r
91                 errln("FAIL lower case conversion test for \\u" +\r
92                       hex(CHARACTER_LOWER_[i]));\r
93                 break;\r
94             }\r
95             if (CHARACTER_UPPER_[i] !=\r
96                 UCharacter.toUpperCase(CHARACTER_UPPER_[i]) &&\r
97                 CHARACTER_UPPER_[i] !=\r
98                 UCharacter.toTitleCase(CHARACTER_UPPER_[i])) {\r
99                 errln("FAIL upper case conversion test for \\u" +\r
100                       hex(CHARACTER_UPPER_[i]));\r
101                 break;\r
102             }\r
103             logln("Ok    \\u" + hex(CHARACTER_UPPER_[i]) + " and \\u" +\r
104                   hex(CHARACTER_LOWER_[i]));\r
105         }\r
106     }\r
107 \r
108     public void TestFolding()\r
109     {\r
110         // test simple case folding\r
111         for (int i = 0; i < FOLDING_SIMPLE_.length; i += 3) {\r
112             if (UCharacter.foldCase(FOLDING_SIMPLE_[i], true) !=\r
113                 FOLDING_SIMPLE_[i + 1]) {\r
114                 errln("FAIL: foldCase(\\u" + hex(FOLDING_SIMPLE_[i]) +\r
115                       ", true) should be \\u" + hex(FOLDING_SIMPLE_[i + 1]));\r
116             }\r
117             if (UCharacter.foldCase(FOLDING_SIMPLE_[i], \r
118                                     UCharacter.FOLD_CASE_DEFAULT) !=\r
119                                                       FOLDING_SIMPLE_[i + 1]) {\r
120                 errln("FAIL: foldCase(\\u" + hex(FOLDING_SIMPLE_[i]) +\r
121                       ", UCharacter.FOLD_CASE_DEFAULT) should be \\u" \r
122                       + hex(FOLDING_SIMPLE_[i + 1]));\r
123             }\r
124             if (UCharacter.foldCase(FOLDING_SIMPLE_[i], false) !=\r
125                 FOLDING_SIMPLE_[i + 2]) {\r
126                 errln("FAIL: foldCase(\\u" + hex(FOLDING_SIMPLE_[i]) +\r
127                       ", false) should be \\u" + hex(FOLDING_SIMPLE_[i + 2]));\r
128             }\r
129             if (UCharacter.foldCase(FOLDING_SIMPLE_[i], \r
130                                     UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I) !=\r
131                                     FOLDING_SIMPLE_[i + 2]) {\r
132                 errln("FAIL: foldCase(\\u" + hex(FOLDING_SIMPLE_[i]) +\r
133                       ", UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I) should be \\u" \r
134                       + hex(FOLDING_SIMPLE_[i + 2]));\r
135             }\r
136         }\r
137 \r
138         // Test full string case folding with default option and separate\r
139         // buffers\r
140         if (!FOLDING_DEFAULT_[0].equals(UCharacter.foldCase(FOLDING_MIXED_[0], true))) {\r
141             errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[0]) +\r
142                   ", true)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[0], true)) +\r
143                   " should be " + prettify(FOLDING_DEFAULT_[0]));\r
144         }\r
145         \r
146         if (!FOLDING_DEFAULT_[0].equals(UCharacter.foldCase(FOLDING_MIXED_[0], UCharacter.FOLD_CASE_DEFAULT))) {\r
147                     errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[0]) +\r
148                           ", UCharacter.FOLD_CASE_DEFAULT)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[0], UCharacter.FOLD_CASE_DEFAULT))\r
149                           + " should be " + prettify(FOLDING_DEFAULT_[0]));\r
150                 }\r
151 \r
152         if (!FOLDING_EXCLUDE_SPECIAL_I_[0].equals(\r
153                             UCharacter.foldCase(FOLDING_MIXED_[0], false))) {\r
154             errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[0]) +\r
155                   ", false)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[0], false))\r
156                   + " should be " + prettify(FOLDING_EXCLUDE_SPECIAL_I_[0]));\r
157         }\r
158         \r
159         if (!FOLDING_EXCLUDE_SPECIAL_I_[0].equals(\r
160                                     UCharacter.foldCase(FOLDING_MIXED_[0], UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I))) {\r
161             errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[0]) +\r
162                   ", UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[0], UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I))\r
163                   + " should be " + prettify(FOLDING_EXCLUDE_SPECIAL_I_[0]));\r
164         }\r
165 \r
166         if (!FOLDING_DEFAULT_[1].equals(UCharacter.foldCase(FOLDING_MIXED_[1], true))) {\r
167            errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[1]) +\r
168                  ", true)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[1], true))\r
169                  + " should be " + prettify(FOLDING_DEFAULT_[1]));\r
170         }\r
171 \r
172         if (!FOLDING_DEFAULT_[1].equals(UCharacter.foldCase(FOLDING_MIXED_[1], UCharacter.FOLD_CASE_DEFAULT))) {\r
173             errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[1]) +\r
174                          ", UCharacter.FOLD_CASE_DEFAULT)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[1], UCharacter.FOLD_CASE_DEFAULT))\r
175                          + " should be " + prettify(FOLDING_DEFAULT_[1]));\r
176         }\r
177         \r
178         // alternate handling for dotted I/dotless i (U+0130, U+0131)\r
179         if (!FOLDING_EXCLUDE_SPECIAL_I_[1].equals(\r
180                         UCharacter.foldCase(FOLDING_MIXED_[1], false))) {\r
181             errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[1]) +\r
182                   ", false)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[1], false))\r
183                   + " should be " + prettify(FOLDING_EXCLUDE_SPECIAL_I_[1]));\r
184         }\r
185         \r
186         if (!FOLDING_EXCLUDE_SPECIAL_I_[1].equals(\r
187                                 UCharacter.foldCase(FOLDING_MIXED_[1], UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I))) {\r
188             errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[1]) +\r
189                   ", UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[1], UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I))\r
190                   + " should be "\r
191                   + prettify(FOLDING_EXCLUDE_SPECIAL_I_[1]));\r
192         }\r
193     }\r
194 \r
195     /**\r
196      * Testing the strings case mapping methods\r
197      */\r
198     public void TestUpper()\r
199     {\r
200         // uppercase with root locale and in the same buffer\r
201         if (!UPPER_ROOT_.equals(UCharacter.toUpperCase(UPPER_BEFORE_))) {\r
202             errln("Fail " + UPPER_BEFORE_ + " after uppercase should be " +\r
203                   UPPER_ROOT_ + " instead got " +\r
204                   UCharacter.toUpperCase(UPPER_BEFORE_));\r
205         }\r
206 \r
207         // uppercase with turkish locale and separate buffers\r
208         if (!UPPER_TURKISH_.equals(UCharacter.toUpperCase(TURKISH_LOCALE_,\r
209                                                          UPPER_BEFORE_))) {\r
210             errln("Fail " + UPPER_BEFORE_ +\r
211                   " after turkish-sensitive uppercase should be " +\r
212                   UPPER_TURKISH_ + " instead of " +\r
213                   UCharacter.toUpperCase(TURKISH_LOCALE_, UPPER_BEFORE_));\r
214         }\r
215 \r
216         // uppercase a short string with root locale\r
217         if (!UPPER_MINI_UPPER_.equals(UCharacter.toUpperCase(UPPER_MINI_))) {\r
218             errln("error in toUpper(root locale)=\"" + UPPER_MINI_ +\r
219                   "\" expected \"" + UPPER_MINI_UPPER_ + "\"");\r
220         }\r
221 \r
222         if (!SHARED_UPPERCASE_TOPKAP_.equals(\r
223                        UCharacter.toUpperCase(SHARED_LOWERCASE_TOPKAP_))) {\r
224             errln("toUpper failed: expected \"" +\r
225                   SHARED_UPPERCASE_TOPKAP_ + "\", got \"" +\r
226                   UCharacter.toUpperCase(SHARED_LOWERCASE_TOPKAP_) + "\".");\r
227         }\r
228 \r
229         if (!SHARED_UPPERCASE_TURKISH_.equals(\r
230                   UCharacter.toUpperCase(TURKISH_LOCALE_,\r
231                                          SHARED_LOWERCASE_TOPKAP_))) {\r
232             errln("toUpper failed: expected \"" +\r
233                   SHARED_UPPERCASE_TURKISH_ + "\", got \"" +\r
234                   UCharacter.toUpperCase(TURKISH_LOCALE_,\r
235                                      SHARED_LOWERCASE_TOPKAP_) + "\".");\r
236         }\r
237 \r
238         if (!SHARED_UPPERCASE_GERMAN_.equals(\r
239                 UCharacter.toUpperCase(GERMAN_LOCALE_,\r
240                                        SHARED_LOWERCASE_GERMAN_))) {\r
241             errln("toUpper failed: expected \"" + SHARED_UPPERCASE_GERMAN_\r
242                   + "\", got \"" + UCharacter.toUpperCase(GERMAN_LOCALE_,\r
243                                         SHARED_LOWERCASE_GERMAN_) + "\".");\r
244         }\r
245 \r
246         if (!SHARED_UPPERCASE_GREEK_.equals(\r
247                 UCharacter.toUpperCase(SHARED_LOWERCASE_GREEK_))) {\r
248             errln("toLower failed: expected \"" + SHARED_UPPERCASE_GREEK_ +\r
249                   "\", got \"" + UCharacter.toUpperCase(\r
250                                         SHARED_LOWERCASE_GREEK_) + "\".");\r
251         }\r
252     }\r
253 \r
254     public void TestLower()\r
255     {\r
256         if (!LOWER_ROOT_.equals(UCharacter.toLowerCase(LOWER_BEFORE_))) {\r
257             errln("Fail " + LOWER_BEFORE_ + " after lowercase should be " +\r
258                   LOWER_ROOT_ + " instead of " +\r
259                   UCharacter.toLowerCase(LOWER_BEFORE_));\r
260         }\r
261 \r
262         // lowercase with turkish locale\r
263         if (!LOWER_TURKISH_.equals(UCharacter.toLowerCase(TURKISH_LOCALE_,\r
264                                                           LOWER_BEFORE_))) {\r
265             errln("Fail " + LOWER_BEFORE_ +\r
266                   " after turkish-sensitive lowercase should be " +\r
267                   LOWER_TURKISH_ + " instead of " +\r
268                   UCharacter.toLowerCase(TURKISH_LOCALE_, LOWER_BEFORE_));\r
269         }\r
270         if (!SHARED_LOWERCASE_ISTANBUL_.equals(\r
271                      UCharacter.toLowerCase(SHARED_UPPERCASE_ISTANBUL_))) {\r
272             errln("1. toLower failed: expected \"" +\r
273                   SHARED_LOWERCASE_ISTANBUL_ + "\", got \"" +\r
274               UCharacter.toLowerCase(SHARED_UPPERCASE_ISTANBUL_) + "\".");\r
275         }\r
276 \r
277         if (!SHARED_LOWERCASE_TURKISH_.equals(\r
278                 UCharacter.toLowerCase(TURKISH_LOCALE_,\r
279                                        SHARED_UPPERCASE_ISTANBUL_))) {\r
280             errln("2. toLower failed: expected \"" +\r
281                   SHARED_LOWERCASE_TURKISH_ + "\", got \"" +\r
282                   UCharacter.toLowerCase(TURKISH_LOCALE_,\r
283                                 SHARED_UPPERCASE_ISTANBUL_) + "\".");\r
284         }\r
285         if (!SHARED_LOWERCASE_GREEK_.equals(\r
286                 UCharacter.toLowerCase(GREEK_LOCALE_,\r
287                                        SHARED_UPPERCASE_GREEK_))) {\r
288             errln("toLower failed: expected \"" + SHARED_LOWERCASE_GREEK_ +\r
289                   "\", got \"" + UCharacter.toLowerCase(GREEK_LOCALE_,\r
290                                         SHARED_UPPERCASE_GREEK_) + "\".");\r
291         }\r
292     }\r
293 \r
294     public void TestTitleRegression() throws java.io.IOException {\r
295         UCaseProps props = new UCaseProps();\r
296         int type = props.getTypeOrIgnorable('\'');\r
297         assertEquals("Case Ignorable check", -1, type); // should be case-ignorable (-1)\r
298         UnicodeSet allCaseIgnorables = new UnicodeSet();\r
299         for (int cp = 0; cp <= 0x10FFFF; ++cp) {\r
300             if (props.getTypeOrIgnorable(cp) < 0) {\r
301                 allCaseIgnorables.add(cp);\r
302             }\r
303         }\r
304         logln(allCaseIgnorables.toString());\r
305         assertEquals("Titlecase check",\r
306                 "The Quick Brown Fox Can't Jump Over The Lazy Dogs.",\r
307                 UCharacter.toTitleCase(ULocale.ENGLISH, "THE QUICK BROWN FOX CAN'T JUMP OVER THE LAZY DOGS.", null));\r
308     }\r
309 \r
310     public void TestTitle()\r
311     {\r
312          try{ \r
313             for (int i = 0; i < TITLE_DATA_.length;) {\r
314                 String test = TITLE_DATA_[i++];\r
315                 String expected = TITLE_DATA_[i++];\r
316                 ULocale locale = new ULocale(TITLE_DATA_[i++]);\r
317                 int breakType = Integer.parseInt(TITLE_DATA_[i++]);\r
318                 String optionsString = TITLE_DATA_[i++];\r
319                 BreakIterator iter =\r
320                     breakType >= 0 ?\r
321                         BreakIterator.getBreakInstance(locale, breakType) :\r
322                         breakType == -2 ?\r
323                             // Open a trivial break iterator that only delivers { 0, length }\r
324                             // or even just { 0 } as boundaries.\r
325                             new RuleBasedBreakIterator(".*;") :\r
326                             null;\r
327                 int options = 0;\r
328                 if (optionsString.indexOf('L') >= 0) {\r
329                     options |= UCharacter.TITLECASE_NO_LOWERCASE;\r
330                 }\r
331                 if (optionsString.indexOf('A') >= 0) {\r
332                     options |= UCharacter.TITLECASE_NO_BREAK_ADJUSTMENT;\r
333                 }\r
334                 String result = UCharacter.toTitleCase(locale, test, iter, options);\r
335                 if (!expected.equals(result)) {\r
336                     errln("titlecasing for " + prettify(test) + " (options " + options + ") should be " +\r
337                           prettify(expected) + " but got " +\r
338                           prettify(result));\r
339                 }\r
340                 if (options == 0) {\r
341                     result = UCharacter.toTitleCase(locale, test, iter);\r
342                     if (!expected.equals(result)) {\r
343                         errln("titlecasing for " + prettify(test) + " should be " +\r
344                               prettify(expected) + " but got " +\r
345                               prettify(result));\r
346                     }\r
347                 }\r
348             }\r
349          }catch(Exception ex){\r
350             warnln("Could not find data for BreakIterators");\r
351          }\r
352     }\r
353 \r
354     public void TestDutchTitle() {\r
355         ULocale LOC_DUTCH = new ULocale("nl");\r
356         int options = 0;\r
357         options |= UCharacter.TITLECASE_NO_LOWERCASE;\r
358         BreakIterator iter = BreakIterator.getWordInstance(LOC_DUTCH);\r
359 \r
360         assertEquals("Dutch titlecase check in English",\r
361                 "Ijssel Igloo Ijmuiden",\r
362                 UCharacter.toTitleCase(ULocale.ENGLISH, "ijssel igloo IJMUIDEN", null));\r
363 \r
364         assertEquals("Dutch titlecase check in Dutch",\r
365                 "IJssel Igloo IJmuiden",\r
366                 UCharacter.toTitleCase(LOC_DUTCH, "ijssel igloo IJMUIDEN", null));\r
367 \r
368         iter.setText("ijssel igloo IjMUIdEN iPoD ijenough");\r
369         assertEquals("Dutch titlecase check in Dutch with nolowercase option",\r
370                 "IJssel Igloo IJMUIdEN IPoD IJenough",\r
371                 UCharacter.toTitleCase(LOC_DUTCH, "ijssel igloo IjMUIdEN iPoD ijenough", iter, options));\r
372     }\r
373 \r
374     public void TestSpecial()\r
375     {\r
376         for (int i = 0; i < SPECIAL_LOCALES_.length; i ++) {\r
377             int    j      = i * 3;\r
378             Locale locale = SPECIAL_LOCALES_[i];\r
379             String str    = SPECIAL_DATA_[j];\r
380             if (locale != null) {\r
381                 if (!SPECIAL_DATA_[j + 1].equals(\r
382                      UCharacter.toLowerCase(locale, str))) {\r
383                     errln("error lowercasing special characters " +\r
384                         hex(str) + " expected " + hex(SPECIAL_DATA_[j + 1])\r
385                         + " for locale " + locale.toString() + " but got " +\r
386                         hex(UCharacter.toLowerCase(locale, str)));\r
387                 }\r
388                 if (!SPECIAL_DATA_[j + 2].equals(\r
389                      UCharacter.toUpperCase(locale, str))) {\r
390                     errln("error uppercasing special characters " +\r
391                         hex(str) + " expected " + SPECIAL_DATA_[j + 2]\r
392                         + " for locale " + locale.toString() + " but got " +\r
393                         hex(UCharacter.toUpperCase(locale, str)));\r
394                 }\r
395             }\r
396             else {\r
397                 if (!SPECIAL_DATA_[j + 1].equals(\r
398                      UCharacter.toLowerCase(str))) {\r
399                     errln("error lowercasing special characters " +\r
400                         hex(str) + " expected " + SPECIAL_DATA_[j + 1] +\r
401                         " but got " +\r
402                         hex(UCharacter.toLowerCase(locale, str)));\r
403                 }\r
404                 if (!SPECIAL_DATA_[j + 2].equals(\r
405                      UCharacter.toUpperCase(locale, str))) {\r
406                     errln("error uppercasing special characters " +\r
407                         hex(str) + " expected " + SPECIAL_DATA_[j + 2] +\r
408                         " but got " +\r
409                         hex(UCharacter.toUpperCase(locale, str)));\r
410                 }\r
411             }\r
412         }\r
413 \r
414         // turkish & azerbaijani dotless i & dotted I\r
415         // remove dot above if there was a capital I before and there are no\r
416         // more accents above\r
417         if (!SPECIAL_DOTTED_LOWER_TURKISH_.equals(UCharacter.toLowerCase(\r
418                                         TURKISH_LOCALE_, SPECIAL_DOTTED_))) {\r
419             errln("error in dots.toLower(tr)=\"" + SPECIAL_DOTTED_ +\r
420                   "\" expected \"" + SPECIAL_DOTTED_LOWER_TURKISH_ +\r
421                   "\" but got " + UCharacter.toLowerCase(TURKISH_LOCALE_,\r
422                                                          SPECIAL_DOTTED_));\r
423         }\r
424         if (!SPECIAL_DOTTED_LOWER_GERMAN_.equals(UCharacter.toLowerCase(\r
425                                              GERMAN_LOCALE_, SPECIAL_DOTTED_))) {\r
426             errln("error in dots.toLower(de)=\"" + SPECIAL_DOTTED_ +\r
427                   "\" expected \"" + SPECIAL_DOTTED_LOWER_GERMAN_ +\r
428                   "\" but got " + UCharacter.toLowerCase(GERMAN_LOCALE_,\r
429                                                          SPECIAL_DOTTED_));\r
430         }\r
431 \r
432         // lithuanian dot above in uppercasing\r
433         if (!SPECIAL_DOT_ABOVE_UPPER_LITHUANIAN_.equals(\r
434              UCharacter.toUpperCase(LITHUANIAN_LOCALE_, SPECIAL_DOT_ABOVE_))) {\r
435             errln("error in dots.toUpper(lt)=\"" + SPECIAL_DOT_ABOVE_ +\r
436                   "\" expected \"" + SPECIAL_DOT_ABOVE_UPPER_LITHUANIAN_ +\r
437                   "\" but got " + UCharacter.toUpperCase(LITHUANIAN_LOCALE_,\r
438                                                          SPECIAL_DOT_ABOVE_));\r
439         }\r
440         if (!SPECIAL_DOT_ABOVE_UPPER_GERMAN_.equals(UCharacter.toUpperCase(\r
441                                         GERMAN_LOCALE_, SPECIAL_DOT_ABOVE_))) {\r
442             errln("error in dots.toUpper(de)=\"" + SPECIAL_DOT_ABOVE_ +\r
443                   "\" expected \"" + SPECIAL_DOT_ABOVE_UPPER_GERMAN_ +\r
444                   "\" but got " + UCharacter.toUpperCase(GERMAN_LOCALE_,\r
445                                                          SPECIAL_DOT_ABOVE_));\r
446         }\r
447 \r
448         // lithuanian adds dot above to i in lowercasing if there are more\r
449         // above accents\r
450         if (!SPECIAL_DOT_ABOVE_LOWER_LITHUANIAN_.equals(\r
451             UCharacter.toLowerCase(LITHUANIAN_LOCALE_,\r
452                                    SPECIAL_DOT_ABOVE_UPPER_))) {\r
453             errln("error in dots.toLower(lt)=\"" + SPECIAL_DOT_ABOVE_UPPER_ +\r
454                   "\" expected \"" + SPECIAL_DOT_ABOVE_LOWER_LITHUANIAN_ +\r
455                   "\" but got " + UCharacter.toLowerCase(LITHUANIAN_LOCALE_,\r
456                                                    SPECIAL_DOT_ABOVE_UPPER_));\r
457         }\r
458         if (!SPECIAL_DOT_ABOVE_LOWER_GERMAN_.equals(\r
459             UCharacter.toLowerCase(GERMAN_LOCALE_,\r
460                                    SPECIAL_DOT_ABOVE_UPPER_))) {\r
461             errln("error in dots.toLower(de)=\"" + SPECIAL_DOT_ABOVE_UPPER_ +\r
462                   "\" expected \"" + SPECIAL_DOT_ABOVE_LOWER_GERMAN_ +\r
463                   "\" but got " + UCharacter.toLowerCase(GERMAN_LOCALE_,\r
464                                                    SPECIAL_DOT_ABOVE_UPPER_));\r
465         }\r
466     }\r
467 \r
468     /**\r
469      * Tests for case mapping in the file SpecialCasing.txt\r
470      * This method reads in SpecialCasing.txt file for testing purposes.\r
471      * A default path is provided relative to the src path, however the user\r
472      * could set a system property to change the directory path.<br>\r
473      * e.g. java -DUnicodeData="data_dir_path" com.ibm.dev.test.lang.UCharacterTest\r
474      */\r
475     public void TestSpecialCasingTxt()\r
476     {\r
477         try\r
478         {\r
479             // reading in the SpecialCasing file\r
480             BufferedReader input = TestUtil.getDataReader(\r
481                                                   "unicode/SpecialCasing.txt");\r
482             while (true)\r
483             {\r
484                 String s = input.readLine();\r
485                 if (s == null) {\r
486                     break;\r
487                 }\r
488                 if (s.length() == 0 || s.charAt(0) == '#') {\r
489                     continue;\r
490                 }\r
491 \r
492                 String chstr[] = getUnicodeStrings(s);\r
493                 StringBuffer strbuffer   = new StringBuffer(chstr[0]);\r
494                 StringBuffer lowerbuffer = new StringBuffer(chstr[1]);\r
495                 StringBuffer upperbuffer = new StringBuffer(chstr[3]);\r
496                 Locale locale = null;\r
497                 for (int i = 4; i < chstr.length; i ++) {\r
498                     String condition = chstr[i];\r
499                     if (Character.isLowerCase(chstr[i].charAt(0))) {\r
500                         // specified locale\r
501                         locale = new Locale(chstr[i], "");\r
502                     }\r
503                     else if (condition.compareToIgnoreCase("Not_Before_Dot")\r
504                                                       == 0) {\r
505                         // turns I into dotless i\r
506                     }\r
507                     else if (condition.compareToIgnoreCase(\r
508                                                       "More_Above") == 0) {\r
509                             strbuffer.append((char)0x300);\r
510                             lowerbuffer.append((char)0x300);\r
511                             upperbuffer.append((char)0x300);\r
512                     }\r
513                     else if (condition.compareToIgnoreCase(\r
514                                                 "After_Soft_Dotted") == 0) {\r
515                             strbuffer.insert(0, 'i');\r
516                             lowerbuffer.insert(0, 'i');\r
517                             String lang = "";\r
518                             if (locale != null) {\r
519                                 lang = locale.getLanguage();\r
520                             }\r
521                             if (lang.equals("tr") || lang.equals("az")) {\r
522                                 // this is to be removed when 4.0 data comes out\r
523                                 // and upperbuffer.insert uncommented\r
524                                 // see jitterbug 2344\r
525                                 chstr[i] = "After_I";\r
526                                 strbuffer.deleteCharAt(0);\r
527                                 lowerbuffer.deleteCharAt(0);\r
528                                 i --;\r
529                                 continue;\r
530                                 // upperbuffer.insert(0, '\u0130');\r
531                             }\r
532                             else {\r
533                                 upperbuffer.insert(0, 'I');\r
534                             }\r
535                     }\r
536                     else if (condition.compareToIgnoreCase(\r
537                                                       "Final_Sigma") == 0) {\r
538                             strbuffer.insert(0, 'c');\r
539                             lowerbuffer.insert(0, 'c');\r
540                             upperbuffer.insert(0, 'C');\r
541                     }\r
542                     else if (condition.compareToIgnoreCase("After_I") == 0) {\r
543                             strbuffer.insert(0, 'I');\r
544                             lowerbuffer.insert(0, 'i');\r
545                             String lang = "";\r
546                             if (locale != null) {\r
547                                 lang = locale.getLanguage();\r
548                             }\r
549                             if (lang.equals("tr") || lang.equals("az")) {\r
550                                 upperbuffer.insert(0, 'I');\r
551                             }\r
552                     }\r
553                 }\r
554                 chstr[0] = strbuffer.toString();\r
555                 chstr[1] = lowerbuffer.toString();\r
556                 chstr[3] = upperbuffer.toString();\r
557                 if (locale == null) {\r
558                     if (!UCharacter.toLowerCase(chstr[0]).equals(chstr[1])) {\r
559                         errln(s);\r
560                         errln("Fail: toLowerCase for character " +\r
561                               Utility.escape(chstr[0]) + ", expected "\r
562                               + Utility.escape(chstr[1]) + " but resulted in " +\r
563                               Utility.escape(UCharacter.toLowerCase(chstr[0])));\r
564                     }\r
565                     if (!UCharacter.toUpperCase(chstr[0]).equals(chstr[3])) {\r
566                         errln(s);\r
567                         errln("Fail: toUpperCase for character " +\r
568                               Utility.escape(chstr[0]) + ", expected "\r
569                               + Utility.escape(chstr[3]) + " but resulted in " +\r
570                               Utility.escape(UCharacter.toUpperCase(chstr[0])));\r
571                     }\r
572                 }\r
573                 else {\r
574                     if (!UCharacter.toLowerCase(locale, chstr[0]).equals(\r
575                                                                    chstr[1])) {\r
576                         errln(s);\r
577                         errln("Fail: toLowerCase for character " +\r
578                               Utility.escape(chstr[0]) + ", expected "\r
579                               + Utility.escape(chstr[1]) + " but resulted in " +\r
580                               Utility.escape(UCharacter.toLowerCase(locale,\r
581                                                                     chstr[0])));\r
582                     }\r
583                     if (!UCharacter.toUpperCase(locale, chstr[0]).equals(\r
584                                                                    chstr[3])) {\r
585                         errln(s);\r
586                         errln("Fail: toUpperCase for character " +\r
587                               Utility.escape(chstr[0]) + ", expected "\r
588                               + Utility.escape(chstr[3]) + " but resulted in " +\r
589                               Utility.escape(UCharacter.toUpperCase(locale,\r
590                                                                     chstr[0])));\r
591                     }\r
592                 }\r
593             }\r
594             input.close();\r
595         }\r
596         catch (Exception e)\r
597         {\r
598           e.printStackTrace();\r
599         }\r
600     }\r
601 \r
602     public void TestUpperLower()\r
603     {\r
604         int upper[] = {0x0041, 0x0042, 0x00b2, 0x01c4, 0x01c6, 0x01c9, 0x01c8,\r
605                         0x01c9, 0x000c};\r
606         int lower[] = {0x0061, 0x0062, 0x00b2, 0x01c6, 0x01c6, 0x01c9, 0x01c9,\r
607                         0x01c9, 0x000c};\r
608         String upperTest = "abcdefg123hij.?:klmno";\r
609         String lowerTest = "ABCDEFG123HIJ.?:KLMNO";\r
610 \r
611         // Checks LetterLike Symbols which were previously a source of\r
612         // confusion [Bertrand A. D. 02/04/98]\r
613         for (int i = 0x2100; i < 0x2138; i ++) {\r
614             /* Unicode 5.0 adds lowercase U+214E (TURNED SMALL F) to U+2132 (TURNED CAPITAL F) */\r
615             if (i != 0x2126 && i != 0x212a && i != 0x212b && i!=0x2132) {\r
616                 if (i != UCharacter.toLowerCase(i)) { // itself\r
617                     errln("Failed case conversion with itself: \\u"\r
618                             + Utility.hex(i, 4));\r
619                 }\r
620                 if (i != UCharacter.toUpperCase(i)) {\r
621                     errln("Failed case conversion with itself: \\u"\r
622                             + Utility.hex(i, 4));\r
623                 }\r
624             }\r
625         }\r
626         for (int i = 0; i < upper.length; i ++) {\r
627             if (UCharacter.toLowerCase(upper[i]) != lower[i]) {\r
628                 errln("FAILED UCharacter.tolower() for \\u"\r
629                         + Utility.hex(upper[i], 4)\r
630                         + " Expected \\u" + Utility.hex(lower[i], 4)\r
631                         + " Got \\u"\r
632                         + Utility.hex(UCharacter.toLowerCase(upper[i]), 4));\r
633             }\r
634         }\r
635         logln("testing upper lower");\r
636         for (int i = 0; i < upperTest.length(); i ++) {\r
637             logln("testing to upper to lower");\r
638             if (UCharacter.isLetter(upperTest.charAt(i)) &&\r
639                 !UCharacter.isLowerCase(upperTest.charAt(i))) {\r
640                 errln("Failed isLowerCase test at \\u"\r
641                         + Utility.hex(upperTest.charAt(i), 4));\r
642             }\r
643             else if (UCharacter.isLetter(lowerTest.charAt(i))\r
644                      && !UCharacter.isUpperCase(lowerTest.charAt(i))) {\r
645                 errln("Failed isUpperCase test at \\u"\r
646                       + Utility.hex(lowerTest.charAt(i), 4));\r
647             }\r
648             else if (upperTest.charAt(i)\r
649                             != UCharacter.toLowerCase(lowerTest.charAt(i))) {\r
650                 errln("Failed case conversion from \\u"\r
651                         + Utility.hex(lowerTest.charAt(i), 4) + " To \\u"\r
652                         + Utility.hex(upperTest.charAt(i), 4));\r
653             }\r
654             else if (lowerTest.charAt(i)\r
655                     != UCharacter.toUpperCase(upperTest.charAt(i))) {\r
656                 errln("Failed case conversion : \\u"\r
657                         + Utility.hex(upperTest.charAt(i), 4) + " To \\u"\r
658                         + Utility.hex(lowerTest.charAt(i), 4));\r
659             }\r
660             else if (upperTest.charAt(i)\r
661                     != UCharacter.toLowerCase(upperTest.charAt(i))) {\r
662                 errln("Failed case conversion with itself: \\u"\r
663                         + Utility.hex(upperTest.charAt(i)));\r
664             }\r
665             else if (lowerTest.charAt(i)\r
666                     != UCharacter.toUpperCase(lowerTest.charAt(i))) {\r
667                 errln("Failed case conversion with itself: \\u"\r
668                         + Utility.hex(lowerTest.charAt(i)));\r
669             }\r
670         }\r
671         logln("done testing upper Lower");\r
672     }\r
673 \r
674     // private data members - test data --------------------------------------\r
675 \r
676     private static final Locale TURKISH_LOCALE_ = new Locale("tr", "TR");\r
677     private static final Locale GERMAN_LOCALE_ = new Locale("de", "DE");\r
678     private static final Locale GREEK_LOCALE_ = new Locale("el", "GR");\r
679     private static final Locale ENGLISH_LOCALE_ = new Locale("en", "US");\r
680     private static final Locale LITHUANIAN_LOCALE_ = new Locale("lt", "LT");\r
681 \r
682     private static final int CHARACTER_UPPER_[] =\r
683                       {0x41, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,\r
684                        0x00b1, 0x00b2, 0xb3, 0x0048, 0x0049, 0x004a, 0x002e,\r
685                        0x003f, 0x003a, 0x004b, 0x004c, 0x4d, 0x004e, 0x004f,\r
686                        0x01c4, 0x01c8, 0x000c, 0x0000};\r
687     private static final int CHARACTER_LOWER_[] =\r
688                       {0x61, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,\r
689                        0x00b1, 0x00b2, 0xb3, 0x0068, 0x0069, 0x006a, 0x002e,\r
690                        0x003f, 0x003a, 0x006b, 0x006c, 0x6d, 0x006e, 0x006f,\r
691                        0x01c6, 0x01c9, 0x000c, 0x0000};\r
692 \r
693     /*\r
694      * CaseFolding.txt says about i and its cousins:\r
695      *   0049; C; 0069; # LATIN CAPITAL LETTER I\r
696      *   0049; T; 0131; # LATIN CAPITAL LETTER I\r
697      *\r
698      *   0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE\r
699      *   0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE\r
700      * That's all.\r
701      * See CaseFolding.txt and the Unicode Standard for how to apply the case foldings.\r
702      */\r
703     private static final int FOLDING_SIMPLE_[] = {\r
704         // input, default, exclude special i\r
705         0x61,   0x61,  0x61,\r
706         0x49,   0x69,  0x131,\r
707         0x130,  0x130, 0x69,\r
708         0x131,  0x131, 0x131,\r
709         0xdf,   0xdf,  0xdf,\r
710         0xfb03, 0xfb03, 0xfb03,\r
711         0x1040e,0x10436,0x10436,\r
712         0x5ffff,0x5ffff,0x5ffff\r
713     };\r
714     private static final String FOLDING_MIXED_[] =\r
715                           {"\u0061\u0042\u0130\u0049\u0131\u03d0\u00df\ufb03\ud93f\udfff",\r
716                            "A\u00df\u00b5\ufb03\uD801\uDC0C\u0130\u0131"};\r
717     private static final String FOLDING_DEFAULT_[] =\r
718          {"\u0061\u0062\u0069\u0307\u0069\u0131\u03b2\u0073\u0073\u0066\u0066\u0069\ud93f\udfff",\r
719           "ass\u03bcffi\uD801\uDC34i\u0307\u0131"};\r
720     private static final String FOLDING_EXCLUDE_SPECIAL_I_[] =\r
721          {"\u0061\u0062\u0069\u0131\u0131\u03b2\u0073\u0073\u0066\u0066\u0069\ud93f\udfff",\r
722           "ass\u03bcffi\uD801\uDC34i\u0131"};\r
723     /**\r
724      * "IESUS CHRISTOS"\r
725      */\r
726     private static final String SHARED_UPPERCASE_GREEK_ =\r
727         "\u0399\u0395\u03a3\u03a5\u03a3\u0020\u03a7\u03a1\u0399\u03a3\u03a4\u039f\u03a3";\r
728     /**\r
729      * "iesus christos"\r
730      */\r
731     private static final String SHARED_LOWERCASE_GREEK_ =\r
732         "\u03b9\u03b5\u03c3\u03c5\u03c2\u0020\u03c7\u03c1\u03b9\u03c3\u03c4\u03bf\u03c2";\r
733     private static final String SHARED_LOWERCASE_TURKISH_ =\r
734         "\u0069\u0073\u0074\u0061\u006e\u0062\u0075\u006c\u002c\u0020\u006e\u006f\u0074\u0020\u0063\u006f\u006e\u0073\u0074\u0061\u006e\u0074\u0131\u006e\u006f\u0070\u006c\u0065\u0021";\r
735     private static final String SHARED_UPPERCASE_TURKISH_ =\r
736         "\u0054\u004f\u0050\u004b\u0041\u0050\u0049\u0020\u0050\u0041\u004c\u0041\u0043\u0045\u002c\u0020\u0130\u0053\u0054\u0041\u004e\u0042\u0055\u004c";\r
737     private static final String SHARED_UPPERCASE_ISTANBUL_ =\r
738                                           "\u0130STANBUL, NOT CONSTANTINOPLE!";\r
739     private static final String SHARED_LOWERCASE_ISTANBUL_ =\r
740                                           "i\u0307stanbul, not constantinople!";\r
741     private static final String SHARED_LOWERCASE_TOPKAP_ =\r
742                                           "topkap\u0131 palace, istanbul";\r
743     private static final String SHARED_UPPERCASE_TOPKAP_ =\r
744                                           "TOPKAPI PALACE, ISTANBUL";\r
745     private static final String SHARED_LOWERCASE_GERMAN_ =\r
746                                           "S\u00FC\u00DFmayrstra\u00DFe";\r
747     private static final String SHARED_UPPERCASE_GERMAN_ =\r
748                                           "S\u00DCSSMAYRSTRASSE";\r
749 \r
750     private static final String UPPER_BEFORE_ =\r
751          "\u0061\u0042\u0069\u03c2\u00df\u03c3\u002f\ufb03\ufb03\ufb03\ud93f\udfff";\r
752     private static final String UPPER_ROOT_ =\r
753          "\u0041\u0042\u0049\u03a3\u0053\u0053\u03a3\u002f\u0046\u0046\u0049\u0046\u0046\u0049\u0046\u0046\u0049\ud93f\udfff";\r
754     private static final String UPPER_TURKISH_ =\r
755          "\u0041\u0042\u0130\u03a3\u0053\u0053\u03a3\u002f\u0046\u0046\u0049\u0046\u0046\u0049\u0046\u0046\u0049\ud93f\udfff";\r
756     private static final String UPPER_MINI_ = "\u00df\u0061";\r
757     private static final String UPPER_MINI_UPPER_ = "\u0053\u0053\u0041";\r
758 \r
759     private static final String LOWER_BEFORE_ =\r
760                       "\u0061\u0042\u0049\u03a3\u00df\u03a3\u002f\ud93f\udfff";\r
761     private static final String LOWER_ROOT_ =\r
762                       "\u0061\u0062\u0069\u03c3\u00df\u03c2\u002f\ud93f\udfff";\r
763     private static final String LOWER_TURKISH_ = \r
764                       "\u0061\u0062\u0131\u03c3\u00df\u03c2\u002f\ud93f\udfff";\r
765 \r
766     /**\r
767      * each item is an array with input string, result string, locale ID, break iterator, options\r
768      * the break iterator is specified as an int, same as in BreakIterator.KIND_*:\r
769      * 0=KIND_CHARACTER  1=KIND_WORD  2=KIND_LINE  3=KIND_SENTENCE  4=KIND_TITLE  -1=default (NULL=words)  -2=no breaks (.*)\r
770      * options: T=U_FOLD_CASE_EXCLUDE_SPECIAL_I  L=U_TITLECASE_NO_LOWERCASE  A=U_TITLECASE_NO_BREAK_ADJUSTMENT\r
771      * see ICU4C source/test/testdata/casing.txt\r
772      */\r
773     private static final String TITLE_DATA_[] = {\r
774         "\u0061\u0042\u0020\u0069\u03c2\u0020\u00df\u03c3\u002f\ufb03\ud93f\udfff",\r
775         "\u0041\u0042\u0020\u0049\u03a3\u0020\u0053\u0073\u03a3\u002f\u0046\u0066\u0069\ud93f\udfff",\r
776         "",\r
777         "0",\r
778         "",\r
779 \r
780         "\u0061\u0042\u0020\u0069\u03c2\u0020\u00df\u03c3\u002f\ufb03\ud93f\udfff",\r
781         "\u0041\u0062\u0020\u0049\u03c2\u0020\u0053\u0073\u03c3\u002f\u0046\u0066\u0069\ud93f\udfff",\r
782         "",\r
783         "1",\r
784         "",\r
785 \r
786         "\u02bbaMeLikA huI P\u016b \u02bb\u02bb\u02bbiA", "\u02bbAmelika Hui P\u016b \u02bb\u02bb\u02bbIa", // titlecase first _cased_ letter, j4933\r
787         "",\r
788         "-1",\r
789         "",\r
790 \r
791         " tHe QUIcK bRoWn", " The Quick Brown",\r
792         "",\r
793         "4",\r
794         "",\r
795 \r
796         "\u01c4\u01c5\u01c6\u01c7\u01c8\u01c9\u01ca\u01cb\u01cc", \r
797         "\u01c5\u01c5\u01c5\u01c8\u01c8\u01c8\u01cb\u01cb\u01cb", // UBRK_CHARACTER\r
798         "",\r
799         "0",\r
800         "",\r
801 \r
802         "\u01c9ubav ljubav", "\u01c8ubav Ljubav", // Lj vs. L+j\r
803         "",\r
804         "-1",\r
805         "",\r
806 \r
807         "'oH dOn'T tItLeCaSe AfTeR lEtTeR+'",  "'Oh Don't Titlecase After Letter+'",\r
808         "",\r
809         "-1",\r
810         "",\r
811 \r
812         "a \u02bbCaT. A \u02bbdOg! \u02bbeTc.",\r
813         "A \u02bbCat. A \u02bbDog! \u02bbEtc.",\r
814         "",\r
815         "-1",\r
816         "", // default\r
817 \r
818         "a \u02bbCaT. A \u02bbdOg! \u02bbeTc.",\r
819         "A \u02bbcat. A \u02bbdog! \u02bbetc.",\r
820         "",\r
821         "-1",\r
822         "A", // U_TITLECASE_NO_BREAK_ADJUSTMENT\r
823 \r
824         "a \u02bbCaT. A \u02bbdOg! \u02bbeTc.",\r
825         "A \u02bbCaT. A \u02bbdOg! \u02bbETc.",\r
826         "",\r
827         "3",\r
828         "L", // UBRK_SENTENCE and U_TITLECASE_NO_LOWERCASE\r
829 \r
830 \r
831         "\u02bbcAt! \u02bbeTc.",\r
832         "\u02bbCat! \u02bbetc.",\r
833         "",\r
834         "-2",\r
835         "", // -2=Trivial break iterator\r
836 \r
837         "\u02bbcAt! \u02bbeTc.",\r
838         "\u02bbcat! \u02bbetc.",\r
839         "",\r
840         "-2",\r
841         "A", // U_TITLECASE_NO_BREAK_ADJUSTMENT\r
842 \r
843         "\u02bbcAt! \u02bbeTc.",\r
844         "\u02bbCAt! \u02bbeTc.",\r
845         "",\r
846         "-2",\r
847         "L", // U_TITLECASE_NO_LOWERCASE\r
848 \r
849         "\u02bbcAt! \u02bbeTc.",\r
850         "\u02bbcAt! \u02bbeTc.",\r
851         "",\r
852         "-2",\r
853         "AL" // Both options\r
854     };\r
855 \r
856 \r
857     /**\r
858      * <p>basic string, lower string, upper string, title string</p>\r
859      */\r
860     private static final String SPECIAL_DATA_[] = {\r
861         UTF16.valueOf(0x1043C) + UTF16.valueOf(0x10414),\r
862         UTF16.valueOf(0x1043C) + UTF16.valueOf(0x1043C),\r
863         UTF16.valueOf(0x10414) + UTF16.valueOf(0x10414),\r
864         "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " +\r
865                          UTF16.valueOf(0x1043C) + UTF16.valueOf(0x10414),\r
866         "ab'cd \uFB00i\u0131ii\u0307 \u01C9\u01C9\u01C9 " +\r
867                               UTF16.valueOf(0x1043C) + UTF16.valueOf(0x1043C),\r
868         "AB'CD FFIII\u0130 \u01C7\u01C7\u01C7 " +\r
869                               UTF16.valueOf(0x10414) + UTF16.valueOf(0x10414),\r
870         // sigmas followed/preceded by cased letters\r
871         "i\u0307\u03a3\u0308j \u0307\u03a3\u0308j i\u00ad\u03a3\u0308 \u0307\u03a3\u0308 ",\r
872         "i\u0307\u03c3\u0308j \u0307\u03c3\u0308j i\u00ad\u03c2\u0308 \u0307\u03c3\u0308 ",\r
873         "I\u0307\u03a3\u0308J \u0307\u03a3\u0308J I\u00ad\u03a3\u0308 \u0307\u03a3\u0308 "\r
874     };\r
875     private static final Locale SPECIAL_LOCALES_[] = {\r
876         null,\r
877         ENGLISH_LOCALE_,\r
878         null,\r
879     };\r
880 \r
881     private static final String SPECIAL_DOTTED_ =\r
882             "I \u0130 I\u0307 I\u0327\u0307 I\u0301\u0307 I\u0327\u0307\u0301";\r
883     private static final String SPECIAL_DOTTED_LOWER_TURKISH_ =\r
884             "\u0131 i i i\u0327 \u0131\u0301\u0307 i\u0327\u0301";\r
885     private static final String SPECIAL_DOTTED_LOWER_GERMAN_ =\r
886             "i i\u0307 i\u0307 i\u0327\u0307 i\u0301\u0307 i\u0327\u0307\u0301";\r
887     private static final String SPECIAL_DOT_ABOVE_ =\r
888             "a\u0307 \u0307 i\u0307 j\u0327\u0307 j\u0301\u0307";\r
889     private static final String SPECIAL_DOT_ABOVE_UPPER_LITHUANIAN_ =\r
890             "A\u0307 \u0307 I J\u0327 J\u0301\u0307";\r
891     private static final String SPECIAL_DOT_ABOVE_UPPER_GERMAN_ =\r
892             "A\u0307 \u0307 I\u0307 J\u0327\u0307 J\u0301\u0307";\r
893     private static final String SPECIAL_DOT_ABOVE_UPPER_ =\r
894             "I I\u0301 J J\u0301 \u012e \u012e\u0301 \u00cc\u00cd\u0128";\r
895     private static final String SPECIAL_DOT_ABOVE_LOWER_LITHUANIAN_ =\r
896             "i i\u0307\u0301 j j\u0307\u0301 \u012f \u012f\u0307\u0301 i\u0307\u0300i\u0307\u0301i\u0307\u0303";\r
897     private static final String SPECIAL_DOT_ABOVE_LOWER_GERMAN_ =\r
898             "i i\u0301 j j\u0301 \u012f \u012f\u0301 \u00ec\u00ed\u0129";\r
899 \r
900     // private methods -------------------------------------------------------\r
901 \r
902     /**\r
903      * Converting the hex numbers represented betwee                             n ';' to Unicode strings\r
904      * @param str string to break up into Unicode strings\r
905      * @return array of Unicode strings ending with a null\r
906      */\r
907     private String[] getUnicodeStrings(String str)\r
908     {\r
909         Vector v = new Vector(10);\r
910         int start = 0;\r
911         for (int casecount = 4; casecount > 0; casecount --) {\r
912             int end = str.indexOf("; ", start);\r
913             String casestr = str.substring(start, end);\r
914             StringBuffer buffer = new StringBuffer();\r
915             int spaceoffset = 0;\r
916             while (spaceoffset < casestr.length()) {\r
917                 int nextspace = casestr.indexOf(' ', spaceoffset);\r
918                 if (nextspace == -1) {\r
919                     nextspace = casestr.length();\r
920                 }\r
921                 buffer.append((char)Integer.parseInt(\r
922                                      casestr.substring(spaceoffset, nextspace),\r
923                                                       16));\r
924                 spaceoffset = nextspace + 1;\r
925             }\r
926             start = end + 2;\r
927             v.add(buffer.toString());\r
928         }\r
929         int comments = str.indexOf(" #", start);\r
930         if (comments != -1 && comments != start) {\r
931             if (str.charAt(comments - 1) == ';') {\r
932                 comments --;\r
933             }\r
934             String conditions = str.substring(start, comments);\r
935             int offset = 0;\r
936             while (offset < conditions.length()) {\r
937                 int spaceoffset = conditions.indexOf(' ', offset);\r
938                 if (spaceoffset == -1) {\r
939                     spaceoffset = conditions.length();\r
940                 }\r
941                 v.add(conditions.substring(offset, spaceoffset));\r
942                 offset = spaceoffset + 1;\r
943             }\r
944         }\r
945         int size = v.size();\r
946         String result[] = new String[size];\r
947         for (int i = 0; i < size; i ++) {\r
948             result[i] = (String)v.elementAt(i);\r
949         }\r
950         return result;\r
951     }\r
952 }\r