]> gitweb.fperrin.net Git - Dictionary.git/blob - jars/icu4j-4_4_2-src/main/tests/core/src/com/ibm/icu/dev/test/lang/UCharacterCaseTest.java
go
[Dictionary.git] / jars / icu4j-4_4_2-src / main / tests / core / src / com / ibm / icu / dev / test / lang / UCharacterCaseTest.java
1 /**\r
2 *******************************************************************************\r
3 * Copyright (C) 1996-2010, International Business Machines Corporation and\r
4 * others. All Rights Reserved.\r
5 *******************************************************************************\r
6 */\r
7 \r
8 \r
9 package com.ibm.icu.dev.test.lang;\r
10 \r
11 \r
12 import java.io.BufferedReader;\r
13 import java.util.Locale;\r
14 import java.util.Vector;\r
15 \r
16 import com.ibm.icu.dev.test.TestFmwk;\r
17 import com.ibm.icu.dev.test.TestUtil;\r
18 import com.ibm.icu.impl.Utility;\r
19 import com.ibm.icu.lang.UCharacter;\r
20 import com.ibm.icu.lang.UProperty;\r
21 import com.ibm.icu.text.BreakIterator;\r
22 import com.ibm.icu.text.RuleBasedBreakIterator;\r
23 import com.ibm.icu.text.UTF16;\r
24 import com.ibm.icu.util.ULocale;\r
25 \r
26 \r
27 /**\r
28 * <p>Testing character casing</p>\r
29 * <p>Mostly following the test cases in strcase.cpp for ICU</p>\r
30 * @author Syn Wee Quek\r
31 * @since march 14 2002\r
32 */\r
33 public final class UCharacterCaseTest extends TestFmwk\r
34 {\r
35     // constructor -----------------------------------------------------------\r
36 \r
37     /**\r
38      * Constructor\r
39      */\r
40     public UCharacterCaseTest()\r
41     {\r
42     }\r
43 \r
44     // public methods --------------------------------------------------------\r
45 \r
46     public static void main(String[] arg)\r
47     {\r
48         try\r
49         {\r
50             UCharacterCaseTest test = new UCharacterCaseTest();\r
51             test.run(arg);\r
52         }\r
53         catch (Exception e)\r
54         {\r
55             e.printStackTrace();\r
56         }\r
57     }\r
58 \r
59     /**\r
60      * Testing the uppercase and lowercase function of UCharacter\r
61      */\r
62     public void TestCharacter()\r
63     {\r
64         for (int i = 0; i < CHARACTER_LOWER_.length; i ++) {\r
65             if (UCharacter.isLetter(CHARACTER_LOWER_[i]) &&\r
66                 !UCharacter.isLowerCase(CHARACTER_LOWER_[i])) {\r
67                 errln("FAIL isLowerCase test for \\u" +\r
68                       hex(CHARACTER_LOWER_[i]));\r
69                 break;\r
70             }\r
71             if (UCharacter.isLetter(CHARACTER_UPPER_[i]) &&\r
72                 !(UCharacter.isUpperCase(CHARACTER_UPPER_[i]) ||\r
73                   UCharacter.isTitleCase(CHARACTER_UPPER_[i]))) {\r
74                 errln("FAIL isUpperCase test for \\u" +\r
75                       hex(CHARACTER_UPPER_[i]));\r
76                 break;\r
77             }\r
78             if (CHARACTER_LOWER_[i] !=\r
79                 UCharacter.toLowerCase(CHARACTER_UPPER_[i]) ||\r
80                 (CHARACTER_UPPER_[i] !=\r
81                 UCharacter.toUpperCase(CHARACTER_LOWER_[i]) &&\r
82                 CHARACTER_UPPER_[i] !=\r
83                 UCharacter.toTitleCase(CHARACTER_LOWER_[i]))) {\r
84                 errln("FAIL case conversion test for \\u" +\r
85                       hex(CHARACTER_UPPER_[i]) +\r
86                       " to \\u" + hex(CHARACTER_LOWER_[i]));\r
87                 break;\r
88             }\r
89             if (CHARACTER_LOWER_[i] !=\r
90                 UCharacter.toLowerCase(CHARACTER_LOWER_[i])) {\r
91                 errln("FAIL lower case conversion test for \\u" +\r
92                       hex(CHARACTER_LOWER_[i]));\r
93                 break;\r
94             }\r
95             if (CHARACTER_UPPER_[i] !=\r
96                 UCharacter.toUpperCase(CHARACTER_UPPER_[i]) &&\r
97                 CHARACTER_UPPER_[i] !=\r
98                 UCharacter.toTitleCase(CHARACTER_UPPER_[i])) {\r
99                 errln("FAIL upper case conversion test for \\u" +\r
100                       hex(CHARACTER_UPPER_[i]));\r
101                 break;\r
102             }\r
103             logln("Ok    \\u" + hex(CHARACTER_UPPER_[i]) + " and \\u" +\r
104                   hex(CHARACTER_LOWER_[i]));\r
105         }\r
106     }\r
107 \r
108     public void TestFolding()\r
109     {\r
110         // test simple case folding\r
111         for (int i = 0; i < FOLDING_SIMPLE_.length; i += 3) {\r
112             if (UCharacter.foldCase(FOLDING_SIMPLE_[i], true) !=\r
113                 FOLDING_SIMPLE_[i + 1]) {\r
114                 errln("FAIL: foldCase(\\u" + hex(FOLDING_SIMPLE_[i]) +\r
115                       ", true) should be \\u" + hex(FOLDING_SIMPLE_[i + 1]));\r
116             }\r
117             if (UCharacter.foldCase(FOLDING_SIMPLE_[i], \r
118                                     UCharacter.FOLD_CASE_DEFAULT) !=\r
119                                                       FOLDING_SIMPLE_[i + 1]) {\r
120                 errln("FAIL: foldCase(\\u" + hex(FOLDING_SIMPLE_[i]) +\r
121                       ", UCharacter.FOLD_CASE_DEFAULT) should be \\u" \r
122                       + hex(FOLDING_SIMPLE_[i + 1]));\r
123             }\r
124             if (UCharacter.foldCase(FOLDING_SIMPLE_[i], false) !=\r
125                 FOLDING_SIMPLE_[i + 2]) {\r
126                 errln("FAIL: foldCase(\\u" + hex(FOLDING_SIMPLE_[i]) +\r
127                       ", false) should be \\u" + hex(FOLDING_SIMPLE_[i + 2]));\r
128             }\r
129             if (UCharacter.foldCase(FOLDING_SIMPLE_[i], \r
130                                     UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I) !=\r
131                                     FOLDING_SIMPLE_[i + 2]) {\r
132                 errln("FAIL: foldCase(\\u" + hex(FOLDING_SIMPLE_[i]) +\r
133                       ", UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I) should be \\u" \r
134                       + hex(FOLDING_SIMPLE_[i + 2]));\r
135             }\r
136         }\r
137 \r
138         // Test full string case folding with default option and separate\r
139         // buffers\r
140         if (!FOLDING_DEFAULT_[0].equals(UCharacter.foldCase(FOLDING_MIXED_[0], true))) {\r
141             errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[0]) +\r
142                   ", true)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[0], true)) +\r
143                   " should be " + prettify(FOLDING_DEFAULT_[0]));\r
144         }\r
145         \r
146         if (!FOLDING_DEFAULT_[0].equals(UCharacter.foldCase(FOLDING_MIXED_[0], UCharacter.FOLD_CASE_DEFAULT))) {\r
147                     errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[0]) +\r
148                           ", UCharacter.FOLD_CASE_DEFAULT)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[0], UCharacter.FOLD_CASE_DEFAULT))\r
149                           + " should be " + prettify(FOLDING_DEFAULT_[0]));\r
150                 }\r
151 \r
152         if (!FOLDING_EXCLUDE_SPECIAL_I_[0].equals(\r
153                             UCharacter.foldCase(FOLDING_MIXED_[0], false))) {\r
154             errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[0]) +\r
155                   ", false)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[0], false))\r
156                   + " should be " + prettify(FOLDING_EXCLUDE_SPECIAL_I_[0]));\r
157         }\r
158         \r
159         if (!FOLDING_EXCLUDE_SPECIAL_I_[0].equals(\r
160                                     UCharacter.foldCase(FOLDING_MIXED_[0], UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I))) {\r
161             errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[0]) +\r
162                   ", UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[0], UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I))\r
163                   + " should be " + prettify(FOLDING_EXCLUDE_SPECIAL_I_[0]));\r
164         }\r
165 \r
166         if (!FOLDING_DEFAULT_[1].equals(UCharacter.foldCase(FOLDING_MIXED_[1], true))) {\r
167            errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[1]) +\r
168                  ", true)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[1], true))\r
169                  + " should be " + prettify(FOLDING_DEFAULT_[1]));\r
170         }\r
171 \r
172         if (!FOLDING_DEFAULT_[1].equals(UCharacter.foldCase(FOLDING_MIXED_[1], UCharacter.FOLD_CASE_DEFAULT))) {\r
173             errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[1]) +\r
174                          ", UCharacter.FOLD_CASE_DEFAULT)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[1], UCharacter.FOLD_CASE_DEFAULT))\r
175                          + " should be " + prettify(FOLDING_DEFAULT_[1]));\r
176         }\r
177         \r
178         // alternate handling for dotted I/dotless i (U+0130, U+0131)\r
179         if (!FOLDING_EXCLUDE_SPECIAL_I_[1].equals(\r
180                         UCharacter.foldCase(FOLDING_MIXED_[1], false))) {\r
181             errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[1]) +\r
182                   ", false)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[1], false))\r
183                   + " should be " + prettify(FOLDING_EXCLUDE_SPECIAL_I_[1]));\r
184         }\r
185         \r
186         if (!FOLDING_EXCLUDE_SPECIAL_I_[1].equals(\r
187                                 UCharacter.foldCase(FOLDING_MIXED_[1], UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I))) {\r
188             errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[1]) +\r
189                   ", UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[1], UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I))\r
190                   + " should be "\r
191                   + prettify(FOLDING_EXCLUDE_SPECIAL_I_[1]));\r
192         }\r
193     }\r
194 \r
195     /**\r
196      * Testing the strings case mapping methods\r
197      */\r
198     public void TestUpper()\r
199     {\r
200         // uppercase with root locale and in the same buffer\r
201         if (!UPPER_ROOT_.equals(UCharacter.toUpperCase(UPPER_BEFORE_))) {\r
202             errln("Fail " + UPPER_BEFORE_ + " after uppercase should be " +\r
203                   UPPER_ROOT_ + " instead got " +\r
204                   UCharacter.toUpperCase(UPPER_BEFORE_));\r
205         }\r
206 \r
207         // uppercase with turkish locale and separate buffers\r
208         if (!UPPER_TURKISH_.equals(UCharacter.toUpperCase(TURKISH_LOCALE_,\r
209                                                          UPPER_BEFORE_))) {\r
210             errln("Fail " + UPPER_BEFORE_ +\r
211                   " after turkish-sensitive uppercase should be " +\r
212                   UPPER_TURKISH_ + " instead of " +\r
213                   UCharacter.toUpperCase(TURKISH_LOCALE_, UPPER_BEFORE_));\r
214         }\r
215 \r
216         // uppercase a short string with root locale\r
217         if (!UPPER_MINI_UPPER_.equals(UCharacter.toUpperCase(UPPER_MINI_))) {\r
218             errln("error in toUpper(root locale)=\"" + UPPER_MINI_ +\r
219                   "\" expected \"" + UPPER_MINI_UPPER_ + "\"");\r
220         }\r
221 \r
222         if (!SHARED_UPPERCASE_TOPKAP_.equals(\r
223                        UCharacter.toUpperCase(SHARED_LOWERCASE_TOPKAP_))) {\r
224             errln("toUpper failed: expected \"" +\r
225                   SHARED_UPPERCASE_TOPKAP_ + "\", got \"" +\r
226                   UCharacter.toUpperCase(SHARED_LOWERCASE_TOPKAP_) + "\".");\r
227         }\r
228 \r
229         if (!SHARED_UPPERCASE_TURKISH_.equals(\r
230                   UCharacter.toUpperCase(TURKISH_LOCALE_,\r
231                                          SHARED_LOWERCASE_TOPKAP_))) {\r
232             errln("toUpper failed: expected \"" +\r
233                   SHARED_UPPERCASE_TURKISH_ + "\", got \"" +\r
234                   UCharacter.toUpperCase(TURKISH_LOCALE_,\r
235                                      SHARED_LOWERCASE_TOPKAP_) + "\".");\r
236         }\r
237 \r
238         if (!SHARED_UPPERCASE_GERMAN_.equals(\r
239                 UCharacter.toUpperCase(GERMAN_LOCALE_,\r
240                                        SHARED_LOWERCASE_GERMAN_))) {\r
241             errln("toUpper failed: expected \"" + SHARED_UPPERCASE_GERMAN_\r
242                   + "\", got \"" + UCharacter.toUpperCase(GERMAN_LOCALE_,\r
243                                         SHARED_LOWERCASE_GERMAN_) + "\".");\r
244         }\r
245 \r
246         if (!SHARED_UPPERCASE_GREEK_.equals(\r
247                 UCharacter.toUpperCase(SHARED_LOWERCASE_GREEK_))) {\r
248             errln("toLower failed: expected \"" + SHARED_UPPERCASE_GREEK_ +\r
249                   "\", got \"" + UCharacter.toUpperCase(\r
250                                         SHARED_LOWERCASE_GREEK_) + "\".");\r
251         }\r
252     }\r
253 \r
254     public void TestLower()\r
255     {\r
256         if (!LOWER_ROOT_.equals(UCharacter.toLowerCase(LOWER_BEFORE_))) {\r
257             errln("Fail " + LOWER_BEFORE_ + " after lowercase should be " +\r
258                   LOWER_ROOT_ + " instead of " +\r
259                   UCharacter.toLowerCase(LOWER_BEFORE_));\r
260         }\r
261 \r
262         // lowercase with turkish locale\r
263         if (!LOWER_TURKISH_.equals(UCharacter.toLowerCase(TURKISH_LOCALE_,\r
264                                                           LOWER_BEFORE_))) {\r
265             errln("Fail " + LOWER_BEFORE_ +\r
266                   " after turkish-sensitive lowercase should be " +\r
267                   LOWER_TURKISH_ + " instead of " +\r
268                   UCharacter.toLowerCase(TURKISH_LOCALE_, LOWER_BEFORE_));\r
269         }\r
270         if (!SHARED_LOWERCASE_ISTANBUL_.equals(\r
271                      UCharacter.toLowerCase(SHARED_UPPERCASE_ISTANBUL_))) {\r
272             errln("1. toLower failed: expected \"" +\r
273                   SHARED_LOWERCASE_ISTANBUL_ + "\", got \"" +\r
274               UCharacter.toLowerCase(SHARED_UPPERCASE_ISTANBUL_) + "\".");\r
275         }\r
276 \r
277         if (!SHARED_LOWERCASE_TURKISH_.equals(\r
278                 UCharacter.toLowerCase(TURKISH_LOCALE_,\r
279                                        SHARED_UPPERCASE_ISTANBUL_))) {\r
280             errln("2. toLower failed: expected \"" +\r
281                   SHARED_LOWERCASE_TURKISH_ + "\", got \"" +\r
282                   UCharacter.toLowerCase(TURKISH_LOCALE_,\r
283                                 SHARED_UPPERCASE_ISTANBUL_) + "\".");\r
284         }\r
285         if (!SHARED_LOWERCASE_GREEK_.equals(\r
286                 UCharacter.toLowerCase(GREEK_LOCALE_,\r
287                                        SHARED_UPPERCASE_GREEK_))) {\r
288             errln("toLower failed: expected \"" + SHARED_LOWERCASE_GREEK_ +\r
289                   "\", got \"" + UCharacter.toLowerCase(GREEK_LOCALE_,\r
290                                         SHARED_UPPERCASE_GREEK_) + "\".");\r
291         }\r
292     }\r
293 \r
294     public void TestTitleRegression() throws java.io.IOException {\r
295         boolean isIgnorable = UCharacter.hasBinaryProperty('\'', UProperty.CASE_IGNORABLE);\r
296         assertTrue("Case Ignorable check of ASCII apostrophe", isIgnorable);\r
297         assertEquals("Titlecase check",\r
298                 "The Quick Brown Fox Can't Jump Over The Lazy Dogs.",\r
299                 UCharacter.toTitleCase(ULocale.ENGLISH, "THE QUICK BROWN FOX CAN'T JUMP OVER THE LAZY DOGS.", null));\r
300     }\r
301 \r
302     public void TestTitle()\r
303     {\r
304          try{ \r
305             for (int i = 0; i < TITLE_DATA_.length;) {\r
306                 String test = TITLE_DATA_[i++];\r
307                 String expected = TITLE_DATA_[i++];\r
308                 ULocale locale = new ULocale(TITLE_DATA_[i++]);\r
309                 int breakType = Integer.parseInt(TITLE_DATA_[i++]);\r
310                 String optionsString = TITLE_DATA_[i++];\r
311                 BreakIterator iter =\r
312                     breakType >= 0 ?\r
313                         BreakIterator.getBreakInstance(locale, breakType) :\r
314                         breakType == -2 ?\r
315                             // Open a trivial break iterator that only delivers { 0, length }\r
316                             // or even just { 0 } as boundaries.\r
317                             new RuleBasedBreakIterator(".*;") :\r
318                             null;\r
319                 int options = 0;\r
320                 if (optionsString.indexOf('L') >= 0) {\r
321                     options |= UCharacter.TITLECASE_NO_LOWERCASE;\r
322                 }\r
323                 if (optionsString.indexOf('A') >= 0) {\r
324                     options |= UCharacter.TITLECASE_NO_BREAK_ADJUSTMENT;\r
325                 }\r
326                 String result = UCharacter.toTitleCase(locale, test, iter, options);\r
327                 if (!expected.equals(result)) {\r
328                     errln("titlecasing for " + prettify(test) + " (options " + options + ") should be " +\r
329                           prettify(expected) + " but got " +\r
330                           prettify(result));\r
331                 }\r
332                 if (options == 0) {\r
333                     result = UCharacter.toTitleCase(locale, test, iter);\r
334                     if (!expected.equals(result)) {\r
335                         errln("titlecasing for " + prettify(test) + " should be " +\r
336                               prettify(expected) + " but got " +\r
337                               prettify(result));\r
338                     }\r
339                 }\r
340             }\r
341          }catch(Exception ex){\r
342             warnln("Could not find data for BreakIterators");\r
343          }\r
344     }\r
345 \r
346     public void TestDutchTitle() {\r
347         ULocale LOC_DUTCH = new ULocale("nl");\r
348         int options = 0;\r
349         options |= UCharacter.TITLECASE_NO_LOWERCASE;\r
350         BreakIterator iter = BreakIterator.getWordInstance(LOC_DUTCH);\r
351 \r
352         assertEquals("Dutch titlecase check in English",\r
353                 "Ijssel Igloo Ijmuiden",\r
354                 UCharacter.toTitleCase(ULocale.ENGLISH, "ijssel igloo IJMUIDEN", null));\r
355 \r
356         assertEquals("Dutch titlecase check in Dutch",\r
357                 "IJssel Igloo IJmuiden",\r
358                 UCharacter.toTitleCase(LOC_DUTCH, "ijssel igloo IJMUIDEN", null));\r
359 \r
360         iter.setText("ijssel igloo IjMUIdEN iPoD ijenough");\r
361         assertEquals("Dutch titlecase check in Dutch with nolowercase option",\r
362                 "IJssel Igloo IJMUIdEN IPoD IJenough",\r
363                 UCharacter.toTitleCase(LOC_DUTCH, "ijssel igloo IjMUIdEN iPoD ijenough", iter, options));\r
364     }\r
365 \r
366     public void TestSpecial()\r
367     {\r
368         for (int i = 0; i < SPECIAL_LOCALES_.length; i ++) {\r
369             int    j      = i * 3;\r
370             Locale locale = SPECIAL_LOCALES_[i];\r
371             String str    = SPECIAL_DATA_[j];\r
372             if (locale != null) {\r
373                 if (!SPECIAL_DATA_[j + 1].equals(\r
374                      UCharacter.toLowerCase(locale, str))) {\r
375                     errln("error lowercasing special characters " +\r
376                         hex(str) + " expected " + hex(SPECIAL_DATA_[j + 1])\r
377                         + " for locale " + locale.toString() + " but got " +\r
378                         hex(UCharacter.toLowerCase(locale, str)));\r
379                 }\r
380                 if (!SPECIAL_DATA_[j + 2].equals(\r
381                      UCharacter.toUpperCase(locale, str))) {\r
382                     errln("error uppercasing special characters " +\r
383                         hex(str) + " expected " + SPECIAL_DATA_[j + 2]\r
384                         + " for locale " + locale.toString() + " but got " +\r
385                         hex(UCharacter.toUpperCase(locale, str)));\r
386                 }\r
387             }\r
388             else {\r
389                 if (!SPECIAL_DATA_[j + 1].equals(\r
390                      UCharacter.toLowerCase(str))) {\r
391                     errln("error lowercasing special characters " +\r
392                         hex(str) + " expected " + SPECIAL_DATA_[j + 1] +\r
393                         " but got " +\r
394                         hex(UCharacter.toLowerCase(locale, str)));\r
395                 }\r
396                 if (!SPECIAL_DATA_[j + 2].equals(\r
397                      UCharacter.toUpperCase(locale, str))) {\r
398                     errln("error uppercasing special characters " +\r
399                         hex(str) + " expected " + SPECIAL_DATA_[j + 2] +\r
400                         " but got " +\r
401                         hex(UCharacter.toUpperCase(locale, str)));\r
402                 }\r
403             }\r
404         }\r
405 \r
406         // turkish & azerbaijani dotless i & dotted I\r
407         // remove dot above if there was a capital I before and there are no\r
408         // more accents above\r
409         if (!SPECIAL_DOTTED_LOWER_TURKISH_.equals(UCharacter.toLowerCase(\r
410                                         TURKISH_LOCALE_, SPECIAL_DOTTED_))) {\r
411             errln("error in dots.toLower(tr)=\"" + SPECIAL_DOTTED_ +\r
412                   "\" expected \"" + SPECIAL_DOTTED_LOWER_TURKISH_ +\r
413                   "\" but got " + UCharacter.toLowerCase(TURKISH_LOCALE_,\r
414                                                          SPECIAL_DOTTED_));\r
415         }\r
416         if (!SPECIAL_DOTTED_LOWER_GERMAN_.equals(UCharacter.toLowerCase(\r
417                                              GERMAN_LOCALE_, SPECIAL_DOTTED_))) {\r
418             errln("error in dots.toLower(de)=\"" + SPECIAL_DOTTED_ +\r
419                   "\" expected \"" + SPECIAL_DOTTED_LOWER_GERMAN_ +\r
420                   "\" but got " + UCharacter.toLowerCase(GERMAN_LOCALE_,\r
421                                                          SPECIAL_DOTTED_));\r
422         }\r
423 \r
424         // lithuanian dot above in uppercasing\r
425         if (!SPECIAL_DOT_ABOVE_UPPER_LITHUANIAN_.equals(\r
426              UCharacter.toUpperCase(LITHUANIAN_LOCALE_, SPECIAL_DOT_ABOVE_))) {\r
427             errln("error in dots.toUpper(lt)=\"" + SPECIAL_DOT_ABOVE_ +\r
428                   "\" expected \"" + SPECIAL_DOT_ABOVE_UPPER_LITHUANIAN_ +\r
429                   "\" but got " + UCharacter.toUpperCase(LITHUANIAN_LOCALE_,\r
430                                                          SPECIAL_DOT_ABOVE_));\r
431         }\r
432         if (!SPECIAL_DOT_ABOVE_UPPER_GERMAN_.equals(UCharacter.toUpperCase(\r
433                                         GERMAN_LOCALE_, SPECIAL_DOT_ABOVE_))) {\r
434             errln("error in dots.toUpper(de)=\"" + SPECIAL_DOT_ABOVE_ +\r
435                   "\" expected \"" + SPECIAL_DOT_ABOVE_UPPER_GERMAN_ +\r
436                   "\" but got " + UCharacter.toUpperCase(GERMAN_LOCALE_,\r
437                                                          SPECIAL_DOT_ABOVE_));\r
438         }\r
439 \r
440         // lithuanian adds dot above to i in lowercasing if there are more\r
441         // above accents\r
442         if (!SPECIAL_DOT_ABOVE_LOWER_LITHUANIAN_.equals(\r
443             UCharacter.toLowerCase(LITHUANIAN_LOCALE_,\r
444                                    SPECIAL_DOT_ABOVE_UPPER_))) {\r
445             errln("error in dots.toLower(lt)=\"" + SPECIAL_DOT_ABOVE_UPPER_ +\r
446                   "\" expected \"" + SPECIAL_DOT_ABOVE_LOWER_LITHUANIAN_ +\r
447                   "\" but got " + UCharacter.toLowerCase(LITHUANIAN_LOCALE_,\r
448                                                    SPECIAL_DOT_ABOVE_UPPER_));\r
449         }\r
450         if (!SPECIAL_DOT_ABOVE_LOWER_GERMAN_.equals(\r
451             UCharacter.toLowerCase(GERMAN_LOCALE_,\r
452                                    SPECIAL_DOT_ABOVE_UPPER_))) {\r
453             errln("error in dots.toLower(de)=\"" + SPECIAL_DOT_ABOVE_UPPER_ +\r
454                   "\" expected \"" + SPECIAL_DOT_ABOVE_LOWER_GERMAN_ +\r
455                   "\" but got " + UCharacter.toLowerCase(GERMAN_LOCALE_,\r
456                                                    SPECIAL_DOT_ABOVE_UPPER_));\r
457         }\r
458     }\r
459 \r
460     /**\r
461      * Tests for case mapping in the file SpecialCasing.txt\r
462      * This method reads in SpecialCasing.txt file for testing purposes.\r
463      * A default path is provided relative to the src path, however the user\r
464      * could set a system property to change the directory path.<br>\r
465      * e.g. java -DUnicodeData="data_dir_path" com.ibm.dev.test.lang.UCharacterTest\r
466      */\r
467     public void TestSpecialCasingTxt()\r
468     {\r
469         try\r
470         {\r
471             // reading in the SpecialCasing file\r
472             BufferedReader input = TestUtil.getDataReader(\r
473                                                   "unicode/SpecialCasing.txt");\r
474             while (true)\r
475             {\r
476                 String s = input.readLine();\r
477                 if (s == null) {\r
478                     break;\r
479                 }\r
480                 if (s.length() == 0 || s.charAt(0) == '#') {\r
481                     continue;\r
482                 }\r
483 \r
484                 String chstr[] = getUnicodeStrings(s);\r
485                 StringBuffer strbuffer   = new StringBuffer(chstr[0]);\r
486                 StringBuffer lowerbuffer = new StringBuffer(chstr[1]);\r
487                 StringBuffer upperbuffer = new StringBuffer(chstr[3]);\r
488                 Locale locale = null;\r
489                 for (int i = 4; i < chstr.length; i ++) {\r
490                     String condition = chstr[i];\r
491                     if (Character.isLowerCase(chstr[i].charAt(0))) {\r
492                         // specified locale\r
493                         locale = new Locale(chstr[i], "");\r
494                     }\r
495                     else if (condition.compareToIgnoreCase("Not_Before_Dot")\r
496                                                       == 0) {\r
497                         // turns I into dotless i\r
498                     }\r
499                     else if (condition.compareToIgnoreCase(\r
500                                                       "More_Above") == 0) {\r
501                             strbuffer.append((char)0x300);\r
502                             lowerbuffer.append((char)0x300);\r
503                             upperbuffer.append((char)0x300);\r
504                     }\r
505                     else if (condition.compareToIgnoreCase(\r
506                                                 "After_Soft_Dotted") == 0) {\r
507                             strbuffer.insert(0, 'i');\r
508                             lowerbuffer.insert(0, 'i');\r
509                             String lang = "";\r
510                             if (locale != null) {\r
511                                 lang = locale.getLanguage();\r
512                             }\r
513                             if (lang.equals("tr") || lang.equals("az")) {\r
514                                 // this is to be removed when 4.0 data comes out\r
515                                 // and upperbuffer.insert uncommented\r
516                                 // see jitterbug 2344\r
517                                 chstr[i] = "After_I";\r
518                                 strbuffer.deleteCharAt(0);\r
519                                 lowerbuffer.deleteCharAt(0);\r
520                                 i --;\r
521                                 continue;\r
522                                 // upperbuffer.insert(0, '\u0130');\r
523                             }\r
524                             else {\r
525                                 upperbuffer.insert(0, 'I');\r
526                             }\r
527                     }\r
528                     else if (condition.compareToIgnoreCase(\r
529                                                       "Final_Sigma") == 0) {\r
530                             strbuffer.insert(0, 'c');\r
531                             lowerbuffer.insert(0, 'c');\r
532                             upperbuffer.insert(0, 'C');\r
533                     }\r
534                     else if (condition.compareToIgnoreCase("After_I") == 0) {\r
535                             strbuffer.insert(0, 'I');\r
536                             lowerbuffer.insert(0, 'i');\r
537                             String lang = "";\r
538                             if (locale != null) {\r
539                                 lang = locale.getLanguage();\r
540                             }\r
541                             if (lang.equals("tr") || lang.equals("az")) {\r
542                                 upperbuffer.insert(0, 'I');\r
543                             }\r
544                     }\r
545                 }\r
546                 chstr[0] = strbuffer.toString();\r
547                 chstr[1] = lowerbuffer.toString();\r
548                 chstr[3] = upperbuffer.toString();\r
549                 if (locale == null) {\r
550                     if (!UCharacter.toLowerCase(chstr[0]).equals(chstr[1])) {\r
551                         errln(s);\r
552                         errln("Fail: toLowerCase for character " +\r
553                               Utility.escape(chstr[0]) + ", expected "\r
554                               + Utility.escape(chstr[1]) + " but resulted in " +\r
555                               Utility.escape(UCharacter.toLowerCase(chstr[0])));\r
556                     }\r
557                     if (!UCharacter.toUpperCase(chstr[0]).equals(chstr[3])) {\r
558                         errln(s);\r
559                         errln("Fail: toUpperCase for character " +\r
560                               Utility.escape(chstr[0]) + ", expected "\r
561                               + Utility.escape(chstr[3]) + " but resulted in " +\r
562                               Utility.escape(UCharacter.toUpperCase(chstr[0])));\r
563                     }\r
564                 }\r
565                 else {\r
566                     if (!UCharacter.toLowerCase(locale, chstr[0]).equals(\r
567                                                                    chstr[1])) {\r
568                         errln(s);\r
569                         errln("Fail: toLowerCase for character " +\r
570                               Utility.escape(chstr[0]) + ", expected "\r
571                               + Utility.escape(chstr[1]) + " but resulted in " +\r
572                               Utility.escape(UCharacter.toLowerCase(locale,\r
573                                                                     chstr[0])));\r
574                     }\r
575                     if (!UCharacter.toUpperCase(locale, chstr[0]).equals(\r
576                                                                    chstr[3])) {\r
577                         errln(s);\r
578                         errln("Fail: toUpperCase for character " +\r
579                               Utility.escape(chstr[0]) + ", expected "\r
580                               + Utility.escape(chstr[3]) + " but resulted in " +\r
581                               Utility.escape(UCharacter.toUpperCase(locale,\r
582                                                                     chstr[0])));\r
583                     }\r
584                 }\r
585             }\r
586             input.close();\r
587         }\r
588         catch (Exception e)\r
589         {\r
590           e.printStackTrace();\r
591         }\r
592     }\r
593 \r
594     public void TestUpperLower()\r
595     {\r
596         int upper[] = {0x0041, 0x0042, 0x00b2, 0x01c4, 0x01c6, 0x01c9, 0x01c8,\r
597                         0x01c9, 0x000c};\r
598         int lower[] = {0x0061, 0x0062, 0x00b2, 0x01c6, 0x01c6, 0x01c9, 0x01c9,\r
599                         0x01c9, 0x000c};\r
600         String upperTest = "abcdefg123hij.?:klmno";\r
601         String lowerTest = "ABCDEFG123HIJ.?:KLMNO";\r
602 \r
603         // Checks LetterLike Symbols which were previously a source of\r
604         // confusion [Bertrand A. D. 02/04/98]\r
605         for (int i = 0x2100; i < 0x2138; i ++) {\r
606             /* Unicode 5.0 adds lowercase U+214E (TURNED SMALL F) to U+2132 (TURNED CAPITAL F) */\r
607             if (i != 0x2126 && i != 0x212a && i != 0x212b && i!=0x2132) {\r
608                 if (i != UCharacter.toLowerCase(i)) { // itself\r
609                     errln("Failed case conversion with itself: \\u"\r
610                             + Utility.hex(i, 4));\r
611                 }\r
612                 if (i != UCharacter.toUpperCase(i)) {\r
613                     errln("Failed case conversion with itself: \\u"\r
614                             + Utility.hex(i, 4));\r
615                 }\r
616             }\r
617         }\r
618         for (int i = 0; i < upper.length; i ++) {\r
619             if (UCharacter.toLowerCase(upper[i]) != lower[i]) {\r
620                 errln("FAILED UCharacter.tolower() for \\u"\r
621                         + Utility.hex(upper[i], 4)\r
622                         + " Expected \\u" + Utility.hex(lower[i], 4)\r
623                         + " Got \\u"\r
624                         + Utility.hex(UCharacter.toLowerCase(upper[i]), 4));\r
625             }\r
626         }\r
627         logln("testing upper lower");\r
628         for (int i = 0; i < upperTest.length(); i ++) {\r
629             logln("testing to upper to lower");\r
630             if (UCharacter.isLetter(upperTest.charAt(i)) &&\r
631                 !UCharacter.isLowerCase(upperTest.charAt(i))) {\r
632                 errln("Failed isLowerCase test at \\u"\r
633                         + Utility.hex(upperTest.charAt(i), 4));\r
634             }\r
635             else if (UCharacter.isLetter(lowerTest.charAt(i))\r
636                      && !UCharacter.isUpperCase(lowerTest.charAt(i))) {\r
637                 errln("Failed isUpperCase test at \\u"\r
638                       + Utility.hex(lowerTest.charAt(i), 4));\r
639             }\r
640             else if (upperTest.charAt(i)\r
641                             != UCharacter.toLowerCase(lowerTest.charAt(i))) {\r
642                 errln("Failed case conversion from \\u"\r
643                         + Utility.hex(lowerTest.charAt(i), 4) + " To \\u"\r
644                         + Utility.hex(upperTest.charAt(i), 4));\r
645             }\r
646             else if (lowerTest.charAt(i)\r
647                     != UCharacter.toUpperCase(upperTest.charAt(i))) {\r
648                 errln("Failed case conversion : \\u"\r
649                         + Utility.hex(upperTest.charAt(i), 4) + " To \\u"\r
650                         + Utility.hex(lowerTest.charAt(i), 4));\r
651             }\r
652             else if (upperTest.charAt(i)\r
653                     != UCharacter.toLowerCase(upperTest.charAt(i))) {\r
654                 errln("Failed case conversion with itself: \\u"\r
655                         + Utility.hex(upperTest.charAt(i)));\r
656             }\r
657             else if (lowerTest.charAt(i)\r
658                     != UCharacter.toUpperCase(lowerTest.charAt(i))) {\r
659                 errln("Failed case conversion with itself: \\u"\r
660                         + Utility.hex(lowerTest.charAt(i)));\r
661             }\r
662         }\r
663         logln("done testing upper Lower");\r
664     }\r
665 \r
666     // private data members - test data --------------------------------------\r
667 \r
668     private static final Locale TURKISH_LOCALE_ = new Locale("tr", "TR");\r
669     private static final Locale GERMAN_LOCALE_ = new Locale("de", "DE");\r
670     private static final Locale GREEK_LOCALE_ = new Locale("el", "GR");\r
671     private static final Locale ENGLISH_LOCALE_ = new Locale("en", "US");\r
672     private static final Locale LITHUANIAN_LOCALE_ = new Locale("lt", "LT");\r
673 \r
674     private static final int CHARACTER_UPPER_[] =\r
675                       {0x41, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,\r
676                        0x00b1, 0x00b2, 0xb3, 0x0048, 0x0049, 0x004a, 0x002e,\r
677                        0x003f, 0x003a, 0x004b, 0x004c, 0x4d, 0x004e, 0x004f,\r
678                        0x01c4, 0x01c8, 0x000c, 0x0000};\r
679     private static final int CHARACTER_LOWER_[] =\r
680                       {0x61, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,\r
681                        0x00b1, 0x00b2, 0xb3, 0x0068, 0x0069, 0x006a, 0x002e,\r
682                        0x003f, 0x003a, 0x006b, 0x006c, 0x6d, 0x006e, 0x006f,\r
683                        0x01c6, 0x01c9, 0x000c, 0x0000};\r
684 \r
685     /*\r
686      * CaseFolding.txt says about i and its cousins:\r
687      *   0049; C; 0069; # LATIN CAPITAL LETTER I\r
688      *   0049; T; 0131; # LATIN CAPITAL LETTER I\r
689      *\r
690      *   0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE\r
691      *   0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE\r
692      * That's all.\r
693      * See CaseFolding.txt and the Unicode Standard for how to apply the case foldings.\r
694      */\r
695     private static final int FOLDING_SIMPLE_[] = {\r
696         // input, default, exclude special i\r
697         0x61,   0x61,  0x61,\r
698         0x49,   0x69,  0x131,\r
699         0x130,  0x130, 0x69,\r
700         0x131,  0x131, 0x131,\r
701         0xdf,   0xdf,  0xdf,\r
702         0xfb03, 0xfb03, 0xfb03,\r
703         0x1040e,0x10436,0x10436,\r
704         0x5ffff,0x5ffff,0x5ffff\r
705     };\r
706     private static final String FOLDING_MIXED_[] =\r
707                           {"\u0061\u0042\u0130\u0049\u0131\u03d0\u00df\ufb03\ud93f\udfff",\r
708                            "A\u00df\u00b5\ufb03\uD801\uDC0C\u0130\u0131"};\r
709     private static final String FOLDING_DEFAULT_[] =\r
710          {"\u0061\u0062\u0069\u0307\u0069\u0131\u03b2\u0073\u0073\u0066\u0066\u0069\ud93f\udfff",\r
711           "ass\u03bcffi\uD801\uDC34i\u0307\u0131"};\r
712     private static final String FOLDING_EXCLUDE_SPECIAL_I_[] =\r
713          {"\u0061\u0062\u0069\u0131\u0131\u03b2\u0073\u0073\u0066\u0066\u0069\ud93f\udfff",\r
714           "ass\u03bcffi\uD801\uDC34i\u0131"};\r
715     /**\r
716      * "IESUS CHRISTOS"\r
717      */\r
718     private static final String SHARED_UPPERCASE_GREEK_ =\r
719         "\u0399\u0395\u03a3\u03a5\u03a3\u0020\u03a7\u03a1\u0399\u03a3\u03a4\u039f\u03a3";\r
720     /**\r
721      * "iesus christos"\r
722      */\r
723     private static final String SHARED_LOWERCASE_GREEK_ =\r
724         "\u03b9\u03b5\u03c3\u03c5\u03c2\u0020\u03c7\u03c1\u03b9\u03c3\u03c4\u03bf\u03c2";\r
725     private static final String SHARED_LOWERCASE_TURKISH_ =\r
726         "\u0069\u0073\u0074\u0061\u006e\u0062\u0075\u006c\u002c\u0020\u006e\u006f\u0074\u0020\u0063\u006f\u006e\u0073\u0074\u0061\u006e\u0074\u0131\u006e\u006f\u0070\u006c\u0065\u0021";\r
727     private static final String SHARED_UPPERCASE_TURKISH_ =\r
728         "\u0054\u004f\u0050\u004b\u0041\u0050\u0049\u0020\u0050\u0041\u004c\u0041\u0043\u0045\u002c\u0020\u0130\u0053\u0054\u0041\u004e\u0042\u0055\u004c";\r
729     private static final String SHARED_UPPERCASE_ISTANBUL_ =\r
730                                           "\u0130STANBUL, NOT CONSTANTINOPLE!";\r
731     private static final String SHARED_LOWERCASE_ISTANBUL_ =\r
732                                           "i\u0307stanbul, not constantinople!";\r
733     private static final String SHARED_LOWERCASE_TOPKAP_ =\r
734                                           "topkap\u0131 palace, istanbul";\r
735     private static final String SHARED_UPPERCASE_TOPKAP_ =\r
736                                           "TOPKAPI PALACE, ISTANBUL";\r
737     private static final String SHARED_LOWERCASE_GERMAN_ =\r
738                                           "S\u00FC\u00DFmayrstra\u00DFe";\r
739     private static final String SHARED_UPPERCASE_GERMAN_ =\r
740                                           "S\u00DCSSMAYRSTRASSE";\r
741 \r
742     private static final String UPPER_BEFORE_ =\r
743          "\u0061\u0042\u0069\u03c2\u00df\u03c3\u002f\ufb03\ufb03\ufb03\ud93f\udfff";\r
744     private static final String UPPER_ROOT_ =\r
745          "\u0041\u0042\u0049\u03a3\u0053\u0053\u03a3\u002f\u0046\u0046\u0049\u0046\u0046\u0049\u0046\u0046\u0049\ud93f\udfff";\r
746     private static final String UPPER_TURKISH_ =\r
747          "\u0041\u0042\u0130\u03a3\u0053\u0053\u03a3\u002f\u0046\u0046\u0049\u0046\u0046\u0049\u0046\u0046\u0049\ud93f\udfff";\r
748     private static final String UPPER_MINI_ = "\u00df\u0061";\r
749     private static final String UPPER_MINI_UPPER_ = "\u0053\u0053\u0041";\r
750 \r
751     private static final String LOWER_BEFORE_ =\r
752                       "\u0061\u0042\u0049\u03a3\u00df\u03a3\u002f\ud93f\udfff";\r
753     private static final String LOWER_ROOT_ =\r
754                       "\u0061\u0062\u0069\u03c3\u00df\u03c2\u002f\ud93f\udfff";\r
755     private static final String LOWER_TURKISH_ = \r
756                       "\u0061\u0062\u0131\u03c3\u00df\u03c2\u002f\ud93f\udfff";\r
757 \r
758     /**\r
759      * each item is an array with input string, result string, locale ID, break iterator, options\r
760      * the break iterator is specified as an int, same as in BreakIterator.KIND_*:\r
761      * 0=KIND_CHARACTER  1=KIND_WORD  2=KIND_LINE  3=KIND_SENTENCE  4=KIND_TITLE  -1=default (NULL=words)  -2=no breaks (.*)\r
762      * options: T=U_FOLD_CASE_EXCLUDE_SPECIAL_I  L=U_TITLECASE_NO_LOWERCASE  A=U_TITLECASE_NO_BREAK_ADJUSTMENT\r
763      * see ICU4C source/test/testdata/casing.txt\r
764      */\r
765     private static final String TITLE_DATA_[] = {\r
766         "\u0061\u0042\u0020\u0069\u03c2\u0020\u00df\u03c3\u002f\ufb03\ud93f\udfff",\r
767         "\u0041\u0042\u0020\u0049\u03a3\u0020\u0053\u0073\u03a3\u002f\u0046\u0066\u0069\ud93f\udfff",\r
768         "",\r
769         "0",\r
770         "",\r
771 \r
772         "\u0061\u0042\u0020\u0069\u03c2\u0020\u00df\u03c3\u002f\ufb03\ud93f\udfff",\r
773         "\u0041\u0062\u0020\u0049\u03c2\u0020\u0053\u0073\u03c3\u002f\u0046\u0066\u0069\ud93f\udfff",\r
774         "",\r
775         "1",\r
776         "",\r
777 \r
778         "\u02bbaMeLikA huI P\u016b \u02bb\u02bb\u02bbiA", "\u02bbAmelika Hui P\u016b \u02bb\u02bb\u02bbIa", // titlecase first _cased_ letter, j4933\r
779         "",\r
780         "-1",\r
781         "",\r
782 \r
783         " tHe QUIcK bRoWn", " The Quick Brown",\r
784         "",\r
785         "4",\r
786         "",\r
787 \r
788         "\u01c4\u01c5\u01c6\u01c7\u01c8\u01c9\u01ca\u01cb\u01cc", \r
789         "\u01c5\u01c5\u01c5\u01c8\u01c8\u01c8\u01cb\u01cb\u01cb", // UBRK_CHARACTER\r
790         "",\r
791         "0",\r
792         "",\r
793 \r
794         "\u01c9ubav ljubav", "\u01c8ubav Ljubav", // Lj vs. L+j\r
795         "",\r
796         "-1",\r
797         "",\r
798 \r
799         "'oH dOn'T tItLeCaSe AfTeR lEtTeR+'",  "'Oh Don't Titlecase After Letter+'",\r
800         "",\r
801         "-1",\r
802         "",\r
803 \r
804         "a \u02bbCaT. A \u02bbdOg! \u02bbeTc.",\r
805         "A \u02bbCat. A \u02bbDog! \u02bbEtc.",\r
806         "",\r
807         "-1",\r
808         "", // default\r
809 \r
810         "a \u02bbCaT. A \u02bbdOg! \u02bbeTc.",\r
811         "A \u02bbcat. A \u02bbdog! \u02bbetc.",\r
812         "",\r
813         "-1",\r
814         "A", // U_TITLECASE_NO_BREAK_ADJUSTMENT\r
815 \r
816         "a \u02bbCaT. A \u02bbdOg! \u02bbeTc.",\r
817         "A \u02bbCaT. A \u02bbdOg! \u02bbETc.",\r
818         "",\r
819         "3",\r
820         "L", // UBRK_SENTENCE and U_TITLECASE_NO_LOWERCASE\r
821 \r
822 \r
823         "\u02bbcAt! \u02bbeTc.",\r
824         "\u02bbCat! \u02bbetc.",\r
825         "",\r
826         "-2",\r
827         "", // -2=Trivial break iterator\r
828 \r
829         "\u02bbcAt! \u02bbeTc.",\r
830         "\u02bbcat! \u02bbetc.",\r
831         "",\r
832         "-2",\r
833         "A", // U_TITLECASE_NO_BREAK_ADJUSTMENT\r
834 \r
835         "\u02bbcAt! \u02bbeTc.",\r
836         "\u02bbCAt! \u02bbeTc.",\r
837         "",\r
838         "-2",\r
839         "L", // U_TITLECASE_NO_LOWERCASE\r
840 \r
841         "\u02bbcAt! \u02bbeTc.",\r
842         "\u02bbcAt! \u02bbeTc.",\r
843         "",\r
844         "-2",\r
845         "AL", // Both options\r
846 \r
847         // Test case for ticket #7251: UCharacter.toTitleCase() throws OutOfMemoryError\r
848         // when TITLECASE_NO_LOWERCASE encounters a single-letter word\r
849         "a b c",\r
850         "A B C",\r
851         "",\r
852         "1",\r
853         "L" // U_TITLECASE_NO_LOWERCASE\r
854     };\r
855 \r
856 \r
857     /**\r
858      * <p>basic string, lower string, upper string, title string</p>\r
859      */\r
860     private static final String SPECIAL_DATA_[] = {\r
861         UTF16.valueOf(0x1043C) + UTF16.valueOf(0x10414),\r
862         UTF16.valueOf(0x1043C) + UTF16.valueOf(0x1043C),\r
863         UTF16.valueOf(0x10414) + UTF16.valueOf(0x10414),\r
864         "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " +\r
865                          UTF16.valueOf(0x1043C) + UTF16.valueOf(0x10414),\r
866         "ab'cd \uFB00i\u0131ii\u0307 \u01C9\u01C9\u01C9 " +\r
867                               UTF16.valueOf(0x1043C) + UTF16.valueOf(0x1043C),\r
868         "AB'CD FFIII\u0130 \u01C7\u01C7\u01C7 " +\r
869                               UTF16.valueOf(0x10414) + UTF16.valueOf(0x10414),\r
870         // sigmas followed/preceded by cased letters\r
871         "i\u0307\u03a3\u0308j \u0307\u03a3\u0308j i\u00ad\u03a3\u0308 \u0307\u03a3\u0308 ",\r
872         "i\u0307\u03c3\u0308j \u0307\u03c3\u0308j i\u00ad\u03c2\u0308 \u0307\u03c3\u0308 ",\r
873         "I\u0307\u03a3\u0308J \u0307\u03a3\u0308J I\u00ad\u03a3\u0308 \u0307\u03a3\u0308 "\r
874     };\r
875     private static final Locale SPECIAL_LOCALES_[] = {\r
876         null,\r
877         ENGLISH_LOCALE_,\r
878         null,\r
879     };\r
880 \r
881     private static final String SPECIAL_DOTTED_ =\r
882             "I \u0130 I\u0307 I\u0327\u0307 I\u0301\u0307 I\u0327\u0307\u0301";\r
883     private static final String SPECIAL_DOTTED_LOWER_TURKISH_ =\r
884             "\u0131 i i i\u0327 \u0131\u0301\u0307 i\u0327\u0301";\r
885     private static final String SPECIAL_DOTTED_LOWER_GERMAN_ =\r
886             "i i\u0307 i\u0307 i\u0327\u0307 i\u0301\u0307 i\u0327\u0307\u0301";\r
887     private static final String SPECIAL_DOT_ABOVE_ =\r
888             "a\u0307 \u0307 i\u0307 j\u0327\u0307 j\u0301\u0307";\r
889     private static final String SPECIAL_DOT_ABOVE_UPPER_LITHUANIAN_ =\r
890             "A\u0307 \u0307 I J\u0327 J\u0301\u0307";\r
891     private static final String SPECIAL_DOT_ABOVE_UPPER_GERMAN_ =\r
892             "A\u0307 \u0307 I\u0307 J\u0327\u0307 J\u0301\u0307";\r
893     private static final String SPECIAL_DOT_ABOVE_UPPER_ =\r
894             "I I\u0301 J J\u0301 \u012e \u012e\u0301 \u00cc\u00cd\u0128";\r
895     private static final String SPECIAL_DOT_ABOVE_LOWER_LITHUANIAN_ =\r
896             "i i\u0307\u0301 j j\u0307\u0301 \u012f \u012f\u0307\u0301 i\u0307\u0300i\u0307\u0301i\u0307\u0303";\r
897     private static final String SPECIAL_DOT_ABOVE_LOWER_GERMAN_ =\r
898             "i i\u0301 j j\u0301 \u012f \u012f\u0301 \u00ec\u00ed\u0129";\r
899 \r
900     // private methods -------------------------------------------------------\r
901 \r
902     /**\r
903      * Converting the hex numbers represented betwee                             n ';' to Unicode strings\r
904      * @param str string to break up into Unicode strings\r
905      * @return array of Unicode strings ending with a null\r
906      */\r
907     private String[] getUnicodeStrings(String str)\r
908     {\r
909         Vector v = new Vector(10);\r
910         int start = 0;\r
911         for (int casecount = 4; casecount > 0; casecount --) {\r
912             int end = str.indexOf("; ", start);\r
913             String casestr = str.substring(start, end);\r
914             StringBuffer buffer = new StringBuffer();\r
915             int spaceoffset = 0;\r
916             while (spaceoffset < casestr.length()) {\r
917                 int nextspace = casestr.indexOf(' ', spaceoffset);\r
918                 if (nextspace == -1) {\r
919                     nextspace = casestr.length();\r
920                 }\r
921                 buffer.append((char)Integer.parseInt(\r
922                                      casestr.substring(spaceoffset, nextspace),\r
923                                                       16));\r
924                 spaceoffset = nextspace + 1;\r
925             }\r
926             start = end + 2;\r
927             v.add(buffer.toString());\r
928         }\r
929         int comments = str.indexOf(" #", start);\r
930         if (comments != -1 && comments != start) {\r
931             if (str.charAt(comments - 1) == ';') {\r
932                 comments --;\r
933             }\r
934             String conditions = str.substring(start, comments);\r
935             int offset = 0;\r
936             while (offset < conditions.length()) {\r
937                 int spaceoffset = conditions.indexOf(' ', offset);\r
938                 if (spaceoffset == -1) {\r
939                     spaceoffset = conditions.length();\r
940                 }\r
941                 v.add(conditions.substring(offset, spaceoffset));\r
942                 offset = spaceoffset + 1;\r
943             }\r
944         }\r
945         int size = v.size();\r
946         String result[] = new String[size];\r
947         for (int i = 0; i < size; i ++) {\r
948             result[i] = (String)v.elementAt(i);\r
949         }\r
950         return result;\r
951     }\r
952 }\r