2 *******************************************************************************
\r
3 * Copyright (C) 1996-2010, International Business Machines Corporation and
\r
4 * others. All Rights Reserved.
\r
5 *******************************************************************************
\r
9 package com.ibm.icu.dev.test.lang;
\r
12 import java.io.BufferedReader;
\r
13 import java.util.Locale;
\r
14 import java.util.Vector;
\r
16 import com.ibm.icu.dev.test.TestFmwk;
\r
17 import com.ibm.icu.dev.test.TestUtil;
\r
18 import com.ibm.icu.impl.Utility;
\r
19 import com.ibm.icu.lang.UCharacter;
\r
20 import com.ibm.icu.lang.UProperty;
\r
21 import com.ibm.icu.text.BreakIterator;
\r
22 import com.ibm.icu.text.RuleBasedBreakIterator;
\r
23 import com.ibm.icu.text.UTF16;
\r
24 import com.ibm.icu.util.ULocale;
\r
28 * <p>Testing character casing</p>
\r
29 * <p>Mostly following the test cases in strcase.cpp for ICU</p>
\r
30 * @author Syn Wee Quek
\r
31 * @since march 14 2002
\r
33 public final class UCharacterCaseTest extends TestFmwk
\r
35 // constructor -----------------------------------------------------------
\r
40 public UCharacterCaseTest()
\r
44 // public methods --------------------------------------------------------
\r
46 public static void main(String[] arg)
\r
50 UCharacterCaseTest test = new UCharacterCaseTest();
\r
55 e.printStackTrace();
\r
60 * Testing the uppercase and lowercase function of UCharacter
\r
62 public void TestCharacter()
\r
64 for (int i = 0; i < CHARACTER_LOWER_.length; i ++) {
\r
65 if (UCharacter.isLetter(CHARACTER_LOWER_[i]) &&
\r
66 !UCharacter.isLowerCase(CHARACTER_LOWER_[i])) {
\r
67 errln("FAIL isLowerCase test for \\u" +
\r
68 hex(CHARACTER_LOWER_[i]));
\r
71 if (UCharacter.isLetter(CHARACTER_UPPER_[i]) &&
\r
72 !(UCharacter.isUpperCase(CHARACTER_UPPER_[i]) ||
\r
73 UCharacter.isTitleCase(CHARACTER_UPPER_[i]))) {
\r
74 errln("FAIL isUpperCase test for \\u" +
\r
75 hex(CHARACTER_UPPER_[i]));
\r
78 if (CHARACTER_LOWER_[i] !=
\r
79 UCharacter.toLowerCase(CHARACTER_UPPER_[i]) ||
\r
80 (CHARACTER_UPPER_[i] !=
\r
81 UCharacter.toUpperCase(CHARACTER_LOWER_[i]) &&
\r
82 CHARACTER_UPPER_[i] !=
\r
83 UCharacter.toTitleCase(CHARACTER_LOWER_[i]))) {
\r
84 errln("FAIL case conversion test for \\u" +
\r
85 hex(CHARACTER_UPPER_[i]) +
\r
86 " to \\u" + hex(CHARACTER_LOWER_[i]));
\r
89 if (CHARACTER_LOWER_[i] !=
\r
90 UCharacter.toLowerCase(CHARACTER_LOWER_[i])) {
\r
91 errln("FAIL lower case conversion test for \\u" +
\r
92 hex(CHARACTER_LOWER_[i]));
\r
95 if (CHARACTER_UPPER_[i] !=
\r
96 UCharacter.toUpperCase(CHARACTER_UPPER_[i]) &&
\r
97 CHARACTER_UPPER_[i] !=
\r
98 UCharacter.toTitleCase(CHARACTER_UPPER_[i])) {
\r
99 errln("FAIL upper case conversion test for \\u" +
\r
100 hex(CHARACTER_UPPER_[i]));
\r
103 logln("Ok \\u" + hex(CHARACTER_UPPER_[i]) + " and \\u" +
\r
104 hex(CHARACTER_LOWER_[i]));
\r
108 public void TestFolding()
\r
110 // test simple case folding
\r
111 for (int i = 0; i < FOLDING_SIMPLE_.length; i += 3) {
\r
112 if (UCharacter.foldCase(FOLDING_SIMPLE_[i], true) !=
\r
113 FOLDING_SIMPLE_[i + 1]) {
\r
114 errln("FAIL: foldCase(\\u" + hex(FOLDING_SIMPLE_[i]) +
\r
115 ", true) should be \\u" + hex(FOLDING_SIMPLE_[i + 1]));
\r
117 if (UCharacter.foldCase(FOLDING_SIMPLE_[i],
\r
118 UCharacter.FOLD_CASE_DEFAULT) !=
\r
119 FOLDING_SIMPLE_[i + 1]) {
\r
120 errln("FAIL: foldCase(\\u" + hex(FOLDING_SIMPLE_[i]) +
\r
121 ", UCharacter.FOLD_CASE_DEFAULT) should be \\u"
\r
122 + hex(FOLDING_SIMPLE_[i + 1]));
\r
124 if (UCharacter.foldCase(FOLDING_SIMPLE_[i], false) !=
\r
125 FOLDING_SIMPLE_[i + 2]) {
\r
126 errln("FAIL: foldCase(\\u" + hex(FOLDING_SIMPLE_[i]) +
\r
127 ", false) should be \\u" + hex(FOLDING_SIMPLE_[i + 2]));
\r
129 if (UCharacter.foldCase(FOLDING_SIMPLE_[i],
\r
130 UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I) !=
\r
131 FOLDING_SIMPLE_[i + 2]) {
\r
132 errln("FAIL: foldCase(\\u" + hex(FOLDING_SIMPLE_[i]) +
\r
133 ", UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I) should be \\u"
\r
134 + hex(FOLDING_SIMPLE_[i + 2]));
\r
138 // Test full string case folding with default option and separate
\r
140 if (!FOLDING_DEFAULT_[0].equals(UCharacter.foldCase(FOLDING_MIXED_[0], true))) {
\r
141 errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[0]) +
\r
142 ", true)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[0], true)) +
\r
143 " should be " + prettify(FOLDING_DEFAULT_[0]));
\r
146 if (!FOLDING_DEFAULT_[0].equals(UCharacter.foldCase(FOLDING_MIXED_[0], UCharacter.FOLD_CASE_DEFAULT))) {
\r
147 errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[0]) +
\r
148 ", UCharacter.FOLD_CASE_DEFAULT)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[0], UCharacter.FOLD_CASE_DEFAULT))
\r
149 + " should be " + prettify(FOLDING_DEFAULT_[0]));
\r
152 if (!FOLDING_EXCLUDE_SPECIAL_I_[0].equals(
\r
153 UCharacter.foldCase(FOLDING_MIXED_[0], false))) {
\r
154 errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[0]) +
\r
155 ", false)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[0], false))
\r
156 + " should be " + prettify(FOLDING_EXCLUDE_SPECIAL_I_[0]));
\r
159 if (!FOLDING_EXCLUDE_SPECIAL_I_[0].equals(
\r
160 UCharacter.foldCase(FOLDING_MIXED_[0], UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I))) {
\r
161 errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[0]) +
\r
162 ", UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[0], UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I))
\r
163 + " should be " + prettify(FOLDING_EXCLUDE_SPECIAL_I_[0]));
\r
166 if (!FOLDING_DEFAULT_[1].equals(UCharacter.foldCase(FOLDING_MIXED_[1], true))) {
\r
167 errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[1]) +
\r
168 ", true)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[1], true))
\r
169 + " should be " + prettify(FOLDING_DEFAULT_[1]));
\r
172 if (!FOLDING_DEFAULT_[1].equals(UCharacter.foldCase(FOLDING_MIXED_[1], UCharacter.FOLD_CASE_DEFAULT))) {
\r
173 errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[1]) +
\r
174 ", UCharacter.FOLD_CASE_DEFAULT)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[1], UCharacter.FOLD_CASE_DEFAULT))
\r
175 + " should be " + prettify(FOLDING_DEFAULT_[1]));
\r
178 // alternate handling for dotted I/dotless i (U+0130, U+0131)
\r
179 if (!FOLDING_EXCLUDE_SPECIAL_I_[1].equals(
\r
180 UCharacter.foldCase(FOLDING_MIXED_[1], false))) {
\r
181 errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[1]) +
\r
182 ", false)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[1], false))
\r
183 + " should be " + prettify(FOLDING_EXCLUDE_SPECIAL_I_[1]));
\r
186 if (!FOLDING_EXCLUDE_SPECIAL_I_[1].equals(
\r
187 UCharacter.foldCase(FOLDING_MIXED_[1], UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I))) {
\r
188 errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[1]) +
\r
189 ", UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[1], UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I))
\r
191 + prettify(FOLDING_EXCLUDE_SPECIAL_I_[1]));
\r
196 * Testing the strings case mapping methods
\r
198 public void TestUpper()
\r
200 // uppercase with root locale and in the same buffer
\r
201 if (!UPPER_ROOT_.equals(UCharacter.toUpperCase(UPPER_BEFORE_))) {
\r
202 errln("Fail " + UPPER_BEFORE_ + " after uppercase should be " +
\r
203 UPPER_ROOT_ + " instead got " +
\r
204 UCharacter.toUpperCase(UPPER_BEFORE_));
\r
207 // uppercase with turkish locale and separate buffers
\r
208 if (!UPPER_TURKISH_.equals(UCharacter.toUpperCase(TURKISH_LOCALE_,
\r
210 errln("Fail " + UPPER_BEFORE_ +
\r
211 " after turkish-sensitive uppercase should be " +
\r
212 UPPER_TURKISH_ + " instead of " +
\r
213 UCharacter.toUpperCase(TURKISH_LOCALE_, UPPER_BEFORE_));
\r
216 // uppercase a short string with root locale
\r
217 if (!UPPER_MINI_UPPER_.equals(UCharacter.toUpperCase(UPPER_MINI_))) {
\r
218 errln("error in toUpper(root locale)=\"" + UPPER_MINI_ +
\r
219 "\" expected \"" + UPPER_MINI_UPPER_ + "\"");
\r
222 if (!SHARED_UPPERCASE_TOPKAP_.equals(
\r
223 UCharacter.toUpperCase(SHARED_LOWERCASE_TOPKAP_))) {
\r
224 errln("toUpper failed: expected \"" +
\r
225 SHARED_UPPERCASE_TOPKAP_ + "\", got \"" +
\r
226 UCharacter.toUpperCase(SHARED_LOWERCASE_TOPKAP_) + "\".");
\r
229 if (!SHARED_UPPERCASE_TURKISH_.equals(
\r
230 UCharacter.toUpperCase(TURKISH_LOCALE_,
\r
231 SHARED_LOWERCASE_TOPKAP_))) {
\r
232 errln("toUpper failed: expected \"" +
\r
233 SHARED_UPPERCASE_TURKISH_ + "\", got \"" +
\r
234 UCharacter.toUpperCase(TURKISH_LOCALE_,
\r
235 SHARED_LOWERCASE_TOPKAP_) + "\".");
\r
238 if (!SHARED_UPPERCASE_GERMAN_.equals(
\r
239 UCharacter.toUpperCase(GERMAN_LOCALE_,
\r
240 SHARED_LOWERCASE_GERMAN_))) {
\r
241 errln("toUpper failed: expected \"" + SHARED_UPPERCASE_GERMAN_
\r
242 + "\", got \"" + UCharacter.toUpperCase(GERMAN_LOCALE_,
\r
243 SHARED_LOWERCASE_GERMAN_) + "\".");
\r
246 if (!SHARED_UPPERCASE_GREEK_.equals(
\r
247 UCharacter.toUpperCase(SHARED_LOWERCASE_GREEK_))) {
\r
248 errln("toLower failed: expected \"" + SHARED_UPPERCASE_GREEK_ +
\r
249 "\", got \"" + UCharacter.toUpperCase(
\r
250 SHARED_LOWERCASE_GREEK_) + "\".");
\r
254 public void TestLower()
\r
256 if (!LOWER_ROOT_.equals(UCharacter.toLowerCase(LOWER_BEFORE_))) {
\r
257 errln("Fail " + LOWER_BEFORE_ + " after lowercase should be " +
\r
258 LOWER_ROOT_ + " instead of " +
\r
259 UCharacter.toLowerCase(LOWER_BEFORE_));
\r
262 // lowercase with turkish locale
\r
263 if (!LOWER_TURKISH_.equals(UCharacter.toLowerCase(TURKISH_LOCALE_,
\r
265 errln("Fail " + LOWER_BEFORE_ +
\r
266 " after turkish-sensitive lowercase should be " +
\r
267 LOWER_TURKISH_ + " instead of " +
\r
268 UCharacter.toLowerCase(TURKISH_LOCALE_, LOWER_BEFORE_));
\r
270 if (!SHARED_LOWERCASE_ISTANBUL_.equals(
\r
271 UCharacter.toLowerCase(SHARED_UPPERCASE_ISTANBUL_))) {
\r
272 errln("1. toLower failed: expected \"" +
\r
273 SHARED_LOWERCASE_ISTANBUL_ + "\", got \"" +
\r
274 UCharacter.toLowerCase(SHARED_UPPERCASE_ISTANBUL_) + "\".");
\r
277 if (!SHARED_LOWERCASE_TURKISH_.equals(
\r
278 UCharacter.toLowerCase(TURKISH_LOCALE_,
\r
279 SHARED_UPPERCASE_ISTANBUL_))) {
\r
280 errln("2. toLower failed: expected \"" +
\r
281 SHARED_LOWERCASE_TURKISH_ + "\", got \"" +
\r
282 UCharacter.toLowerCase(TURKISH_LOCALE_,
\r
283 SHARED_UPPERCASE_ISTANBUL_) + "\".");
\r
285 if (!SHARED_LOWERCASE_GREEK_.equals(
\r
286 UCharacter.toLowerCase(GREEK_LOCALE_,
\r
287 SHARED_UPPERCASE_GREEK_))) {
\r
288 errln("toLower failed: expected \"" + SHARED_LOWERCASE_GREEK_ +
\r
289 "\", got \"" + UCharacter.toLowerCase(GREEK_LOCALE_,
\r
290 SHARED_UPPERCASE_GREEK_) + "\".");
\r
294 public void TestTitleRegression() throws java.io.IOException {
\r
295 boolean isIgnorable = UCharacter.hasBinaryProperty('\'', UProperty.CASE_IGNORABLE);
\r
296 assertTrue("Case Ignorable check of ASCII apostrophe", isIgnorable);
\r
297 assertEquals("Titlecase check",
\r
298 "The Quick Brown Fox Can't Jump Over The Lazy Dogs.",
\r
299 UCharacter.toTitleCase(ULocale.ENGLISH, "THE QUICK BROWN FOX CAN'T JUMP OVER THE LAZY DOGS.", null));
\r
302 public void TestTitle()
\r
305 for (int i = 0; i < TITLE_DATA_.length;) {
\r
306 String test = TITLE_DATA_[i++];
\r
307 String expected = TITLE_DATA_[i++];
\r
308 ULocale locale = new ULocale(TITLE_DATA_[i++]);
\r
309 int breakType = Integer.parseInt(TITLE_DATA_[i++]);
\r
310 String optionsString = TITLE_DATA_[i++];
\r
311 BreakIterator iter =
\r
313 BreakIterator.getBreakInstance(locale, breakType) :
\r
315 // Open a trivial break iterator that only delivers { 0, length }
\r
316 // or even just { 0 } as boundaries.
\r
317 new RuleBasedBreakIterator(".*;") :
\r
320 if (optionsString.indexOf('L') >= 0) {
\r
321 options |= UCharacter.TITLECASE_NO_LOWERCASE;
\r
323 if (optionsString.indexOf('A') >= 0) {
\r
324 options |= UCharacter.TITLECASE_NO_BREAK_ADJUSTMENT;
\r
326 String result = UCharacter.toTitleCase(locale, test, iter, options);
\r
327 if (!expected.equals(result)) {
\r
328 errln("titlecasing for " + prettify(test) + " (options " + options + ") should be " +
\r
329 prettify(expected) + " but got " +
\r
332 if (options == 0) {
\r
333 result = UCharacter.toTitleCase(locale, test, iter);
\r
334 if (!expected.equals(result)) {
\r
335 errln("titlecasing for " + prettify(test) + " should be " +
\r
336 prettify(expected) + " but got " +
\r
341 }catch(Exception ex){
\r
342 warnln("Could not find data for BreakIterators");
\r
346 public void TestDutchTitle() {
\r
347 ULocale LOC_DUTCH = new ULocale("nl");
\r
349 options |= UCharacter.TITLECASE_NO_LOWERCASE;
\r
350 BreakIterator iter = BreakIterator.getWordInstance(LOC_DUTCH);
\r
352 assertEquals("Dutch titlecase check in English",
\r
353 "Ijssel Igloo Ijmuiden",
\r
354 UCharacter.toTitleCase(ULocale.ENGLISH, "ijssel igloo IJMUIDEN", null));
\r
356 assertEquals("Dutch titlecase check in Dutch",
\r
357 "IJssel Igloo IJmuiden",
\r
358 UCharacter.toTitleCase(LOC_DUTCH, "ijssel igloo IJMUIDEN", null));
\r
360 iter.setText("ijssel igloo IjMUIdEN iPoD ijenough");
\r
361 assertEquals("Dutch titlecase check in Dutch with nolowercase option",
\r
362 "IJssel Igloo IJMUIdEN IPoD IJenough",
\r
363 UCharacter.toTitleCase(LOC_DUTCH, "ijssel igloo IjMUIdEN iPoD ijenough", iter, options));
\r
366 public void TestSpecial()
\r
368 for (int i = 0; i < SPECIAL_LOCALES_.length; i ++) {
\r
370 Locale locale = SPECIAL_LOCALES_[i];
\r
371 String str = SPECIAL_DATA_[j];
\r
372 if (locale != null) {
\r
373 if (!SPECIAL_DATA_[j + 1].equals(
\r
374 UCharacter.toLowerCase(locale, str))) {
\r
375 errln("error lowercasing special characters " +
\r
376 hex(str) + " expected " + hex(SPECIAL_DATA_[j + 1])
\r
377 + " for locale " + locale.toString() + " but got " +
\r
378 hex(UCharacter.toLowerCase(locale, str)));
\r
380 if (!SPECIAL_DATA_[j + 2].equals(
\r
381 UCharacter.toUpperCase(locale, str))) {
\r
382 errln("error uppercasing special characters " +
\r
383 hex(str) + " expected " + SPECIAL_DATA_[j + 2]
\r
384 + " for locale " + locale.toString() + " but got " +
\r
385 hex(UCharacter.toUpperCase(locale, str)));
\r
389 if (!SPECIAL_DATA_[j + 1].equals(
\r
390 UCharacter.toLowerCase(str))) {
\r
391 errln("error lowercasing special characters " +
\r
392 hex(str) + " expected " + SPECIAL_DATA_[j + 1] +
\r
394 hex(UCharacter.toLowerCase(locale, str)));
\r
396 if (!SPECIAL_DATA_[j + 2].equals(
\r
397 UCharacter.toUpperCase(locale, str))) {
\r
398 errln("error uppercasing special characters " +
\r
399 hex(str) + " expected " + SPECIAL_DATA_[j + 2] +
\r
401 hex(UCharacter.toUpperCase(locale, str)));
\r
406 // turkish & azerbaijani dotless i & dotted I
\r
407 // remove dot above if there was a capital I before and there are no
\r
408 // more accents above
\r
409 if (!SPECIAL_DOTTED_LOWER_TURKISH_.equals(UCharacter.toLowerCase(
\r
410 TURKISH_LOCALE_, SPECIAL_DOTTED_))) {
\r
411 errln("error in dots.toLower(tr)=\"" + SPECIAL_DOTTED_ +
\r
412 "\" expected \"" + SPECIAL_DOTTED_LOWER_TURKISH_ +
\r
413 "\" but got " + UCharacter.toLowerCase(TURKISH_LOCALE_,
\r
416 if (!SPECIAL_DOTTED_LOWER_GERMAN_.equals(UCharacter.toLowerCase(
\r
417 GERMAN_LOCALE_, SPECIAL_DOTTED_))) {
\r
418 errln("error in dots.toLower(de)=\"" + SPECIAL_DOTTED_ +
\r
419 "\" expected \"" + SPECIAL_DOTTED_LOWER_GERMAN_ +
\r
420 "\" but got " + UCharacter.toLowerCase(GERMAN_LOCALE_,
\r
424 // lithuanian dot above in uppercasing
\r
425 if (!SPECIAL_DOT_ABOVE_UPPER_LITHUANIAN_.equals(
\r
426 UCharacter.toUpperCase(LITHUANIAN_LOCALE_, SPECIAL_DOT_ABOVE_))) {
\r
427 errln("error in dots.toUpper(lt)=\"" + SPECIAL_DOT_ABOVE_ +
\r
428 "\" expected \"" + SPECIAL_DOT_ABOVE_UPPER_LITHUANIAN_ +
\r
429 "\" but got " + UCharacter.toUpperCase(LITHUANIAN_LOCALE_,
\r
430 SPECIAL_DOT_ABOVE_));
\r
432 if (!SPECIAL_DOT_ABOVE_UPPER_GERMAN_.equals(UCharacter.toUpperCase(
\r
433 GERMAN_LOCALE_, SPECIAL_DOT_ABOVE_))) {
\r
434 errln("error in dots.toUpper(de)=\"" + SPECIAL_DOT_ABOVE_ +
\r
435 "\" expected \"" + SPECIAL_DOT_ABOVE_UPPER_GERMAN_ +
\r
436 "\" but got " + UCharacter.toUpperCase(GERMAN_LOCALE_,
\r
437 SPECIAL_DOT_ABOVE_));
\r
440 // lithuanian adds dot above to i in lowercasing if there are more
\r
442 if (!SPECIAL_DOT_ABOVE_LOWER_LITHUANIAN_.equals(
\r
443 UCharacter.toLowerCase(LITHUANIAN_LOCALE_,
\r
444 SPECIAL_DOT_ABOVE_UPPER_))) {
\r
445 errln("error in dots.toLower(lt)=\"" + SPECIAL_DOT_ABOVE_UPPER_ +
\r
446 "\" expected \"" + SPECIAL_DOT_ABOVE_LOWER_LITHUANIAN_ +
\r
447 "\" but got " + UCharacter.toLowerCase(LITHUANIAN_LOCALE_,
\r
448 SPECIAL_DOT_ABOVE_UPPER_));
\r
450 if (!SPECIAL_DOT_ABOVE_LOWER_GERMAN_.equals(
\r
451 UCharacter.toLowerCase(GERMAN_LOCALE_,
\r
452 SPECIAL_DOT_ABOVE_UPPER_))) {
\r
453 errln("error in dots.toLower(de)=\"" + SPECIAL_DOT_ABOVE_UPPER_ +
\r
454 "\" expected \"" + SPECIAL_DOT_ABOVE_LOWER_GERMAN_ +
\r
455 "\" but got " + UCharacter.toLowerCase(GERMAN_LOCALE_,
\r
456 SPECIAL_DOT_ABOVE_UPPER_));
\r
461 * Tests for case mapping in the file SpecialCasing.txt
\r
462 * This method reads in SpecialCasing.txt file for testing purposes.
\r
463 * A default path is provided relative to the src path, however the user
\r
464 * could set a system property to change the directory path.<br>
\r
465 * e.g. java -DUnicodeData="data_dir_path" com.ibm.dev.test.lang.UCharacterTest
\r
467 public void TestSpecialCasingTxt()
\r
471 // reading in the SpecialCasing file
\r
472 BufferedReader input = TestUtil.getDataReader(
\r
473 "unicode/SpecialCasing.txt");
\r
476 String s = input.readLine();
\r
480 if (s.length() == 0 || s.charAt(0) == '#') {
\r
484 String chstr[] = getUnicodeStrings(s);
\r
485 StringBuffer strbuffer = new StringBuffer(chstr[0]);
\r
486 StringBuffer lowerbuffer = new StringBuffer(chstr[1]);
\r
487 StringBuffer upperbuffer = new StringBuffer(chstr[3]);
\r
488 Locale locale = null;
\r
489 for (int i = 4; i < chstr.length; i ++) {
\r
490 String condition = chstr[i];
\r
491 if (Character.isLowerCase(chstr[i].charAt(0))) {
\r
492 // specified locale
\r
493 locale = new Locale(chstr[i], "");
\r
495 else if (condition.compareToIgnoreCase("Not_Before_Dot")
\r
497 // turns I into dotless i
\r
499 else if (condition.compareToIgnoreCase(
\r
500 "More_Above") == 0) {
\r
501 strbuffer.append((char)0x300);
\r
502 lowerbuffer.append((char)0x300);
\r
503 upperbuffer.append((char)0x300);
\r
505 else if (condition.compareToIgnoreCase(
\r
506 "After_Soft_Dotted") == 0) {
\r
507 strbuffer.insert(0, 'i');
\r
508 lowerbuffer.insert(0, 'i');
\r
510 if (locale != null) {
\r
511 lang = locale.getLanguage();
\r
513 if (lang.equals("tr") || lang.equals("az")) {
\r
514 // this is to be removed when 4.0 data comes out
\r
515 // and upperbuffer.insert uncommented
\r
516 // see jitterbug 2344
\r
517 chstr[i] = "After_I";
\r
518 strbuffer.deleteCharAt(0);
\r
519 lowerbuffer.deleteCharAt(0);
\r
522 // upperbuffer.insert(0, '\u0130');
\r
525 upperbuffer.insert(0, 'I');
\r
528 else if (condition.compareToIgnoreCase(
\r
529 "Final_Sigma") == 0) {
\r
530 strbuffer.insert(0, 'c');
\r
531 lowerbuffer.insert(0, 'c');
\r
532 upperbuffer.insert(0, 'C');
\r
534 else if (condition.compareToIgnoreCase("After_I") == 0) {
\r
535 strbuffer.insert(0, 'I');
\r
536 lowerbuffer.insert(0, 'i');
\r
538 if (locale != null) {
\r
539 lang = locale.getLanguage();
\r
541 if (lang.equals("tr") || lang.equals("az")) {
\r
542 upperbuffer.insert(0, 'I');
\r
546 chstr[0] = strbuffer.toString();
\r
547 chstr[1] = lowerbuffer.toString();
\r
548 chstr[3] = upperbuffer.toString();
\r
549 if (locale == null) {
\r
550 if (!UCharacter.toLowerCase(chstr[0]).equals(chstr[1])) {
\r
552 errln("Fail: toLowerCase for character " +
\r
553 Utility.escape(chstr[0]) + ", expected "
\r
554 + Utility.escape(chstr[1]) + " but resulted in " +
\r
555 Utility.escape(UCharacter.toLowerCase(chstr[0])));
\r
557 if (!UCharacter.toUpperCase(chstr[0]).equals(chstr[3])) {
\r
559 errln("Fail: toUpperCase for character " +
\r
560 Utility.escape(chstr[0]) + ", expected "
\r
561 + Utility.escape(chstr[3]) + " but resulted in " +
\r
562 Utility.escape(UCharacter.toUpperCase(chstr[0])));
\r
566 if (!UCharacter.toLowerCase(locale, chstr[0]).equals(
\r
569 errln("Fail: toLowerCase for character " +
\r
570 Utility.escape(chstr[0]) + ", expected "
\r
571 + Utility.escape(chstr[1]) + " but resulted in " +
\r
572 Utility.escape(UCharacter.toLowerCase(locale,
\r
575 if (!UCharacter.toUpperCase(locale, chstr[0]).equals(
\r
578 errln("Fail: toUpperCase for character " +
\r
579 Utility.escape(chstr[0]) + ", expected "
\r
580 + Utility.escape(chstr[3]) + " but resulted in " +
\r
581 Utility.escape(UCharacter.toUpperCase(locale,
\r
588 catch (Exception e)
\r
590 e.printStackTrace();
\r
594 public void TestUpperLower()
\r
596 int upper[] = {0x0041, 0x0042, 0x00b2, 0x01c4, 0x01c6, 0x01c9, 0x01c8,
\r
598 int lower[] = {0x0061, 0x0062, 0x00b2, 0x01c6, 0x01c6, 0x01c9, 0x01c9,
\r
600 String upperTest = "abcdefg123hij.?:klmno";
\r
601 String lowerTest = "ABCDEFG123HIJ.?:KLMNO";
\r
603 // Checks LetterLike Symbols which were previously a source of
\r
604 // confusion [Bertrand A. D. 02/04/98]
\r
605 for (int i = 0x2100; i < 0x2138; i ++) {
\r
606 /* Unicode 5.0 adds lowercase U+214E (TURNED SMALL F) to U+2132 (TURNED CAPITAL F) */
\r
607 if (i != 0x2126 && i != 0x212a && i != 0x212b && i!=0x2132) {
\r
608 if (i != UCharacter.toLowerCase(i)) { // itself
\r
609 errln("Failed case conversion with itself: \\u"
\r
610 + Utility.hex(i, 4));
\r
612 if (i != UCharacter.toUpperCase(i)) {
\r
613 errln("Failed case conversion with itself: \\u"
\r
614 + Utility.hex(i, 4));
\r
618 for (int i = 0; i < upper.length; i ++) {
\r
619 if (UCharacter.toLowerCase(upper[i]) != lower[i]) {
\r
620 errln("FAILED UCharacter.tolower() for \\u"
\r
621 + Utility.hex(upper[i], 4)
\r
622 + " Expected \\u" + Utility.hex(lower[i], 4)
\r
624 + Utility.hex(UCharacter.toLowerCase(upper[i]), 4));
\r
627 logln("testing upper lower");
\r
628 for (int i = 0; i < upperTest.length(); i ++) {
\r
629 logln("testing to upper to lower");
\r
630 if (UCharacter.isLetter(upperTest.charAt(i)) &&
\r
631 !UCharacter.isLowerCase(upperTest.charAt(i))) {
\r
632 errln("Failed isLowerCase test at \\u"
\r
633 + Utility.hex(upperTest.charAt(i), 4));
\r
635 else if (UCharacter.isLetter(lowerTest.charAt(i))
\r
636 && !UCharacter.isUpperCase(lowerTest.charAt(i))) {
\r
637 errln("Failed isUpperCase test at \\u"
\r
638 + Utility.hex(lowerTest.charAt(i), 4));
\r
640 else if (upperTest.charAt(i)
\r
641 != UCharacter.toLowerCase(lowerTest.charAt(i))) {
\r
642 errln("Failed case conversion from \\u"
\r
643 + Utility.hex(lowerTest.charAt(i), 4) + " To \\u"
\r
644 + Utility.hex(upperTest.charAt(i), 4));
\r
646 else if (lowerTest.charAt(i)
\r
647 != UCharacter.toUpperCase(upperTest.charAt(i))) {
\r
648 errln("Failed case conversion : \\u"
\r
649 + Utility.hex(upperTest.charAt(i), 4) + " To \\u"
\r
650 + Utility.hex(lowerTest.charAt(i), 4));
\r
652 else if (upperTest.charAt(i)
\r
653 != UCharacter.toLowerCase(upperTest.charAt(i))) {
\r
654 errln("Failed case conversion with itself: \\u"
\r
655 + Utility.hex(upperTest.charAt(i)));
\r
657 else if (lowerTest.charAt(i)
\r
658 != UCharacter.toUpperCase(lowerTest.charAt(i))) {
\r
659 errln("Failed case conversion with itself: \\u"
\r
660 + Utility.hex(lowerTest.charAt(i)));
\r
663 logln("done testing upper Lower");
\r
666 // private data members - test data --------------------------------------
\r
668 private static final Locale TURKISH_LOCALE_ = new Locale("tr", "TR");
\r
669 private static final Locale GERMAN_LOCALE_ = new Locale("de", "DE");
\r
670 private static final Locale GREEK_LOCALE_ = new Locale("el", "GR");
\r
671 private static final Locale ENGLISH_LOCALE_ = new Locale("en", "US");
\r
672 private static final Locale LITHUANIAN_LOCALE_ = new Locale("lt", "LT");
\r
674 private static final int CHARACTER_UPPER_[] =
\r
675 {0x41, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
\r
676 0x00b1, 0x00b2, 0xb3, 0x0048, 0x0049, 0x004a, 0x002e,
\r
677 0x003f, 0x003a, 0x004b, 0x004c, 0x4d, 0x004e, 0x004f,
\r
678 0x01c4, 0x01c8, 0x000c, 0x0000};
\r
679 private static final int CHARACTER_LOWER_[] =
\r
680 {0x61, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
\r
681 0x00b1, 0x00b2, 0xb3, 0x0068, 0x0069, 0x006a, 0x002e,
\r
682 0x003f, 0x003a, 0x006b, 0x006c, 0x6d, 0x006e, 0x006f,
\r
683 0x01c6, 0x01c9, 0x000c, 0x0000};
\r
686 * CaseFolding.txt says about i and its cousins:
\r
687 * 0049; C; 0069; # LATIN CAPITAL LETTER I
\r
688 * 0049; T; 0131; # LATIN CAPITAL LETTER I
\r
690 * 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE
\r
691 * 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE
\r
693 * See CaseFolding.txt and the Unicode Standard for how to apply the case foldings.
\r
695 private static final int FOLDING_SIMPLE_[] = {
\r
696 // input, default, exclude special i
\r
699 0x130, 0x130, 0x69,
\r
700 0x131, 0x131, 0x131,
\r
702 0xfb03, 0xfb03, 0xfb03,
\r
703 0x1040e,0x10436,0x10436,
\r
704 0x5ffff,0x5ffff,0x5ffff
\r
706 private static final String FOLDING_MIXED_[] =
\r
707 {"\u0061\u0042\u0130\u0049\u0131\u03d0\u00df\ufb03\ud93f\udfff",
\r
708 "A\u00df\u00b5\ufb03\uD801\uDC0C\u0130\u0131"};
\r
709 private static final String FOLDING_DEFAULT_[] =
\r
710 {"\u0061\u0062\u0069\u0307\u0069\u0131\u03b2\u0073\u0073\u0066\u0066\u0069\ud93f\udfff",
\r
711 "ass\u03bcffi\uD801\uDC34i\u0307\u0131"};
\r
712 private static final String FOLDING_EXCLUDE_SPECIAL_I_[] =
\r
713 {"\u0061\u0062\u0069\u0131\u0131\u03b2\u0073\u0073\u0066\u0066\u0069\ud93f\udfff",
\r
714 "ass\u03bcffi\uD801\uDC34i\u0131"};
\r
718 private static final String SHARED_UPPERCASE_GREEK_ =
\r
719 "\u0399\u0395\u03a3\u03a5\u03a3\u0020\u03a7\u03a1\u0399\u03a3\u03a4\u039f\u03a3";
\r
723 private static final String SHARED_LOWERCASE_GREEK_ =
\r
724 "\u03b9\u03b5\u03c3\u03c5\u03c2\u0020\u03c7\u03c1\u03b9\u03c3\u03c4\u03bf\u03c2";
\r
725 private static final String SHARED_LOWERCASE_TURKISH_ =
\r
726 "\u0069\u0073\u0074\u0061\u006e\u0062\u0075\u006c\u002c\u0020\u006e\u006f\u0074\u0020\u0063\u006f\u006e\u0073\u0074\u0061\u006e\u0074\u0131\u006e\u006f\u0070\u006c\u0065\u0021";
\r
727 private static final String SHARED_UPPERCASE_TURKISH_ =
\r
728 "\u0054\u004f\u0050\u004b\u0041\u0050\u0049\u0020\u0050\u0041\u004c\u0041\u0043\u0045\u002c\u0020\u0130\u0053\u0054\u0041\u004e\u0042\u0055\u004c";
\r
729 private static final String SHARED_UPPERCASE_ISTANBUL_ =
\r
730 "\u0130STANBUL, NOT CONSTANTINOPLE!";
\r
731 private static final String SHARED_LOWERCASE_ISTANBUL_ =
\r
732 "i\u0307stanbul, not constantinople!";
\r
733 private static final String SHARED_LOWERCASE_TOPKAP_ =
\r
734 "topkap\u0131 palace, istanbul";
\r
735 private static final String SHARED_UPPERCASE_TOPKAP_ =
\r
736 "TOPKAPI PALACE, ISTANBUL";
\r
737 private static final String SHARED_LOWERCASE_GERMAN_ =
\r
738 "S\u00FC\u00DFmayrstra\u00DFe";
\r
739 private static final String SHARED_UPPERCASE_GERMAN_ =
\r
740 "S\u00DCSSMAYRSTRASSE";
\r
742 private static final String UPPER_BEFORE_ =
\r
743 "\u0061\u0042\u0069\u03c2\u00df\u03c3\u002f\ufb03\ufb03\ufb03\ud93f\udfff";
\r
744 private static final String UPPER_ROOT_ =
\r
745 "\u0041\u0042\u0049\u03a3\u0053\u0053\u03a3\u002f\u0046\u0046\u0049\u0046\u0046\u0049\u0046\u0046\u0049\ud93f\udfff";
\r
746 private static final String UPPER_TURKISH_ =
\r
747 "\u0041\u0042\u0130\u03a3\u0053\u0053\u03a3\u002f\u0046\u0046\u0049\u0046\u0046\u0049\u0046\u0046\u0049\ud93f\udfff";
\r
748 private static final String UPPER_MINI_ = "\u00df\u0061";
\r
749 private static final String UPPER_MINI_UPPER_ = "\u0053\u0053\u0041";
\r
751 private static final String LOWER_BEFORE_ =
\r
752 "\u0061\u0042\u0049\u03a3\u00df\u03a3\u002f\ud93f\udfff";
\r
753 private static final String LOWER_ROOT_ =
\r
754 "\u0061\u0062\u0069\u03c3\u00df\u03c2\u002f\ud93f\udfff";
\r
755 private static final String LOWER_TURKISH_ =
\r
756 "\u0061\u0062\u0131\u03c3\u00df\u03c2\u002f\ud93f\udfff";
\r
759 * each item is an array with input string, result string, locale ID, break iterator, options
\r
760 * the break iterator is specified as an int, same as in BreakIterator.KIND_*:
\r
761 * 0=KIND_CHARACTER 1=KIND_WORD 2=KIND_LINE 3=KIND_SENTENCE 4=KIND_TITLE -1=default (NULL=words) -2=no breaks (.*)
\r
762 * options: T=U_FOLD_CASE_EXCLUDE_SPECIAL_I L=U_TITLECASE_NO_LOWERCASE A=U_TITLECASE_NO_BREAK_ADJUSTMENT
\r
763 * see ICU4C source/test/testdata/casing.txt
\r
765 private static final String TITLE_DATA_[] = {
\r
766 "\u0061\u0042\u0020\u0069\u03c2\u0020\u00df\u03c3\u002f\ufb03\ud93f\udfff",
\r
767 "\u0041\u0042\u0020\u0049\u03a3\u0020\u0053\u0073\u03a3\u002f\u0046\u0066\u0069\ud93f\udfff",
\r
772 "\u0061\u0042\u0020\u0069\u03c2\u0020\u00df\u03c3\u002f\ufb03\ud93f\udfff",
\r
773 "\u0041\u0062\u0020\u0049\u03c2\u0020\u0053\u0073\u03c3\u002f\u0046\u0066\u0069\ud93f\udfff",
\r
778 "\u02bbaMeLikA huI P\u016b \u02bb\u02bb\u02bbiA", "\u02bbAmelika Hui P\u016b \u02bb\u02bb\u02bbIa", // titlecase first _cased_ letter, j4933
\r
783 " tHe QUIcK bRoWn", " The Quick Brown",
\r
788 "\u01c4\u01c5\u01c6\u01c7\u01c8\u01c9\u01ca\u01cb\u01cc",
\r
789 "\u01c5\u01c5\u01c5\u01c8\u01c8\u01c8\u01cb\u01cb\u01cb", // UBRK_CHARACTER
\r
794 "\u01c9ubav ljubav", "\u01c8ubav Ljubav", // Lj vs. L+j
\r
799 "'oH dOn'T tItLeCaSe AfTeR lEtTeR+'", "'Oh Don't Titlecase After Letter+'",
\r
804 "a \u02bbCaT. A \u02bbdOg! \u02bbeTc.",
\r
805 "A \u02bbCat. A \u02bbDog! \u02bbEtc.",
\r
810 "a \u02bbCaT. A \u02bbdOg! \u02bbeTc.",
\r
811 "A \u02bbcat. A \u02bbdog! \u02bbetc.",
\r
814 "A", // U_TITLECASE_NO_BREAK_ADJUSTMENT
\r
816 "a \u02bbCaT. A \u02bbdOg! \u02bbeTc.",
\r
817 "A \u02bbCaT. A \u02bbdOg! \u02bbETc.",
\r
820 "L", // UBRK_SENTENCE and U_TITLECASE_NO_LOWERCASE
\r
823 "\u02bbcAt! \u02bbeTc.",
\r
824 "\u02bbCat! \u02bbetc.",
\r
827 "", // -2=Trivial break iterator
\r
829 "\u02bbcAt! \u02bbeTc.",
\r
830 "\u02bbcat! \u02bbetc.",
\r
833 "A", // U_TITLECASE_NO_BREAK_ADJUSTMENT
\r
835 "\u02bbcAt! \u02bbeTc.",
\r
836 "\u02bbCAt! \u02bbeTc.",
\r
839 "L", // U_TITLECASE_NO_LOWERCASE
\r
841 "\u02bbcAt! \u02bbeTc.",
\r
842 "\u02bbcAt! \u02bbeTc.",
\r
845 "AL", // Both options
\r
847 // Test case for ticket #7251: UCharacter.toTitleCase() throws OutOfMemoryError
\r
848 // when TITLECASE_NO_LOWERCASE encounters a single-letter word
\r
853 "L" // U_TITLECASE_NO_LOWERCASE
\r
858 * <p>basic string, lower string, upper string, title string</p>
\r
860 private static final String SPECIAL_DATA_[] = {
\r
861 UTF16.valueOf(0x1043C) + UTF16.valueOf(0x10414),
\r
862 UTF16.valueOf(0x1043C) + UTF16.valueOf(0x1043C),
\r
863 UTF16.valueOf(0x10414) + UTF16.valueOf(0x10414),
\r
864 "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " +
\r
865 UTF16.valueOf(0x1043C) + UTF16.valueOf(0x10414),
\r
866 "ab'cd \uFB00i\u0131ii\u0307 \u01C9\u01C9\u01C9 " +
\r
867 UTF16.valueOf(0x1043C) + UTF16.valueOf(0x1043C),
\r
868 "AB'CD FFIII\u0130 \u01C7\u01C7\u01C7 " +
\r
869 UTF16.valueOf(0x10414) + UTF16.valueOf(0x10414),
\r
870 // sigmas followed/preceded by cased letters
\r
871 "i\u0307\u03a3\u0308j \u0307\u03a3\u0308j i\u00ad\u03a3\u0308 \u0307\u03a3\u0308 ",
\r
872 "i\u0307\u03c3\u0308j \u0307\u03c3\u0308j i\u00ad\u03c2\u0308 \u0307\u03c3\u0308 ",
\r
873 "I\u0307\u03a3\u0308J \u0307\u03a3\u0308J I\u00ad\u03a3\u0308 \u0307\u03a3\u0308 "
\r
875 private static final Locale SPECIAL_LOCALES_[] = {
\r
881 private static final String SPECIAL_DOTTED_ =
\r
882 "I \u0130 I\u0307 I\u0327\u0307 I\u0301\u0307 I\u0327\u0307\u0301";
\r
883 private static final String SPECIAL_DOTTED_LOWER_TURKISH_ =
\r
884 "\u0131 i i i\u0327 \u0131\u0301\u0307 i\u0327\u0301";
\r
885 private static final String SPECIAL_DOTTED_LOWER_GERMAN_ =
\r
886 "i i\u0307 i\u0307 i\u0327\u0307 i\u0301\u0307 i\u0327\u0307\u0301";
\r
887 private static final String SPECIAL_DOT_ABOVE_ =
\r
888 "a\u0307 \u0307 i\u0307 j\u0327\u0307 j\u0301\u0307";
\r
889 private static final String SPECIAL_DOT_ABOVE_UPPER_LITHUANIAN_ =
\r
890 "A\u0307 \u0307 I J\u0327 J\u0301\u0307";
\r
891 private static final String SPECIAL_DOT_ABOVE_UPPER_GERMAN_ =
\r
892 "A\u0307 \u0307 I\u0307 J\u0327\u0307 J\u0301\u0307";
\r
893 private static final String SPECIAL_DOT_ABOVE_UPPER_ =
\r
894 "I I\u0301 J J\u0301 \u012e \u012e\u0301 \u00cc\u00cd\u0128";
\r
895 private static final String SPECIAL_DOT_ABOVE_LOWER_LITHUANIAN_ =
\r
896 "i i\u0307\u0301 j j\u0307\u0301 \u012f \u012f\u0307\u0301 i\u0307\u0300i\u0307\u0301i\u0307\u0303";
\r
897 private static final String SPECIAL_DOT_ABOVE_LOWER_GERMAN_ =
\r
898 "i i\u0301 j j\u0301 \u012f \u012f\u0301 \u00ec\u00ed\u0129";
\r
900 // private methods -------------------------------------------------------
\r
903 * Converting the hex numbers represented betwee n ';' to Unicode strings
\r
904 * @param str string to break up into Unicode strings
\r
905 * @return array of Unicode strings ending with a null
\r
907 private String[] getUnicodeStrings(String str)
\r
909 Vector v = new Vector(10);
\r
911 for (int casecount = 4; casecount > 0; casecount --) {
\r
912 int end = str.indexOf("; ", start);
\r
913 String casestr = str.substring(start, end);
\r
914 StringBuffer buffer = new StringBuffer();
\r
915 int spaceoffset = 0;
\r
916 while (spaceoffset < casestr.length()) {
\r
917 int nextspace = casestr.indexOf(' ', spaceoffset);
\r
918 if (nextspace == -1) {
\r
919 nextspace = casestr.length();
\r
921 buffer.append((char)Integer.parseInt(
\r
922 casestr.substring(spaceoffset, nextspace),
\r
924 spaceoffset = nextspace + 1;
\r
927 v.add(buffer.toString());
\r
929 int comments = str.indexOf(" #", start);
\r
930 if (comments != -1 && comments != start) {
\r
931 if (str.charAt(comments - 1) == ';') {
\r
934 String conditions = str.substring(start, comments);
\r
936 while (offset < conditions.length()) {
\r
937 int spaceoffset = conditions.indexOf(' ', offset);
\r
938 if (spaceoffset == -1) {
\r
939 spaceoffset = conditions.length();
\r
941 v.add(conditions.substring(offset, spaceoffset));
\r
942 offset = spaceoffset + 1;
\r
945 int size = v.size();
\r
946 String result[] = new String[size];
\r
947 for (int i = 0; i < size; i ++) {
\r
948 result[i] = (String)v.elementAt(i);
\r