2 *******************************************************************************
3 * Copyright (C) 1996-2010, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 *******************************************************************************
9 package com.ibm.icu.dev.test.lang;
12 import java.io.BufferedReader;
13 import java.util.ArrayList;
14 import java.util.List;
15 import java.util.Locale;
17 import com.ibm.icu.dev.test.TestFmwk;
18 import com.ibm.icu.dev.test.TestUtil;
19 import com.ibm.icu.impl.Utility;
20 import com.ibm.icu.lang.UCharacter;
21 import com.ibm.icu.lang.UProperty;
22 import com.ibm.icu.text.BreakIterator;
23 import com.ibm.icu.text.RuleBasedBreakIterator;
24 import com.ibm.icu.text.UTF16;
25 import com.ibm.icu.util.ULocale;
29 * <p>Testing character casing</p>
30 * <p>Mostly following the test cases in strcase.cpp for ICU</p>
31 * @author Syn Wee Quek
32 * @since march 14 2002
34 public final class UCharacterCaseTest extends TestFmwk
36 // constructor -----------------------------------------------------------
41 public UCharacterCaseTest()
45 // public methods --------------------------------------------------------
47 public static void main(String[] arg)
51 UCharacterCaseTest test = new UCharacterCaseTest();
61 * Testing the uppercase and lowercase function of UCharacter
63 public void TestCharacter()
65 for (int i = 0; i < CHARACTER_LOWER_.length; i ++) {
66 if (UCharacter.isLetter(CHARACTER_LOWER_[i]) &&
67 !UCharacter.isLowerCase(CHARACTER_LOWER_[i])) {
68 errln("FAIL isLowerCase test for \\u" +
69 hex(CHARACTER_LOWER_[i]));
72 if (UCharacter.isLetter(CHARACTER_UPPER_[i]) &&
73 !(UCharacter.isUpperCase(CHARACTER_UPPER_[i]) ||
74 UCharacter.isTitleCase(CHARACTER_UPPER_[i]))) {
75 errln("FAIL isUpperCase test for \\u" +
76 hex(CHARACTER_UPPER_[i]));
79 if (CHARACTER_LOWER_[i] !=
80 UCharacter.toLowerCase(CHARACTER_UPPER_[i]) ||
81 (CHARACTER_UPPER_[i] !=
82 UCharacter.toUpperCase(CHARACTER_LOWER_[i]) &&
83 CHARACTER_UPPER_[i] !=
84 UCharacter.toTitleCase(CHARACTER_LOWER_[i]))) {
85 errln("FAIL case conversion test for \\u" +
86 hex(CHARACTER_UPPER_[i]) +
87 " to \\u" + hex(CHARACTER_LOWER_[i]));
90 if (CHARACTER_LOWER_[i] !=
91 UCharacter.toLowerCase(CHARACTER_LOWER_[i])) {
92 errln("FAIL lower case conversion test for \\u" +
93 hex(CHARACTER_LOWER_[i]));
96 if (CHARACTER_UPPER_[i] !=
97 UCharacter.toUpperCase(CHARACTER_UPPER_[i]) &&
98 CHARACTER_UPPER_[i] !=
99 UCharacter.toTitleCase(CHARACTER_UPPER_[i])) {
100 errln("FAIL upper case conversion test for \\u" +
101 hex(CHARACTER_UPPER_[i]));
104 logln("Ok \\u" + hex(CHARACTER_UPPER_[i]) + " and \\u" +
105 hex(CHARACTER_LOWER_[i]));
109 public void TestFolding()
111 // test simple case folding
112 for (int i = 0; i < FOLDING_SIMPLE_.length; i += 3) {
113 if (UCharacter.foldCase(FOLDING_SIMPLE_[i], true) !=
114 FOLDING_SIMPLE_[i + 1]) {
115 errln("FAIL: foldCase(\\u" + hex(FOLDING_SIMPLE_[i]) +
116 ", true) should be \\u" + hex(FOLDING_SIMPLE_[i + 1]));
118 if (UCharacter.foldCase(FOLDING_SIMPLE_[i],
119 UCharacter.FOLD_CASE_DEFAULT) !=
120 FOLDING_SIMPLE_[i + 1]) {
121 errln("FAIL: foldCase(\\u" + hex(FOLDING_SIMPLE_[i]) +
122 ", UCharacter.FOLD_CASE_DEFAULT) should be \\u"
123 + hex(FOLDING_SIMPLE_[i + 1]));
125 if (UCharacter.foldCase(FOLDING_SIMPLE_[i], false) !=
126 FOLDING_SIMPLE_[i + 2]) {
127 errln("FAIL: foldCase(\\u" + hex(FOLDING_SIMPLE_[i]) +
128 ", false) should be \\u" + hex(FOLDING_SIMPLE_[i + 2]));
130 if (UCharacter.foldCase(FOLDING_SIMPLE_[i],
131 UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I) !=
132 FOLDING_SIMPLE_[i + 2]) {
133 errln("FAIL: foldCase(\\u" + hex(FOLDING_SIMPLE_[i]) +
134 ", UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I) should be \\u"
135 + hex(FOLDING_SIMPLE_[i + 2]));
139 // Test full string case folding with default option and separate
141 if (!FOLDING_DEFAULT_[0].equals(UCharacter.foldCase(FOLDING_MIXED_[0], true))) {
142 errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[0]) +
143 ", true)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[0], true)) +
144 " should be " + prettify(FOLDING_DEFAULT_[0]));
147 if (!FOLDING_DEFAULT_[0].equals(UCharacter.foldCase(FOLDING_MIXED_[0], UCharacter.FOLD_CASE_DEFAULT))) {
148 errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[0]) +
149 ", UCharacter.FOLD_CASE_DEFAULT)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[0], UCharacter.FOLD_CASE_DEFAULT))
150 + " should be " + prettify(FOLDING_DEFAULT_[0]));
153 if (!FOLDING_EXCLUDE_SPECIAL_I_[0].equals(
154 UCharacter.foldCase(FOLDING_MIXED_[0], false))) {
155 errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[0]) +
156 ", false)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[0], false))
157 + " should be " + prettify(FOLDING_EXCLUDE_SPECIAL_I_[0]));
160 if (!FOLDING_EXCLUDE_SPECIAL_I_[0].equals(
161 UCharacter.foldCase(FOLDING_MIXED_[0], UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I))) {
162 errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[0]) +
163 ", UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[0], UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I))
164 + " should be " + prettify(FOLDING_EXCLUDE_SPECIAL_I_[0]));
167 if (!FOLDING_DEFAULT_[1].equals(UCharacter.foldCase(FOLDING_MIXED_[1], true))) {
168 errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[1]) +
169 ", true)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[1], true))
170 + " should be " + prettify(FOLDING_DEFAULT_[1]));
173 if (!FOLDING_DEFAULT_[1].equals(UCharacter.foldCase(FOLDING_MIXED_[1], UCharacter.FOLD_CASE_DEFAULT))) {
174 errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[1]) +
175 ", UCharacter.FOLD_CASE_DEFAULT)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[1], UCharacter.FOLD_CASE_DEFAULT))
176 + " should be " + prettify(FOLDING_DEFAULT_[1]));
179 // alternate handling for dotted I/dotless i (U+0130, U+0131)
180 if (!FOLDING_EXCLUDE_SPECIAL_I_[1].equals(
181 UCharacter.foldCase(FOLDING_MIXED_[1], false))) {
182 errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[1]) +
183 ", false)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[1], false))
184 + " should be " + prettify(FOLDING_EXCLUDE_SPECIAL_I_[1]));
187 if (!FOLDING_EXCLUDE_SPECIAL_I_[1].equals(
188 UCharacter.foldCase(FOLDING_MIXED_[1], UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I))) {
189 errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[1]) +
190 ", UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[1], UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I))
192 + prettify(FOLDING_EXCLUDE_SPECIAL_I_[1]));
197 * Testing the strings case mapping methods
199 public void TestUpper()
201 // uppercase with root locale and in the same buffer
202 if (!UPPER_ROOT_.equals(UCharacter.toUpperCase(UPPER_BEFORE_))) {
203 errln("Fail " + UPPER_BEFORE_ + " after uppercase should be " +
204 UPPER_ROOT_ + " instead got " +
205 UCharacter.toUpperCase(UPPER_BEFORE_));
208 // uppercase with turkish locale and separate buffers
209 if (!UPPER_TURKISH_.equals(UCharacter.toUpperCase(TURKISH_LOCALE_,
211 errln("Fail " + UPPER_BEFORE_ +
212 " after turkish-sensitive uppercase should be " +
213 UPPER_TURKISH_ + " instead of " +
214 UCharacter.toUpperCase(TURKISH_LOCALE_, UPPER_BEFORE_));
217 // uppercase a short string with root locale
218 if (!UPPER_MINI_UPPER_.equals(UCharacter.toUpperCase(UPPER_MINI_))) {
219 errln("error in toUpper(root locale)=\"" + UPPER_MINI_ +
220 "\" expected \"" + UPPER_MINI_UPPER_ + "\"");
223 if (!SHARED_UPPERCASE_TOPKAP_.equals(
224 UCharacter.toUpperCase(SHARED_LOWERCASE_TOPKAP_))) {
225 errln("toUpper failed: expected \"" +
226 SHARED_UPPERCASE_TOPKAP_ + "\", got \"" +
227 UCharacter.toUpperCase(SHARED_LOWERCASE_TOPKAP_) + "\".");
230 if (!SHARED_UPPERCASE_TURKISH_.equals(
231 UCharacter.toUpperCase(TURKISH_LOCALE_,
232 SHARED_LOWERCASE_TOPKAP_))) {
233 errln("toUpper failed: expected \"" +
234 SHARED_UPPERCASE_TURKISH_ + "\", got \"" +
235 UCharacter.toUpperCase(TURKISH_LOCALE_,
236 SHARED_LOWERCASE_TOPKAP_) + "\".");
239 if (!SHARED_UPPERCASE_GERMAN_.equals(
240 UCharacter.toUpperCase(GERMAN_LOCALE_,
241 SHARED_LOWERCASE_GERMAN_))) {
242 errln("toUpper failed: expected \"" + SHARED_UPPERCASE_GERMAN_
243 + "\", got \"" + UCharacter.toUpperCase(GERMAN_LOCALE_,
244 SHARED_LOWERCASE_GERMAN_) + "\".");
247 if (!SHARED_UPPERCASE_GREEK_.equals(
248 UCharacter.toUpperCase(SHARED_LOWERCASE_GREEK_))) {
249 errln("toLower failed: expected \"" + SHARED_UPPERCASE_GREEK_ +
250 "\", got \"" + UCharacter.toUpperCase(
251 SHARED_LOWERCASE_GREEK_) + "\".");
255 public void TestLower()
257 if (!LOWER_ROOT_.equals(UCharacter.toLowerCase(LOWER_BEFORE_))) {
258 errln("Fail " + LOWER_BEFORE_ + " after lowercase should be " +
259 LOWER_ROOT_ + " instead of " +
260 UCharacter.toLowerCase(LOWER_BEFORE_));
263 // lowercase with turkish locale
264 if (!LOWER_TURKISH_.equals(UCharacter.toLowerCase(TURKISH_LOCALE_,
266 errln("Fail " + LOWER_BEFORE_ +
267 " after turkish-sensitive lowercase should be " +
268 LOWER_TURKISH_ + " instead of " +
269 UCharacter.toLowerCase(TURKISH_LOCALE_, LOWER_BEFORE_));
271 if (!SHARED_LOWERCASE_ISTANBUL_.equals(
272 UCharacter.toLowerCase(SHARED_UPPERCASE_ISTANBUL_))) {
273 errln("1. toLower failed: expected \"" +
274 SHARED_LOWERCASE_ISTANBUL_ + "\", got \"" +
275 UCharacter.toLowerCase(SHARED_UPPERCASE_ISTANBUL_) + "\".");
278 if (!SHARED_LOWERCASE_TURKISH_.equals(
279 UCharacter.toLowerCase(TURKISH_LOCALE_,
280 SHARED_UPPERCASE_ISTANBUL_))) {
281 errln("2. toLower failed: expected \"" +
282 SHARED_LOWERCASE_TURKISH_ + "\", got \"" +
283 UCharacter.toLowerCase(TURKISH_LOCALE_,
284 SHARED_UPPERCASE_ISTANBUL_) + "\".");
286 if (!SHARED_LOWERCASE_GREEK_.equals(
287 UCharacter.toLowerCase(GREEK_LOCALE_,
288 SHARED_UPPERCASE_GREEK_))) {
289 errln("toLower failed: expected \"" + SHARED_LOWERCASE_GREEK_ +
290 "\", got \"" + UCharacter.toLowerCase(GREEK_LOCALE_,
291 SHARED_UPPERCASE_GREEK_) + "\".");
295 public void TestTitleRegression() throws java.io.IOException {
296 boolean isIgnorable = UCharacter.hasBinaryProperty('\'', UProperty.CASE_IGNORABLE);
297 assertTrue("Case Ignorable check of ASCII apostrophe", isIgnorable);
298 assertEquals("Titlecase check",
299 "The Quick Brown Fox Can't Jump Over The Lazy Dogs.",
300 UCharacter.toTitleCase(ULocale.ENGLISH, "THE QUICK BROWN FOX CAN'T JUMP OVER THE LAZY DOGS.", null));
303 public void TestTitle()
306 for (int i = 0; i < TITLE_DATA_.length;) {
307 String test = TITLE_DATA_[i++];
308 String expected = TITLE_DATA_[i++];
309 ULocale locale = new ULocale(TITLE_DATA_[i++]);
310 int breakType = Integer.parseInt(TITLE_DATA_[i++]);
311 String optionsString = TITLE_DATA_[i++];
314 BreakIterator.getBreakInstance(locale, breakType) :
316 // Open a trivial break iterator that only delivers { 0, length }
317 // or even just { 0 } as boundaries.
318 new RuleBasedBreakIterator(".*;") :
321 if (optionsString.indexOf('L') >= 0) {
322 options |= UCharacter.TITLECASE_NO_LOWERCASE;
324 if (optionsString.indexOf('A') >= 0) {
325 options |= UCharacter.TITLECASE_NO_BREAK_ADJUSTMENT;
327 String result = UCharacter.toTitleCase(locale, test, iter, options);
328 if (!expected.equals(result)) {
329 errln("titlecasing for " + prettify(test) + " (options " + options + ") should be " +
330 prettify(expected) + " but got " +
334 result = UCharacter.toTitleCase(locale, test, iter);
335 if (!expected.equals(result)) {
336 errln("titlecasing for " + prettify(test) + " should be " +
337 prettify(expected) + " but got " +
342 }catch(Exception ex){
343 warnln("Could not find data for BreakIterators");
347 public void TestDutchTitle() {
348 ULocale LOC_DUTCH = new ULocale("nl");
350 options |= UCharacter.TITLECASE_NO_LOWERCASE;
351 BreakIterator iter = BreakIterator.getWordInstance(LOC_DUTCH);
353 assertEquals("Dutch titlecase check in English",
354 "Ijssel Igloo Ijmuiden",
355 UCharacter.toTitleCase(ULocale.ENGLISH, "ijssel igloo IJMUIDEN", null));
357 assertEquals("Dutch titlecase check in Dutch",
358 "IJssel Igloo IJmuiden",
359 UCharacter.toTitleCase(LOC_DUTCH, "ijssel igloo IJMUIDEN", null));
361 iter.setText("ijssel igloo IjMUIdEN iPoD ijenough");
362 assertEquals("Dutch titlecase check in Dutch with nolowercase option",
363 "IJssel Igloo IJMUIdEN IPoD IJenough",
364 UCharacter.toTitleCase(LOC_DUTCH, "ijssel igloo IjMUIdEN iPoD ijenough", iter, options));
367 public void TestSpecial()
369 for (int i = 0; i < SPECIAL_LOCALES_.length; i ++) {
371 Locale locale = SPECIAL_LOCALES_[i];
372 String str = SPECIAL_DATA_[j];
373 if (locale != null) {
374 if (!SPECIAL_DATA_[j + 1].equals(
375 UCharacter.toLowerCase(locale, str))) {
376 errln("error lowercasing special characters " +
377 hex(str) + " expected " + hex(SPECIAL_DATA_[j + 1])
378 + " for locale " + locale.toString() + " but got " +
379 hex(UCharacter.toLowerCase(locale, str)));
381 if (!SPECIAL_DATA_[j + 2].equals(
382 UCharacter.toUpperCase(locale, str))) {
383 errln("error uppercasing special characters " +
384 hex(str) + " expected " + SPECIAL_DATA_[j + 2]
385 + " for locale " + locale.toString() + " but got " +
386 hex(UCharacter.toUpperCase(locale, str)));
390 if (!SPECIAL_DATA_[j + 1].equals(
391 UCharacter.toLowerCase(str))) {
392 errln("error lowercasing special characters " +
393 hex(str) + " expected " + SPECIAL_DATA_[j + 1] +
395 hex(UCharacter.toLowerCase(locale, str)));
397 if (!SPECIAL_DATA_[j + 2].equals(
398 UCharacter.toUpperCase(locale, str))) {
399 errln("error uppercasing special characters " +
400 hex(str) + " expected " + SPECIAL_DATA_[j + 2] +
402 hex(UCharacter.toUpperCase(locale, str)));
407 // turkish & azerbaijani dotless i & dotted I
408 // remove dot above if there was a capital I before and there are no
409 // more accents above
410 if (!SPECIAL_DOTTED_LOWER_TURKISH_.equals(UCharacter.toLowerCase(
411 TURKISH_LOCALE_, SPECIAL_DOTTED_))) {
412 errln("error in dots.toLower(tr)=\"" + SPECIAL_DOTTED_ +
413 "\" expected \"" + SPECIAL_DOTTED_LOWER_TURKISH_ +
414 "\" but got " + UCharacter.toLowerCase(TURKISH_LOCALE_,
417 if (!SPECIAL_DOTTED_LOWER_GERMAN_.equals(UCharacter.toLowerCase(
418 GERMAN_LOCALE_, SPECIAL_DOTTED_))) {
419 errln("error in dots.toLower(de)=\"" + SPECIAL_DOTTED_ +
420 "\" expected \"" + SPECIAL_DOTTED_LOWER_GERMAN_ +
421 "\" but got " + UCharacter.toLowerCase(GERMAN_LOCALE_,
425 // lithuanian dot above in uppercasing
426 if (!SPECIAL_DOT_ABOVE_UPPER_LITHUANIAN_.equals(
427 UCharacter.toUpperCase(LITHUANIAN_LOCALE_, SPECIAL_DOT_ABOVE_))) {
428 errln("error in dots.toUpper(lt)=\"" + SPECIAL_DOT_ABOVE_ +
429 "\" expected \"" + SPECIAL_DOT_ABOVE_UPPER_LITHUANIAN_ +
430 "\" but got " + UCharacter.toUpperCase(LITHUANIAN_LOCALE_,
431 SPECIAL_DOT_ABOVE_));
433 if (!SPECIAL_DOT_ABOVE_UPPER_GERMAN_.equals(UCharacter.toUpperCase(
434 GERMAN_LOCALE_, SPECIAL_DOT_ABOVE_))) {
435 errln("error in dots.toUpper(de)=\"" + SPECIAL_DOT_ABOVE_ +
436 "\" expected \"" + SPECIAL_DOT_ABOVE_UPPER_GERMAN_ +
437 "\" but got " + UCharacter.toUpperCase(GERMAN_LOCALE_,
438 SPECIAL_DOT_ABOVE_));
441 // lithuanian adds dot above to i in lowercasing if there are more
443 if (!SPECIAL_DOT_ABOVE_LOWER_LITHUANIAN_.equals(
444 UCharacter.toLowerCase(LITHUANIAN_LOCALE_,
445 SPECIAL_DOT_ABOVE_UPPER_))) {
446 errln("error in dots.toLower(lt)=\"" + SPECIAL_DOT_ABOVE_UPPER_ +
447 "\" expected \"" + SPECIAL_DOT_ABOVE_LOWER_LITHUANIAN_ +
448 "\" but got " + UCharacter.toLowerCase(LITHUANIAN_LOCALE_,
449 SPECIAL_DOT_ABOVE_UPPER_));
451 if (!SPECIAL_DOT_ABOVE_LOWER_GERMAN_.equals(
452 UCharacter.toLowerCase(GERMAN_LOCALE_,
453 SPECIAL_DOT_ABOVE_UPPER_))) {
454 errln("error in dots.toLower(de)=\"" + SPECIAL_DOT_ABOVE_UPPER_ +
455 "\" expected \"" + SPECIAL_DOT_ABOVE_LOWER_GERMAN_ +
456 "\" but got " + UCharacter.toLowerCase(GERMAN_LOCALE_,
457 SPECIAL_DOT_ABOVE_UPPER_));
462 * Tests for case mapping in the file SpecialCasing.txt
463 * This method reads in SpecialCasing.txt file for testing purposes.
464 * A default path is provided relative to the src path, however the user
465 * could set a system property to change the directory path.<br>
466 * e.g. java -DUnicodeData="data_dir_path" com.ibm.dev.test.lang.UCharacterTest
468 public void TestSpecialCasingTxt()
472 // reading in the SpecialCasing file
473 BufferedReader input = TestUtil.getDataReader(
474 "unicode/SpecialCasing.txt");
477 String s = input.readLine();
481 if (s.length() == 0 || s.charAt(0) == '#') {
485 String chstr[] = getUnicodeStrings(s);
486 StringBuffer strbuffer = new StringBuffer(chstr[0]);
487 StringBuffer lowerbuffer = new StringBuffer(chstr[1]);
488 StringBuffer upperbuffer = new StringBuffer(chstr[3]);
489 Locale locale = null;
490 for (int i = 4; i < chstr.length; i ++) {
491 String condition = chstr[i];
492 if (Character.isLowerCase(chstr[i].charAt(0))) {
494 locale = new Locale(chstr[i], "");
496 else if (condition.compareToIgnoreCase("Not_Before_Dot")
498 // turns I into dotless i
500 else if (condition.compareToIgnoreCase(
501 "More_Above") == 0) {
502 strbuffer.append((char)0x300);
503 lowerbuffer.append((char)0x300);
504 upperbuffer.append((char)0x300);
506 else if (condition.compareToIgnoreCase(
507 "After_Soft_Dotted") == 0) {
508 strbuffer.insert(0, 'i');
509 lowerbuffer.insert(0, 'i');
511 if (locale != null) {
512 lang = locale.getLanguage();
514 if (lang.equals("tr") || lang.equals("az")) {
515 // this is to be removed when 4.0 data comes out
516 // and upperbuffer.insert uncommented
517 // see jitterbug 2344
518 chstr[i] = "After_I";
519 strbuffer.deleteCharAt(0);
520 lowerbuffer.deleteCharAt(0);
523 // upperbuffer.insert(0, '\u0130');
526 upperbuffer.insert(0, 'I');
529 else if (condition.compareToIgnoreCase(
530 "Final_Sigma") == 0) {
531 strbuffer.insert(0, 'c');
532 lowerbuffer.insert(0, 'c');
533 upperbuffer.insert(0, 'C');
535 else if (condition.compareToIgnoreCase("After_I") == 0) {
536 strbuffer.insert(0, 'I');
537 lowerbuffer.insert(0, 'i');
539 if (locale != null) {
540 lang = locale.getLanguage();
542 if (lang.equals("tr") || lang.equals("az")) {
543 upperbuffer.insert(0, 'I');
547 chstr[0] = strbuffer.toString();
548 chstr[1] = lowerbuffer.toString();
549 chstr[3] = upperbuffer.toString();
550 if (locale == null) {
551 if (!UCharacter.toLowerCase(chstr[0]).equals(chstr[1])) {
553 errln("Fail: toLowerCase for character " +
554 Utility.escape(chstr[0]) + ", expected "
555 + Utility.escape(chstr[1]) + " but resulted in " +
556 Utility.escape(UCharacter.toLowerCase(chstr[0])));
558 if (!UCharacter.toUpperCase(chstr[0]).equals(chstr[3])) {
560 errln("Fail: toUpperCase for character " +
561 Utility.escape(chstr[0]) + ", expected "
562 + Utility.escape(chstr[3]) + " but resulted in " +
563 Utility.escape(UCharacter.toUpperCase(chstr[0])));
567 if (!UCharacter.toLowerCase(locale, chstr[0]).equals(
570 errln("Fail: toLowerCase for character " +
571 Utility.escape(chstr[0]) + ", expected "
572 + Utility.escape(chstr[1]) + " but resulted in " +
573 Utility.escape(UCharacter.toLowerCase(locale,
576 if (!UCharacter.toUpperCase(locale, chstr[0]).equals(
579 errln("Fail: toUpperCase for character " +
580 Utility.escape(chstr[0]) + ", expected "
581 + Utility.escape(chstr[3]) + " but resulted in " +
582 Utility.escape(UCharacter.toUpperCase(locale,
595 public void TestUpperLower()
597 int upper[] = {0x0041, 0x0042, 0x00b2, 0x01c4, 0x01c6, 0x01c9, 0x01c8,
599 int lower[] = {0x0061, 0x0062, 0x00b2, 0x01c6, 0x01c6, 0x01c9, 0x01c9,
601 String upperTest = "abcdefg123hij.?:klmno";
602 String lowerTest = "ABCDEFG123HIJ.?:KLMNO";
604 // Checks LetterLike Symbols which were previously a source of
605 // confusion [Bertrand A. D. 02/04/98]
606 for (int i = 0x2100; i < 0x2138; i ++) {
607 /* Unicode 5.0 adds lowercase U+214E (TURNED SMALL F) to U+2132 (TURNED CAPITAL F) */
608 if (i != 0x2126 && i != 0x212a && i != 0x212b && i!=0x2132) {
609 if (i != UCharacter.toLowerCase(i)) { // itself
610 errln("Failed case conversion with itself: \\u"
611 + Utility.hex(i, 4));
613 if (i != UCharacter.toUpperCase(i)) {
614 errln("Failed case conversion with itself: \\u"
615 + Utility.hex(i, 4));
619 for (int i = 0; i < upper.length; i ++) {
620 if (UCharacter.toLowerCase(upper[i]) != lower[i]) {
621 errln("FAILED UCharacter.tolower() for \\u"
622 + Utility.hex(upper[i], 4)
623 + " Expected \\u" + Utility.hex(lower[i], 4)
625 + Utility.hex(UCharacter.toLowerCase(upper[i]), 4));
628 logln("testing upper lower");
629 for (int i = 0; i < upperTest.length(); i ++) {
630 logln("testing to upper to lower");
631 if (UCharacter.isLetter(upperTest.charAt(i)) &&
632 !UCharacter.isLowerCase(upperTest.charAt(i))) {
633 errln("Failed isLowerCase test at \\u"
634 + Utility.hex(upperTest.charAt(i), 4));
636 else if (UCharacter.isLetter(lowerTest.charAt(i))
637 && !UCharacter.isUpperCase(lowerTest.charAt(i))) {
638 errln("Failed isUpperCase test at \\u"
639 + Utility.hex(lowerTest.charAt(i), 4));
641 else if (upperTest.charAt(i)
642 != UCharacter.toLowerCase(lowerTest.charAt(i))) {
643 errln("Failed case conversion from \\u"
644 + Utility.hex(lowerTest.charAt(i), 4) + " To \\u"
645 + Utility.hex(upperTest.charAt(i), 4));
647 else if (lowerTest.charAt(i)
648 != UCharacter.toUpperCase(upperTest.charAt(i))) {
649 errln("Failed case conversion : \\u"
650 + Utility.hex(upperTest.charAt(i), 4) + " To \\u"
651 + Utility.hex(lowerTest.charAt(i), 4));
653 else if (upperTest.charAt(i)
654 != UCharacter.toLowerCase(upperTest.charAt(i))) {
655 errln("Failed case conversion with itself: \\u"
656 + Utility.hex(upperTest.charAt(i)));
658 else if (lowerTest.charAt(i)
659 != UCharacter.toUpperCase(lowerTest.charAt(i))) {
660 errln("Failed case conversion with itself: \\u"
661 + Utility.hex(lowerTest.charAt(i)));
664 logln("done testing upper Lower");
667 // private data members - test data --------------------------------------
669 private static final Locale TURKISH_LOCALE_ = new Locale("tr", "TR");
670 private static final Locale GERMAN_LOCALE_ = new Locale("de", "DE");
671 private static final Locale GREEK_LOCALE_ = new Locale("el", "GR");
672 private static final Locale ENGLISH_LOCALE_ = new Locale("en", "US");
673 private static final Locale LITHUANIAN_LOCALE_ = new Locale("lt", "LT");
675 private static final int CHARACTER_UPPER_[] =
676 {0x41, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
677 0x00b1, 0x00b2, 0xb3, 0x0048, 0x0049, 0x004a, 0x002e,
678 0x003f, 0x003a, 0x004b, 0x004c, 0x4d, 0x004e, 0x004f,
679 0x01c4, 0x01c8, 0x000c, 0x0000};
680 private static final int CHARACTER_LOWER_[] =
681 {0x61, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
682 0x00b1, 0x00b2, 0xb3, 0x0068, 0x0069, 0x006a, 0x002e,
683 0x003f, 0x003a, 0x006b, 0x006c, 0x6d, 0x006e, 0x006f,
684 0x01c6, 0x01c9, 0x000c, 0x0000};
687 * CaseFolding.txt says about i and its cousins:
688 * 0049; C; 0069; # LATIN CAPITAL LETTER I
689 * 0049; T; 0131; # LATIN CAPITAL LETTER I
691 * 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE
692 * 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE
694 * See CaseFolding.txt and the Unicode Standard for how to apply the case foldings.
696 private static final int FOLDING_SIMPLE_[] = {
697 // input, default, exclude special i
703 0xfb03, 0xfb03, 0xfb03,
704 0x1040e,0x10436,0x10436,
705 0x5ffff,0x5ffff,0x5ffff
707 private static final String FOLDING_MIXED_[] =
708 {"\u0061\u0042\u0130\u0049\u0131\u03d0\u00df\ufb03\ud93f\udfff",
709 "A\u00df\u00b5\ufb03\uD801\uDC0C\u0130\u0131"};
710 private static final String FOLDING_DEFAULT_[] =
711 {"\u0061\u0062\u0069\u0307\u0069\u0131\u03b2\u0073\u0073\u0066\u0066\u0069\ud93f\udfff",
712 "ass\u03bcffi\uD801\uDC34i\u0307\u0131"};
713 private static final String FOLDING_EXCLUDE_SPECIAL_I_[] =
714 {"\u0061\u0062\u0069\u0131\u0131\u03b2\u0073\u0073\u0066\u0066\u0069\ud93f\udfff",
715 "ass\u03bcffi\uD801\uDC34i\u0131"};
719 private static final String SHARED_UPPERCASE_GREEK_ =
720 "\u0399\u0395\u03a3\u03a5\u03a3\u0020\u03a7\u03a1\u0399\u03a3\u03a4\u039f\u03a3";
724 private static final String SHARED_LOWERCASE_GREEK_ =
725 "\u03b9\u03b5\u03c3\u03c5\u03c2\u0020\u03c7\u03c1\u03b9\u03c3\u03c4\u03bf\u03c2";
726 private static final String SHARED_LOWERCASE_TURKISH_ =
727 "\u0069\u0073\u0074\u0061\u006e\u0062\u0075\u006c\u002c\u0020\u006e\u006f\u0074\u0020\u0063\u006f\u006e\u0073\u0074\u0061\u006e\u0074\u0131\u006e\u006f\u0070\u006c\u0065\u0021";
728 private static final String SHARED_UPPERCASE_TURKISH_ =
729 "\u0054\u004f\u0050\u004b\u0041\u0050\u0049\u0020\u0050\u0041\u004c\u0041\u0043\u0045\u002c\u0020\u0130\u0053\u0054\u0041\u004e\u0042\u0055\u004c";
730 private static final String SHARED_UPPERCASE_ISTANBUL_ =
731 "\u0130STANBUL, NOT CONSTANTINOPLE!";
732 private static final String SHARED_LOWERCASE_ISTANBUL_ =
733 "i\u0307stanbul, not constantinople!";
734 private static final String SHARED_LOWERCASE_TOPKAP_ =
735 "topkap\u0131 palace, istanbul";
736 private static final String SHARED_UPPERCASE_TOPKAP_ =
737 "TOPKAPI PALACE, ISTANBUL";
738 private static final String SHARED_LOWERCASE_GERMAN_ =
739 "S\u00FC\u00DFmayrstra\u00DFe";
740 private static final String SHARED_UPPERCASE_GERMAN_ =
741 "S\u00DCSSMAYRSTRASSE";
743 private static final String UPPER_BEFORE_ =
744 "\u0061\u0042\u0069\u03c2\u00df\u03c3\u002f\ufb03\ufb03\ufb03\ud93f\udfff";
745 private static final String UPPER_ROOT_ =
746 "\u0041\u0042\u0049\u03a3\u0053\u0053\u03a3\u002f\u0046\u0046\u0049\u0046\u0046\u0049\u0046\u0046\u0049\ud93f\udfff";
747 private static final String UPPER_TURKISH_ =
748 "\u0041\u0042\u0130\u03a3\u0053\u0053\u03a3\u002f\u0046\u0046\u0049\u0046\u0046\u0049\u0046\u0046\u0049\ud93f\udfff";
749 private static final String UPPER_MINI_ = "\u00df\u0061";
750 private static final String UPPER_MINI_UPPER_ = "\u0053\u0053\u0041";
752 private static final String LOWER_BEFORE_ =
753 "\u0061\u0042\u0049\u03a3\u00df\u03a3\u002f\ud93f\udfff";
754 private static final String LOWER_ROOT_ =
755 "\u0061\u0062\u0069\u03c3\u00df\u03c2\u002f\ud93f\udfff";
756 private static final String LOWER_TURKISH_ =
757 "\u0061\u0062\u0131\u03c3\u00df\u03c2\u002f\ud93f\udfff";
760 * each item is an array with input string, result string, locale ID, break iterator, options
761 * the break iterator is specified as an int, same as in BreakIterator.KIND_*:
762 * 0=KIND_CHARACTER 1=KIND_WORD 2=KIND_LINE 3=KIND_SENTENCE 4=KIND_TITLE -1=default (NULL=words) -2=no breaks (.*)
763 * options: T=U_FOLD_CASE_EXCLUDE_SPECIAL_I L=U_TITLECASE_NO_LOWERCASE A=U_TITLECASE_NO_BREAK_ADJUSTMENT
764 * see ICU4C source/test/testdata/casing.txt
766 private static final String TITLE_DATA_[] = {
767 "\u0061\u0042\u0020\u0069\u03c2\u0020\u00df\u03c3\u002f\ufb03\ud93f\udfff",
768 "\u0041\u0042\u0020\u0049\u03a3\u0020\u0053\u0073\u03a3\u002f\u0046\u0066\u0069\ud93f\udfff",
773 "\u0061\u0042\u0020\u0069\u03c2\u0020\u00df\u03c3\u002f\ufb03\ud93f\udfff",
774 "\u0041\u0062\u0020\u0049\u03c2\u0020\u0053\u0073\u03c3\u002f\u0046\u0066\u0069\ud93f\udfff",
779 "\u02bbaMeLikA huI P\u016b \u02bb\u02bb\u02bbiA", "\u02bbAmelika Hui P\u016b \u02bb\u02bb\u02bbIa", // titlecase first _cased_ letter, j4933
784 " tHe QUIcK bRoWn", " The Quick Brown",
789 "\u01c4\u01c5\u01c6\u01c7\u01c8\u01c9\u01ca\u01cb\u01cc",
790 "\u01c5\u01c5\u01c5\u01c8\u01c8\u01c8\u01cb\u01cb\u01cb", // UBRK_CHARACTER
795 "\u01c9ubav ljubav", "\u01c8ubav Ljubav", // Lj vs. L+j
800 "'oH dOn'T tItLeCaSe AfTeR lEtTeR+'", "'Oh Don't Titlecase After Letter+'",
805 "a \u02bbCaT. A \u02bbdOg! \u02bbeTc.",
806 "A \u02bbCat. A \u02bbDog! \u02bbEtc.",
811 "a \u02bbCaT. A \u02bbdOg! \u02bbeTc.",
812 "A \u02bbcat. A \u02bbdog! \u02bbetc.",
815 "A", // U_TITLECASE_NO_BREAK_ADJUSTMENT
817 "a \u02bbCaT. A \u02bbdOg! \u02bbeTc.",
818 "A \u02bbCaT. A \u02bbdOg! \u02bbETc.",
821 "L", // UBRK_SENTENCE and U_TITLECASE_NO_LOWERCASE
824 "\u02bbcAt! \u02bbeTc.",
825 "\u02bbCat! \u02bbetc.",
828 "", // -2=Trivial break iterator
830 "\u02bbcAt! \u02bbeTc.",
831 "\u02bbcat! \u02bbetc.",
834 "A", // U_TITLECASE_NO_BREAK_ADJUSTMENT
836 "\u02bbcAt! \u02bbeTc.",
837 "\u02bbCAt! \u02bbeTc.",
840 "L", // U_TITLECASE_NO_LOWERCASE
842 "\u02bbcAt! \u02bbeTc.",
843 "\u02bbcAt! \u02bbeTc.",
846 "AL", // Both options
848 // Test case for ticket #7251: UCharacter.toTitleCase() throws OutOfMemoryError
849 // when TITLECASE_NO_LOWERCASE encounters a single-letter word
854 "L" // U_TITLECASE_NO_LOWERCASE
859 * <p>basic string, lower string, upper string, title string</p>
861 private static final String SPECIAL_DATA_[] = {
862 UTF16.valueOf(0x1043C) + UTF16.valueOf(0x10414),
863 UTF16.valueOf(0x1043C) + UTF16.valueOf(0x1043C),
864 UTF16.valueOf(0x10414) + UTF16.valueOf(0x10414),
865 "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " +
866 UTF16.valueOf(0x1043C) + UTF16.valueOf(0x10414),
867 "ab'cd \uFB00i\u0131ii\u0307 \u01C9\u01C9\u01C9 " +
868 UTF16.valueOf(0x1043C) + UTF16.valueOf(0x1043C),
869 "AB'CD FFIII\u0130 \u01C7\u01C7\u01C7 " +
870 UTF16.valueOf(0x10414) + UTF16.valueOf(0x10414),
871 // sigmas followed/preceded by cased letters
872 "i\u0307\u03a3\u0308j \u0307\u03a3\u0308j i\u00ad\u03a3\u0308 \u0307\u03a3\u0308 ",
873 "i\u0307\u03c3\u0308j \u0307\u03c3\u0308j i\u00ad\u03c2\u0308 \u0307\u03c3\u0308 ",
874 "I\u0307\u03a3\u0308J \u0307\u03a3\u0308J I\u00ad\u03a3\u0308 \u0307\u03a3\u0308 "
876 private static final Locale SPECIAL_LOCALES_[] = {
882 private static final String SPECIAL_DOTTED_ =
883 "I \u0130 I\u0307 I\u0327\u0307 I\u0301\u0307 I\u0327\u0307\u0301";
884 private static final String SPECIAL_DOTTED_LOWER_TURKISH_ =
885 "\u0131 i i i\u0327 \u0131\u0301\u0307 i\u0327\u0301";
886 private static final String SPECIAL_DOTTED_LOWER_GERMAN_ =
887 "i i\u0307 i\u0307 i\u0327\u0307 i\u0301\u0307 i\u0327\u0307\u0301";
888 private static final String SPECIAL_DOT_ABOVE_ =
889 "a\u0307 \u0307 i\u0307 j\u0327\u0307 j\u0301\u0307";
890 private static final String SPECIAL_DOT_ABOVE_UPPER_LITHUANIAN_ =
891 "A\u0307 \u0307 I J\u0327 J\u0301\u0307";
892 private static final String SPECIAL_DOT_ABOVE_UPPER_GERMAN_ =
893 "A\u0307 \u0307 I\u0307 J\u0327\u0307 J\u0301\u0307";
894 private static final String SPECIAL_DOT_ABOVE_UPPER_ =
895 "I I\u0301 J J\u0301 \u012e \u012e\u0301 \u00cc\u00cd\u0128";
896 private static final String SPECIAL_DOT_ABOVE_LOWER_LITHUANIAN_ =
897 "i i\u0307\u0301 j j\u0307\u0301 \u012f \u012f\u0307\u0301 i\u0307\u0300i\u0307\u0301i\u0307\u0303";
898 private static final String SPECIAL_DOT_ABOVE_LOWER_GERMAN_ =
899 "i i\u0301 j j\u0301 \u012f \u012f\u0301 \u00ec\u00ed\u0129";
901 // private methods -------------------------------------------------------
904 * Converting the hex numbers represented betwee n ';' to Unicode strings
905 * @param str string to break up into Unicode strings
906 * @return array of Unicode strings ending with a null
908 private String[] getUnicodeStrings(String str)
910 List<String> v = new ArrayList<String>(10);
912 for (int casecount = 4; casecount > 0; casecount --) {
913 int end = str.indexOf("; ", start);
914 String casestr = str.substring(start, end);
915 StringBuffer buffer = new StringBuffer();
917 while (spaceoffset < casestr.length()) {
918 int nextspace = casestr.indexOf(' ', spaceoffset);
919 if (nextspace == -1) {
920 nextspace = casestr.length();
922 buffer.append((char)Integer.parseInt(
923 casestr.substring(spaceoffset, nextspace),
925 spaceoffset = nextspace + 1;
928 v.add(buffer.toString());
930 int comments = str.indexOf(" #", start);
931 if (comments != -1 && comments != start) {
932 if (str.charAt(comments - 1) == ';') {
935 String conditions = str.substring(start, comments);
937 while (offset < conditions.length()) {
938 int spaceoffset = conditions.indexOf(' ', offset);
939 if (spaceoffset == -1) {
940 spaceoffset = conditions.length();
942 v.add(conditions.substring(offset, spaceoffset));
943 offset = spaceoffset + 1;
947 String result[] = new String[size];
948 for (int i = 0; i < size; i ++) {
949 result[i] = v.get(i);