2 *******************************************************************************
\r
3 * Copyright (C) 2008, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 *******************************************************************************
\r
7 package com.ibm.icu.dev.test.localespi;
\r
9 import java.text.BreakIterator;
\r
10 import java.util.Locale;
\r
12 import com.ibm.icu.dev.test.TestFmwk;
\r
14 public class BreakIteratorTest extends TestFmwk {
\r
15 public static void main(String[] args) throws Exception {
\r
16 new BreakIteratorTest().run(args);
\r
19 private static final int CHARACTER_BRK = 0;
\r
20 private static final int WORD_BRK = 1;
\r
21 private static final int LINE_BRK = 2;
\r
22 private static final int SENTENCE_BRK = 3;
\r
25 * Check if getInstance returns the ICU implementation.
\r
27 public void TestGetInstance() {
\r
28 for (Locale loc : BreakIterator.getAvailableLocales()) {
\r
29 if (TestUtil.isProblematicIBMLocale(loc)) {
\r
30 logln("Skipped " + loc);
\r
33 checkGetInstance(CHARACTER_BRK, loc);
\r
34 checkGetInstance(WORD_BRK, loc);
\r
35 checkGetInstance(LINE_BRK, loc);
\r
36 checkGetInstance(SENTENCE_BRK, loc);
\r
40 private void checkGetInstance(int type, Locale loc) {
\r
41 BreakIterator brkitr = null;
\r
42 String method = null;
\r
45 brkitr = BreakIterator.getCharacterInstance(loc);
\r
46 method = "getCharacterInstance";
\r
49 brkitr = BreakIterator.getWordInstance(loc);
\r
50 method = "getWordInstance";
\r
53 brkitr = BreakIterator.getLineInstance(loc);
\r
54 method = "getLineInstance";
\r
57 brkitr = BreakIterator.getSentenceInstance(loc);
\r
58 method = "getSentenceInstance";
\r
61 errln("FAIL: Unknown break iterator type");
\r
65 boolean isIcuImpl = (brkitr instanceof com.ibm.icu.impl.jdkadapter.BreakIteratorICU);
\r
67 if (TestUtil.isICUExtendedLocale(loc)) {
\r
69 errln("FAIL: " + method + " returned JDK BreakIterator for locale " + loc);
\r
73 logln("INFO: " + method + " returned ICU BreakIterator for locale " + loc);
\r
75 BreakIterator brkitrIcu = null;
\r
76 Locale iculoc = TestUtil.toICUExtendedLocale(loc);
\r
79 brkitrIcu = BreakIterator.getCharacterInstance(iculoc);
\r
82 brkitrIcu = BreakIterator.getWordInstance(iculoc);
\r
85 brkitrIcu = BreakIterator.getLineInstance(iculoc);
\r
88 brkitrIcu = BreakIterator.getSentenceInstance(iculoc);
\r
92 if (!brkitr.equals(brkitrIcu)) {
\r
93 // BreakIterator.getXXXInstance returns a cached BreakIterator instance.
\r
94 // BreakIterator does not override Object#equals, so the result may not be
\r
96 // logln("INFO: " + method + " returned ICU BreakIterator for locale " + loc
\r
97 // + ", but different from the one for locale " + iculoc);
\r
100 if (!(brkitrIcu instanceof com.ibm.icu.impl.jdkadapter.BreakIteratorICU)) {
\r
101 errln("FAIL: " + method + " returned JDK BreakIterator for locale " + iculoc);
\r
108 * Testing the behavior of text break between ICU instance and its
\r
109 * equivalent created via the Locale SPI framework.
\r
111 public void TestICUEquivalent() {
\r
112 Locale[] TEST_LOCALES = {
\r
113 new Locale("en", "US"),
\r
114 new Locale("fr", "FR"),
\r
115 new Locale("th", "TH"),
\r
116 new Locale("zh", "CN"),
\r
119 String[] TEST_DATA = {
\r
120 "International Components for Unicode (ICU) is an open source project of mature "
\r
121 + "C/C++ and Java libraries for Unicode support, software internationalization and "
\r
122 + "software globalization. ICU is widely portable to many operating systems and "
\r
123 + "environments. It gives applications the same results on all platforms and between "
\r
124 + "C/C++ and Java software. The ICU project is an open source development project "
\r
125 + "that is sponsored, supported and used by IBM and many other companies.",
\r
127 "L'International Components for Unicode (ICU) est un projet open source qui fourni "
\r
128 + "des biblioth\u00e8ques pour les langages informatique C/C++ et Java pour supporter "
\r
129 + "Unicode, l'internationalisation et la mondialisation des logiciels. ICU est largement "
\r
130 + "portable vers beaucoup de syst\u00e8mes d'exploitations et d'environnements. Il "
\r
131 + "donne aux applications les m\u00eames comportements et r\u00e9sultats sur toutes "
\r
132 + "les plateformes et entre les logiciels C/C++ et Java. Le projet ICU est un projet "
\r
133 + "dont les code sources sont disponibles qui est sponsoris\u00e9, support\u00e9 et "
\r
134 + "utilis\u00e9 par IBM et beaucoup d'autres entreprises.",
\r
136 "\u5728IBM\u7b49\u4f01\u696d\u4e2d\uff0c\u56fd\u9645\u5316\u7ecf\u5e38\u7b80\u5199"
\r
137 + "\u4e3aI18N (\u6216i18n\u6216I18n)\uff0c\u5176\u4e2d18\u4ee3\u8868\u4e86\u4e2d\u95f4"
\r
138 + "\u7701\u7565\u768418\u4e2a\u5b57\u6bcd\uff1b\u800c\u201c\u672c\u5730\u5316\u201d"
\r
139 + "\u540c\u53ef\u7b80\u5199\u4e3al10n\u3002\u9019\u4e24\u4e2a\u6982\u5ff5\u6709\u65f6"
\r
140 + "\u5408\u79f0\u5168\u7403\u5316\uff08g11n\uff09\uff0c\u4f46\u662f\u5168\u7403\u5316"
\r
141 + "\u7684\u6db5\u4e49\u66f4\u4e3a\u4e00\u822c\u5316\u3002\u53e6\u5916\u5076\u5c14\u4f1a"
\r
142 + "\u51fa\u73b0\u201cp13n\u201d\uff0c\u4ee3\u8868\u4e2a\u4eba\u5316\uff08personalization"
\r
145 "\u0e01\u0e23\u0e38\u0e07\u0e40\u0e17\u0e1e\u0e21\u0e2b\u0e32\u0e19\u0e04\u0e23"
\r
146 + "\u0e43\u0e19\u0e1b\u0e31\u0e08\u0e08\u0e38\u0e1a\u0e31\u0e19\u0e40\u0e1b\u0e47"
\r
147 + "\u0e19\u0e28\u0e39\u0e19\u0e22\u0e4c\u0e01\u0e25\u0e32\u0e07\u0e01\u0e32\u0e23"
\r
148 + "\u0e1b\u0e01\u0e04\u0e23\u0e2d\u0e07 \u0e01\u0e32\u0e23\u0e28\u0e36\u0e01\u0e29"
\r
149 + "\u0e32 \u0e01\u0e32\u0e23\u0e04\u0e21\u0e19\u0e32\u0e04\u0e21\u0e02\u0e19\u0e2a"
\r
150 + "\u0e48\u0e07 \u0e01\u0e32\u0e23\u0e40\u0e07\u0e34\u0e19\u0e01\u0e32\u0e23\u0e18"
\r
151 + "\u0e19\u0e32\u0e04\u0e32\u0e23 \u0e01\u0e32\u0e23\u0e1e\u0e32\u0e13\u0e34\u0e0a"
\r
152 + "\u0e22\u0e4c \u0e01\u0e32\u0e23\u0e2a\u0e37\u0e48\u0e2d\u0e2a\u0e32\u0e23 \u0e2f"
\r
153 + "\u0e25\u0e2f \u0e42\u0e14\u0e22\u0e21\u0e35\u0e1e\u0e37\u0e49\u0e19\u0e17\u0e35"
\r
154 + "\u0e48\u0e17\u0e31\u0e49\u0e07\u0e2b\u0e21\u0e14 1,562.2 \u0e15\u0e32\u0e23\u0e32"
\r
155 + "\u0e07\u0e01\u0e34\u0e42\u0e25\u0e40\u0e21\u0e15\u0e23 \u0e1e\u0e34\u0e01\u0e31"
\r
156 + "\u0e14\u0e17\u0e32\u0e07\u0e20\u0e39\u0e21\u0e34\u0e28\u0e32\u0e2a\u0e15\u0e23"
\r
157 + "\u0e4c\u0e04\u0e37\u0e2d \u0e25\u0e30\u0e15\u0e34\u0e08\u0e39\u0e14 13\u00b0 45"
\r
158 + "\u2019 \u0e40\u0e2b\u0e19\u0e37\u0e2d \u0e25\u0e2d\u0e07\u0e08\u0e34\u0e08\u0e39"
\r
159 + "\u0e14 100\u00b0 31\u2019 \u0e15\u0e30\u0e27\u0e31\u0e19\u0e2d\u0e2d\u0e01"
\r
162 BreakIterator[] jdkBrkItrs = new BreakIterator[4];
\r
163 com.ibm.icu.text.BreakIterator[] icuBrkItrs = new com.ibm.icu.text.BreakIterator[4];
\r
165 for (Locale loc : TEST_LOCALES) {
\r
166 Locale iculoc = TestUtil.toICUExtendedLocale(loc);
\r
168 jdkBrkItrs[0] = BreakIterator.getCharacterInstance(iculoc);
\r
169 jdkBrkItrs[1] = BreakIterator.getWordInstance(iculoc);
\r
170 jdkBrkItrs[2] = BreakIterator.getLineInstance(iculoc);
\r
171 jdkBrkItrs[3] = BreakIterator.getSentenceInstance(iculoc);
\r
173 icuBrkItrs[0] = com.ibm.icu.text.BreakIterator.getCharacterInstance(iculoc);
\r
174 icuBrkItrs[1] = com.ibm.icu.text.BreakIterator.getWordInstance(iculoc);
\r
175 icuBrkItrs[2] = com.ibm.icu.text.BreakIterator.getLineInstance(iculoc);
\r
176 icuBrkItrs[3] = com.ibm.icu.text.BreakIterator.getSentenceInstance(iculoc);
\r
178 for (String text : TEST_DATA) {
\r
179 for (int i = 0; i < 4; i++) {
\r
180 compareBreaks(text, jdkBrkItrs[i], icuBrkItrs[i]);
\r
186 private void compareBreaks(String text, BreakIterator jdkBrk, com.ibm.icu.text.BreakIterator icuBrk) {
\r
187 jdkBrk.setText(text);
\r
188 icuBrk.setText(text);
\r
191 int jidx = jdkBrk.first();
\r
192 int iidx = icuBrk.first();
\r
193 if (jidx != iidx) {
\r
194 errln("FAIL: Different first boundaries (jdk=" + jidx + ",icu=" + iidx + ") for text:\n" + text);
\r
197 jidx = jdkBrk.next();
\r
198 iidx = icuBrk.next();
\r
199 if (jidx != iidx) {
\r
200 errln("FAIL: Different boundaries (jdk=" + jidx + ",icu=" + iidx + "direction=forward) for text:\n" + text);
\r
202 if (jidx == BreakIterator.DONE) {
\r
208 jidx = jdkBrk.last();
\r
209 iidx = jdkBrk.last();
\r
210 if (jidx != iidx) {
\r
211 errln("FAIL: Different last boundaries (jdk=" + jidx + ",icu=" + iidx + ") for text:\n" + text);
\r
214 jidx = jdkBrk.previous();
\r
215 iidx = icuBrk.previous();
\r
216 if (jidx != iidx) {
\r
217 errln("FAIL: Different boundaries (jdk=" + jidx + ",icu=" + iidx + "direction=backward) for text:\n" + text);
\r
219 if (jidx == BreakIterator.DONE) {
\r