2 *******************************************************************************
\r
3 * Copyright (C) 1996-2009, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 *******************************************************************************
\r
7 package com.ibm.icu.dev.test.rbbi;
\r
9 //Regression testing of RuleBasedBreakIterator
\r
11 // TODO: These tests should be mostly retired.
\r
12 // Much of the test data that was originally here was removed when the RBBI rules
\r
13 // were updated to match the Unicode boundary TRs, and the data was found to be invalid.
\r
14 // Much of the remaining data has been moved into the rbbitst.txt test data file,
\r
15 // which is common between ICU4C and ICU4J. The remaining test data should also be moved,
\r
16 // or simply retired if it is no longer interesting.
\r
17 import com.ibm.icu.dev.test.*;
\r
18 import com.ibm.icu.text.RuleBasedBreakIterator;
\r
19 import com.ibm.icu.text.BreakIterator;
\r
20 import com.ibm.icu.util.ULocale;
\r
22 import java.util.Vector;
\r
24 public class RBBITest extends TestFmwk
\r
27 public static void main(String[] args) throws Exception {
\r
28 new RBBITest().run(args);
\r
31 public RBBITest() {
\r
34 private static final String halfNA = "\u0928\u094d\u200d"; /*halfform NA = devanigiri NA + virama(supresses inherent vowel)+ zero width joiner */
\r
37 // tests default rules based character iteration.
\r
38 // Builds a new iterator from the source rules in the default (prebuilt) iterator.
\r
40 public void TestDefaultRuleBasedCharacterIteration(){
\r
41 RuleBasedBreakIterator rbbi=(RuleBasedBreakIterator)BreakIterator.getCharacterInstance();
\r
42 logln("Testing the RBBI for character iteration by using default rules");
\r
44 //fetch the rules used to create the above RuleBasedBreakIterator
\r
45 String defaultRules=rbbi.toString();
\r
47 RuleBasedBreakIterator charIterDefault=null;
\r
49 charIterDefault = new RuleBasedBreakIterator(defaultRules);
\r
50 }catch(IllegalArgumentException iae){
\r
51 errln("ERROR: failed construction in TestDefaultRuleBasedCharacterIteration()"+ iae.toString());
\r
54 Vector chardata = new Vector();
\r
55 chardata.addElement("H");
\r
56 chardata.addElement("e");
\r
57 chardata.addElement("l");
\r
58 chardata.addElement("l");
\r
59 chardata.addElement("o");
\r
60 chardata.addElement("e\u0301"); //acuteE
\r
61 chardata.addElement("&");
\r
62 chardata.addElement("e\u0303"); //tildaE
\r
63 //devanagiri characters for Hindi support
\r
64 chardata.addElement("\u0906"); //devanagiri AA
\r
65 //chardata.addElement("\u093e\u0901"); //devanagiri vowelsign AA+ chandrabindhu
\r
66 chardata.addElement("\u0916\u0947"); //devanagiri KHA+vowelsign E
\r
67 chardata.addElement("\u0938\u0941\u0902"); //devanagiri SA+vowelsign U + anusvara(bindu)
\r
68 chardata.addElement("\u0926"); //devanagiri consonant DA
\r
69 chardata.addElement("\u0930"); //devanagiri consonant RA
\r
70 // chardata.addElement("\u0939\u094c"); //devanagiri HA+vowel sign AI
\r
71 chardata.addElement("\u0964"); //devanagiri danda
\r
72 //end hindi characters
\r
73 chardata.addElement("A\u0302"); // circumflexA
\r
74 chardata.addElement("i\u0301"); // acuteBelowI
\r
75 // conjoining jamo...
\r
76 chardata.addElement("\u1109\u1161\u11bc");
\r
77 chardata.addElement("\u1112\u1161\u11bc");
\r
78 chardata.addElement("\n");
\r
79 chardata.addElement("\r\n"); // keep CRLF sequences together
\r
80 chardata.addElement("S\u0300"); //graveS
\r
81 chardata.addElement("i\u0301"); // acuteBelowI
\r
82 chardata.addElement("!");
\r
84 // What follows is a string of Korean characters (I found it in the Yellow Pages
\r
85 // ad for the Korean Presbyterian Church of San Francisco, and I hope I transcribed
\r
86 // it correctly), first as precomposed syllables, and then as conjoining jamo.
\r
87 // Both sequences should be semantically identical and break the same way.
\r
88 // precomposed syllables...
\r
89 chardata.addElement("\uc0c1");
\r
90 chardata.addElement("\ud56d");
\r
91 chardata.addElement(" ");
\r
92 chardata.addElement("\ud55c");
\r
93 chardata.addElement("\uc778");
\r
94 chardata.addElement(" ");
\r
95 chardata.addElement("\uc5f0");
\r
96 chardata.addElement("\ud569");
\r
97 chardata.addElement(" ");
\r
98 chardata.addElement("\uc7a5");
\r
99 chardata.addElement("\ub85c");
\r
100 chardata.addElement("\uad50");
\r
101 chardata.addElement("\ud68c");
\r
102 chardata.addElement(" ");
\r
103 // conjoining jamo...
\r
104 chardata.addElement("\u1109\u1161\u11bc");
\r
105 chardata.addElement("\u1112\u1161\u11bc");
\r
106 chardata.addElement(" ");
\r
107 chardata.addElement("\u1112\u1161\u11ab");
\r
108 chardata.addElement("\u110b\u1175\u11ab");
\r
109 chardata.addElement(" ");
\r
110 chardata.addElement("\u110b\u1167\u11ab");
\r
111 chardata.addElement("\u1112\u1161\u11b8");
\r
112 chardata.addElement(" ");
\r
113 chardata.addElement("\u110c\u1161\u11bc");
\r
114 chardata.addElement("\u1105\u1169");
\r
115 chardata.addElement("\u1100\u116d");
\r
116 chardata.addElement("\u1112\u116c");
\r
119 generalIteratorTest(charIterDefault, chardata);
\r
123 public void TestDefaultRuleBasedWordIteration(){
\r
124 logln("Testing the RBBI for word iteration using default rules");
\r
125 RuleBasedBreakIterator rbbi=(RuleBasedBreakIterator)BreakIterator.getWordInstance();
\r
126 //fetch the rules used to create the above RuleBasedBreakIterator
\r
127 String defaultRules=rbbi.toString();
\r
129 RuleBasedBreakIterator wordIterDefault=null;
\r
131 wordIterDefault = new RuleBasedBreakIterator(defaultRules);
\r
132 }catch(IllegalArgumentException iae){
\r
133 errln("ERROR: failed construction in TestDefaultRuleBasedWordIteration() -- custom rules"+ iae.toString());
\r
136 Vector worddata = new Vector();
\r
137 worddata.addElement ("Write");
\r
138 worddata.addElement (" ");
\r
139 worddata.addElement ("wordrules");
\r
140 worddata.addElement (".");
\r
141 worddata.addElement(" ");
\r
142 //worddata.addElement("alpha-beta-gamma");
\r
143 worddata.addElement(" ");
\r
144 worddata.addElement("\u092f\u0939");
\r
145 worddata.addElement(" ");
\r
146 worddata.addElement("\u0939\u093f" + halfNA + "\u0926\u0940");
\r
147 worddata.addElement(" ");
\r
148 worddata.addElement("\u0939\u0948");
\r
149 // worddata.addElement("\u0964"); //danda followed by a space
\r
150 worddata.addElement(" ");
\r
151 worddata.addElement("\u0905\u093e\u092a");
\r
152 worddata.addElement(" ");
\r
153 worddata.addElement("\u0938\u093f\u0916\u094b\u0917\u0947");
\r
154 worddata.addElement("?");
\r
155 worddata.addElement(" ");
\r
156 worddata.addElement("\r");
\r
157 worddata.addElement("It's");
\r
158 worddata.addElement(" ");
\r
159 // worddata.addElement("$30.10");
\r
160 worddata.addElement(" ");
\r
161 worddata.addElement(" ");
\r
162 worddata.addElement("Badges");
\r
163 worddata.addElement("?");
\r
164 worddata.addElement(" ");
\r
165 worddata.addElement("BADGES");
\r
166 worddata.addElement("!");
\r
167 worddata.addElement("1000,233,456.000");
\r
168 worddata.addElement(" ");
\r
170 generalIteratorTest(wordIterDefault, worddata);
\r
172 // private static final String kParagraphSeparator = "\u2029";
\r
173 private static final String kLineSeparator = "\u2028";
\r
175 public void TestDefaultRuleBasedSentenceIteration(){
\r
176 logln("Testing the RBBI for sentence iteration using default rules");
\r
177 RuleBasedBreakIterator rbbi=(RuleBasedBreakIterator)BreakIterator.getSentenceInstance();
\r
179 //fetch the rules used to create the above RuleBasedBreakIterator
\r
180 String defaultRules=rbbi.toString();
\r
181 RuleBasedBreakIterator sentIterDefault=null;
\r
183 sentIterDefault = new RuleBasedBreakIterator(defaultRules);
\r
184 }catch(IllegalArgumentException iae){
\r
185 errln("ERROR: failed construction in TestDefaultRuleBasedSentenceIteration()" + iae.toString());
\r
188 Vector sentdata = new Vector();
\r
189 sentdata.addElement("(This is it.) ");
\r
190 sentdata.addElement("Testing the sentence iterator. ");
\r
191 sentdata.addElement("\"This isn\'t it.\" ");
\r
192 sentdata.addElement("Hi! ");
\r
193 sentdata.addElement("This is a simple sample sentence. ");
\r
194 sentdata.addElement("(This is it.) ");
\r
195 sentdata.addElement("This is a simple sample sentence. ");
\r
196 sentdata.addElement("\"This isn\'t it.\" ");
\r
197 sentdata.addElement("Hi! ");
\r
198 sentdata.addElement("This is a simple sample sentence. ");
\r
199 sentdata.addElement("It does not have to make any sense as you can see. ");
\r
200 sentdata.addElement("Nel mezzo del cammin di nostra vita, mi ritrovai in una selva oscura. ");
\r
201 sentdata.addElement("Che la dritta via aveo smarrita. ");
\r
202 generalIteratorTest(sentIterDefault, sentdata);
\r
205 public void TestDefaultRuleBasedLineIteration(){
\r
206 logln("Testing the RBBI for line iteration using default rules");
\r
207 RuleBasedBreakIterator rbbi=(RuleBasedBreakIterator)RuleBasedBreakIterator.getLineInstance();
\r
208 //fetch the rules used to create the above RuleBasedBreakIterator
\r
209 String defaultRules=rbbi.toString();
\r
210 RuleBasedBreakIterator lineIterDefault=null;
\r
212 lineIterDefault = new RuleBasedBreakIterator(defaultRules);
\r
213 }catch(IllegalArgumentException iae){
\r
214 errln("ERROR: failed construction in TestDefaultRuleBasedLineIteration()" + iae.toString());
\r
217 Vector linedata = new Vector();
\r
218 linedata.addElement("Multi-");
\r
219 linedata.addElement("Level ");
\r
220 linedata.addElement("example ");
\r
221 linedata.addElement("of ");
\r
222 linedata.addElement("a ");
\r
223 linedata.addElement("semi-");
\r
224 linedata.addElement("idiotic ");
\r
225 linedata.addElement("non-");
\r
226 linedata.addElement("sensical ");
\r
227 linedata.addElement("(non-");
\r
228 linedata.addElement("important) ");
\r
229 linedata.addElement("sentence. ");
\r
231 linedata.addElement("Hi ");
\r
232 linedata.addElement("Hello ");
\r
233 linedata.addElement("How\n");
\r
234 linedata.addElement("are\r");
\r
235 linedata.addElement("you" + kLineSeparator);
\r
236 linedata.addElement("fine.\t");
\r
237 linedata.addElement("good. ");
\r
239 linedata.addElement("Now\r");
\r
240 linedata.addElement("is\n");
\r
241 linedata.addElement("the\r\n");
\r
242 linedata.addElement("time\n");
\r
243 linedata.addElement("\r");
\r
244 linedata.addElement("for\r");
\r
245 linedata.addElement("\r");
\r
246 linedata.addElement("all");
\r
248 generalIteratorTest(lineIterDefault, linedata);
\r
253 //=========================================================================
\r
254 // general test subroutines
\r
255 //=========================================================================
\r
257 private void generalIteratorTest(RuleBasedBreakIterator rbbi, Vector expectedResult){
\r
258 StringBuffer buffer = new StringBuffer();
\r
260 for (int i = 0; i < expectedResult.size(); i++) {
\r
261 text = (String)expectedResult.elementAt(i);
\r
262 buffer.append(text);
\r
264 text = buffer.toString();
\r
265 if (rbbi == null) {
\r
266 errln("null iterator, test skipped.");
\r
270 rbbi.setText(text);
\r
272 Vector nextResults = _testFirstAndNext(rbbi, text);
\r
273 Vector previousResults = _testLastAndPrevious(rbbi, text);
\r
275 logln("comparing forward and backward...");
\r
276 int errs = getErrorCount();
\r
277 compareFragmentLists("forward iteration", "backward iteration", nextResults,
\r
279 if (getErrorCount() == errs) {
\r
280 logln("comparing expected and actual...");
\r
281 compareFragmentLists("expected result", "actual result", expectedResult,
\r
285 int[] boundaries = new int[expectedResult.size() + 3];
\r
286 boundaries[0] = RuleBasedBreakIterator.DONE;
\r
288 for (int i = 0; i < expectedResult.size(); i++)
\r
289 boundaries[i + 2] = boundaries[i + 1] + ((String)expectedResult.elementAt(i)).length();
\r
291 boundaries[boundaries.length - 1] = RuleBasedBreakIterator.DONE;
\r
293 _testFollowing(rbbi, text, boundaries);
\r
294 _testPreceding(rbbi, text, boundaries);
\r
295 _testIsBoundary(rbbi, text, boundaries);
\r
297 doMultipleSelectionTest(rbbi, text);
\r
300 private Vector _testFirstAndNext(RuleBasedBreakIterator rbbi, String text) {
\r
301 int p = rbbi.first();
\r
303 Vector result = new Vector();
\r
306 errln("first() returned " + p + " instead of 0");
\r
307 while (p != RuleBasedBreakIterator.DONE) {
\r
309 if (p != RuleBasedBreakIterator.DONE) {
\r
311 errln("next() failed to move forward: next() on position "
\r
312 + lastP + " yielded " + p);
\r
314 result.addElement(text.substring(lastP, p));
\r
317 if (lastP != text.length())
\r
318 errln("next() returned DONE prematurely: offset was "
\r
319 + lastP + " instead of " + text.length());
\r
326 private Vector _testLastAndPrevious(RuleBasedBreakIterator rbbi, String text) {
\r
327 int p = rbbi.last();
\r
329 Vector result = new Vector();
\r
331 if (p != text.length())
\r
332 errln("last() returned " + p + " instead of " + text.length());
\r
333 while (p != RuleBasedBreakIterator.DONE) {
\r
334 p = rbbi.previous();
\r
335 if (p != RuleBasedBreakIterator.DONE) {
\r
337 errln("previous() failed to move backward: previous() on position "
\r
338 + lastP + " yielded " + p);
\r
340 result.insertElementAt(text.substring(p, lastP), 0);
\r
344 errln("previous() returned DONE prematurely: offset was "
\r
345 + lastP + " instead of 0");
\r
352 private void compareFragmentLists(String f1Name, String f2Name, Vector f1, Vector f2) {
\r
360 while (p1 < f1.size() && p2 < f2.size()) {
\r
361 s1 = (String)f1.elementAt(p1);
\r
362 s2 = (String)f2.elementAt(p2);
\r
366 if (s1.equals(s2)) {
\r
367 debugLogln(" >" + s1 + "<");
\r
377 while (tempT1 != tempT2 && tempP1 < f1.size() && tempP2 < f2.size()) {
\r
378 while (tempT1 < tempT2 && tempP1 < f1.size()) {
\r
379 tempT1 += ((String)f1.elementAt(tempP1)).length();
\r
382 while (tempT2 < tempT1 && tempP2 < f2.size()) {
\r
383 tempT2 += ((String)f2.elementAt(tempP2)).length();
\r
387 logln("*** " + f1Name + " has:");
\r
388 while (p1 <= tempP1 && p1 < f1.size()) {
\r
389 s1 = (String)f1.elementAt(p1);
\r
391 debugLogln(" *** >" + s1 + "<");
\r
394 logln("***** " + f2Name + " has:");
\r
395 while (p2 <= tempP2 && p2 < f2.size()) {
\r
396 s2 = (String)f2.elementAt(p2);
\r
398 debugLogln(" ***** >" + s2 + "<");
\r
401 errln("Discrepancy between " + f1Name + " and " + f2Name);
\r
406 private void _testFollowing(RuleBasedBreakIterator rbbi, String text, int[] boundaries) {
\r
407 logln("testFollowing():");
\r
409 for(int i = 0; i <= text.length(); i++) {
\r
410 if (i == boundaries[p])
\r
412 int b = rbbi.following(i);
\r
413 logln("rbbi.following(" + i + ") -> " + b);
\r
414 if (b != boundaries[p])
\r
415 errln("Wrong result from following() for " + i + ": expected " + boundaries[p]
\r
420 private void _testPreceding(RuleBasedBreakIterator rbbi, String text, int[] boundaries) {
\r
421 logln("testPreceding():");
\r
423 for(int i = 0; i <= text.length(); i++) {
\r
424 int b = rbbi.preceding(i);
\r
425 logln("rbbi.preceding(" + i + ") -> " + b);
\r
426 if (b != boundaries[p])
\r
427 errln("Wrong result from preceding() for " + i + ": expected " + boundaries[p]
\r
429 if (i == boundaries[p + 1])
\r
434 private void _testIsBoundary(RuleBasedBreakIterator rbbi, String text, int[] boundaries) {
\r
435 logln("testIsBoundary():");
\r
438 for(int i = 0; i <= text.length(); i++) {
\r
439 isB = rbbi.isBoundary(i);
\r
440 logln("rbbi.isBoundary(" + i + ") -> " + isB);
\r
441 if(i == boundaries[p]) {
\r
443 errln("Wrong result from isBoundary() for " + i + ": expected true, got false");
\r
448 errln("Wrong result from isBoundary() for " + i + ": expected false, got true");
\r
452 private void doMultipleSelectionTest(RuleBasedBreakIterator iterator, String testText)
\r
454 logln("Multiple selection test...");
\r
455 RuleBasedBreakIterator testIterator = (RuleBasedBreakIterator)iterator.clone();
\r
456 int offset = iterator.first();
\r
461 testOffset = testIterator.first();
\r
462 testOffset = testIterator.next(count);
\r
463 logln("next(" + count + ") -> " + testOffset);
\r
464 if (offset != testOffset)
\r
465 errln("next(n) and next() not returning consistent results: for step " + count + ", next(n) returned " + testOffset + " and next() had " + offset);
\r
467 if (offset != RuleBasedBreakIterator.DONE) {
\r
469 offset = iterator.next();
\r
471 } while (offset != RuleBasedBreakIterator.DONE);
\r
473 // now do it backwards...
\r
474 offset = iterator.last();
\r
478 testOffset = testIterator.last();
\r
479 testOffset = testIterator.next(count);
\r
480 logln("next(" + count + ") -> " + testOffset);
\r
481 if (offset != testOffset)
\r
482 errln("next(n) and next() not returning consistent results: for step " + count + ", next(n) returned " + testOffset + " and next() had " + offset);
\r
484 if (offset != RuleBasedBreakIterator.DONE) {
\r
486 offset = iterator.previous();
\r
488 } while (offset != RuleBasedBreakIterator.DONE);
\r
491 private void debugLogln(String s) {
\r
492 final String zeros = "0000";
\r
494 StringBuffer out = new StringBuffer();
\r
495 for (int i = 0; i < s.length(); i++) {
\r
496 char c = s.charAt(i);
\r
497 if (c >= ' ' && c < '\u007f')
\r
501 temp = Integer.toHexString((int)c);
\r
502 out.append(zeros.substring(0, 4 - temp.length()));
\r
506 logln(out.toString());
\r
509 public void TestThaiDictionaryBreakIterator() {
\r
512 int result[] = { 1, 2, 5, 10, 11, 12, 11, 10, 5, 2, 1, 0 };
\r
515 0x0E01, 0x0E32, 0x0E23, 0x0E17, 0x0E14, 0x0E25, 0x0E2D, 0x0E07,
\r
518 String text = new String(ctext);
\r
520 ULocale locale = ULocale.createCanonical("th");
\r
521 BreakIterator b = BreakIterator.getWordInstance(locale);
\r
526 // Test forward iteration
\r
527 while ((position = b.next())!= BreakIterator.DONE) {
\r
528 if (position != result[index++]) {
\r
529 errln("Error with ThaiDictionaryBreakIterator forward iteration test at " + position + ".\nShould have been " + result[index-1]);
\r
533 // Test backward iteration
\r
534 while ((position = b.previous())!= BreakIterator.DONE) {
\r
535 if (position != result[index++]) {
\r
536 errln("Error with ThaiDictionaryBreakIterator backward iteration test at " + position + ".\nShould have been " + result[index-1]);
\r
540 //Test invalid sequence and spaces
\r
542 0x0E01, 0x0E39, 0x0020, 0x0E01, 0x0E34, 0x0E19, 0x0E01, 0x0E38, 0x0E49, 0x0E07, 0x0020, 0x0E1B,
\r
543 0x0E34, 0x0E49, 0x0E48, 0x0E07, 0x0E2D, 0x0E22, 0x0E39, 0x0E48, 0x0E43, 0x0E19,
\r
544 0x0E16, 0x0E49, 0x0E33
\r
546 int expectedWordResult[] = {
\r
547 2, 3, 6, 10, 11, 15, 17, 20, 22
\r
549 int expectedLineResult[] = {
\r
550 3, 6, 11, 15, 17, 20, 22
\r
552 BreakIterator brk = BreakIterator.getWordInstance(new ULocale("th"));
\r
553 brk.setText(new String(text2));
\r
554 position = index = 0;
\r
555 while ((position = brk.next()) != BreakIterator.DONE && position < text2.length) {
\r
556 if (position != expectedWordResult[index++]) {
\r
557 errln("Incorrect break given by thai word break iterator. Expected: " + expectedWordResult[index-1] + " Got: " + position);
\r
561 brk = BreakIterator.getLineInstance(new ULocale("th"));
\r
562 brk.setText(new String(text2));
\r
563 position = index = 0;
\r
564 while ((position = brk.next()) != BreakIterator.DONE && position < text2.length) {
\r
565 if (position != expectedLineResult[index++]) {
\r
566 errln("Incorrect break given by thai line break iterator. Expected: " + expectedLineResult[index-1] + " Got: " + position);
\r
571 public void TestTailoredBreaks() {
\r
574 private ULocale locale;
\r
575 private String text;
\r
576 private int[] expectOffsets;
\r
577 TBItem(int typ, ULocale loc, String txt, int[] eOffs) {
\r
581 expectOffsets = eOffs;
\r
583 private static final int maxOffsetCount = 128;
\r
584 private boolean offsetsMatchExpected(int[] foundOffsets, int foundOffsetsLength) {
\r
585 if ( foundOffsetsLength != expectOffsets.length ) {
\r
588 for (int i = 0; i < foundOffsetsLength; i++) {
\r
589 if ( foundOffsets[i] != expectOffsets[i] ) {
\r
595 private String formatOffsets(int[] offsets, int length) {
\r
596 StringBuffer buildString = new StringBuffer(4*maxOffsetCount);
\r
597 for (int i = 0; i < length; i++) {
\r
598 buildString.append(" " + offsets[i]);
\r
600 return buildString.toString();
\r
602 public void doTest() {
\r
603 BreakIterator brkIter;
\r
605 case BreakIterator.KIND_CHARACTER: brkIter = BreakIterator.getCharacterInstance(locale); break;
\r
606 case BreakIterator.KIND_WORD: brkIter = BreakIterator.getWordInstance(locale); break;
\r
607 case BreakIterator.KIND_LINE: brkIter = BreakIterator.getLineInstance(locale); break;
\r
608 case BreakIterator.KIND_SENTENCE: brkIter = BreakIterator.getSentenceInstance(locale); break;
\r
609 default: errln("Unsupported break iterator type " + type); return;
\r
611 brkIter.setText(text);
\r
612 int[] foundOffsets = new int[maxOffsetCount];
\r
613 int offset, foundOffsetsCount = 0;
\r
614 // do forwards iteration test
\r
615 while ( foundOffsetsCount < maxOffsetCount && (offset = brkIter.next()) != BreakIterator.DONE ) {
\r
616 foundOffsets[foundOffsetsCount++] = offset;
\r
618 if ( !offsetsMatchExpected(foundOffsets, foundOffsetsCount) ) {
\r
619 // log error for forwards test
\r
620 String textToDisplay = (text.length() <= 16)? text: text.substring(0,16);
\r
621 errln("For type " + type + " " + locale + ", text \"" + textToDisplay + "...\"" +
\r
622 "; expect " + expectOffsets.length + " offsets:" + formatOffsets(expectOffsets, expectOffsets.length) +
\r
623 "; found " + foundOffsetsCount + " offsets fwd:" + formatOffsets(foundOffsets, foundOffsetsCount) );
\r
625 // do backwards iteration test
\r
626 --foundOffsetsCount; // back off one from the end offset
\r
627 while ( foundOffsetsCount > 0 ) {
\r
628 offset = brkIter.previous();
\r
629 if ( offset != foundOffsets[--foundOffsetsCount] ) {
\r
630 // log error for backwards test
\r
631 String textToDisplay = (text.length() <= 16)? text: text.substring(0,16);
\r
632 errln("For type " + type + " " + locale + ", text \"" + textToDisplay + "...\"" +
\r
633 "; expect " + expectOffsets.length + " offsets:" + formatOffsets(expectOffsets, expectOffsets.length) +
\r
634 "; found rev offset " + offset + " where expect " + foundOffsets[foundOffsetsCount] );
\r
641 // KIND_WORD "en_US_POSIX"
\r
642 final String posxWordText = "Can't have breaks in xx:yy or struct.field for CS-types.";
\r
643 final int[] posxWordTOffsets = { 5, 6, 10, 11, 17, 18, 20, 21, 23, 24, 26, 27, 29, 30, 36, 37, 42, 43, 46, 47, 49, 50, 55, 56 };
\r
644 final int[] posxWordROffsets = { 5, 6, 10, 11, 17, 18, 20, 21, 26, 27, 29, 30, 42, 43, 46, 47, 49, 50, 55, 56 };
\r
646 final String jaWordText = "\u79C1\u9054\u306B\u4E00\u3007\u3007\u3007\u306E\u30B3\u30F3\u30D4\u30E5\u30FC\u30BF" +
\r
647 "\u304C\u3042\u308B\u3002\u5948\u3005\u306F\u30EF\u30FC\u30C9\u3067\u3042\u308B\u3002";
\r
648 final int[] jaWordTOffsets = { 2, 3, 7, 8, 14, 17, 18, 20, 21, 24, 27, 28 };
\r
649 final int[] jaWordROffsets = { 1, 2, 3, 4, 5, 6, 7, 8, 14, 15, 16, 17, 18, 19, 20, 21, 24, 25, 26, 27, 28 };
\r
650 // KIND_SENTENCE "el"
\r
651 final String elSentText = "\u0391\u03B2, \u03B3\u03B4; \u0395 \u03B6\u03B7\u037E \u0398 \u03B9\u03BA. " +
\r
652 "\u039B\u03BC \u03BD\u03BE! \u039F\u03C0, \u03A1\u03C2? \u03A3";
\r
653 final int[] elSentTOffsets = { 8, 14, 20, 27, 35, 36 };
\r
654 final int[] elSentROffsets = { 20, 27, 35, 36 };
\r
655 // KIND_CHARACTER "th"
\r
656 final String thCharText = "\u0E01\u0E23\u0E30\u0E17\u0E48\u0E2D\u0E21\u0E23\u0E08\u0E19\u0E32 " +
\r
657 "(\u0E2A\u0E38\u0E0A\u0E32\u0E15\u0E34-\u0E08\u0E38\u0E11\u0E32\u0E21\u0E32\u0E28) " +
\r
658 "\u0E40\u0E14\u0E47\u0E01\u0E21\u0E35\u0E1B\u0E31\u0E0D\u0E2B\u0E32 ";
\r
659 final int[] thCharTOffsets = { 1, 2, 3, 5, 6, 7, 8, 9, 10, 11,
\r
660 12, 13, 15, 16, 17, 19, 20, 22, 23, 24, 25, 26, 27, 28,
\r
661 29, 30, 32, 33, 35, 37, 38, 39, 40, 41 };
\r
662 final int[] thCharROffsets = { 1, 3, 5, 6, 7, 8, 9, 11,
\r
663 12, 13, 15, 17, 19, 20, 22, 24, 26, 27, 28,
\r
664 29, 32, 33, 35, 37, 38, 40, 41 };
\r
666 final TBItem[] tests = {
\r
667 new TBItem( BreakIterator.KIND_WORD, new ULocale("en_US_POSIX"), posxWordText, posxWordTOffsets ),
\r
668 new TBItem( BreakIterator.KIND_WORD, ULocale.ROOT, posxWordText, posxWordROffsets ),
\r
669 new TBItem( BreakIterator.KIND_WORD, new ULocale("ja"), jaWordText, jaWordTOffsets ),
\r
670 new TBItem( BreakIterator.KIND_WORD, ULocale.ROOT, jaWordText, jaWordROffsets ),
\r
671 new TBItem( BreakIterator.KIND_SENTENCE, new ULocale("el"), elSentText, elSentTOffsets ),
\r
672 new TBItem( BreakIterator.KIND_SENTENCE, ULocale.ROOT, elSentText, elSentROffsets ),
\r
673 new TBItem( BreakIterator.KIND_CHARACTER, new ULocale("th"), thCharText, thCharTOffsets ),
\r
674 new TBItem( BreakIterator.KIND_CHARACTER, ULocale.ROOT, thCharText, thCharROffsets ),
\r
676 for (int iTest = 0; iTest < tests.length; iTest++) {
\r
677 tests[iTest].doTest();
\r