2 *******************************************************************************
\r
3 * Copyright (C) 1996-2010, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 *******************************************************************************
\r
7 package com.ibm.icu.dev.test.rbbi;
\r
9 //Regression testing of RuleBasedBreakIterator
\r
11 // TODO: These tests should be mostly retired.
\r
12 // Much of the test data that was originally here was removed when the RBBI rules
\r
13 // were updated to match the Unicode boundary TRs, and the data was found to be invalid.
\r
14 // Much of the remaining data has been moved into the rbbitst.txt test data file,
\r
15 // which is common between ICU4C and ICU4J. The remaining test data should also be moved,
\r
16 // or simply retired if it is no longer interesting.
\r
17 import java.text.CharacterIterator;
\r
18 import java.util.Vector;
\r
20 import com.ibm.icu.dev.test.TestFmwk;
\r
21 import com.ibm.icu.text.BreakIterator;
\r
22 import com.ibm.icu.text.DictionaryBasedBreakIterator;
\r
23 import com.ibm.icu.text.RuleBasedBreakIterator;
\r
24 import com.ibm.icu.util.ULocale;
\r
26 public class RBBITest extends TestFmwk
\r
29 public static void main(String[] args) throws Exception {
\r
30 new RBBITest().run(args);
\r
33 public RBBITest() {
\r
36 private static final String halfNA = "\u0928\u094d\u200d"; /*halfform NA = devanigiri NA + virama(supresses inherent vowel)+ zero width joiner */
\r
39 // tests default rules based character iteration.
\r
40 // Builds a new iterator from the source rules in the default (prebuilt) iterator.
\r
42 public void TestDefaultRuleBasedCharacterIteration(){
\r
43 RuleBasedBreakIterator rbbi=(RuleBasedBreakIterator)BreakIterator.getCharacterInstance();
\r
44 logln("Testing the RBBI for character iteration by using default rules");
\r
46 //fetch the rules used to create the above RuleBasedBreakIterator
\r
47 String defaultRules=rbbi.toString();
\r
49 RuleBasedBreakIterator charIterDefault=null;
\r
51 charIterDefault = new RuleBasedBreakIterator(defaultRules);
\r
52 }catch(IllegalArgumentException iae){
\r
53 errln("ERROR: failed construction in TestDefaultRuleBasedCharacterIteration()"+ iae.toString());
\r
56 Vector chardata = new Vector();
\r
57 chardata.addElement("H");
\r
58 chardata.addElement("e");
\r
59 chardata.addElement("l");
\r
60 chardata.addElement("l");
\r
61 chardata.addElement("o");
\r
62 chardata.addElement("e\u0301"); //acuteE
\r
63 chardata.addElement("&");
\r
64 chardata.addElement("e\u0303"); //tildaE
\r
65 //devanagiri characters for Hindi support
\r
66 chardata.addElement("\u0906"); //devanagiri AA
\r
67 //chardata.addElement("\u093e\u0901"); //devanagiri vowelsign AA+ chandrabindhu
\r
68 chardata.addElement("\u0916\u0947"); //devanagiri KHA+vowelsign E
\r
69 chardata.addElement("\u0938\u0941\u0902"); //devanagiri SA+vowelsign U + anusvara(bindu)
\r
70 chardata.addElement("\u0926"); //devanagiri consonant DA
\r
71 chardata.addElement("\u0930"); //devanagiri consonant RA
\r
72 // chardata.addElement("\u0939\u094c"); //devanagiri HA+vowel sign AI
\r
73 chardata.addElement("\u0964"); //devanagiri danda
\r
74 //end hindi characters
\r
75 chardata.addElement("A\u0302"); // circumflexA
\r
76 chardata.addElement("i\u0301"); // acuteBelowI
\r
77 // conjoining jamo...
\r
78 chardata.addElement("\u1109\u1161\u11bc");
\r
79 chardata.addElement("\u1112\u1161\u11bc");
\r
80 chardata.addElement("\n");
\r
81 chardata.addElement("\r\n"); // keep CRLF sequences together
\r
82 chardata.addElement("S\u0300"); //graveS
\r
83 chardata.addElement("i\u0301"); // acuteBelowI
\r
84 chardata.addElement("!");
\r
86 // What follows is a string of Korean characters (I found it in the Yellow Pages
\r
87 // ad for the Korean Presbyterian Church of San Francisco, and I hope I transcribed
\r
88 // it correctly), first as precomposed syllables, and then as conjoining jamo.
\r
89 // Both sequences should be semantically identical and break the same way.
\r
90 // precomposed syllables...
\r
91 chardata.addElement("\uc0c1");
\r
92 chardata.addElement("\ud56d");
\r
93 chardata.addElement(" ");
\r
94 chardata.addElement("\ud55c");
\r
95 chardata.addElement("\uc778");
\r
96 chardata.addElement(" ");
\r
97 chardata.addElement("\uc5f0");
\r
98 chardata.addElement("\ud569");
\r
99 chardata.addElement(" ");
\r
100 chardata.addElement("\uc7a5");
\r
101 chardata.addElement("\ub85c");
\r
102 chardata.addElement("\uad50");
\r
103 chardata.addElement("\ud68c");
\r
104 chardata.addElement(" ");
\r
105 // conjoining jamo...
\r
106 chardata.addElement("\u1109\u1161\u11bc");
\r
107 chardata.addElement("\u1112\u1161\u11bc");
\r
108 chardata.addElement(" ");
\r
109 chardata.addElement("\u1112\u1161\u11ab");
\r
110 chardata.addElement("\u110b\u1175\u11ab");
\r
111 chardata.addElement(" ");
\r
112 chardata.addElement("\u110b\u1167\u11ab");
\r
113 chardata.addElement("\u1112\u1161\u11b8");
\r
114 chardata.addElement(" ");
\r
115 chardata.addElement("\u110c\u1161\u11bc");
\r
116 chardata.addElement("\u1105\u1169");
\r
117 chardata.addElement("\u1100\u116d");
\r
118 chardata.addElement("\u1112\u116c");
\r
121 generalIteratorTest(charIterDefault, chardata);
\r
125 public void TestDefaultRuleBasedWordIteration(){
\r
126 logln("Testing the RBBI for word iteration using default rules");
\r
127 RuleBasedBreakIterator rbbi=(RuleBasedBreakIterator)BreakIterator.getWordInstance();
\r
128 //fetch the rules used to create the above RuleBasedBreakIterator
\r
129 String defaultRules=rbbi.toString();
\r
131 RuleBasedBreakIterator wordIterDefault=null;
\r
133 wordIterDefault = new RuleBasedBreakIterator(defaultRules);
\r
134 }catch(IllegalArgumentException iae){
\r
135 errln("ERROR: failed construction in TestDefaultRuleBasedWordIteration() -- custom rules"+ iae.toString());
\r
138 Vector worddata = new Vector();
\r
139 worddata.addElement ("Write");
\r
140 worddata.addElement (" ");
\r
141 worddata.addElement ("wordrules");
\r
142 worddata.addElement (".");
\r
143 worddata.addElement(" ");
\r
144 //worddata.addElement("alpha-beta-gamma");
\r
145 worddata.addElement(" ");
\r
146 worddata.addElement("\u092f\u0939");
\r
147 worddata.addElement(" ");
\r
148 worddata.addElement("\u0939\u093f" + halfNA + "\u0926\u0940");
\r
149 worddata.addElement(" ");
\r
150 worddata.addElement("\u0939\u0948");
\r
151 // worddata.addElement("\u0964"); //danda followed by a space
\r
152 worddata.addElement(" ");
\r
153 worddata.addElement("\u0905\u093e\u092a");
\r
154 worddata.addElement(" ");
\r
155 worddata.addElement("\u0938\u093f\u0916\u094b\u0917\u0947");
\r
156 worddata.addElement("?");
\r
157 worddata.addElement(" ");
\r
158 worddata.addElement("\r");
\r
159 worddata.addElement("It's");
\r
160 worddata.addElement(" ");
\r
161 // worddata.addElement("$30.10");
\r
162 worddata.addElement(" ");
\r
163 worddata.addElement(" ");
\r
164 worddata.addElement("Badges");
\r
165 worddata.addElement("?");
\r
166 worddata.addElement(" ");
\r
167 worddata.addElement("BADGES");
\r
168 worddata.addElement("!");
\r
169 worddata.addElement("1000,233,456.000");
\r
170 worddata.addElement(" ");
\r
172 generalIteratorTest(wordIterDefault, worddata);
\r
174 // private static final String kParagraphSeparator = "\u2029";
\r
175 private static final String kLineSeparator = "\u2028";
\r
177 public void TestDefaultRuleBasedSentenceIteration(){
\r
178 logln("Testing the RBBI for sentence iteration using default rules");
\r
179 RuleBasedBreakIterator rbbi=(RuleBasedBreakIterator)BreakIterator.getSentenceInstance();
\r
181 //fetch the rules used to create the above RuleBasedBreakIterator
\r
182 String defaultRules=rbbi.toString();
\r
183 RuleBasedBreakIterator sentIterDefault=null;
\r
185 sentIterDefault = new RuleBasedBreakIterator(defaultRules);
\r
186 }catch(IllegalArgumentException iae){
\r
187 errln("ERROR: failed construction in TestDefaultRuleBasedSentenceIteration()" + iae.toString());
\r
190 Vector sentdata = new Vector();
\r
191 sentdata.addElement("(This is it.) ");
\r
192 sentdata.addElement("Testing the sentence iterator. ");
\r
193 sentdata.addElement("\"This isn\'t it.\" ");
\r
194 sentdata.addElement("Hi! ");
\r
195 sentdata.addElement("This is a simple sample sentence. ");
\r
196 sentdata.addElement("(This is it.) ");
\r
197 sentdata.addElement("This is a simple sample sentence. ");
\r
198 sentdata.addElement("\"This isn\'t it.\" ");
\r
199 sentdata.addElement("Hi! ");
\r
200 sentdata.addElement("This is a simple sample sentence. ");
\r
201 sentdata.addElement("It does not have to make any sense as you can see. ");
\r
202 sentdata.addElement("Nel mezzo del cammin di nostra vita, mi ritrovai in una selva oscura. ");
\r
203 sentdata.addElement("Che la dritta via aveo smarrita. ");
\r
204 generalIteratorTest(sentIterDefault, sentdata);
\r
207 public void TestDefaultRuleBasedLineIteration(){
\r
208 logln("Testing the RBBI for line iteration using default rules");
\r
209 RuleBasedBreakIterator rbbi=(RuleBasedBreakIterator)RuleBasedBreakIterator.getLineInstance();
\r
210 //fetch the rules used to create the above RuleBasedBreakIterator
\r
211 String defaultRules=rbbi.toString();
\r
212 RuleBasedBreakIterator lineIterDefault=null;
\r
214 lineIterDefault = new RuleBasedBreakIterator(defaultRules);
\r
215 }catch(IllegalArgumentException iae){
\r
216 errln("ERROR: failed construction in TestDefaultRuleBasedLineIteration()" + iae.toString());
\r
219 Vector linedata = new Vector();
\r
220 linedata.addElement("Multi-");
\r
221 linedata.addElement("Level ");
\r
222 linedata.addElement("example ");
\r
223 linedata.addElement("of ");
\r
224 linedata.addElement("a ");
\r
225 linedata.addElement("semi-");
\r
226 linedata.addElement("idiotic ");
\r
227 linedata.addElement("non-");
\r
228 linedata.addElement("sensical ");
\r
229 linedata.addElement("(non-");
\r
230 linedata.addElement("important) ");
\r
231 linedata.addElement("sentence. ");
\r
233 linedata.addElement("Hi ");
\r
234 linedata.addElement("Hello ");
\r
235 linedata.addElement("How\n");
\r
236 linedata.addElement("are\r");
\r
237 linedata.addElement("you" + kLineSeparator);
\r
238 linedata.addElement("fine.\t");
\r
239 linedata.addElement("good. ");
\r
241 linedata.addElement("Now\r");
\r
242 linedata.addElement("is\n");
\r
243 linedata.addElement("the\r\n");
\r
244 linedata.addElement("time\n");
\r
245 linedata.addElement("\r");
\r
246 linedata.addElement("for\r");
\r
247 linedata.addElement("\r");
\r
248 linedata.addElement("all");
\r
250 generalIteratorTest(lineIterDefault, linedata);
\r
255 //=========================================================================
\r
256 // general test subroutines
\r
257 //=========================================================================
\r
259 private void generalIteratorTest(RuleBasedBreakIterator rbbi, Vector expectedResult){
\r
260 StringBuffer buffer = new StringBuffer();
\r
262 for (int i = 0; i < expectedResult.size(); i++) {
\r
263 text = (String)expectedResult.elementAt(i);
\r
264 buffer.append(text);
\r
266 text = buffer.toString();
\r
267 if (rbbi == null) {
\r
268 errln("null iterator, test skipped.");
\r
272 rbbi.setText(text);
\r
274 Vector nextResults = _testFirstAndNext(rbbi, text);
\r
275 Vector previousResults = _testLastAndPrevious(rbbi, text);
\r
277 logln("comparing forward and backward...");
\r
278 int errs = getErrorCount();
\r
279 compareFragmentLists("forward iteration", "backward iteration", nextResults,
\r
281 if (getErrorCount() == errs) {
\r
282 logln("comparing expected and actual...");
\r
283 compareFragmentLists("expected result", "actual result", expectedResult,
\r
287 int[] boundaries = new int[expectedResult.size() + 3];
\r
288 boundaries[0] = RuleBasedBreakIterator.DONE;
\r
290 for (int i = 0; i < expectedResult.size(); i++)
\r
291 boundaries[i + 2] = boundaries[i + 1] + ((String)expectedResult.elementAt(i)).length();
\r
293 boundaries[boundaries.length - 1] = RuleBasedBreakIterator.DONE;
\r
295 _testFollowing(rbbi, text, boundaries);
\r
296 _testPreceding(rbbi, text, boundaries);
\r
297 _testIsBoundary(rbbi, text, boundaries);
\r
299 doMultipleSelectionTest(rbbi, text);
\r
302 private Vector _testFirstAndNext(RuleBasedBreakIterator rbbi, String text) {
\r
303 int p = rbbi.first();
\r
305 Vector result = new Vector();
\r
308 errln("first() returned " + p + " instead of 0");
\r
309 while (p != RuleBasedBreakIterator.DONE) {
\r
311 if (p != RuleBasedBreakIterator.DONE) {
\r
313 errln("next() failed to move forward: next() on position "
\r
314 + lastP + " yielded " + p);
\r
316 result.addElement(text.substring(lastP, p));
\r
319 if (lastP != text.length())
\r
320 errln("next() returned DONE prematurely: offset was "
\r
321 + lastP + " instead of " + text.length());
\r
328 private Vector _testLastAndPrevious(RuleBasedBreakIterator rbbi, String text) {
\r
329 int p = rbbi.last();
\r
331 Vector result = new Vector();
\r
333 if (p != text.length())
\r
334 errln("last() returned " + p + " instead of " + text.length());
\r
335 while (p != RuleBasedBreakIterator.DONE) {
\r
336 p = rbbi.previous();
\r
337 if (p != RuleBasedBreakIterator.DONE) {
\r
339 errln("previous() failed to move backward: previous() on position "
\r
340 + lastP + " yielded " + p);
\r
342 result.insertElementAt(text.substring(p, lastP), 0);
\r
346 errln("previous() returned DONE prematurely: offset was "
\r
347 + lastP + " instead of 0");
\r
354 private void compareFragmentLists(String f1Name, String f2Name, Vector f1, Vector f2) {
\r
362 while (p1 < f1.size() && p2 < f2.size()) {
\r
363 s1 = (String)f1.elementAt(p1);
\r
364 s2 = (String)f2.elementAt(p2);
\r
368 if (s1.equals(s2)) {
\r
369 debugLogln(" >" + s1 + "<");
\r
379 while (tempT1 != tempT2 && tempP1 < f1.size() && tempP2 < f2.size()) {
\r
380 while (tempT1 < tempT2 && tempP1 < f1.size()) {
\r
381 tempT1 += ((String)f1.elementAt(tempP1)).length();
\r
384 while (tempT2 < tempT1 && tempP2 < f2.size()) {
\r
385 tempT2 += ((String)f2.elementAt(tempP2)).length();
\r
389 logln("*** " + f1Name + " has:");
\r
390 while (p1 <= tempP1 && p1 < f1.size()) {
\r
391 s1 = (String)f1.elementAt(p1);
\r
393 debugLogln(" *** >" + s1 + "<");
\r
396 logln("***** " + f2Name + " has:");
\r
397 while (p2 <= tempP2 && p2 < f2.size()) {
\r
398 s2 = (String)f2.elementAt(p2);
\r
400 debugLogln(" ***** >" + s2 + "<");
\r
403 errln("Discrepancy between " + f1Name + " and " + f2Name);
\r
408 private void _testFollowing(RuleBasedBreakIterator rbbi, String text, int[] boundaries) {
\r
409 logln("testFollowing():");
\r
411 for(int i = 0; i <= text.length(); i++) {
\r
412 if (i == boundaries[p])
\r
414 int b = rbbi.following(i);
\r
415 logln("rbbi.following(" + i + ") -> " + b);
\r
416 if (b != boundaries[p])
\r
417 errln("Wrong result from following() for " + i + ": expected " + boundaries[p]
\r
422 private void _testPreceding(RuleBasedBreakIterator rbbi, String text, int[] boundaries) {
\r
423 logln("testPreceding():");
\r
425 for(int i = 0; i <= text.length(); i++) {
\r
426 int b = rbbi.preceding(i);
\r
427 logln("rbbi.preceding(" + i + ") -> " + b);
\r
428 if (b != boundaries[p])
\r
429 errln("Wrong result from preceding() for " + i + ": expected " + boundaries[p]
\r
431 if (i == boundaries[p + 1])
\r
436 private void _testIsBoundary(RuleBasedBreakIterator rbbi, String text, int[] boundaries) {
\r
437 logln("testIsBoundary():");
\r
440 for(int i = 0; i <= text.length(); i++) {
\r
441 isB = rbbi.isBoundary(i);
\r
442 logln("rbbi.isBoundary(" + i + ") -> " + isB);
\r
443 if(i == boundaries[p]) {
\r
445 errln("Wrong result from isBoundary() for " + i + ": expected true, got false");
\r
450 errln("Wrong result from isBoundary() for " + i + ": expected false, got true");
\r
454 private void doMultipleSelectionTest(RuleBasedBreakIterator iterator, String testText)
\r
456 logln("Multiple selection test...");
\r
457 RuleBasedBreakIterator testIterator = (RuleBasedBreakIterator)iterator.clone();
\r
458 int offset = iterator.first();
\r
463 testOffset = testIterator.first();
\r
464 testOffset = testIterator.next(count);
\r
465 logln("next(" + count + ") -> " + testOffset);
\r
466 if (offset != testOffset)
\r
467 errln("next(n) and next() not returning consistent results: for step " + count + ", next(n) returned " + testOffset + " and next() had " + offset);
\r
469 if (offset != RuleBasedBreakIterator.DONE) {
\r
471 offset = iterator.next();
\r
473 } while (offset != RuleBasedBreakIterator.DONE);
\r
475 // now do it backwards...
\r
476 offset = iterator.last();
\r
480 testOffset = testIterator.last();
\r
481 testOffset = testIterator.next(count);
\r
482 logln("next(" + count + ") -> " + testOffset);
\r
483 if (offset != testOffset)
\r
484 errln("next(n) and next() not returning consistent results: for step " + count + ", next(n) returned " + testOffset + " and next() had " + offset);
\r
486 if (offset != RuleBasedBreakIterator.DONE) {
\r
488 offset = iterator.previous();
\r
490 } while (offset != RuleBasedBreakIterator.DONE);
\r
493 private void debugLogln(String s) {
\r
494 final String zeros = "0000";
\r
496 StringBuffer out = new StringBuffer();
\r
497 for (int i = 0; i < s.length(); i++) {
\r
498 char c = s.charAt(i);
\r
499 if (c >= ' ' && c < '\u007f')
\r
503 temp = Integer.toHexString((int)c);
\r
504 out.append(zeros.substring(0, 4 - temp.length()));
\r
508 logln(out.toString());
\r
511 public void TestThaiDictionaryBreakIterator() {
\r
514 int result[] = { 1, 2, 5, 10, 11, 12, 11, 10, 5, 2, 1, 0 };
\r
517 0x0E01, 0x0E32, 0x0E23, 0x0E17, 0x0E14, 0x0E25, 0x0E2D, 0x0E07,
\r
520 String text = new String(ctext);
\r
522 ULocale locale = ULocale.createCanonical("th");
\r
523 BreakIterator b = BreakIterator.getWordInstance(locale);
\r
528 // Test forward iteration
\r
529 while ((position = b.next())!= BreakIterator.DONE) {
\r
530 if (position != result[index++]) {
\r
531 errln("Error with ThaiDictionaryBreakIterator forward iteration test at " + position + ".\nShould have been " + result[index-1]);
\r
535 // Test backward iteration
\r
536 while ((position = b.previous())!= BreakIterator.DONE) {
\r
537 if (position != result[index++]) {
\r
538 errln("Error with ThaiDictionaryBreakIterator backward iteration test at " + position + ".\nShould have been " + result[index-1]);
\r
542 //Test invalid sequence and spaces
\r
544 0x0E01, 0x0E39, 0x0020, 0x0E01, 0x0E34, 0x0E19, 0x0E01, 0x0E38, 0x0E49, 0x0E07, 0x0020, 0x0E1B,
\r
545 0x0E34, 0x0E49, 0x0E48, 0x0E07, 0x0E2D, 0x0E22, 0x0E39, 0x0E48, 0x0E43, 0x0E19,
\r
546 0x0E16, 0x0E49, 0x0E33
\r
548 int expectedWordResult[] = {
\r
549 2, 3, 6, 10, 11, 15, 17, 20, 22
\r
551 int expectedLineResult[] = {
\r
552 3, 6, 11, 15, 17, 20, 22
\r
554 BreakIterator brk = BreakIterator.getWordInstance(new ULocale("th"));
\r
555 brk.setText(new String(text2));
\r
556 position = index = 0;
\r
557 while ((position = brk.next()) != BreakIterator.DONE && position < text2.length) {
\r
558 if (position != expectedWordResult[index++]) {
\r
559 errln("Incorrect break given by thai word break iterator. Expected: " + expectedWordResult[index-1] + " Got: " + position);
\r
563 brk = BreakIterator.getLineInstance(new ULocale("th"));
\r
564 brk.setText(new String(text2));
\r
565 position = index = 0;
\r
566 while ((position = brk.next()) != BreakIterator.DONE && position < text2.length) {
\r
567 if (position != expectedLineResult[index++]) {
\r
568 errln("Incorrect break given by thai line break iterator. Expected: " + expectedLineResult[index-1] + " Got: " + position);
\r
571 // Improve code coverage
\r
572 if (brk.preceding(expectedLineResult[1]) != expectedLineResult[0]) {
\r
573 errln("Incorrect preceding position.");
\r
575 if (brk.following(expectedLineResult[1]) != expectedLineResult[2]) {
\r
576 errln("Incorrect following position.");
\r
578 int []fillInArray = new int[2];
\r
579 if (((DictionaryBasedBreakIterator)brk).getRuleStatusVec(fillInArray) != 1 || fillInArray[0] != 0) {
\r
580 errln("Error: Since getRuleStatusVec is not supported in DictionaryBasedBreakIterator, it should return 1 and fillInArray[0] == 0.");
\r
584 public void TestTailoredBreaks() {
\r
587 private ULocale locale;
\r
588 private String text;
\r
589 private int[] expectOffsets;
\r
590 TBItem(int typ, ULocale loc, String txt, int[] eOffs) {
\r
594 expectOffsets = eOffs;
\r
596 private static final int maxOffsetCount = 128;
\r
597 private boolean offsetsMatchExpected(int[] foundOffsets, int foundOffsetsLength) {
\r
598 if ( foundOffsetsLength != expectOffsets.length ) {
\r
601 for (int i = 0; i < foundOffsetsLength; i++) {
\r
602 if ( foundOffsets[i] != expectOffsets[i] ) {
\r
608 private String formatOffsets(int[] offsets, int length) {
\r
609 StringBuffer buildString = new StringBuffer(4*maxOffsetCount);
\r
610 for (int i = 0; i < length; i++) {
\r
611 buildString.append(" " + offsets[i]);
\r
613 return buildString.toString();
\r
615 public void doTest() {
\r
616 BreakIterator brkIter;
\r
618 case BreakIterator.KIND_CHARACTER: brkIter = BreakIterator.getCharacterInstance(locale); break;
\r
619 case BreakIterator.KIND_WORD: brkIter = BreakIterator.getWordInstance(locale); break;
\r
620 case BreakIterator.KIND_LINE: brkIter = BreakIterator.getLineInstance(locale); break;
\r
621 case BreakIterator.KIND_SENTENCE: brkIter = BreakIterator.getSentenceInstance(locale); break;
\r
622 default: errln("Unsupported break iterator type " + type); return;
\r
624 brkIter.setText(text);
\r
625 int[] foundOffsets = new int[maxOffsetCount];
\r
626 int offset, foundOffsetsCount = 0;
\r
627 // do forwards iteration test
\r
628 while ( foundOffsetsCount < maxOffsetCount && (offset = brkIter.next()) != BreakIterator.DONE ) {
\r
629 foundOffsets[foundOffsetsCount++] = offset;
\r
631 if ( !offsetsMatchExpected(foundOffsets, foundOffsetsCount) ) {
\r
632 // log error for forwards test
\r
633 String textToDisplay = (text.length() <= 16)? text: text.substring(0,16);
\r
634 errln("For type " + type + " " + locale + ", text \"" + textToDisplay + "...\"" +
\r
635 "; expect " + expectOffsets.length + " offsets:" + formatOffsets(expectOffsets, expectOffsets.length) +
\r
636 "; found " + foundOffsetsCount + " offsets fwd:" + formatOffsets(foundOffsets, foundOffsetsCount) );
\r
638 // do backwards iteration test
\r
639 --foundOffsetsCount; // back off one from the end offset
\r
640 while ( foundOffsetsCount > 0 ) {
\r
641 offset = brkIter.previous();
\r
642 if ( offset != foundOffsets[--foundOffsetsCount] ) {
\r
643 // log error for backwards test
\r
644 String textToDisplay = (text.length() <= 16)? text: text.substring(0,16);
\r
645 errln("For type " + type + " " + locale + ", text \"" + textToDisplay + "...\"" +
\r
646 "; expect " + expectOffsets.length + " offsets:" + formatOffsets(expectOffsets, expectOffsets.length) +
\r
647 "; found rev offset " + offset + " where expect " + foundOffsets[foundOffsetsCount] );
\r
654 // KIND_WORD "en_US_POSIX"
\r
655 final String posxWordText = "Can't have breaks in xx:yy or struct.field for CS-types.";
\r
656 final int[] posxWordTOffsets = { 5, 6, 10, 11, 17, 18, 20, 21, 23, 24, 26, 27, 29, 30, 36, 37, 42, 43, 46, 47, 49, 50, 55, 56 };
\r
657 final int[] posxWordROffsets = { 5, 6, 10, 11, 17, 18, 20, 21, 26, 27, 29, 30, 42, 43, 46, 47, 49, 50, 55, 56 };
\r
659 final String jaWordText = "\u79C1\u9054\u306B\u4E00\u3007\u3007\u3007\u306E\u30B3\u30F3\u30D4\u30E5\u30FC\u30BF" +
\r
660 "\u304C\u3042\u308B\u3002\u5948\u3005\u306F\u30EF\u30FC\u30C9\u3067\u3042\u308B\u3002";
\r
661 final int[] jaWordTOffsets = { 2, 3, 7, 8, 14, 17, 18, 20, 21, 24, 27, 28 };
\r
662 final int[] jaWordROffsets = { 1, 2, 3, 4, 5, 6, 7, 8, 14, 15, 16, 17, 18, 19, 20, 21, 24, 25, 26, 27, 28 };
\r
663 // KIND_SENTENCE "el"
\r
664 final String elSentText = "\u0391\u03B2, \u03B3\u03B4; \u0395 \u03B6\u03B7\u037E \u0398 \u03B9\u03BA. " +
\r
665 "\u039B\u03BC \u03BD\u03BE! \u039F\u03C0, \u03A1\u03C2? \u03A3";
\r
666 final int[] elSentTOffsets = { 8, 14, 20, 27, 35, 36 };
\r
667 final int[] elSentROffsets = { 20, 27, 35, 36 };
\r
668 // KIND_CHARACTER "th"
\r
669 final String thCharText = "\u0E01\u0E23\u0E30\u0E17\u0E48\u0E2D\u0E21\u0E23\u0E08\u0E19\u0E32 " +
\r
670 "(\u0E2A\u0E38\u0E0A\u0E32\u0E15\u0E34-\u0E08\u0E38\u0E11\u0E32\u0E21\u0E32\u0E28) " +
\r
671 "\u0E40\u0E14\u0E47\u0E01\u0E21\u0E35\u0E1B\u0E31\u0E0D\u0E2B\u0E32 ";
\r
672 final int[] thCharTOffsets = { 1, 2, 3, 5, 6, 7, 8, 9, 10, 11,
\r
673 12, 13, 15, 16, 17, 19, 20, 22, 23, 24, 25, 26, 27, 28,
\r
674 29, 30, 32, 33, 35, 37, 38, 39, 40, 41 };
\r
675 final int[] thCharROffsets = { 1, 3, 5, 6, 7, 8, 9, 11,
\r
676 12, 13, 15, 17, 19, 20, 22, 24, 26, 27, 28,
\r
677 29, 32, 33, 35, 37, 38, 40, 41 };
\r
679 final TBItem[] tests = {
\r
680 new TBItem( BreakIterator.KIND_WORD, new ULocale("en_US_POSIX"), posxWordText, posxWordTOffsets ),
\r
681 new TBItem( BreakIterator.KIND_WORD, ULocale.ROOT, posxWordText, posxWordROffsets ),
\r
682 new TBItem( BreakIterator.KIND_WORD, new ULocale("ja"), jaWordText, jaWordTOffsets ),
\r
683 new TBItem( BreakIterator.KIND_WORD, ULocale.ROOT, jaWordText, jaWordROffsets ),
\r
684 new TBItem( BreakIterator.KIND_SENTENCE, new ULocale("el"), elSentText, elSentTOffsets ),
\r
685 new TBItem( BreakIterator.KIND_SENTENCE, ULocale.ROOT, elSentText, elSentROffsets ),
\r
686 new TBItem( BreakIterator.KIND_CHARACTER, new ULocale("th"), thCharText, thCharTOffsets ),
\r
687 new TBItem( BreakIterator.KIND_CHARACTER, ULocale.ROOT, thCharText, thCharROffsets ),
\r
689 for (int iTest = 0; iTest < tests.length; iTest++) {
\r
690 tests[iTest].doTest();
\r
694 /* Tests the method public Object clone() */
\r
695 public void TestClone() {
\r
696 RuleBasedBreakIterator rbbi = new RuleBasedBreakIterator();
\r
698 rbbi.setText((CharacterIterator) null);
\r
699 if (((RuleBasedBreakIterator) rbbi.clone()).getText() != null)
\r
700 errln("RuleBasedBreakIterator.clone() was suppose to return "
\r
701 + "the same object because fText is set to null.");
\r
702 } catch (Exception e) {
\r
703 errln("RuleBasedBreakIterator.clone() was not suppose to return " + "an exception.");
\r
708 * Tests the method public boolean equals(Object that)
\r
710 public void TestEquals() {
\r
711 RuleBasedBreakIterator rbbi = new RuleBasedBreakIterator("");
\r
712 RuleBasedBreakIterator rbbi1 = new RuleBasedBreakIterator("");
\r
714 // TODO: Tests when "if (fRData != other.fRData && (fRData == null || other.fRData == null))" is true
\r
716 // Tests when "if (fText == null || other.fText == null)" is true
\r
717 rbbi.setText((CharacterIterator) null);
\r
718 if (rbbi.equals(rbbi1)) {
\r
719 errln("RuleBasedBreakIterator.equals(Object) was not suppose to return "
\r
720 + "true when the other object has a null fText.");
\r
723 // Tests when "if (fText == null && other.fText == null)" is true
\r
724 rbbi1.setText((CharacterIterator) null);
\r
725 if (!rbbi.equals(rbbi1)) {
\r
726 errln("RuleBasedBreakIterator.equals(Object) was not suppose to return "
\r
727 + "false when both objects has a null fText.");
\r
730 // Tests when an exception occurs
\r
731 if (rbbi.equals(0)) {
\r
732 errln("RuleBasedBreakIterator.equals(Object) was suppose to return " + "false when comparing to integer 0.");
\r
734 if (rbbi.equals(0.0)) {
\r
735 errln("RuleBasedBreakIterator.equals(Object) was suppose to return " + "false when comparing to float 0.0.");
\r
737 if (rbbi.equals("0")) {
\r
738 errln("RuleBasedBreakIterator.equals(Object) was suppose to return "
\r
739 + "false when comparing to string '0'.");
\r
744 * Tests the method public void dump()
\r
746 public void TestDump() {
\r
747 RuleBasedBreakIterator rbbi = new RuleBasedBreakIterator();
\r
750 errln("RuleBasedBreakIterator.dump() was suppose to return "
\r
751 + "an exception for a blank RuleBasedBreakIterator object.");
\r
752 } catch (Exception e) {
\r
757 * Tests the method public int first()
\r
759 public void TestFirst() {
\r
760 RuleBasedBreakIterator rbbi = new RuleBasedBreakIterator("");
\r
761 // Tests when "if (fText == null)" is true
\r
762 rbbi.setText((CharacterIterator) null);
\r
763 if (rbbi.first() != BreakIterator.DONE) {
\r
764 errln("RuleBasedBreakIterator.first() was suppose to return "
\r
765 + "BreakIterator.DONE when the object has a null fText.");
\r
770 * Tests the method public int last()
\r
772 public void TestLast() {
\r
773 RuleBasedBreakIterator rbbi = new RuleBasedBreakIterator("");
\r
774 // Tests when "if (fText == null)" is true
\r
775 rbbi.setText((CharacterIterator) null);
\r
776 if (rbbi.last() != BreakIterator.DONE) {
\r
777 errln("RuleBasedBreakIterator.last() was suppose to return "
\r
778 + "BreakIterator.DONE when the object has a null fText.");
\r
783 * Tests the method public int following(int offset)
\r
785 public void TestFollowing() {
\r
786 RuleBasedBreakIterator rbbi = new RuleBasedBreakIterator("");
\r
787 // Tests when "else if (offset < fText.getBeginIndex())" is true
\r
788 rbbi.setText("dummy");
\r
789 if (rbbi.following(-1) != 0) {
\r
790 errln("RuleBasedBreakIterator.following(-1) was suppose to return "
\r
791 + "0 when the object has a fText of dummy.");
\r
796 * Tests the method public int preceding(int offset)
\r
798 public void TestPreceding() {
\r
799 RuleBasedBreakIterator rbbi = new RuleBasedBreakIterator("");
\r
800 // Tests when "if (fText == null || offset > fText.getEndIndex())" is true
\r
801 rbbi.setText((CharacterIterator)null);
\r
802 if (rbbi.preceding(-1) != BreakIterator.DONE) {
\r
803 errln("RuleBasedBreakIterator.preceding(-1) was suppose to return "
\r
804 + "0 when the object has a fText of null.");
\r
807 // Tests when "else if (offset < fText.getBeginIndex())" is true
\r
808 rbbi.setText("dummy");
\r
809 if (rbbi.preceding(-1) != 0) {
\r
810 errln("RuleBasedBreakIterator.preceding(-1) was suppose to return "
\r
811 + "0 when the object has a fText of dummy.");
\r
815 /* Tests the method public int current() */
\r
816 public void TestCurrent(){
\r
817 RuleBasedBreakIterator rbbi = new RuleBasedBreakIterator("");
\r
818 // Tests when "(fText != null) ? fText.getIndex() : BreakIterator.DONE" is true and false
\r
819 rbbi.setText((CharacterIterator)null);
\r
820 if(rbbi.current() != BreakIterator.DONE){
\r
821 errln("RuleBasedBreakIterator.current() was suppose to return "
\r
822 + "BreakIterator.DONE when the object has a fText of null.");
\r
824 rbbi.setText("dummy");
\r
825 if(rbbi.current() != 0){
\r
826 errln("RuleBasedBreakIterator.current() was suppose to return "
\r
827 + "0 when the object has a fText of dummy.");
\r