2 *******************************************************************************
\r
3 * Copyright (C) 1996-2010, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 *******************************************************************************
\r
7 package com.ibm.icu.dev.test.rbbi;
\r
9 import java.io.DataInputStream;
\r
10 import java.io.File;
\r
11 import java.io.FileInputStream;
\r
12 import java.io.IOException;
\r
13 import java.io.InputStream;
\r
14 import java.text.StringCharacterIterator;
\r
15 import java.util.Locale;
\r
16 import java.util.Vector;
\r
18 import com.ibm.icu.dev.test.TestFmwk;
\r
19 import com.ibm.icu.text.BreakIterator;
\r
20 import com.ibm.icu.text.DictionaryBasedBreakIterator;
\r
22 public class BreakIteratorTest extends TestFmwk
\r
24 private BreakIterator characterBreak;
\r
25 private BreakIterator wordBreak;
\r
26 private BreakIterator lineBreak;
\r
27 private BreakIterator sentenceBreak;
\r
28 private BreakIterator titleBreak;
\r
30 public static void main(String[] args) throws Exception {
\r
31 new BreakIteratorTest().run(args);
\r
33 public BreakIteratorTest()
\r
37 protected void init(){
\r
38 characterBreak = BreakIterator.getCharacterInstance();
\r
39 wordBreak = BreakIterator.getWordInstance();
\r
40 lineBreak = BreakIterator.getLineInstance();
\r
41 //logln("Creating sentence iterator...");
\r
42 sentenceBreak = BreakIterator.getSentenceInstance();
\r
43 //logln("Finished creating sentence iterator...");
\r
44 titleBreak = BreakIterator.getTitleInstance();
\r
46 //=========================================================================
\r
47 // general test subroutines
\r
48 //=========================================================================
\r
50 private void generalIteratorTest(BreakIterator bi, Vector expectedResult) {
\r
51 StringBuffer buffer = new StringBuffer();
\r
53 for (int i = 0; i < expectedResult.size(); i++) {
\r
54 text = (String)expectedResult.elementAt(i);
\r
55 buffer.append(text);
\r
57 text = buffer.toString();
\r
61 Vector nextResults = _testFirstAndNext(bi, text);
\r
62 Vector previousResults = _testLastAndPrevious(bi, text);
\r
64 logln("comparing forward and backward...");
\r
65 int errs = getErrorCount();
\r
66 compareFragmentLists("forward iteration", "backward iteration", nextResults,
\r
68 if (getErrorCount() == errs) {
\r
69 logln("comparing expected and actual...");
\r
70 compareFragmentLists("expected result", "actual result", expectedResult,
\r
74 int[] boundaries = new int[expectedResult.size() + 3];
\r
75 boundaries[0] = BreakIterator.DONE;
\r
77 for (int i = 0; i < expectedResult.size(); i++)
\r
78 boundaries[i + 2] = boundaries[i + 1] + ((String)expectedResult.elementAt(i)).
\r
80 boundaries[boundaries.length - 1] = BreakIterator.DONE;
\r
82 _testFollowing(bi, text, boundaries);
\r
83 _testPreceding(bi, text, boundaries);
\r
84 _testIsBoundary(bi, text, boundaries);
\r
86 doMultipleSelectionTest(bi, text);
\r
89 private Vector _testFirstAndNext(BreakIterator bi, String text) {
\r
92 Vector result = new Vector();
\r
95 errln("first() returned " + p + " instead of 0");
\r
96 while (p != BreakIterator.DONE) {
\r
98 if (p != BreakIterator.DONE) {
\r
100 errln("next() failed to move forward: next() on position "
\r
101 + lastP + " yielded " + p);
\r
103 result.addElement(text.substring(lastP, p));
\r
106 if (lastP != text.length())
\r
107 errln("next() returned DONE prematurely: offset was "
\r
108 + lastP + " instead of " + text.length());
\r
115 private Vector _testLastAndPrevious(BreakIterator bi, String text) {
\r
118 Vector result = new Vector();
\r
120 if (p != text.length())
\r
121 errln("last() returned " + p + " instead of " + text.length());
\r
122 while (p != BreakIterator.DONE) {
\r
124 if (p != BreakIterator.DONE) {
\r
126 errln("previous() failed to move backward: previous() on position "
\r
127 + lastP + " yielded " + p);
\r
129 result.insertElementAt(text.substring(p, lastP), 0);
\r
133 errln("previous() returned DONE prematurely: offset was "
\r
134 + lastP + " instead of 0");
\r
141 private void compareFragmentLists(String f1Name, String f2Name, Vector f1, Vector f2) {
\r
149 while (p1 < f1.size() && p2 < f2.size()) {
\r
150 s1 = (String)f1.elementAt(p1);
\r
151 s2 = (String)f2.elementAt(p2);
\r
155 if (s1.equals(s2)) {
\r
156 debugLogln(" >" + s1 + "<");
\r
166 while (tempT1 != tempT2 && tempP1 < f1.size() && tempP2 < f2.size()) {
\r
167 while (tempT1 < tempT2 && tempP1 < f1.size()) {
\r
168 tempT1 += ((String)f1.elementAt(tempP1)).length();
\r
171 while (tempT2 < tempT1 && tempP2 < f2.size()) {
\r
172 tempT2 += ((String)f2.elementAt(tempP2)).length();
\r
176 logln("*** " + f1Name + " has:");
\r
177 while (p1 <= tempP1 && p1 < f1.size()) {
\r
178 s1 = (String)f1.elementAt(p1);
\r
180 debugLogln(" *** >" + s1 + "<");
\r
183 logln("***** " + f2Name + " has:");
\r
184 while (p2 <= tempP2 && p2 < f2.size()) {
\r
185 s2 = (String)f2.elementAt(p2);
\r
187 debugLogln(" ***** >" + s2 + "<");
\r
190 errln("Discrepancy between " + f1Name + " and " + f2Name);
\r
195 private void _testFollowing(BreakIterator bi, String text, int[] boundaries) {
\r
196 logln("testFollowing():");
\r
198 for (int i = 0; i <= text.length(); i++) {
\r
199 if (i == boundaries[p])
\r
202 int b = bi.following(i);
\r
203 logln("bi.following(" + i + ") -> " + b);
\r
204 if (b != boundaries[p])
\r
205 errln("Wrong result from following() for " + i + ": expected " + boundaries[p]
\r
210 private void _testPreceding(BreakIterator bi, String text, int[] boundaries) {
\r
211 logln("testPreceding():");
\r
213 for (int i = 0; i <= text.length(); i++) {
\r
214 int b = bi.preceding(i);
\r
215 logln("bi.preceding(" + i + ") -> " + b);
\r
216 if (b != boundaries[p])
\r
217 errln("Wrong result from preceding() for " + i + ": expected " + boundaries[p]
\r
220 if (i == boundaries[p + 1])
\r
225 private void _testIsBoundary(BreakIterator bi, String text, int[] boundaries) {
\r
226 logln("testIsBoundary():");
\r
229 for (int i = 0; i <= text.length(); i++) {
\r
230 isB = bi.isBoundary(i);
\r
231 logln("bi.isBoundary(" + i + ") -> " + isB);
\r
233 if (i == boundaries[p]) {
\r
235 errln("Wrong result from isBoundary() for " + i + ": expected true, got false");
\r
240 errln("Wrong result from isBoundary() for " + i + ": expected false, got true");
\r
245 private void doMultipleSelectionTest(BreakIterator iterator, String testText)
\r
247 logln("Multiple selection test...");
\r
248 BreakIterator testIterator = (BreakIterator)iterator.clone();
\r
249 int offset = iterator.first();
\r
254 testOffset = testIterator.first();
\r
255 testOffset = testIterator.next(count);
\r
256 logln("next(" + count + ") -> " + testOffset);
\r
257 if (offset != testOffset)
\r
258 errln("next(n) and next() not returning consistent results: for step " + count + ", next(n) returned " + testOffset + " and next() had " + offset);
\r
260 if (offset != BreakIterator.DONE) {
\r
262 offset = iterator.next();
\r
264 } while (offset != BreakIterator.DONE);
\r
266 // now do it backwards...
\r
267 offset = iterator.last();
\r
271 testOffset = testIterator.last();
\r
272 testOffset = testIterator.next(count);
\r
273 logln("next(" + count + ") -> " + testOffset);
\r
274 if (offset != testOffset)
\r
275 errln("next(n) and next() not returning consistent results: for step " + count + ", next(n) returned " + testOffset + " and next() had " + offset);
\r
277 if (offset != BreakIterator.DONE) {
\r
279 offset = iterator.previous();
\r
281 } while (offset != BreakIterator.DONE);
\r
285 private void doOtherInvariantTest(BreakIterator tb, String testChars)
\r
287 StringBuffer work = new StringBuffer("a\r\na");
\r
288 int errorCount = 0;
\r
290 // a break should never occur between CR and LF
\r
291 for (int i = 0; i < testChars.length(); i++) {
\r
292 work.setCharAt(0, testChars.charAt(i));
\r
293 for (int j = 0; j < testChars.length(); j++) {
\r
294 work.setCharAt(3, testChars.charAt(j));
\r
295 tb.setText(work.toString());
\r
296 for (int k = tb.first(); k != BreakIterator.DONE; k = tb.next())
\r
298 errln("Break between CR and LF in string U+" + Integer.toHexString(
\r
299 (int)(work.charAt(0))) + ", U+d U+a U+" + Integer.toHexString(
\r
300 (int)(work.charAt(3))));
\r
302 if (errorCount >= 75)
\r
308 // a break should never occur before a non-spacing mark, unless it's preceded
\r
309 // by a line terminator
\r
311 work.append("aaaa");
\r
312 for (int i = 0; i < testChars.length(); i++) {
\r
313 char c = testChars.charAt(i);
\r
314 if (c == '\n' || c == '\r' || c == '\u2029' || c == '\u2028' || c == '\u0003')
\r
316 work.setCharAt(1, c);
\r
317 for (int j = 0; j < testChars.length(); j++) {
\r
318 c = testChars.charAt(j);
\r
319 if (Character.getType(c) != Character.NON_SPACING_MARK && Character.getType(c)
\r
320 != Character.ENCLOSING_MARK)
\r
322 work.setCharAt(2, c);
\r
323 tb.setText(work.toString());
\r
324 for (int k = tb.first(); k != BreakIterator.DONE; k = tb.next())
\r
326 errln("Break between U+" + Integer.toHexString((int)(work.charAt(1)))
\r
327 + " and U+" + Integer.toHexString((int)(work.charAt(2))));
\r
329 if (errorCount >= 75)
\r
336 public void debugLogln(String s) {
\r
337 final String zeros = "0000";
\r
339 StringBuffer out = new StringBuffer();
\r
340 for (int i = 0; i < s.length(); i++) {
\r
341 char c = s.charAt(i);
\r
342 if (c >= ' ' && c < '\u007f')
\r
346 temp = Integer.toHexString((int)c);
\r
347 out.append(zeros.substring(0, 4 - temp.length()));
\r
351 logln(out.toString());
\r
354 //=========================================================================
\r
356 //=========================================================================
\r
362 public void TestBug4097779() {
\r
363 Vector wordSelectionData = new Vector();
\r
365 wordSelectionData.addElement("aa\u0300a");
\r
366 wordSelectionData.addElement(" ");
\r
368 generalIteratorTest(wordBreak, wordSelectionData);
\r
374 public void TestBug4098467Words() {
\r
375 Vector wordSelectionData = new Vector();
\r
377 // What follows is a string of Korean characters (I found it in the Yellow Pages
\r
378 // ad for the Korean Presbyterian Church of San Francisco, and I hope I transcribed
\r
379 // it correctly), first as precomposed syllables, and then as conjoining jamo.
\r
380 // Both sequences should be semantically identical and break the same way.
\r
381 // precomposed syllables...
\r
382 wordSelectionData.addElement("\uc0c1\ud56d");
\r
383 wordSelectionData.addElement(" ");
\r
384 wordSelectionData.addElement("\ud55c\uc778");
\r
385 wordSelectionData.addElement(" ");
\r
386 wordSelectionData.addElement("\uc5f0\ud569");
\r
387 wordSelectionData.addElement(" ");
\r
388 wordSelectionData.addElement("\uc7a5\ub85c\uad50\ud68c");
\r
389 wordSelectionData.addElement(" ");
\r
390 // conjoining jamo...
\r
391 wordSelectionData.addElement("\u1109\u1161\u11bc\u1112\u1161\u11bc");
\r
392 wordSelectionData.addElement(" ");
\r
393 wordSelectionData.addElement("\u1112\u1161\u11ab\u110b\u1175\u11ab");
\r
394 wordSelectionData.addElement(" ");
\r
395 wordSelectionData.addElement("\u110b\u1167\u11ab\u1112\u1161\u11b8");
\r
396 wordSelectionData.addElement(" ");
\r
397 wordSelectionData.addElement("\u110c\u1161\u11bc\u1105\u1169\u1100\u116d\u1112\u116c");
\r
398 wordSelectionData.addElement(" ");
\r
400 generalIteratorTest(wordBreak, wordSelectionData);
\r
407 public void TestBug4111338() {
\r
408 Vector sentenceSelectionData = new Vector();
\r
410 // test for bug #4111338: Don't break sentences at the boundary between CJK
\r
411 // and other letters
\r
412 sentenceSelectionData.addElement("\u5487\u67ff\ue591\u5017\u61b3\u60a1\u9510\u8165:\"JAVA\u821c"
\r
413 + "\u8165\u7fc8\u51ce\u306d,\u2494\u56d8\u4ec0\u60b1\u8560\u51ba"
\r
414 + "\u611d\u57b6\u2510\u5d46\".\u2029");
\r
415 sentenceSelectionData.addElement("\u5487\u67ff\ue591\u5017\u61b3\u60a1\u9510\u8165\u9de8"
\r
416 + "\u97e4JAVA\u821c\u8165\u7fc8\u51ce\u306d\ue30b\u2494\u56d8\u4ec0"
\r
417 + "\u60b1\u8560\u51ba\u611d\u57b6\u2510\u5d46\u97e5\u7751\u2029");
\r
418 sentenceSelectionData.addElement("\u5487\u67ff\ue591\u5017\u61b3\u60a1\u9510\u8165\u9de8\u97e4"
\r
419 + "\u6470\u8790JAVA\u821c\u8165\u7fc8\u51ce\u306d\ue30b\u2494\u56d8"
\r
420 + "\u4ec0\u60b1\u8560\u51ba\u611d\u57b6\u2510\u5d46\u97e5\u7751\u2029");
\r
421 sentenceSelectionData.addElement("He said, \"I can go there.\"\u2029");
\r
423 generalIteratorTest(sentenceBreak, sentenceSelectionData);
\r
430 public void TestBug4143071() {
\r
431 Vector sentenceSelectionData = new Vector();
\r
433 // Make sure sentences that end with digits work right
\r
434 sentenceSelectionData.addElement("Today is the 27th of May, 1998. ");
\r
435 sentenceSelectionData.addElement("Tomorrow will be 28 May 1998. ");
\r
436 sentenceSelectionData.addElement("The day after will be the 30th.\u2029");
\r
438 generalIteratorTest(sentenceBreak, sentenceSelectionData);
\r
444 public void TestBug4152416() {
\r
445 Vector sentenceSelectionData = new Vector();
\r
447 // Make sure sentences ending with a capital letter are treated correctly
\r
448 sentenceSelectionData.addElement("The type of all primitive "
\r
449 + "<code>boolean</code> values accessed in the target VM. ");
\r
450 sentenceSelectionData.addElement("Calls to xxx will return an "
\r
451 + "implementor of this interface.\u2029");
\r
453 generalIteratorTest(sentenceBreak, sentenceSelectionData);
\r
459 public void TestBug4152117() {
\r
460 Vector sentenceSelectionData = new Vector();
\r
462 // Make sure sentence breaking is handling punctuation correctly
\r
463 // [COULD NOT REPRODUCE THIS BUG, BUT TEST IS HERE TO MAKE SURE
\r
464 // IT DOESN'T CROP UP]
\r
465 sentenceSelectionData.addElement("Constructs a randomly generated "
\r
466 + "BigInteger, uniformly distributed over the range <tt>0</tt> "
\r
467 + "to <tt>(2<sup>numBits</sup> - 1)</tt>, inclusive. ");
\r
468 sentenceSelectionData.addElement("The uniformity of the distribution "
\r
469 + "assumes that a fair source of random bits is provided in "
\r
470 + "<tt>rnd</tt>. ");
\r
471 sentenceSelectionData.addElement("Note that this constructor always "
\r
472 + "constructs a non-negative BigInteger.\u2029");
\r
474 generalIteratorTest(sentenceBreak, sentenceSelectionData);
\r
477 public void TestLineBreak() {
\r
478 Vector lineSelectionData = new Vector();
\r
480 lineSelectionData.addElement("Multi-");
\r
481 lineSelectionData.addElement("Level ");
\r
482 lineSelectionData.addElement("example ");
\r
483 lineSelectionData.addElement("of ");
\r
484 lineSelectionData.addElement("a ");
\r
485 lineSelectionData.addElement("semi-");
\r
486 lineSelectionData.addElement("idiotic ");
\r
487 lineSelectionData.addElement("non-");
\r
488 lineSelectionData.addElement("sensical ");
\r
489 lineSelectionData.addElement("(non-");
\r
490 lineSelectionData.addElement("important) ");
\r
491 lineSelectionData.addElement("sentence. ");
\r
493 lineSelectionData.addElement("Hi ");
\r
494 lineSelectionData.addElement("Hello ");
\r
495 lineSelectionData.addElement("How\n");
\r
496 lineSelectionData.addElement("are\r");
\r
497 lineSelectionData.addElement("you\u2028");
\r
498 lineSelectionData.addElement("fine.\t");
\r
499 lineSelectionData.addElement("good. ");
\r
501 lineSelectionData.addElement("Now\r");
\r
502 lineSelectionData.addElement("is\n");
\r
503 lineSelectionData.addElement("the\r\n");
\r
504 lineSelectionData.addElement("time\n");
\r
505 lineSelectionData.addElement("\r");
\r
506 lineSelectionData.addElement("for\r");
\r
507 lineSelectionData.addElement("\r");
\r
508 lineSelectionData.addElement("all");
\r
510 generalIteratorTest(lineBreak, lineSelectionData);
\r
516 public void TestBug4068133() {
\r
517 Vector lineSelectionData = new Vector();
\r
519 lineSelectionData.addElement("\u96f6");
\r
520 lineSelectionData.addElement("\u4e00\u3002");
\r
521 lineSelectionData.addElement("\u4e8c\u3001");
\r
522 lineSelectionData.addElement("\u4e09\u3002\u3001");
\r
523 lineSelectionData.addElement("\u56db\u3001\u3002\u3001");
\r
524 lineSelectionData.addElement("\u4e94,");
\r
525 lineSelectionData.addElement("\u516d.");
\r
526 lineSelectionData.addElement("\u4e03.\u3001,\u3002");
\r
527 lineSelectionData.addElement("\u516b");
\r
529 generalIteratorTest(lineBreak, lineSelectionData);
\r
535 public void TestBug4086052() {
\r
536 Vector lineSelectionData = new Vector();
\r
538 lineSelectionData.addElement("foo\u00a0bar ");
\r
539 // lineSelectionData.addElement("foo\ufeffbar");
\r
541 generalIteratorTest(lineBreak, lineSelectionData);
\r
547 public void TestBug4097920() {
\r
548 Vector lineSelectionData = new Vector();
\r
550 lineSelectionData.addElement("dog,cat,mouse ");
\r
551 lineSelectionData.addElement("(one)");
\r
552 lineSelectionData.addElement("(two)\n");
\r
553 generalIteratorTest(lineBreak, lineSelectionData);
\r
561 public void TestBug4117554Lines() {
\r
562 Vector lineSelectionData = new Vector();
\r
564 // Fullwidth .!? should be treated as postJwrd
\r
565 lineSelectionData.addElement("\u4e01\uff0e");
\r
566 lineSelectionData.addElement("\u4e02\uff01");
\r
567 lineSelectionData.addElement("\u4e03\uff1f");
\r
569 generalIteratorTest(lineBreak, lineSelectionData);
\r
572 public void TestLettersAndDigits() {
\r
573 // a character sequence such as "X11" or "30F3" or "native2ascii" should
\r
574 // be kept together as a single word
\r
575 Vector lineSelectionData = new Vector();
\r
577 lineSelectionData.addElement("X11 ");
\r
578 lineSelectionData.addElement("30F3 ");
\r
579 lineSelectionData.addElement("native2ascii");
\r
581 generalIteratorTest(lineBreak, lineSelectionData);
\r
585 private static final String graveS = "S\u0300";
\r
586 private static final String acuteBelowI = "i\u0317";
\r
587 private static final String acuteE = "e\u0301";
\r
588 private static final String circumflexA = "a\u0302";
\r
589 private static final String tildeE = "e\u0303";
\r
591 public void TestCharacterBreak() {
\r
592 Vector characterSelectionData = new Vector();
\r
594 characterSelectionData.addElement(graveS);
\r
595 characterSelectionData.addElement(acuteBelowI);
\r
596 characterSelectionData.addElement("m");
\r
597 characterSelectionData.addElement("p");
\r
598 characterSelectionData.addElement("l");
\r
599 characterSelectionData.addElement(acuteE);
\r
600 characterSelectionData.addElement(" ");
\r
601 characterSelectionData.addElement("s");
\r
602 characterSelectionData.addElement(circumflexA);
\r
603 characterSelectionData.addElement("m");
\r
604 characterSelectionData.addElement("p");
\r
605 characterSelectionData.addElement("l");
\r
606 characterSelectionData.addElement(tildeE);
\r
607 characterSelectionData.addElement(".");
\r
608 characterSelectionData.addElement("w");
\r
609 characterSelectionData.addElement(circumflexA);
\r
610 characterSelectionData.addElement("w");
\r
611 characterSelectionData.addElement("a");
\r
612 characterSelectionData.addElement("f");
\r
613 characterSelectionData.addElement("q");
\r
614 characterSelectionData.addElement("\n");
\r
615 characterSelectionData.addElement("\r");
\r
616 characterSelectionData.addElement("\r\n");
\r
617 characterSelectionData.addElement("\n");
\r
619 generalIteratorTest(characterBreak, characterSelectionData);
\r
625 public void TestBug4098467Characters() {
\r
626 Vector characterSelectionData = new Vector();
\r
628 // What follows is a string of Korean characters (I found it in the Yellow Pages
\r
629 // ad for the Korean Presbyterian Church of San Francisco, and I hope I transcribed
\r
630 // it correctly), first as precomposed syllables, and then as conjoining jamo.
\r
631 // Both sequences should be semantically identical and break the same way.
\r
632 // precomposed syllables...
\r
633 characterSelectionData.addElement("\uc0c1");
\r
634 characterSelectionData.addElement("\ud56d");
\r
635 characterSelectionData.addElement(" ");
\r
636 characterSelectionData.addElement("\ud55c");
\r
637 characterSelectionData.addElement("\uc778");
\r
638 characterSelectionData.addElement(" ");
\r
639 characterSelectionData.addElement("\uc5f0");
\r
640 characterSelectionData.addElement("\ud569");
\r
641 characterSelectionData.addElement(" ");
\r
642 characterSelectionData.addElement("\uc7a5");
\r
643 characterSelectionData.addElement("\ub85c");
\r
644 characterSelectionData.addElement("\uad50");
\r
645 characterSelectionData.addElement("\ud68c");
\r
646 characterSelectionData.addElement(" ");
\r
647 // conjoining jamo...
\r
648 characterSelectionData.addElement("\u1109\u1161\u11bc");
\r
649 characterSelectionData.addElement("\u1112\u1161\u11bc");
\r
650 characterSelectionData.addElement(" ");
\r
651 characterSelectionData.addElement("\u1112\u1161\u11ab");
\r
652 characterSelectionData.addElement("\u110b\u1175\u11ab");
\r
653 characterSelectionData.addElement(" ");
\r
654 characterSelectionData.addElement("\u110b\u1167\u11ab");
\r
655 characterSelectionData.addElement("\u1112\u1161\u11b8");
\r
656 characterSelectionData.addElement(" ");
\r
657 characterSelectionData.addElement("\u110c\u1161\u11bc");
\r
658 characterSelectionData.addElement("\u1105\u1169");
\r
659 characterSelectionData.addElement("\u1100\u116d");
\r
660 characterSelectionData.addElement("\u1112\u116c");
\r
662 generalIteratorTest(characterBreak, characterSelectionData);
\r
665 public void TestTitleBreak()
\r
667 Vector titleData = new Vector();
\r
668 titleData.addElement(" ");
\r
669 titleData.addElement("This ");
\r
670 titleData.addElement("is ");
\r
671 titleData.addElement("a ");
\r
672 titleData.addElement("simple ");
\r
673 titleData.addElement("sample ");
\r
674 titleData.addElement("sentence. ");
\r
675 titleData.addElement("This ");
\r
677 generalIteratorTest(titleBreak, titleData);
\r
685 public void TestBug4153072() {
\r
686 BreakIterator iter = BreakIterator.getWordInstance();
\r
687 String str = "...Hello, World!...";
\r
689 int end = str.length() - 3;
\r
690 // not used boolean gotException = false;
\r
693 iter.setText(new StringCharacterIterator(str, begin, end, begin));
\r
694 for (int index = -1; index < begin + 1; ++index) {
\r
696 iter.isBoundary(index);
\r
698 errln("Didn't get exception with offset = " + index +
\r
699 " and begin index = " + begin);
\r
701 catch (IllegalArgumentException e) {
\r
702 if (index >= begin)
\r
703 errln("Got exception with offset = " + index +
\r
704 " and begin index = " + begin);
\r
710 public void TestBug4146175Lines() {
\r
711 Vector lineSelectionData = new Vector();
\r
713 // the fullwidth comma should stick to the preceding Japanese character
\r
714 lineSelectionData.addElement("\u7d42\uff0c");
\r
715 lineSelectionData.addElement("\u308f");
\r
717 generalIteratorTest(lineBreak, lineSelectionData);
\r
720 private static final String cannedTestChars
\r
721 = "\u0000\u0001\u0002\u0003\u0004 !\"#$%&()+-01234<=>ABCDE[]^_`abcde{}|\u00a0\u00a2"
\r
722 + "\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u00ab\u00ad\u00ae\u00af\u00b0\u00b2\u00b3"
\r
723 + "\u00b4\u00b9\u00bb\u00bc\u00bd\u02b0\u02b1\u02b2\u02b3\u02b4\u0300\u0301\u0302\u0303"
\r
724 + "\u0304\u05d0\u05d1\u05d2\u05d3\u05d4\u0903\u093e\u093f\u0940\u0949\u0f3a\u0f3b\u2000"
\r
725 + "\u2001\u2002\u200c\u200d\u200e\u200f\u2010\u2011\u2012\u2028\u2029\u202a\u203e\u203f"
\r
726 + "\u2040\u20dd\u20de\u20df\u20e0\u2160\u2161\u2162\u2163\u2164";
\r
728 public void TestSentenceInvariants()
\r
730 BreakIterator e = BreakIterator.getSentenceInstance();
\r
731 doOtherInvariantTest(e, cannedTestChars + ".,\u3001\u3002\u3041\u3042\u3043\ufeff");
\r
734 public void TestEmptyString()
\r
737 Vector x = new Vector();
\r
738 x.addElement(text);
\r
740 generalIteratorTest(lineBreak, x);
\r
743 public void TestGetAvailableLocales()
\r
745 Locale[] locList = BreakIterator.getAvailableLocales();
\r
747 if (locList.length == 0)
\r
748 errln("getAvailableLocales() returned an empty list!");
\r
749 // I have no idea how to test this function...
\r
751 com.ibm.icu.util.ULocale[] ulocList = BreakIterator.getAvailableULocales();
\r
752 if (ulocList.length == 0) {
\r
753 errln("getAvailableULocales() returned an empty list!");
\r
755 logln("getAvailableULocales() returned " + ulocList.length + " locales");
\r
763 public void TestEndBehavior()
\r
765 String testString = "boo.";
\r
766 BreakIterator wb = BreakIterator.getWordInstance();
\r
767 wb.setText(testString);
\r
769 if (wb.first() != 0)
\r
770 errln("Didn't get break at beginning of string.");
\r
771 if (wb.next() != 3)
\r
772 errln("Didn't get break before period in \"boo.\"");
\r
773 if (wb.current() != 4 && wb.next() != 4)
\r
774 errln("Didn't get break at end of string.");
\r
777 // The Following two tests are ported from ICU4C 1.8.1 [Richard/GCL]
\r
779 * Port From: ICU4C v1.8.1 : textbounds : IntlTestTextBoundary
\r
780 * Source File: $ICU4CRoot/source/test/intltest/ittxtbd.cpp
\r
783 * test methods preceding, following and isBoundary
\r
785 public void TestPreceding() {
\r
786 String words3 = "aaa bbb ccc";
\r
787 BreakIterator e = BreakIterator.getWordInstance(Locale.getDefault());
\r
788 e.setText( words3 );
\r
795 int f = e.following(p2+1);
\r
796 int p = e.preceding(p2+1);
\r
798 errln("IntlTestTextBoundary::TestPreceding: f!=p3");
\r
800 errln("IntlTestTextBoundary::TestPreceding: p!=p2");
\r
803 errln("IntlTestTextBoundary::TestPreceding: p1+1!=p2");
\r
806 errln("IntlTestTextBoundary::TestPreceding: p3+1!=p4");
\r
808 if (!e.isBoundary(p2) || e.isBoundary(p2+1) || !e.isBoundary(p3))
\r
810 errln("IntlTestTextBoundary::TestPreceding: isBoundary err");
\r
818 public void TestLineBreakContractions() {
\r
819 Vector expected = new Vector();
\r
820 expected.add("These ");
\r
821 expected.add("are ");
\r
822 expected.add("'foobles'. ");
\r
823 expected.add("Don't ");
\r
824 expected.add("you ");
\r
825 expected.add("like ");
\r
826 expected.add("them?");
\r
827 generalIteratorTest(lineBreak, expected);
\r
833 public void TestT5615() {
\r
834 com.ibm.icu.util.ULocale[] ulocales = BreakIterator.getAvailableULocales();
\r
836 com.ibm.icu.util.ULocale loc = null;
\r
838 for (int i = 0; i < ulocales.length; i++) {
\r
840 for (type = 0; type < 5 /* 5 = BreakIterator.KIND_COUNT */; ++type) {
\r
841 BreakIterator brk = BreakIterator.getBreakInstance(loc, type);
\r
843 errln("ERR: Failed to create an instance type: " + type + " / locale: " + loc);
\r
847 } catch (Exception e) {
\r
848 errln("ERR: Failed to create an instance type: " + type + " / locale: " + loc + " / exception: " + e.getMessage());
\r
853 * Tests the constructors public DictionaryBasedBreakIterator(String rules, ... public
\r
854 * DictionaryBasedBreakIterator(InputStream compiledRules, ...
\r
856 public void TestDictionaryBasedBreakIterator() throws IOException {
\r
857 // The following class allows the testing of the constructor
\r
858 // public DictionaryBasedBreakIterator(String rules, ...
\r
859 class TestDictionaryBasedBreakIterator extends DictionaryBasedBreakIterator {
\r
860 public TestDictionaryBasedBreakIterator(InputStream is) throws IOException {
\r
865 @SuppressWarnings("unused")
\r
866 TestDictionaryBasedBreakIterator td = new TestDictionaryBasedBreakIterator(null);
\r
867 errln("DictionaryBasedBreakIterator constructor is suppose to return an "
\r
868 + "exception for an empty string.");
\r
869 } catch (Exception e) {
\r
873 File file = File.createTempFile("dummy", "");
\r
874 FileInputStream fis = new FileInputStream(file);
\r
875 DataInputStream dis = new DataInputStream(fis);
\r
876 @SuppressWarnings("unused")
\r
877 TestDictionaryBasedBreakIterator td = new TestDictionaryBasedBreakIterator(dis);
\r
878 errln("DictionaryBasedBreakIterator constructor is suppose to return an "
\r
879 + "exception for a temporary file with EOF.");
\r
880 } catch (Exception e) {
\r
883 // The following class allows the testing of the constructor
\r
884 // public DictionaryBasedBreakIterator(InputStream compiledRules, ...
\r
885 class TestDictionaryBasedBreakIterator1 extends DictionaryBasedBreakIterator {
\r
886 public TestDictionaryBasedBreakIterator1() throws IOException {
\r
887 super((InputStream) null, (InputStream) null);
\r
892 @SuppressWarnings("unused")
\r
893 TestDictionaryBasedBreakIterator1 td1 = new TestDictionaryBasedBreakIterator1();
\r
894 errln("DictionaryBasedBreakIterator constructor is suppose to return an "
\r
895 + "exception for an null input stream.");
\r
896 } catch (Exception e) {
\r