2 *******************************************************************************
3 * Copyright (C) 1996-2010, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 *******************************************************************************
7 package com.ibm.icu.dev.test.rbbi;
9 import java.io.DataInputStream;
11 import java.io.FileInputStream;
12 import java.io.IOException;
13 import java.io.InputStream;
14 import java.text.StringCharacterIterator;
15 import java.util.ArrayList;
16 import java.util.List;
17 import java.util.Locale;
19 import com.ibm.icu.dev.test.TestFmwk;
20 import com.ibm.icu.text.BreakIterator;
21 import com.ibm.icu.text.DictionaryBasedBreakIterator;
23 public class BreakIteratorTest extends TestFmwk
25 private BreakIterator characterBreak;
26 private BreakIterator wordBreak;
27 private BreakIterator lineBreak;
28 private BreakIterator sentenceBreak;
29 private BreakIterator titleBreak;
31 public static void main(String[] args) throws Exception {
32 new BreakIteratorTest().run(args);
34 public BreakIteratorTest()
38 protected void init(){
39 characterBreak = BreakIterator.getCharacterInstance();
40 wordBreak = BreakIterator.getWordInstance();
41 lineBreak = BreakIterator.getLineInstance();
42 //logln("Creating sentence iterator...");
43 sentenceBreak = BreakIterator.getSentenceInstance();
44 //logln("Finished creating sentence iterator...");
45 titleBreak = BreakIterator.getTitleInstance();
47 //=========================================================================
48 // general test subroutines
49 //=========================================================================
51 private void generalIteratorTest(BreakIterator bi, List<String> expectedResult) {
52 StringBuffer buffer = new StringBuffer();
54 for (int i = 0; i < expectedResult.size(); i++) {
55 text = expectedResult.get(i);
58 text = buffer.toString();
62 List<String> nextResults = _testFirstAndNext(bi, text);
63 List<String> previousResults = _testLastAndPrevious(bi, text);
65 logln("comparing forward and backward...");
66 int errs = getErrorCount();
67 compareFragmentLists("forward iteration", "backward iteration", nextResults,
69 if (getErrorCount() == errs) {
70 logln("comparing expected and actual...");
71 compareFragmentLists("expected result", "actual result", expectedResult,
75 int[] boundaries = new int[expectedResult.size() + 3];
76 boundaries[0] = BreakIterator.DONE;
78 for (int i = 0; i < expectedResult.size(); i++)
79 boundaries[i + 2] = boundaries[i + 1] + (expectedResult.get(i)).
81 boundaries[boundaries.length - 1] = BreakIterator.DONE;
83 _testFollowing(bi, text, boundaries);
84 _testPreceding(bi, text, boundaries);
85 _testIsBoundary(bi, text, boundaries);
87 doMultipleSelectionTest(bi, text);
90 private List<String> _testFirstAndNext(BreakIterator bi, String text) {
93 List<String> result = new ArrayList<String>();
96 errln("first() returned " + p + " instead of 0");
97 while (p != BreakIterator.DONE) {
99 if (p != BreakIterator.DONE) {
101 errln("next() failed to move forward: next() on position "
102 + lastP + " yielded " + p);
104 result.add(text.substring(lastP, p));
107 if (lastP != text.length())
108 errln("next() returned DONE prematurely: offset was "
109 + lastP + " instead of " + text.length());
116 private List<String> _testLastAndPrevious(BreakIterator bi, String text) {
119 List<String> result = new ArrayList<String>();
121 if (p != text.length())
122 errln("last() returned " + p + " instead of " + text.length());
123 while (p != BreakIterator.DONE) {
125 if (p != BreakIterator.DONE) {
127 errln("previous() failed to move backward: previous() on position "
128 + lastP + " yielded " + p);
130 result.add(0, text.substring(p, lastP));
134 errln("previous() returned DONE prematurely: offset was "
135 + lastP + " instead of 0");
142 private void compareFragmentLists(String f1Name, String f2Name, List<String> f1, List<String> f2) {
150 while (p1 < f1.size() && p2 < f2.size()) {
157 debugLogln(" >" + s1 + "<");
167 while (tempT1 != tempT2 && tempP1 < f1.size() && tempP2 < f2.size()) {
168 while (tempT1 < tempT2 && tempP1 < f1.size()) {
169 tempT1 += (f1.get(tempP1)).length();
172 while (tempT2 < tempT1 && tempP2 < f2.size()) {
173 tempT2 += (f2.get(tempP2)).length();
177 logln("*** " + f1Name + " has:");
178 while (p1 <= tempP1 && p1 < f1.size()) {
181 debugLogln(" *** >" + s1 + "<");
184 logln("***** " + f2Name + " has:");
185 while (p2 <= tempP2 && p2 < f2.size()) {
188 debugLogln(" ***** >" + s2 + "<");
191 errln("Discrepancy between " + f1Name + " and " + f2Name);
196 private void _testFollowing(BreakIterator bi, String text, int[] boundaries) {
197 logln("testFollowing():");
199 for (int i = 0; i <= text.length(); i++) {
200 if (i == boundaries[p])
203 int b = bi.following(i);
204 logln("bi.following(" + i + ") -> " + b);
205 if (b != boundaries[p])
206 errln("Wrong result from following() for " + i + ": expected " + boundaries[p]
211 private void _testPreceding(BreakIterator bi, String text, int[] boundaries) {
212 logln("testPreceding():");
214 for (int i = 0; i <= text.length(); i++) {
215 int b = bi.preceding(i);
216 logln("bi.preceding(" + i + ") -> " + b);
217 if (b != boundaries[p])
218 errln("Wrong result from preceding() for " + i + ": expected " + boundaries[p]
221 if (i == boundaries[p + 1])
226 private void _testIsBoundary(BreakIterator bi, String text, int[] boundaries) {
227 logln("testIsBoundary():");
230 for (int i = 0; i <= text.length(); i++) {
231 isB = bi.isBoundary(i);
232 logln("bi.isBoundary(" + i + ") -> " + isB);
234 if (i == boundaries[p]) {
236 errln("Wrong result from isBoundary() for " + i + ": expected true, got false");
241 errln("Wrong result from isBoundary() for " + i + ": expected false, got true");
246 private void doMultipleSelectionTest(BreakIterator iterator, String testText)
248 logln("Multiple selection test...");
249 BreakIterator testIterator = (BreakIterator)iterator.clone();
250 int offset = iterator.first();
255 testOffset = testIterator.first();
256 testOffset = testIterator.next(count);
257 logln("next(" + count + ") -> " + testOffset);
258 if (offset != testOffset)
259 errln("next(n) and next() not returning consistent results: for step " + count + ", next(n) returned " + testOffset + " and next() had " + offset);
261 if (offset != BreakIterator.DONE) {
263 offset = iterator.next();
265 } while (offset != BreakIterator.DONE);
267 // now do it backwards...
268 offset = iterator.last();
272 testOffset = testIterator.last();
273 testOffset = testIterator.next(count);
274 logln("next(" + count + ") -> " + testOffset);
275 if (offset != testOffset)
276 errln("next(n) and next() not returning consistent results: for step " + count + ", next(n) returned " + testOffset + " and next() had " + offset);
278 if (offset != BreakIterator.DONE) {
280 offset = iterator.previous();
282 } while (offset != BreakIterator.DONE);
286 private void doOtherInvariantTest(BreakIterator tb, String testChars)
288 StringBuffer work = new StringBuffer("a\r\na");
291 // a break should never occur between CR and LF
292 for (int i = 0; i < testChars.length(); i++) {
293 work.setCharAt(0, testChars.charAt(i));
294 for (int j = 0; j < testChars.length(); j++) {
295 work.setCharAt(3, testChars.charAt(j));
296 tb.setText(work.toString());
297 for (int k = tb.first(); k != BreakIterator.DONE; k = tb.next())
299 errln("Break between CR and LF in string U+" + Integer.toHexString(
300 (int)(work.charAt(0))) + ", U+d U+a U+" + Integer.toHexString(
301 (int)(work.charAt(3))));
303 if (errorCount >= 75)
309 // a break should never occur before a non-spacing mark, unless it's preceded
310 // by a line terminator
313 for (int i = 0; i < testChars.length(); i++) {
314 char c = testChars.charAt(i);
315 if (c == '\n' || c == '\r' || c == '\u2029' || c == '\u2028' || c == '\u0003')
317 work.setCharAt(1, c);
318 for (int j = 0; j < testChars.length(); j++) {
319 c = testChars.charAt(j);
320 if (Character.getType(c) != Character.NON_SPACING_MARK && Character.getType(c)
321 != Character.ENCLOSING_MARK)
323 work.setCharAt(2, c);
324 tb.setText(work.toString());
325 for (int k = tb.first(); k != BreakIterator.DONE; k = tb.next())
327 errln("Break between U+" + Integer.toHexString((int)(work.charAt(1)))
328 + " and U+" + Integer.toHexString((int)(work.charAt(2))));
330 if (errorCount >= 75)
337 public void debugLogln(String s) {
338 final String zeros = "0000";
340 StringBuffer out = new StringBuffer();
341 for (int i = 0; i < s.length(); i++) {
342 char c = s.charAt(i);
343 if (c >= ' ' && c < '\u007f')
347 temp = Integer.toHexString((int)c);
348 out.append(zeros.substring(0, 4 - temp.length()));
352 logln(out.toString());
355 //=========================================================================
357 //=========================================================================
363 public void TestBug4097779() {
364 List<String> wordSelectionData = new ArrayList<String>(2);
366 wordSelectionData.add("aa\u0300a");
367 wordSelectionData.add(" ");
369 generalIteratorTest(wordBreak, wordSelectionData);
375 public void TestBug4098467Words() {
376 List<String> wordSelectionData = new ArrayList<String>();
378 // What follows is a string of Korean characters (I found it in the Yellow Pages
379 // ad for the Korean Presbyterian Church of San Francisco, and I hope I transcribed
380 // it correctly), first as precomposed syllables, and then as conjoining jamo.
381 // Both sequences should be semantically identical and break the same way.
382 // precomposed syllables...
383 wordSelectionData.add("\uc0c1\ud56d");
384 wordSelectionData.add(" ");
385 wordSelectionData.add("\ud55c\uc778");
386 wordSelectionData.add(" ");
387 wordSelectionData.add("\uc5f0\ud569");
388 wordSelectionData.add(" ");
389 wordSelectionData.add("\uc7a5\ub85c\uad50\ud68c");
390 wordSelectionData.add(" ");
391 // conjoining jamo...
392 wordSelectionData.add("\u1109\u1161\u11bc\u1112\u1161\u11bc");
393 wordSelectionData.add(" ");
394 wordSelectionData.add("\u1112\u1161\u11ab\u110b\u1175\u11ab");
395 wordSelectionData.add(" ");
396 wordSelectionData.add("\u110b\u1167\u11ab\u1112\u1161\u11b8");
397 wordSelectionData.add(" ");
398 wordSelectionData.add("\u110c\u1161\u11bc\u1105\u1169\u1100\u116d\u1112\u116c");
399 wordSelectionData.add(" ");
401 generalIteratorTest(wordBreak, wordSelectionData);
408 public void TestBug4111338() {
409 List<String> sentenceSelectionData = new ArrayList<String>();
411 // test for bug #4111338: Don't break sentences at the boundary between CJK
413 sentenceSelectionData.add("\u5487\u67ff\ue591\u5017\u61b3\u60a1\u9510\u8165:\"JAVA\u821c"
414 + "\u8165\u7fc8\u51ce\u306d,\u2494\u56d8\u4ec0\u60b1\u8560\u51ba"
415 + "\u611d\u57b6\u2510\u5d46\".\u2029");
416 sentenceSelectionData.add("\u5487\u67ff\ue591\u5017\u61b3\u60a1\u9510\u8165\u9de8"
417 + "\u97e4JAVA\u821c\u8165\u7fc8\u51ce\u306d\ue30b\u2494\u56d8\u4ec0"
418 + "\u60b1\u8560\u51ba\u611d\u57b6\u2510\u5d46\u97e5\u7751\u2029");
419 sentenceSelectionData.add("\u5487\u67ff\ue591\u5017\u61b3\u60a1\u9510\u8165\u9de8\u97e4"
420 + "\u6470\u8790JAVA\u821c\u8165\u7fc8\u51ce\u306d\ue30b\u2494\u56d8"
421 + "\u4ec0\u60b1\u8560\u51ba\u611d\u57b6\u2510\u5d46\u97e5\u7751\u2029");
422 sentenceSelectionData.add("He said, \"I can go there.\"\u2029");
424 generalIteratorTest(sentenceBreak, sentenceSelectionData);
431 public void TestBug4143071() {
432 List<String> sentenceSelectionData = new ArrayList<String>(3);
434 // Make sure sentences that end with digits work right
435 sentenceSelectionData.add("Today is the 27th of May, 1998. ");
436 sentenceSelectionData.add("Tomorrow will be 28 May 1998. ");
437 sentenceSelectionData.add("The day after will be the 30th.\u2029");
439 generalIteratorTest(sentenceBreak, sentenceSelectionData);
445 public void TestBug4152416() {
446 List<String> sentenceSelectionData = new ArrayList<String>(2);
448 // Make sure sentences ending with a capital letter are treated correctly
449 sentenceSelectionData.add("The type of all primitive "
450 + "<code>boolean</code> values accessed in the target VM. ");
451 sentenceSelectionData.add("Calls to xxx will return an "
452 + "implementor of this interface.\u2029");
454 generalIteratorTest(sentenceBreak, sentenceSelectionData);
460 public void TestBug4152117() {
461 List<String> sentenceSelectionData = new ArrayList<String>(3);
463 // Make sure sentence breaking is handling punctuation correctly
464 // [COULD NOT REPRODUCE THIS BUG, BUT TEST IS HERE TO MAKE SURE
465 // IT DOESN'T CROP UP]
466 sentenceSelectionData.add("Constructs a randomly generated "
467 + "BigInteger, uniformly distributed over the range <tt>0</tt> "
468 + "to <tt>(2<sup>numBits</sup> - 1)</tt>, inclusive. ");
469 sentenceSelectionData.add("The uniformity of the distribution "
470 + "assumes that a fair source of random bits is provided in "
472 sentenceSelectionData.add("Note that this constructor always "
473 + "constructs a non-negative BigInteger.\u2029");
475 generalIteratorTest(sentenceBreak, sentenceSelectionData);
478 public void TestLineBreak() {
479 List<String> lineSelectionData = new ArrayList<String>();
481 lineSelectionData.add("Multi-");
482 lineSelectionData.add("Level ");
483 lineSelectionData.add("example ");
484 lineSelectionData.add("of ");
485 lineSelectionData.add("a ");
486 lineSelectionData.add("semi-");
487 lineSelectionData.add("idiotic ");
488 lineSelectionData.add("non-");
489 lineSelectionData.add("sensical ");
490 lineSelectionData.add("(non-");
491 lineSelectionData.add("important) ");
492 lineSelectionData.add("sentence. ");
494 lineSelectionData.add("Hi ");
495 lineSelectionData.add("Hello ");
496 lineSelectionData.add("How\n");
497 lineSelectionData.add("are\r");
498 lineSelectionData.add("you\u2028");
499 lineSelectionData.add("fine.\t");
500 lineSelectionData.add("good. ");
502 lineSelectionData.add("Now\r");
503 lineSelectionData.add("is\n");
504 lineSelectionData.add("the\r\n");
505 lineSelectionData.add("time\n");
506 lineSelectionData.add("\r");
507 lineSelectionData.add("for\r");
508 lineSelectionData.add("\r");
509 lineSelectionData.add("all");
511 generalIteratorTest(lineBreak, lineSelectionData);
517 public void TestBug4068133() {
518 List<String> lineSelectionData = new ArrayList<String>(9);
520 lineSelectionData.add("\u96f6");
521 lineSelectionData.add("\u4e00\u3002");
522 lineSelectionData.add("\u4e8c\u3001");
523 lineSelectionData.add("\u4e09\u3002\u3001");
524 lineSelectionData.add("\u56db\u3001\u3002\u3001");
525 lineSelectionData.add("\u4e94,");
526 lineSelectionData.add("\u516d.");
527 lineSelectionData.add("\u4e03.\u3001,\u3002");
528 lineSelectionData.add("\u516b");
530 generalIteratorTest(lineBreak, lineSelectionData);
536 public void TestBug4086052() {
537 List<String> lineSelectionData = new ArrayList<String>(1);
539 lineSelectionData.add("foo\u00a0bar ");
540 // lineSelectionData.addElement("foo\ufeffbar");
542 generalIteratorTest(lineBreak, lineSelectionData);
548 public void TestBug4097920() {
549 List<String> lineSelectionData = new ArrayList<String>(3);
551 lineSelectionData.add("dog,cat,mouse ");
552 lineSelectionData.add("(one)");
553 lineSelectionData.add("(two)\n");
554 generalIteratorTest(lineBreak, lineSelectionData);
562 public void TestBug4117554Lines() {
563 List<String> lineSelectionData = new ArrayList<String>(3);
565 // Fullwidth .!? should be treated as postJwrd
566 lineSelectionData.add("\u4e01\uff0e");
567 lineSelectionData.add("\u4e02\uff01");
568 lineSelectionData.add("\u4e03\uff1f");
570 generalIteratorTest(lineBreak, lineSelectionData);
573 public void TestLettersAndDigits() {
574 // a character sequence such as "X11" or "30F3" or "native2ascii" should
575 // be kept together as a single word
576 List<String> lineSelectionData = new ArrayList<String>(3);
578 lineSelectionData.add("X11 ");
579 lineSelectionData.add("30F3 ");
580 lineSelectionData.add("native2ascii");
582 generalIteratorTest(lineBreak, lineSelectionData);
586 private static final String graveS = "S\u0300";
587 private static final String acuteBelowI = "i\u0317";
588 private static final String acuteE = "e\u0301";
589 private static final String circumflexA = "a\u0302";
590 private static final String tildeE = "e\u0303";
592 public void TestCharacterBreak() {
593 List<String> characterSelectionData = new ArrayList<String>();
595 characterSelectionData.add(graveS);
596 characterSelectionData.add(acuteBelowI);
597 characterSelectionData.add("m");
598 characterSelectionData.add("p");
599 characterSelectionData.add("l");
600 characterSelectionData.add(acuteE);
601 characterSelectionData.add(" ");
602 characterSelectionData.add("s");
603 characterSelectionData.add(circumflexA);
604 characterSelectionData.add("m");
605 characterSelectionData.add("p");
606 characterSelectionData.add("l");
607 characterSelectionData.add(tildeE);
608 characterSelectionData.add(".");
609 characterSelectionData.add("w");
610 characterSelectionData.add(circumflexA);
611 characterSelectionData.add("w");
612 characterSelectionData.add("a");
613 characterSelectionData.add("f");
614 characterSelectionData.add("q");
615 characterSelectionData.add("\n");
616 characterSelectionData.add("\r");
617 characterSelectionData.add("\r\n");
618 characterSelectionData.add("\n");
620 generalIteratorTest(characterBreak, characterSelectionData);
626 public void TestBug4098467Characters() {
627 List<String> characterSelectionData = new ArrayList<String>();
629 // What follows is a string of Korean characters (I found it in the Yellow Pages
630 // ad for the Korean Presbyterian Church of San Francisco, and I hope I transcribed
631 // it correctly), first as precomposed syllables, and then as conjoining jamo.
632 // Both sequences should be semantically identical and break the same way.
633 // precomposed syllables...
634 characterSelectionData.add("\uc0c1");
635 characterSelectionData.add("\ud56d");
636 characterSelectionData.add(" ");
637 characterSelectionData.add("\ud55c");
638 characterSelectionData.add("\uc778");
639 characterSelectionData.add(" ");
640 characterSelectionData.add("\uc5f0");
641 characterSelectionData.add("\ud569");
642 characterSelectionData.add(" ");
643 characterSelectionData.add("\uc7a5");
644 characterSelectionData.add("\ub85c");
645 characterSelectionData.add("\uad50");
646 characterSelectionData.add("\ud68c");
647 characterSelectionData.add(" ");
648 // conjoining jamo...
649 characterSelectionData.add("\u1109\u1161\u11bc");
650 characterSelectionData.add("\u1112\u1161\u11bc");
651 characterSelectionData.add(" ");
652 characterSelectionData.add("\u1112\u1161\u11ab");
653 characterSelectionData.add("\u110b\u1175\u11ab");
654 characterSelectionData.add(" ");
655 characterSelectionData.add("\u110b\u1167\u11ab");
656 characterSelectionData.add("\u1112\u1161\u11b8");
657 characterSelectionData.add(" ");
658 characterSelectionData.add("\u110c\u1161\u11bc");
659 characterSelectionData.add("\u1105\u1169");
660 characterSelectionData.add("\u1100\u116d");
661 characterSelectionData.add("\u1112\u116c");
663 generalIteratorTest(characterBreak, characterSelectionData);
666 public void TestTitleBreak()
668 List<String> titleData = new ArrayList<String>();
670 titleData.add("This ");
671 titleData.add("is ");
673 titleData.add("simple ");
674 titleData.add("sample ");
675 titleData.add("sentence. ");
676 titleData.add("This ");
678 generalIteratorTest(titleBreak, titleData);
686 public void TestBug4153072() {
687 BreakIterator iter = BreakIterator.getWordInstance();
688 String str = "...Hello, World!...";
690 int end = str.length() - 3;
691 // not used boolean gotException = false;
694 iter.setText(new StringCharacterIterator(str, begin, end, begin));
695 for (int index = -1; index < begin + 1; ++index) {
697 iter.isBoundary(index);
699 errln("Didn't get exception with offset = " + index +
700 " and begin index = " + begin);
702 catch (IllegalArgumentException e) {
704 errln("Got exception with offset = " + index +
705 " and begin index = " + begin);
711 public void TestBug4146175Lines() {
712 List<String> lineSelectionData = new ArrayList<String>(2);
714 // the fullwidth comma should stick to the preceding Japanese character
715 lineSelectionData.add("\u7d42\uff0c");
716 lineSelectionData.add("\u308f");
718 generalIteratorTest(lineBreak, lineSelectionData);
721 private static final String cannedTestChars
722 = "\u0000\u0001\u0002\u0003\u0004 !\"#$%&()+-01234<=>ABCDE[]^_`abcde{}|\u00a0\u00a2"
723 + "\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u00ab\u00ad\u00ae\u00af\u00b0\u00b2\u00b3"
724 + "\u00b4\u00b9\u00bb\u00bc\u00bd\u02b0\u02b1\u02b2\u02b3\u02b4\u0300\u0301\u0302\u0303"
725 + "\u0304\u05d0\u05d1\u05d2\u05d3\u05d4\u0903\u093e\u093f\u0940\u0949\u0f3a\u0f3b\u2000"
726 + "\u2001\u2002\u200c\u200d\u200e\u200f\u2010\u2011\u2012\u2028\u2029\u202a\u203e\u203f"
727 + "\u2040\u20dd\u20de\u20df\u20e0\u2160\u2161\u2162\u2163\u2164";
729 public void TestSentenceInvariants()
731 BreakIterator e = BreakIterator.getSentenceInstance();
732 doOtherInvariantTest(e, cannedTestChars + ".,\u3001\u3002\u3041\u3042\u3043\ufeff");
735 public void TestEmptyString()
738 List<String> x = new ArrayList<String>(1);
741 generalIteratorTest(lineBreak, x);
744 public void TestGetAvailableLocales()
746 Locale[] locList = BreakIterator.getAvailableLocales();
748 if (locList.length == 0)
749 errln("getAvailableLocales() returned an empty list!");
750 // I have no idea how to test this function...
752 com.ibm.icu.util.ULocale[] ulocList = BreakIterator.getAvailableULocales();
753 if (ulocList.length == 0) {
754 errln("getAvailableULocales() returned an empty list!");
756 logln("getAvailableULocales() returned " + ulocList.length + " locales");
764 public void TestEndBehavior()
766 String testString = "boo.";
767 BreakIterator wb = BreakIterator.getWordInstance();
768 wb.setText(testString);
771 errln("Didn't get break at beginning of string.");
773 errln("Didn't get break before period in \"boo.\"");
774 if (wb.current() != 4 && wb.next() != 4)
775 errln("Didn't get break at end of string.");
778 // The Following two tests are ported from ICU4C 1.8.1 [Richard/GCL]
780 * Port From: ICU4C v1.8.1 : textbounds : IntlTestTextBoundary
781 * Source File: $ICU4CRoot/source/test/intltest/ittxtbd.cpp
784 * test methods preceding, following and isBoundary
786 public void TestPreceding() {
787 String words3 = "aaa bbb ccc";
788 BreakIterator e = BreakIterator.getWordInstance(Locale.getDefault());
796 int f = e.following(p2+1);
797 int p = e.preceding(p2+1);
799 errln("IntlTestTextBoundary::TestPreceding: f!=p3");
801 errln("IntlTestTextBoundary::TestPreceding: p!=p2");
804 errln("IntlTestTextBoundary::TestPreceding: p1+1!=p2");
807 errln("IntlTestTextBoundary::TestPreceding: p3+1!=p4");
809 if (!e.isBoundary(p2) || e.isBoundary(p2+1) || !e.isBoundary(p3))
811 errln("IntlTestTextBoundary::TestPreceding: isBoundary err");
819 public void TestLineBreakContractions() {
820 List<String> expected = new ArrayList<String>(7);
821 expected.add("These ");
822 expected.add("are ");
823 expected.add("'foobles'. ");
824 expected.add("Don't ");
825 expected.add("you ");
826 expected.add("like ");
827 expected.add("them?");
828 generalIteratorTest(lineBreak, expected);
834 public void TestT5615() {
835 com.ibm.icu.util.ULocale[] ulocales = BreakIterator.getAvailableULocales();
837 com.ibm.icu.util.ULocale loc = null;
839 for (int i = 0; i < ulocales.length; i++) {
841 for (type = 0; type < 5 /* 5 = BreakIterator.KIND_COUNT */; ++type) {
842 BreakIterator brk = BreakIterator.getBreakInstance(loc, type);
844 errln("ERR: Failed to create an instance type: " + type + " / locale: " + loc);
848 } catch (Exception e) {
849 errln("ERR: Failed to create an instance type: " + type + " / locale: " + loc + " / exception: " + e.getMessage());
854 * Tests the constructors public DictionaryBasedBreakIterator(String rules, ... public
855 * DictionaryBasedBreakIterator(InputStream compiledRules, ...
857 public void TestDictionaryBasedBreakIterator() throws IOException {
858 // The following class allows the testing of the constructor
859 // public DictionaryBasedBreakIterator(String rules, ...
860 class TestDictionaryBasedBreakIterator extends DictionaryBasedBreakIterator {
861 public TestDictionaryBasedBreakIterator(InputStream is) throws IOException {
866 @SuppressWarnings("unused")
867 TestDictionaryBasedBreakIterator td = new TestDictionaryBasedBreakIterator(null);
868 errln("DictionaryBasedBreakIterator constructor is suppose to return an "
869 + "exception for an empty string.");
870 } catch (Exception e) {
874 File file = File.createTempFile("dummy", "");
875 FileInputStream fis = new FileInputStream(file);
876 DataInputStream dis = new DataInputStream(fis);
877 @SuppressWarnings("unused")
878 TestDictionaryBasedBreakIterator td = new TestDictionaryBasedBreakIterator(dis);
879 errln("DictionaryBasedBreakIterator constructor is suppose to return an "
880 + "exception for a temporary file with EOF.");
881 } catch (Exception e) {
884 // The following class allows the testing of the constructor
885 // public DictionaryBasedBreakIterator(InputStream compiledRules, ...
886 class TestDictionaryBasedBreakIterator1 extends DictionaryBasedBreakIterator {
887 public TestDictionaryBasedBreakIterator1() throws IOException {
888 super((InputStream) null, (InputStream) null);
893 @SuppressWarnings("unused")
894 TestDictionaryBasedBreakIterator1 td1 = new TestDictionaryBasedBreakIterator1();
895 errln("DictionaryBasedBreakIterator constructor is suppose to return an "
896 + "exception for an null input stream.");
897 } catch (Exception e) {