]> gitweb.fperrin.net Git - Dictionary.git/blob - jars/icu4j-4_8_1_1/main/tests/core/src/com/ibm/icu/dev/test/rbbi/BreakIteratorTest.java
Added flags.
[Dictionary.git] / jars / icu4j-4_8_1_1 / main / tests / core / src / com / ibm / icu / dev / test / rbbi / BreakIteratorTest.java
1 /*
2  *******************************************************************************
3  * Copyright (C) 1996-2010, International Business Machines Corporation and    *
4  * others. All Rights Reserved.                                                *
5  *******************************************************************************
6  */
7 package com.ibm.icu.dev.test.rbbi;
8
9 import java.io.DataInputStream;
10 import java.io.File;
11 import java.io.FileInputStream;
12 import java.io.IOException;
13 import java.io.InputStream;
14 import java.text.StringCharacterIterator;
15 import java.util.ArrayList;
16 import java.util.List;
17 import java.util.Locale;
18
19 import com.ibm.icu.dev.test.TestFmwk;
20 import com.ibm.icu.text.BreakIterator;
21 import com.ibm.icu.text.DictionaryBasedBreakIterator;
22
23 public class BreakIteratorTest extends TestFmwk
24 {
25     private BreakIterator characterBreak;
26     private BreakIterator wordBreak;
27     private BreakIterator lineBreak;
28     private BreakIterator sentenceBreak;
29     private BreakIterator titleBreak;
30
31     public static void main(String[] args) throws Exception {
32         new BreakIteratorTest().run(args);
33     }
34     public BreakIteratorTest()
35     {
36
37     }
38     protected void init(){
39         characterBreak = BreakIterator.getCharacterInstance();
40         wordBreak = BreakIterator.getWordInstance();
41         lineBreak = BreakIterator.getLineInstance();
42         //logln("Creating sentence iterator...");
43         sentenceBreak = BreakIterator.getSentenceInstance();
44         //logln("Finished creating sentence iterator...");
45         titleBreak = BreakIterator.getTitleInstance();
46     }
47     //=========================================================================
48     // general test subroutines
49     //=========================================================================
50
51     private void generalIteratorTest(BreakIterator bi, List<String> expectedResult) {
52         StringBuffer buffer = new StringBuffer();
53         String text;
54         for (int i = 0; i < expectedResult.size(); i++) {
55             text = expectedResult.get(i);
56             buffer.append(text);
57         }
58         text = buffer.toString();
59
60         bi.setText(text);
61
62         List<String> nextResults = _testFirstAndNext(bi, text);
63         List<String> previousResults = _testLastAndPrevious(bi, text);
64
65         logln("comparing forward and backward...");
66         int errs = getErrorCount();
67         compareFragmentLists("forward iteration", "backward iteration", nextResults,
68                         previousResults);
69         if (getErrorCount() == errs) {
70             logln("comparing expected and actual...");
71             compareFragmentLists("expected result", "actual result", expectedResult,
72                             nextResults);
73         }
74
75         int[] boundaries = new int[expectedResult.size() + 3];
76         boundaries[0] = BreakIterator.DONE;
77         boundaries[1] = 0;
78         for (int i = 0; i < expectedResult.size(); i++)
79             boundaries[i + 2] = boundaries[i + 1] + (expectedResult.get(i)).
80                             length();
81         boundaries[boundaries.length - 1] = BreakIterator.DONE;
82
83         _testFollowing(bi, text, boundaries);
84         _testPreceding(bi, text, boundaries);
85         _testIsBoundary(bi, text, boundaries);
86
87         doMultipleSelectionTest(bi, text);
88     }
89
90     private List<String> _testFirstAndNext(BreakIterator bi, String text) {
91         int p = bi.first();
92         int lastP = p;
93         List<String> result = new ArrayList<String>();
94
95         if (p != 0)
96             errln("first() returned " + p + " instead of 0");
97         while (p != BreakIterator.DONE) {
98             p = bi.next();
99             if (p != BreakIterator.DONE) {
100                 if (p <= lastP)
101                     errln("next() failed to move forward: next() on position "
102                                     + lastP + " yielded " + p);
103
104                 result.add(text.substring(lastP, p));
105             }
106             else {
107                 if (lastP != text.length())
108                     errln("next() returned DONE prematurely: offset was "
109                                     + lastP + " instead of " + text.length());
110             }
111             lastP = p;
112         }
113         return result;
114     }
115
116     private List<String> _testLastAndPrevious(BreakIterator bi, String text) {
117         int p = bi.last();
118         int lastP = p;
119         List<String> result = new ArrayList<String>();
120
121         if (p != text.length())
122             errln("last() returned " + p + " instead of " + text.length());
123         while (p != BreakIterator.DONE) {
124             p = bi.previous();
125             if (p != BreakIterator.DONE) {
126                 if (p >= lastP)
127                     errln("previous() failed to move backward: previous() on position "
128                                     + lastP + " yielded " + p);
129
130                 result.add(0, text.substring(p, lastP));
131             }
132             else {
133                 if (lastP != 0)
134                     errln("previous() returned DONE prematurely: offset was "
135                                     + lastP + " instead of 0");
136             }
137             lastP = p;
138         }
139         return result;
140     }
141
142     private void compareFragmentLists(String f1Name, String f2Name, List<String> f1, List<String> f2) {
143         int p1 = 0;
144         int p2 = 0;
145         String s1;
146         String s2;
147         int t1 = 0;
148         int t2 = 0;
149
150         while (p1 < f1.size() && p2 < f2.size()) {
151             s1 = f1.get(p1);
152             s2 = f2.get(p2);
153             t1 += s1.length();
154             t2 += s2.length();
155
156             if (s1.equals(s2)) {
157                 debugLogln("   >" + s1 + "<");
158                 ++p1;
159                 ++p2;
160             }
161             else {
162                 int tempT1 = t1;
163                 int tempT2 = t2;
164                 int tempP1 = p1;
165                 int tempP2 = p2;
166
167                 while (tempT1 != tempT2 && tempP1 < f1.size() && tempP2 < f2.size()) {
168                     while (tempT1 < tempT2 && tempP1 < f1.size()) {
169                         tempT1 += (f1.get(tempP1)).length();
170                         ++tempP1;
171                     }
172                     while (tempT2 < tempT1 && tempP2 < f2.size()) {
173                         tempT2 += (f2.get(tempP2)).length();
174                         ++tempP2;
175                     }
176                 }
177                 logln("*** " + f1Name + " has:");
178                 while (p1 <= tempP1 && p1 < f1.size()) {
179                     s1 = f1.get(p1);
180                     t1 += s1.length();
181                     debugLogln(" *** >" + s1 + "<");
182                     ++p1;
183                 }
184                 logln("***** " + f2Name + " has:");
185                 while (p2 <= tempP2 && p2 < f2.size()) {
186                     s2 = f2.get(p2);
187                     t2 += s2.length();
188                     debugLogln(" ***** >" + s2 + "<");
189                     ++p2;
190                 }
191                 errln("Discrepancy between " + f1Name + " and " + f2Name);
192             }
193         }
194     }
195
196     private void _testFollowing(BreakIterator bi, String text, int[] boundaries) {
197         logln("testFollowing():");
198         int p = 2;
199         for (int i = 0; i <= text.length(); i++) {
200             if (i == boundaries[p])
201                 ++p;
202
203             int b = bi.following(i);
204             logln("bi.following(" + i + ") -> " + b);
205             if (b != boundaries[p])
206                 errln("Wrong result from following() for " + i + ": expected " + boundaries[p]
207                                 + ", got " + b);
208         }
209     }
210
211     private void _testPreceding(BreakIterator bi, String text, int[] boundaries) {
212         logln("testPreceding():");
213         int p = 0;
214         for (int i = 0; i <= text.length(); i++) {
215             int b = bi.preceding(i);
216             logln("bi.preceding(" + i + ") -> " + b);
217             if (b != boundaries[p])
218                 errln("Wrong result from preceding() for " + i + ": expected " + boundaries[p]
219                                 + ", got " + b);
220
221             if (i == boundaries[p + 1])
222                 ++p;
223         }
224     }
225
226     private void _testIsBoundary(BreakIterator bi, String text, int[] boundaries) {
227         logln("testIsBoundary():");
228         int p = 1;
229         boolean isB;
230         for (int i = 0; i <= text.length(); i++) {
231             isB = bi.isBoundary(i);
232             logln("bi.isBoundary(" + i + ") -> " + isB);
233
234             if (i == boundaries[p]) {
235                 if (!isB)
236                     errln("Wrong result from isBoundary() for " + i + ": expected true, got false");
237                 ++p;
238             }
239             else {
240                 if (isB)
241                     errln("Wrong result from isBoundary() for " + i + ": expected false, got true");
242             }
243         }
244     }
245
246     private void doMultipleSelectionTest(BreakIterator iterator, String testText)
247     {
248         logln("Multiple selection test...");
249         BreakIterator testIterator = (BreakIterator)iterator.clone();
250         int offset = iterator.first();
251         int testOffset;
252         int count = 0;
253
254         do {
255             testOffset = testIterator.first();
256             testOffset = testIterator.next(count);
257             logln("next(" + count + ") -> " + testOffset);
258             if (offset != testOffset)
259                 errln("next(n) and next() not returning consistent results: for step " + count + ", next(n) returned " + testOffset + " and next() had " + offset);
260
261             if (offset != BreakIterator.DONE) {
262                 count++;
263                 offset = iterator.next();
264             }
265         } while (offset != BreakIterator.DONE);
266
267         // now do it backwards...
268         offset = iterator.last();
269         count = 0;
270
271         do {
272             testOffset = testIterator.last();
273             testOffset = testIterator.next(count);
274             logln("next(" + count + ") -> " + testOffset);
275             if (offset != testOffset)
276                 errln("next(n) and next() not returning consistent results: for step " + count + ", next(n) returned " + testOffset + " and next() had " + offset);
277
278             if (offset != BreakIterator.DONE) {
279                 count--;
280                 offset = iterator.previous();
281             }
282         } while (offset != BreakIterator.DONE);
283     }
284
285
286     private void doOtherInvariantTest(BreakIterator tb, String testChars)
287     {
288         StringBuffer work = new StringBuffer("a\r\na");
289         int errorCount = 0;
290
291         // a break should never occur between CR and LF
292         for (int i = 0; i < testChars.length(); i++) {
293             work.setCharAt(0, testChars.charAt(i));
294             for (int j = 0; j < testChars.length(); j++) {
295                 work.setCharAt(3, testChars.charAt(j));
296                 tb.setText(work.toString());
297                 for (int k = tb.first(); k != BreakIterator.DONE; k = tb.next())
298                     if (k == 2) {
299                         errln("Break between CR and LF in string U+" + Integer.toHexString(
300                                 (int)(work.charAt(0))) + ", U+d U+a U+" + Integer.toHexString(
301                                 (int)(work.charAt(3))));
302                         errorCount++;
303                         if (errorCount >= 75)
304                             return;
305                     }
306             }
307         }
308
309         // a break should never occur before a non-spacing mark, unless it's preceded
310         // by a line terminator
311         work.setLength(0);
312         work.append("aaaa");
313         for (int i = 0; i < testChars.length(); i++) {
314             char c = testChars.charAt(i);
315             if (c == '\n' || c == '\r' || c == '\u2029' || c == '\u2028' || c == '\u0003')
316                 continue;
317             work.setCharAt(1, c);
318             for (int j = 0; j < testChars.length(); j++) {
319                 c = testChars.charAt(j);
320                 if (Character.getType(c) != Character.NON_SPACING_MARK && Character.getType(c)
321                         != Character.ENCLOSING_MARK)
322                     continue;
323                 work.setCharAt(2, c);
324                 tb.setText(work.toString());
325                 for (int k = tb.first(); k != BreakIterator.DONE; k = tb.next())
326                     if (k == 2) {
327                         errln("Break between U+" + Integer.toHexString((int)(work.charAt(1)))
328                                 + " and U+" + Integer.toHexString((int)(work.charAt(2))));
329                         errorCount++;
330                         if (errorCount >= 75)
331                             return;
332                     }
333             }
334         }
335     }
336
337     public void debugLogln(String s) {
338         final String zeros = "0000";
339         String temp;
340         StringBuffer out = new StringBuffer();
341         for (int i = 0; i < s.length(); i++) {
342             char c = s.charAt(i);
343             if (c >= ' ' && c < '\u007f')
344                 out.append(c);
345             else {
346                 out.append("\\u");
347                 temp = Integer.toHexString((int)c);
348                 out.append(zeros.substring(0, 4 - temp.length()));
349                 out.append(temp);
350             }
351         }
352         logln(out.toString());
353     }
354
355     //=========================================================================
356     // tests
357     //=========================================================================
358
359
360     /**
361      * @bug 4097779
362      */
363     public void TestBug4097779() {
364         List<String> wordSelectionData = new ArrayList<String>(2);
365
366         wordSelectionData.add("aa\u0300a");
367         wordSelectionData.add(" ");
368
369         generalIteratorTest(wordBreak, wordSelectionData);
370     }
371
372     /**
373      * @bug 4098467
374      */
375     public void TestBug4098467Words() {
376         List<String> wordSelectionData = new ArrayList<String>();
377
378         // What follows is a string of Korean characters (I found it in the Yellow Pages
379         // ad for the Korean Presbyterian Church of San Francisco, and I hope I transcribed
380         // it correctly), first as precomposed syllables, and then as conjoining jamo.
381         // Both sequences should be semantically identical and break the same way.
382         // precomposed syllables...
383         wordSelectionData.add("\uc0c1\ud56d");
384         wordSelectionData.add(" ");
385         wordSelectionData.add("\ud55c\uc778");
386         wordSelectionData.add(" ");
387         wordSelectionData.add("\uc5f0\ud569");
388         wordSelectionData.add(" ");
389         wordSelectionData.add("\uc7a5\ub85c\uad50\ud68c");
390         wordSelectionData.add(" ");
391         // conjoining jamo...
392         wordSelectionData.add("\u1109\u1161\u11bc\u1112\u1161\u11bc");
393         wordSelectionData.add(" ");
394         wordSelectionData.add("\u1112\u1161\u11ab\u110b\u1175\u11ab");
395         wordSelectionData.add(" ");
396         wordSelectionData.add("\u110b\u1167\u11ab\u1112\u1161\u11b8");
397         wordSelectionData.add(" ");
398         wordSelectionData.add("\u110c\u1161\u11bc\u1105\u1169\u1100\u116d\u1112\u116c");
399         wordSelectionData.add(" ");
400
401         generalIteratorTest(wordBreak, wordSelectionData);
402     }
403
404
405     /**
406      * @bug 4111338
407      */
408     public void TestBug4111338() {
409         List<String> sentenceSelectionData = new ArrayList<String>();
410
411         // test for bug #4111338: Don't break sentences at the boundary between CJK
412         // and other letters
413         sentenceSelectionData.add("\u5487\u67ff\ue591\u5017\u61b3\u60a1\u9510\u8165:\"JAVA\u821c"
414                 + "\u8165\u7fc8\u51ce\u306d,\u2494\u56d8\u4ec0\u60b1\u8560\u51ba"
415                 + "\u611d\u57b6\u2510\u5d46\".\u2029");
416         sentenceSelectionData.add("\u5487\u67ff\ue591\u5017\u61b3\u60a1\u9510\u8165\u9de8"
417                 + "\u97e4JAVA\u821c\u8165\u7fc8\u51ce\u306d\ue30b\u2494\u56d8\u4ec0"
418                 + "\u60b1\u8560\u51ba\u611d\u57b6\u2510\u5d46\u97e5\u7751\u2029");
419         sentenceSelectionData.add("\u5487\u67ff\ue591\u5017\u61b3\u60a1\u9510\u8165\u9de8\u97e4"
420                 + "\u6470\u8790JAVA\u821c\u8165\u7fc8\u51ce\u306d\ue30b\u2494\u56d8"
421                 + "\u4ec0\u60b1\u8560\u51ba\u611d\u57b6\u2510\u5d46\u97e5\u7751\u2029");
422         sentenceSelectionData.add("He said, \"I can go there.\"\u2029");
423
424         generalIteratorTest(sentenceBreak, sentenceSelectionData);
425     }
426
427
428     /**
429      * @bug 4143071
430      */
431     public void TestBug4143071() {
432         List<String> sentenceSelectionData = new ArrayList<String>(3);
433
434         // Make sure sentences that end with digits work right
435         sentenceSelectionData.add("Today is the 27th of May, 1998.  ");
436         sentenceSelectionData.add("Tomorrow will be 28 May 1998.  ");
437         sentenceSelectionData.add("The day after will be the 30th.\u2029");
438
439         generalIteratorTest(sentenceBreak, sentenceSelectionData);
440     }
441
442     /**
443      * @bug 4152416
444      */
445     public void TestBug4152416() {
446         List<String> sentenceSelectionData = new ArrayList<String>(2);
447
448         // Make sure sentences ending with a capital letter are treated correctly
449         sentenceSelectionData.add("The type of all primitive "
450                 + "<code>boolean</code> values accessed in the target VM.  ");
451         sentenceSelectionData.add("Calls to xxx will return an "
452                 + "implementor of this interface.\u2029");
453
454         generalIteratorTest(sentenceBreak, sentenceSelectionData);
455     }
456
457     /**
458      * @bug 4152117
459      */
460     public void TestBug4152117() {
461         List<String> sentenceSelectionData = new ArrayList<String>(3);
462
463         // Make sure sentence breaking is handling punctuation correctly
464         // [COULD NOT REPRODUCE THIS BUG, BUT TEST IS HERE TO MAKE SURE
465         // IT DOESN'T CROP UP]
466         sentenceSelectionData.add("Constructs a randomly generated "
467                 + "BigInteger, uniformly distributed over the range <tt>0</tt> "
468                 + "to <tt>(2<sup>numBits</sup> - 1)</tt>, inclusive.  ");
469         sentenceSelectionData.add("The uniformity of the distribution "
470                 + "assumes that a fair source of random bits is provided in "
471                 + "<tt>rnd</tt>.  ");
472         sentenceSelectionData.add("Note that this constructor always "
473                 + "constructs a non-negative BigInteger.\u2029");
474
475         generalIteratorTest(sentenceBreak, sentenceSelectionData);
476     }
477
478     public void TestLineBreak() {
479         List<String> lineSelectionData = new ArrayList<String>();
480
481         lineSelectionData.add("Multi-");
482         lineSelectionData.add("Level ");
483         lineSelectionData.add("example ");
484         lineSelectionData.add("of ");
485         lineSelectionData.add("a ");
486         lineSelectionData.add("semi-");
487         lineSelectionData.add("idiotic ");
488         lineSelectionData.add("non-");
489         lineSelectionData.add("sensical ");
490         lineSelectionData.add("(non-");
491         lineSelectionData.add("important) ");
492         lineSelectionData.add("sentence. ");
493
494         lineSelectionData.add("Hi  ");
495         lineSelectionData.add("Hello ");
496         lineSelectionData.add("How\n");
497         lineSelectionData.add("are\r");
498         lineSelectionData.add("you\u2028");
499         lineSelectionData.add("fine.\t");
500         lineSelectionData.add("good.  ");
501
502         lineSelectionData.add("Now\r");
503         lineSelectionData.add("is\n");
504         lineSelectionData.add("the\r\n");
505         lineSelectionData.add("time\n");
506         lineSelectionData.add("\r");
507         lineSelectionData.add("for\r");
508         lineSelectionData.add("\r");
509         lineSelectionData.add("all");
510
511         generalIteratorTest(lineBreak, lineSelectionData);
512     }
513
514     /**
515      * @bug 4068133
516      */
517     public void TestBug4068133() {
518         List<String> lineSelectionData = new ArrayList<String>(9);
519
520         lineSelectionData.add("\u96f6");
521         lineSelectionData.add("\u4e00\u3002");
522         lineSelectionData.add("\u4e8c\u3001");
523         lineSelectionData.add("\u4e09\u3002\u3001");
524         lineSelectionData.add("\u56db\u3001\u3002\u3001");
525         lineSelectionData.add("\u4e94,");
526         lineSelectionData.add("\u516d.");
527         lineSelectionData.add("\u4e03.\u3001,\u3002");
528         lineSelectionData.add("\u516b");
529
530         generalIteratorTest(lineBreak, lineSelectionData);
531     }
532
533     /**
534      * @bug 4086052
535      */
536     public void TestBug4086052() {
537         List<String> lineSelectionData = new ArrayList<String>(1);
538
539         lineSelectionData.add("foo\u00a0bar ");
540 //        lineSelectionData.addElement("foo\ufeffbar");
541
542         generalIteratorTest(lineBreak, lineSelectionData);
543     }
544
545     /**
546      * @bug 4097920
547      */
548     public void TestBug4097920() {
549         List<String> lineSelectionData = new ArrayList<String>(3);
550
551         lineSelectionData.add("dog,cat,mouse ");
552         lineSelectionData.add("(one)");
553         lineSelectionData.add("(two)\n");
554         generalIteratorTest(lineBreak, lineSelectionData);
555     }
556
557  
558
559     /**
560      * @bug 4117554
561      */
562     public void TestBug4117554Lines() {
563         List<String> lineSelectionData = new ArrayList<String>(3);
564
565         // Fullwidth .!? should be treated as postJwrd
566         lineSelectionData.add("\u4e01\uff0e");
567         lineSelectionData.add("\u4e02\uff01");
568         lineSelectionData.add("\u4e03\uff1f");
569
570         generalIteratorTest(lineBreak, lineSelectionData);
571     }
572
573     public void TestLettersAndDigits() {
574         // a character sequence such as "X11" or "30F3" or "native2ascii" should
575         // be kept together as a single word
576         List<String> lineSelectionData = new ArrayList<String>(3);
577
578         lineSelectionData.add("X11 ");
579         lineSelectionData.add("30F3 ");
580         lineSelectionData.add("native2ascii");
581
582         generalIteratorTest(lineBreak, lineSelectionData);
583     }
584
585
586     private static final String graveS = "S\u0300";
587     private static final String acuteBelowI = "i\u0317";
588     private static final String acuteE = "e\u0301";
589     private static final String circumflexA = "a\u0302";
590     private static final String tildeE = "e\u0303";
591
592     public void TestCharacterBreak() {
593         List<String> characterSelectionData = new ArrayList<String>();
594
595         characterSelectionData.add(graveS);
596         characterSelectionData.add(acuteBelowI);
597         characterSelectionData.add("m");
598         characterSelectionData.add("p");
599         characterSelectionData.add("l");
600         characterSelectionData.add(acuteE);
601         characterSelectionData.add(" ");
602         characterSelectionData.add("s");
603         characterSelectionData.add(circumflexA);
604         characterSelectionData.add("m");
605         characterSelectionData.add("p");
606         characterSelectionData.add("l");
607         characterSelectionData.add(tildeE);
608         characterSelectionData.add(".");
609         characterSelectionData.add("w");
610         characterSelectionData.add(circumflexA);
611         characterSelectionData.add("w");
612         characterSelectionData.add("a");
613         characterSelectionData.add("f");
614         characterSelectionData.add("q");
615         characterSelectionData.add("\n");
616         characterSelectionData.add("\r");
617         characterSelectionData.add("\r\n");
618         characterSelectionData.add("\n");
619
620         generalIteratorTest(characterBreak, characterSelectionData);
621     }
622
623     /**
624      * @bug 4098467
625      */
626     public void TestBug4098467Characters() {
627         List<String> characterSelectionData = new ArrayList<String>();
628
629         // What follows is a string of Korean characters (I found it in the Yellow Pages
630         // ad for the Korean Presbyterian Church of San Francisco, and I hope I transcribed
631         // it correctly), first as precomposed syllables, and then as conjoining jamo.
632         // Both sequences should be semantically identical and break the same way.
633         // precomposed syllables...
634         characterSelectionData.add("\uc0c1");
635         characterSelectionData.add("\ud56d");
636         characterSelectionData.add(" ");
637         characterSelectionData.add("\ud55c");
638         characterSelectionData.add("\uc778");
639         characterSelectionData.add(" ");
640         characterSelectionData.add("\uc5f0");
641         characterSelectionData.add("\ud569");
642         characterSelectionData.add(" ");
643         characterSelectionData.add("\uc7a5");
644         characterSelectionData.add("\ub85c");
645         characterSelectionData.add("\uad50");
646         characterSelectionData.add("\ud68c");
647         characterSelectionData.add(" ");
648         // conjoining jamo...
649         characterSelectionData.add("\u1109\u1161\u11bc");
650         characterSelectionData.add("\u1112\u1161\u11bc");
651         characterSelectionData.add(" ");
652         characterSelectionData.add("\u1112\u1161\u11ab");
653         characterSelectionData.add("\u110b\u1175\u11ab");
654         characterSelectionData.add(" ");
655         characterSelectionData.add("\u110b\u1167\u11ab");
656         characterSelectionData.add("\u1112\u1161\u11b8");
657         characterSelectionData.add(" ");
658         characterSelectionData.add("\u110c\u1161\u11bc");
659         characterSelectionData.add("\u1105\u1169");
660         characterSelectionData.add("\u1100\u116d");
661         characterSelectionData.add("\u1112\u116c");
662
663         generalIteratorTest(characterBreak, characterSelectionData);
664     }
665
666     public void TestTitleBreak()
667     {
668         List<String> titleData = new ArrayList<String>();
669         titleData.add("   ");
670         titleData.add("This ");
671         titleData.add("is ");
672         titleData.add("a ");
673         titleData.add("simple ");
674         titleData.add("sample ");
675         titleData.add("sentence. ");
676         titleData.add("This ");
677
678         generalIteratorTest(titleBreak, titleData);
679     }
680
681
682
683     /*
684      * @bug 4153072
685      */
686     public void TestBug4153072() {
687         BreakIterator iter = BreakIterator.getWordInstance();
688         String str = "...Hello, World!...";
689         int begin = 3;
690         int end = str.length() - 3;
691         // not used boolean gotException = false;
692  
693
694         iter.setText(new StringCharacterIterator(str, begin, end, begin));
695         for (int index = -1; index < begin + 1; ++index) {
696             try {
697                 iter.isBoundary(index);
698                 if (index < begin)
699                     errln("Didn't get exception with offset = " + index +
700                                     " and begin index = " + begin);
701             }
702             catch (IllegalArgumentException e) {
703                 if (index >= begin)
704                     errln("Got exception with offset = " + index +
705                                     " and begin index = " + begin);
706             }
707         }
708     }
709
710
711     public void TestBug4146175Lines() {
712         List<String> lineSelectionData = new ArrayList<String>(2);
713
714         // the fullwidth comma should stick to the preceding Japanese character
715         lineSelectionData.add("\u7d42\uff0c");
716         lineSelectionData.add("\u308f");
717
718         generalIteratorTest(lineBreak, lineSelectionData);
719     }
720
721     private static final String cannedTestChars
722         = "\u0000\u0001\u0002\u0003\u0004 !\"#$%&()+-01234<=>ABCDE[]^_`abcde{}|\u00a0\u00a2"
723         + "\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u00ab\u00ad\u00ae\u00af\u00b0\u00b2\u00b3"
724         + "\u00b4\u00b9\u00bb\u00bc\u00bd\u02b0\u02b1\u02b2\u02b3\u02b4\u0300\u0301\u0302\u0303"
725         + "\u0304\u05d0\u05d1\u05d2\u05d3\u05d4\u0903\u093e\u093f\u0940\u0949\u0f3a\u0f3b\u2000"
726         + "\u2001\u2002\u200c\u200d\u200e\u200f\u2010\u2011\u2012\u2028\u2029\u202a\u203e\u203f"
727         + "\u2040\u20dd\u20de\u20df\u20e0\u2160\u2161\u2162\u2163\u2164";
728
729     public void TestSentenceInvariants()
730     {
731         BreakIterator e = BreakIterator.getSentenceInstance();
732         doOtherInvariantTest(e, cannedTestChars + ".,\u3001\u3002\u3041\u3042\u3043\ufeff");
733     }
734
735     public void TestEmptyString()
736     {
737         String text = "";
738         List<String> x = new ArrayList<String>(1);
739         x.add(text);
740
741         generalIteratorTest(lineBreak, x);
742     }
743
744     public void TestGetAvailableLocales()
745     {
746         Locale[] locList = BreakIterator.getAvailableLocales();
747
748         if (locList.length == 0)
749             errln("getAvailableLocales() returned an empty list!");
750         // I have no idea how to test this function...
751         
752         com.ibm.icu.util.ULocale[] ulocList = BreakIterator.getAvailableULocales();
753         if (ulocList.length == 0) {
754             errln("getAvailableULocales() returned an empty list!");        
755         } else {
756             logln("getAvailableULocales() returned " + ulocList.length + " locales");
757         }
758     }
759
760     
761     /**
762      * @bug 4068137
763      */
764     public void TestEndBehavior()
765     {
766         String testString = "boo.";
767         BreakIterator wb = BreakIterator.getWordInstance();
768         wb.setText(testString);
769
770         if (wb.first() != 0)
771             errln("Didn't get break at beginning of string.");
772         if (wb.next() != 3)
773             errln("Didn't get break before period in \"boo.\"");
774         if (wb.current() != 4 && wb.next() != 4)
775             errln("Didn't get break at end of string.");
776     }
777
778     // The Following two tests are ported from ICU4C 1.8.1 [Richard/GCL]
779     /**
780      * Port From:   ICU4C v1.8.1 : textbounds : IntlTestTextBoundary
781      * Source File: $ICU4CRoot/source/test/intltest/ittxtbd.cpp
782      **/
783     /**
784      * test methods preceding, following and isBoundary
785      **/
786     public void TestPreceding() {
787         String words3 = "aaa bbb ccc";
788         BreakIterator e = BreakIterator.getWordInstance(Locale.getDefault());
789         e.setText( words3 );
790         e.first();
791         int p1 = e.next();
792         int p2 = e.next();
793         int p3 = e.next();
794         int p4 = e.next();
795
796         int f = e.following(p2+1);
797         int p = e.preceding(p2+1);
798         if (f!=p3)
799             errln("IntlTestTextBoundary::TestPreceding: f!=p3");
800         if (p!=p2)
801             errln("IntlTestTextBoundary::TestPreceding: p!=p2");
802
803         if (p1+1!=p2)
804             errln("IntlTestTextBoundary::TestPreceding: p1+1!=p2");
805
806         if (p3+1!=p4)
807             errln("IntlTestTextBoundary::TestPreceding: p3+1!=p4");
808
809         if (!e.isBoundary(p2) || e.isBoundary(p2+1) || !e.isBoundary(p3))
810         {
811             errln("IntlTestTextBoundary::TestPreceding: isBoundary err");
812         }
813     }
814
815     
816     /**
817      * Bug 4450804
818      */
819     public void TestLineBreakContractions() {
820         List<String> expected = new ArrayList<String>(7);
821         expected.add("These ");
822         expected.add("are ");
823         expected.add("'foobles'. ");
824         expected.add("Don't ");
825         expected.add("you ");
826         expected.add("like ");
827         expected.add("them?");
828         generalIteratorTest(lineBreak, expected);
829     }
830
831     /**
832      * Ticket#5615
833      */
834     public void TestT5615() {
835         com.ibm.icu.util.ULocale[] ulocales = BreakIterator.getAvailableULocales();
836         int type = 0;
837         com.ibm.icu.util.ULocale loc = null;
838         try {
839             for (int i = 0; i < ulocales.length; i++) {
840                 loc = ulocales[i];
841                 for (type = 0; type < 5 /* 5 = BreakIterator.KIND_COUNT */; ++type) {
842                     BreakIterator brk = BreakIterator.getBreakInstance(loc, type);
843                     if (brk == null) {
844                         errln("ERR: Failed to create an instance type: " + type + " / locale: " + loc);
845                     }
846                 }
847             }
848         } catch (Exception e) {
849             errln("ERR: Failed to create an instance type: " + type + " / locale: " + loc + " / exception: " + e.getMessage());
850         }
851     }
852     
853     /*
854      * Tests the constructors public DictionaryBasedBreakIterator(String rules, ... public
855      * DictionaryBasedBreakIterator(InputStream compiledRules, ...
856      */
857     public void TestDictionaryBasedBreakIterator() throws IOException {
858         // The following class allows the testing of the constructor
859         // public DictionaryBasedBreakIterator(String rules, ...
860         class TestDictionaryBasedBreakIterator extends DictionaryBasedBreakIterator {
861             public TestDictionaryBasedBreakIterator(InputStream is) throws IOException {
862                 super("", is);
863             }
864         }
865         try {
866             @SuppressWarnings("unused")
867             TestDictionaryBasedBreakIterator td = new TestDictionaryBasedBreakIterator(null);
868             errln("DictionaryBasedBreakIterator constructor is suppose to return an "
869                     + "exception for an empty string.");
870         } catch (Exception e) {
871         }
872         
873         try {
874             File file = File.createTempFile("dummy", "");
875             FileInputStream fis = new FileInputStream(file);
876             DataInputStream dis = new DataInputStream(fis);
877             @SuppressWarnings("unused")
878             TestDictionaryBasedBreakIterator td = new TestDictionaryBasedBreakIterator(dis);
879             errln("DictionaryBasedBreakIterator constructor is suppose to return an "
880                     + "exception for a temporary file with EOF.");
881         } catch (Exception e) {
882         }
883         
884         // The following class allows the testing of the constructor
885         // public DictionaryBasedBreakIterator(InputStream compiledRules, ...
886         class TestDictionaryBasedBreakIterator1 extends DictionaryBasedBreakIterator {
887             public TestDictionaryBasedBreakIterator1() throws IOException {
888                 super((InputStream) null, (InputStream) null);
889             }
890
891         }
892         try {
893             @SuppressWarnings("unused")
894             TestDictionaryBasedBreakIterator1 td1 = new TestDictionaryBasedBreakIterator1();
895             errln("DictionaryBasedBreakIterator constructor is suppose to return an "
896                     + "exception for an null input stream.");
897         } catch (Exception e) {
898         }
899     }   
900 }