]> gitweb.fperrin.net Git - Dictionary.git/blob - jars/icu4j-4_4_2-src/main/tests/core/src/com/ibm/icu/dev/test/rbbi/BreakIteratorTest.java
go
[Dictionary.git] / jars / icu4j-4_4_2-src / main / tests / core / src / com / ibm / icu / dev / test / rbbi / BreakIteratorTest.java
1 /*\r
2  *******************************************************************************\r
3  * Copyright (C) 1996-2010, International Business Machines Corporation and    *\r
4  * others. All Rights Reserved.                                                *\r
5  *******************************************************************************\r
6  */\r
7 package com.ibm.icu.dev.test.rbbi;\r
8 \r
9 import java.io.DataInputStream;\r
10 import java.io.File;\r
11 import java.io.FileInputStream;\r
12 import java.io.IOException;\r
13 import java.io.InputStream;\r
14 import java.text.StringCharacterIterator;\r
15 import java.util.Locale;\r
16 import java.util.Vector;\r
17 \r
18 import com.ibm.icu.dev.test.TestFmwk;\r
19 import com.ibm.icu.text.BreakIterator;\r
20 import com.ibm.icu.text.DictionaryBasedBreakIterator;\r
21 \r
22 public class BreakIteratorTest extends TestFmwk\r
23 {\r
24     private BreakIterator characterBreak;\r
25     private BreakIterator wordBreak;\r
26     private BreakIterator lineBreak;\r
27     private BreakIterator sentenceBreak;\r
28     private BreakIterator titleBreak;\r
29 \r
30     public static void main(String[] args) throws Exception {\r
31         new BreakIteratorTest().run(args);\r
32     }\r
33     public BreakIteratorTest()\r
34     {\r
35 \r
36     }\r
37     protected void init(){\r
38         characterBreak = BreakIterator.getCharacterInstance();\r
39         wordBreak = BreakIterator.getWordInstance();\r
40         lineBreak = BreakIterator.getLineInstance();\r
41         //logln("Creating sentence iterator...");\r
42         sentenceBreak = BreakIterator.getSentenceInstance();\r
43         //logln("Finished creating sentence iterator...");\r
44         titleBreak = BreakIterator.getTitleInstance();\r
45     }\r
46     //=========================================================================\r
47     // general test subroutines\r
48     //=========================================================================\r
49 \r
50     private void generalIteratorTest(BreakIterator bi, Vector expectedResult) {\r
51         StringBuffer buffer = new StringBuffer();\r
52         String text;\r
53         for (int i = 0; i < expectedResult.size(); i++) {\r
54             text = (String)expectedResult.elementAt(i);\r
55             buffer.append(text);\r
56         }\r
57         text = buffer.toString();\r
58 \r
59         bi.setText(text);\r
60 \r
61         Vector nextResults = _testFirstAndNext(bi, text);\r
62         Vector previousResults = _testLastAndPrevious(bi, text);\r
63 \r
64         logln("comparing forward and backward...");\r
65         int errs = getErrorCount();\r
66         compareFragmentLists("forward iteration", "backward iteration", nextResults,\r
67                         previousResults);\r
68         if (getErrorCount() == errs) {\r
69             logln("comparing expected and actual...");\r
70             compareFragmentLists("expected result", "actual result", expectedResult,\r
71                             nextResults);\r
72         }\r
73 \r
74         int[] boundaries = new int[expectedResult.size() + 3];\r
75         boundaries[0] = BreakIterator.DONE;\r
76         boundaries[1] = 0;\r
77         for (int i = 0; i < expectedResult.size(); i++)\r
78             boundaries[i + 2] = boundaries[i + 1] + ((String)expectedResult.elementAt(i)).\r
79                             length();\r
80         boundaries[boundaries.length - 1] = BreakIterator.DONE;\r
81 \r
82         _testFollowing(bi, text, boundaries);\r
83         _testPreceding(bi, text, boundaries);\r
84         _testIsBoundary(bi, text, boundaries);\r
85 \r
86         doMultipleSelectionTest(bi, text);\r
87     }\r
88 \r
89     private Vector _testFirstAndNext(BreakIterator bi, String text) {\r
90         int p = bi.first();\r
91         int lastP = p;\r
92         Vector result = new Vector();\r
93 \r
94         if (p != 0)\r
95             errln("first() returned " + p + " instead of 0");\r
96         while (p != BreakIterator.DONE) {\r
97             p = bi.next();\r
98             if (p != BreakIterator.DONE) {\r
99                 if (p <= lastP)\r
100                     errln("next() failed to move forward: next() on position "\r
101                                     + lastP + " yielded " + p);\r
102 \r
103                 result.addElement(text.substring(lastP, p));\r
104             }\r
105             else {\r
106                 if (lastP != text.length())\r
107                     errln("next() returned DONE prematurely: offset was "\r
108                                     + lastP + " instead of " + text.length());\r
109             }\r
110             lastP = p;\r
111         }\r
112         return result;\r
113     }\r
114 \r
115     private Vector _testLastAndPrevious(BreakIterator bi, String text) {\r
116         int p = bi.last();\r
117         int lastP = p;\r
118         Vector result = new Vector();\r
119 \r
120         if (p != text.length())\r
121             errln("last() returned " + p + " instead of " + text.length());\r
122         while (p != BreakIterator.DONE) {\r
123             p = bi.previous();\r
124             if (p != BreakIterator.DONE) {\r
125                 if (p >= lastP)\r
126                     errln("previous() failed to move backward: previous() on position "\r
127                                     + lastP + " yielded " + p);\r
128 \r
129                 result.insertElementAt(text.substring(p, lastP), 0);\r
130             }\r
131             else {\r
132                 if (lastP != 0)\r
133                     errln("previous() returned DONE prematurely: offset was "\r
134                                     + lastP + " instead of 0");\r
135             }\r
136             lastP = p;\r
137         }\r
138         return result;\r
139     }\r
140 \r
141     private void compareFragmentLists(String f1Name, String f2Name, Vector f1, Vector f2) {\r
142         int p1 = 0;\r
143         int p2 = 0;\r
144         String s1;\r
145         String s2;\r
146         int t1 = 0;\r
147         int t2 = 0;\r
148 \r
149         while (p1 < f1.size() && p2 < f2.size()) {\r
150             s1 = (String)f1.elementAt(p1);\r
151             s2 = (String)f2.elementAt(p2);\r
152             t1 += s1.length();\r
153             t2 += s2.length();\r
154 \r
155             if (s1.equals(s2)) {\r
156                 debugLogln("   >" + s1 + "<");\r
157                 ++p1;\r
158                 ++p2;\r
159             }\r
160             else {\r
161                 int tempT1 = t1;\r
162                 int tempT2 = t2;\r
163                 int tempP1 = p1;\r
164                 int tempP2 = p2;\r
165 \r
166                 while (tempT1 != tempT2 && tempP1 < f1.size() && tempP2 < f2.size()) {\r
167                     while (tempT1 < tempT2 && tempP1 < f1.size()) {\r
168                         tempT1 += ((String)f1.elementAt(tempP1)).length();\r
169                         ++tempP1;\r
170                     }\r
171                     while (tempT2 < tempT1 && tempP2 < f2.size()) {\r
172                         tempT2 += ((String)f2.elementAt(tempP2)).length();\r
173                         ++tempP2;\r
174                     }\r
175                 }\r
176                 logln("*** " + f1Name + " has:");\r
177                 while (p1 <= tempP1 && p1 < f1.size()) {\r
178                     s1 = (String)f1.elementAt(p1);\r
179                     t1 += s1.length();\r
180                     debugLogln(" *** >" + s1 + "<");\r
181                     ++p1;\r
182                 }\r
183                 logln("***** " + f2Name + " has:");\r
184                 while (p2 <= tempP2 && p2 < f2.size()) {\r
185                     s2 = (String)f2.elementAt(p2);\r
186                     t2 += s2.length();\r
187                     debugLogln(" ***** >" + s2 + "<");\r
188                     ++p2;\r
189                 }\r
190                 errln("Discrepancy between " + f1Name + " and " + f2Name);\r
191             }\r
192         }\r
193     }\r
194 \r
195     private void _testFollowing(BreakIterator bi, String text, int[] boundaries) {\r
196         logln("testFollowing():");\r
197         int p = 2;\r
198         for (int i = 0; i <= text.length(); i++) {\r
199             if (i == boundaries[p])\r
200                 ++p;\r
201 \r
202             int b = bi.following(i);\r
203             logln("bi.following(" + i + ") -> " + b);\r
204             if (b != boundaries[p])\r
205                 errln("Wrong result from following() for " + i + ": expected " + boundaries[p]\r
206                                 + ", got " + b);\r
207         }\r
208     }\r
209 \r
210     private void _testPreceding(BreakIterator bi, String text, int[] boundaries) {\r
211         logln("testPreceding():");\r
212         int p = 0;\r
213         for (int i = 0; i <= text.length(); i++) {\r
214             int b = bi.preceding(i);\r
215             logln("bi.preceding(" + i + ") -> " + b);\r
216             if (b != boundaries[p])\r
217                 errln("Wrong result from preceding() for " + i + ": expected " + boundaries[p]\r
218                                 + ", got " + b);\r
219 \r
220             if (i == boundaries[p + 1])\r
221                 ++p;\r
222         }\r
223     }\r
224 \r
225     private void _testIsBoundary(BreakIterator bi, String text, int[] boundaries) {\r
226         logln("testIsBoundary():");\r
227         int p = 1;\r
228         boolean isB;\r
229         for (int i = 0; i <= text.length(); i++) {\r
230             isB = bi.isBoundary(i);\r
231             logln("bi.isBoundary(" + i + ") -> " + isB);\r
232 \r
233             if (i == boundaries[p]) {\r
234                 if (!isB)\r
235                     errln("Wrong result from isBoundary() for " + i + ": expected true, got false");\r
236                 ++p;\r
237             }\r
238             else {\r
239                 if (isB)\r
240                     errln("Wrong result from isBoundary() for " + i + ": expected false, got true");\r
241             }\r
242         }\r
243     }\r
244 \r
245     private void doMultipleSelectionTest(BreakIterator iterator, String testText)\r
246     {\r
247         logln("Multiple selection test...");\r
248         BreakIterator testIterator = (BreakIterator)iterator.clone();\r
249         int offset = iterator.first();\r
250         int testOffset;\r
251         int count = 0;\r
252 \r
253         do {\r
254             testOffset = testIterator.first();\r
255             testOffset = testIterator.next(count);\r
256             logln("next(" + count + ") -> " + testOffset);\r
257             if (offset != testOffset)\r
258                 errln("next(n) and next() not returning consistent results: for step " + count + ", next(n) returned " + testOffset + " and next() had " + offset);\r
259 \r
260             if (offset != BreakIterator.DONE) {\r
261                 count++;\r
262                 offset = iterator.next();\r
263             }\r
264         } while (offset != BreakIterator.DONE);\r
265 \r
266         // now do it backwards...\r
267         offset = iterator.last();\r
268         count = 0;\r
269 \r
270         do {\r
271             testOffset = testIterator.last();\r
272             testOffset = testIterator.next(count);\r
273             logln("next(" + count + ") -> " + testOffset);\r
274             if (offset != testOffset)\r
275                 errln("next(n) and next() not returning consistent results: for step " + count + ", next(n) returned " + testOffset + " and next() had " + offset);\r
276 \r
277             if (offset != BreakIterator.DONE) {\r
278                 count--;\r
279                 offset = iterator.previous();\r
280             }\r
281         } while (offset != BreakIterator.DONE);\r
282     }\r
283 \r
284 \r
285     private void doOtherInvariantTest(BreakIterator tb, String testChars)\r
286     {\r
287         StringBuffer work = new StringBuffer("a\r\na");\r
288         int errorCount = 0;\r
289 \r
290         // a break should never occur between CR and LF\r
291         for (int i = 0; i < testChars.length(); i++) {\r
292             work.setCharAt(0, testChars.charAt(i));\r
293             for (int j = 0; j < testChars.length(); j++) {\r
294                 work.setCharAt(3, testChars.charAt(j));\r
295                 tb.setText(work.toString());\r
296                 for (int k = tb.first(); k != BreakIterator.DONE; k = tb.next())\r
297                     if (k == 2) {\r
298                         errln("Break between CR and LF in string U+" + Integer.toHexString(\r
299                                 (int)(work.charAt(0))) + ", U+d U+a U+" + Integer.toHexString(\r
300                                 (int)(work.charAt(3))));\r
301                         errorCount++;\r
302                         if (errorCount >= 75)\r
303                             return;\r
304                     }\r
305             }\r
306         }\r
307 \r
308         // a break should never occur before a non-spacing mark, unless it's preceded\r
309         // by a line terminator\r
310         work.setLength(0);\r
311         work.append("aaaa");\r
312         for (int i = 0; i < testChars.length(); i++) {\r
313             char c = testChars.charAt(i);\r
314             if (c == '\n' || c == '\r' || c == '\u2029' || c == '\u2028' || c == '\u0003')\r
315                 continue;\r
316             work.setCharAt(1, c);\r
317             for (int j = 0; j < testChars.length(); j++) {\r
318                 c = testChars.charAt(j);\r
319                 if (Character.getType(c) != Character.NON_SPACING_MARK && Character.getType(c)\r
320                         != Character.ENCLOSING_MARK)\r
321                     continue;\r
322                 work.setCharAt(2, c);\r
323                 tb.setText(work.toString());\r
324                 for (int k = tb.first(); k != BreakIterator.DONE; k = tb.next())\r
325                     if (k == 2) {\r
326                         errln("Break between U+" + Integer.toHexString((int)(work.charAt(1)))\r
327                                 + " and U+" + Integer.toHexString((int)(work.charAt(2))));\r
328                         errorCount++;\r
329                         if (errorCount >= 75)\r
330                             return;\r
331                     }\r
332             }\r
333         }\r
334     }\r
335 \r
336     public void debugLogln(String s) {\r
337         final String zeros = "0000";\r
338         String temp;\r
339         StringBuffer out = new StringBuffer();\r
340         for (int i = 0; i < s.length(); i++) {\r
341             char c = s.charAt(i);\r
342             if (c >= ' ' && c < '\u007f')\r
343                 out.append(c);\r
344             else {\r
345                 out.append("\\u");\r
346                 temp = Integer.toHexString((int)c);\r
347                 out.append(zeros.substring(0, 4 - temp.length()));\r
348                 out.append(temp);\r
349             }\r
350         }\r
351         logln(out.toString());\r
352     }\r
353 \r
354     //=========================================================================\r
355     // tests\r
356     //=========================================================================\r
357 \r
358 \r
359     /**\r
360      * @bug 4097779\r
361      */\r
362     public void TestBug4097779() {\r
363         Vector wordSelectionData = new Vector();\r
364 \r
365         wordSelectionData.addElement("aa\u0300a");\r
366         wordSelectionData.addElement(" ");\r
367 \r
368         generalIteratorTest(wordBreak, wordSelectionData);\r
369     }\r
370 \r
371     /**\r
372      * @bug 4098467\r
373      */\r
374     public void TestBug4098467Words() {\r
375         Vector wordSelectionData = new Vector();\r
376 \r
377         // What follows is a string of Korean characters (I found it in the Yellow Pages\r
378         // ad for the Korean Presbyterian Church of San Francisco, and I hope I transcribed\r
379         // it correctly), first as precomposed syllables, and then as conjoining jamo.\r
380         // Both sequences should be semantically identical and break the same way.\r
381         // precomposed syllables...\r
382         wordSelectionData.addElement("\uc0c1\ud56d");\r
383         wordSelectionData.addElement(" ");\r
384         wordSelectionData.addElement("\ud55c\uc778");\r
385         wordSelectionData.addElement(" ");\r
386         wordSelectionData.addElement("\uc5f0\ud569");\r
387         wordSelectionData.addElement(" ");\r
388         wordSelectionData.addElement("\uc7a5\ub85c\uad50\ud68c");\r
389         wordSelectionData.addElement(" ");\r
390         // conjoining jamo...\r
391         wordSelectionData.addElement("\u1109\u1161\u11bc\u1112\u1161\u11bc");\r
392         wordSelectionData.addElement(" ");\r
393         wordSelectionData.addElement("\u1112\u1161\u11ab\u110b\u1175\u11ab");\r
394         wordSelectionData.addElement(" ");\r
395         wordSelectionData.addElement("\u110b\u1167\u11ab\u1112\u1161\u11b8");\r
396         wordSelectionData.addElement(" ");\r
397         wordSelectionData.addElement("\u110c\u1161\u11bc\u1105\u1169\u1100\u116d\u1112\u116c");\r
398         wordSelectionData.addElement(" ");\r
399 \r
400         generalIteratorTest(wordBreak, wordSelectionData);\r
401     }\r
402 \r
403 \r
404     /**\r
405      * @bug 4111338\r
406      */\r
407     public void TestBug4111338() {\r
408         Vector sentenceSelectionData = new Vector();\r
409 \r
410         // test for bug #4111338: Don't break sentences at the boundary between CJK\r
411         // and other letters\r
412         sentenceSelectionData.addElement("\u5487\u67ff\ue591\u5017\u61b3\u60a1\u9510\u8165:\"JAVA\u821c"\r
413                 + "\u8165\u7fc8\u51ce\u306d,\u2494\u56d8\u4ec0\u60b1\u8560\u51ba"\r
414                 + "\u611d\u57b6\u2510\u5d46\".\u2029");\r
415         sentenceSelectionData.addElement("\u5487\u67ff\ue591\u5017\u61b3\u60a1\u9510\u8165\u9de8"\r
416                 + "\u97e4JAVA\u821c\u8165\u7fc8\u51ce\u306d\ue30b\u2494\u56d8\u4ec0"\r
417                 + "\u60b1\u8560\u51ba\u611d\u57b6\u2510\u5d46\u97e5\u7751\u2029");\r
418         sentenceSelectionData.addElement("\u5487\u67ff\ue591\u5017\u61b3\u60a1\u9510\u8165\u9de8\u97e4"\r
419                 + "\u6470\u8790JAVA\u821c\u8165\u7fc8\u51ce\u306d\ue30b\u2494\u56d8"\r
420                 + "\u4ec0\u60b1\u8560\u51ba\u611d\u57b6\u2510\u5d46\u97e5\u7751\u2029");\r
421         sentenceSelectionData.addElement("He said, \"I can go there.\"\u2029");\r
422 \r
423         generalIteratorTest(sentenceBreak, sentenceSelectionData);\r
424     }\r
425 \r
426 \r
427     /**\r
428      * @bug 4143071\r
429      */\r
430     public void TestBug4143071() {\r
431         Vector sentenceSelectionData = new Vector();\r
432 \r
433         // Make sure sentences that end with digits work right\r
434         sentenceSelectionData.addElement("Today is the 27th of May, 1998.  ");\r
435         sentenceSelectionData.addElement("Tomorrow will be 28 May 1998.  ");\r
436         sentenceSelectionData.addElement("The day after will be the 30th.\u2029");\r
437 \r
438         generalIteratorTest(sentenceBreak, sentenceSelectionData);\r
439     }\r
440 \r
441     /**\r
442      * @bug 4152416\r
443      */\r
444     public void TestBug4152416() {\r
445         Vector sentenceSelectionData = new Vector();\r
446 \r
447         // Make sure sentences ending with a capital letter are treated correctly\r
448         sentenceSelectionData.addElement("The type of all primitive "\r
449                 + "<code>boolean</code> values accessed in the target VM.  ");\r
450         sentenceSelectionData.addElement("Calls to xxx will return an "\r
451                 + "implementor of this interface.\u2029");\r
452 \r
453         generalIteratorTest(sentenceBreak, sentenceSelectionData);\r
454     }\r
455 \r
456     /**\r
457      * @bug 4152117\r
458      */\r
459     public void TestBug4152117() {\r
460         Vector sentenceSelectionData = new Vector();\r
461 \r
462         // Make sure sentence breaking is handling punctuation correctly\r
463         // [COULD NOT REPRODUCE THIS BUG, BUT TEST IS HERE TO MAKE SURE\r
464         // IT DOESN'T CROP UP]\r
465         sentenceSelectionData.addElement("Constructs a randomly generated "\r
466                 + "BigInteger, uniformly distributed over the range <tt>0</tt> "\r
467                 + "to <tt>(2<sup>numBits</sup> - 1)</tt>, inclusive.  ");\r
468         sentenceSelectionData.addElement("The uniformity of the distribution "\r
469                 + "assumes that a fair source of random bits is provided in "\r
470                 + "<tt>rnd</tt>.  ");\r
471         sentenceSelectionData.addElement("Note that this constructor always "\r
472                 + "constructs a non-negative BigInteger.\u2029");\r
473 \r
474         generalIteratorTest(sentenceBreak, sentenceSelectionData);\r
475     }\r
476 \r
477     public void TestLineBreak() {\r
478         Vector lineSelectionData = new Vector();\r
479 \r
480         lineSelectionData.addElement("Multi-");\r
481         lineSelectionData.addElement("Level ");\r
482         lineSelectionData.addElement("example ");\r
483         lineSelectionData.addElement("of ");\r
484         lineSelectionData.addElement("a ");\r
485         lineSelectionData.addElement("semi-");\r
486         lineSelectionData.addElement("idiotic ");\r
487         lineSelectionData.addElement("non-");\r
488         lineSelectionData.addElement("sensical ");\r
489         lineSelectionData.addElement("(non-");\r
490         lineSelectionData.addElement("important) ");\r
491         lineSelectionData.addElement("sentence. ");\r
492 \r
493         lineSelectionData.addElement("Hi  ");\r
494         lineSelectionData.addElement("Hello ");\r
495         lineSelectionData.addElement("How\n");\r
496         lineSelectionData.addElement("are\r");\r
497         lineSelectionData.addElement("you\u2028");\r
498         lineSelectionData.addElement("fine.\t");\r
499         lineSelectionData.addElement("good.  ");\r
500 \r
501         lineSelectionData.addElement("Now\r");\r
502         lineSelectionData.addElement("is\n");\r
503         lineSelectionData.addElement("the\r\n");\r
504         lineSelectionData.addElement("time\n");\r
505         lineSelectionData.addElement("\r");\r
506         lineSelectionData.addElement("for\r");\r
507         lineSelectionData.addElement("\r");\r
508         lineSelectionData.addElement("all");\r
509 \r
510         generalIteratorTest(lineBreak, lineSelectionData);\r
511     }\r
512 \r
513     /**\r
514      * @bug 4068133\r
515      */\r
516     public void TestBug4068133() {\r
517         Vector lineSelectionData = new Vector();\r
518 \r
519         lineSelectionData.addElement("\u96f6");\r
520         lineSelectionData.addElement("\u4e00\u3002");\r
521         lineSelectionData.addElement("\u4e8c\u3001");\r
522         lineSelectionData.addElement("\u4e09\u3002\u3001");\r
523         lineSelectionData.addElement("\u56db\u3001\u3002\u3001");\r
524         lineSelectionData.addElement("\u4e94,");\r
525         lineSelectionData.addElement("\u516d.");\r
526         lineSelectionData.addElement("\u4e03.\u3001,\u3002");\r
527         lineSelectionData.addElement("\u516b");\r
528 \r
529         generalIteratorTest(lineBreak, lineSelectionData);\r
530     }\r
531 \r
532     /**\r
533      * @bug 4086052\r
534      */\r
535     public void TestBug4086052() {\r
536         Vector lineSelectionData = new Vector();\r
537 \r
538         lineSelectionData.addElement("foo\u00a0bar ");\r
539 //        lineSelectionData.addElement("foo\ufeffbar");\r
540 \r
541         generalIteratorTest(lineBreak, lineSelectionData);\r
542     }\r
543 \r
544     /**\r
545      * @bug 4097920\r
546      */\r
547     public void TestBug4097920() {\r
548         Vector lineSelectionData = new Vector();\r
549 \r
550         lineSelectionData.addElement("dog,cat,mouse ");\r
551         lineSelectionData.addElement("(one)");\r
552         lineSelectionData.addElement("(two)\n");\r
553         generalIteratorTest(lineBreak, lineSelectionData);\r
554     }\r
555 \r
556  \r
557 \r
558     /**\r
559      * @bug 4117554\r
560      */\r
561     public void TestBug4117554Lines() {\r
562         Vector lineSelectionData = new Vector();\r
563 \r
564         // Fullwidth .!? should be treated as postJwrd\r
565         lineSelectionData.addElement("\u4e01\uff0e");\r
566         lineSelectionData.addElement("\u4e02\uff01");\r
567         lineSelectionData.addElement("\u4e03\uff1f");\r
568 \r
569         generalIteratorTest(lineBreak, lineSelectionData);\r
570     }\r
571 \r
572     public void TestLettersAndDigits() {\r
573         // a character sequence such as "X11" or "30F3" or "native2ascii" should\r
574         // be kept together as a single word\r
575         Vector lineSelectionData = new Vector();\r
576 \r
577         lineSelectionData.addElement("X11 ");\r
578         lineSelectionData.addElement("30F3 ");\r
579         lineSelectionData.addElement("native2ascii");\r
580 \r
581         generalIteratorTest(lineBreak, lineSelectionData);\r
582     }\r
583 \r
584 \r
585     private static final String graveS = "S\u0300";\r
586     private static final String acuteBelowI = "i\u0317";\r
587     private static final String acuteE = "e\u0301";\r
588     private static final String circumflexA = "a\u0302";\r
589     private static final String tildeE = "e\u0303";\r
590 \r
591     public void TestCharacterBreak() {\r
592         Vector characterSelectionData = new Vector();\r
593 \r
594         characterSelectionData.addElement(graveS);\r
595         characterSelectionData.addElement(acuteBelowI);\r
596         characterSelectionData.addElement("m");\r
597         characterSelectionData.addElement("p");\r
598         characterSelectionData.addElement("l");\r
599         characterSelectionData.addElement(acuteE);\r
600         characterSelectionData.addElement(" ");\r
601         characterSelectionData.addElement("s");\r
602         characterSelectionData.addElement(circumflexA);\r
603         characterSelectionData.addElement("m");\r
604         characterSelectionData.addElement("p");\r
605         characterSelectionData.addElement("l");\r
606         characterSelectionData.addElement(tildeE);\r
607         characterSelectionData.addElement(".");\r
608         characterSelectionData.addElement("w");\r
609         characterSelectionData.addElement(circumflexA);\r
610         characterSelectionData.addElement("w");\r
611         characterSelectionData.addElement("a");\r
612         characterSelectionData.addElement("f");\r
613         characterSelectionData.addElement("q");\r
614         characterSelectionData.addElement("\n");\r
615         characterSelectionData.addElement("\r");\r
616         characterSelectionData.addElement("\r\n");\r
617         characterSelectionData.addElement("\n");\r
618 \r
619         generalIteratorTest(characterBreak, characterSelectionData);\r
620     }\r
621 \r
622     /**\r
623      * @bug 4098467\r
624      */\r
625     public void TestBug4098467Characters() {\r
626         Vector characterSelectionData = new Vector();\r
627 \r
628         // What follows is a string of Korean characters (I found it in the Yellow Pages\r
629         // ad for the Korean Presbyterian Church of San Francisco, and I hope I transcribed\r
630         // it correctly), first as precomposed syllables, and then as conjoining jamo.\r
631         // Both sequences should be semantically identical and break the same way.\r
632         // precomposed syllables...\r
633         characterSelectionData.addElement("\uc0c1");\r
634         characterSelectionData.addElement("\ud56d");\r
635         characterSelectionData.addElement(" ");\r
636         characterSelectionData.addElement("\ud55c");\r
637         characterSelectionData.addElement("\uc778");\r
638         characterSelectionData.addElement(" ");\r
639         characterSelectionData.addElement("\uc5f0");\r
640         characterSelectionData.addElement("\ud569");\r
641         characterSelectionData.addElement(" ");\r
642         characterSelectionData.addElement("\uc7a5");\r
643         characterSelectionData.addElement("\ub85c");\r
644         characterSelectionData.addElement("\uad50");\r
645         characterSelectionData.addElement("\ud68c");\r
646         characterSelectionData.addElement(" ");\r
647         // conjoining jamo...\r
648         characterSelectionData.addElement("\u1109\u1161\u11bc");\r
649         characterSelectionData.addElement("\u1112\u1161\u11bc");\r
650         characterSelectionData.addElement(" ");\r
651         characterSelectionData.addElement("\u1112\u1161\u11ab");\r
652         characterSelectionData.addElement("\u110b\u1175\u11ab");\r
653         characterSelectionData.addElement(" ");\r
654         characterSelectionData.addElement("\u110b\u1167\u11ab");\r
655         characterSelectionData.addElement("\u1112\u1161\u11b8");\r
656         characterSelectionData.addElement(" ");\r
657         characterSelectionData.addElement("\u110c\u1161\u11bc");\r
658         characterSelectionData.addElement("\u1105\u1169");\r
659         characterSelectionData.addElement("\u1100\u116d");\r
660         characterSelectionData.addElement("\u1112\u116c");\r
661 \r
662         generalIteratorTest(characterBreak, characterSelectionData);\r
663     }\r
664 \r
665     public void TestTitleBreak()\r
666     {\r
667         Vector titleData = new Vector();\r
668         titleData.addElement("   ");\r
669         titleData.addElement("This ");\r
670         titleData.addElement("is ");\r
671         titleData.addElement("a ");\r
672         titleData.addElement("simple ");\r
673         titleData.addElement("sample ");\r
674         titleData.addElement("sentence. ");\r
675         titleData.addElement("This ");\r
676 \r
677         generalIteratorTest(titleBreak, titleData);\r
678     }\r
679 \r
680 \r
681 \r
682     /*\r
683      * @bug 4153072\r
684      */\r
685     public void TestBug4153072() {\r
686         BreakIterator iter = BreakIterator.getWordInstance();\r
687         String str = "...Hello, World!...";\r
688         int begin = 3;\r
689         int end = str.length() - 3;\r
690         // not used boolean gotException = false;\r
691  \r
692 \r
693         iter.setText(new StringCharacterIterator(str, begin, end, begin));\r
694         for (int index = -1; index < begin + 1; ++index) {\r
695             try {\r
696                 iter.isBoundary(index);\r
697                 if (index < begin)\r
698                     errln("Didn't get exception with offset = " + index +\r
699                                     " and begin index = " + begin);\r
700             }\r
701             catch (IllegalArgumentException e) {\r
702                 if (index >= begin)\r
703                     errln("Got exception with offset = " + index +\r
704                                     " and begin index = " + begin);\r
705             }\r
706         }\r
707     }\r
708 \r
709 \r
710     public void TestBug4146175Lines() {\r
711         Vector lineSelectionData = new Vector();\r
712 \r
713         // the fullwidth comma should stick to the preceding Japanese character\r
714         lineSelectionData.addElement("\u7d42\uff0c");\r
715         lineSelectionData.addElement("\u308f");\r
716 \r
717         generalIteratorTest(lineBreak, lineSelectionData);\r
718     }\r
719 \r
720     private static final String cannedTestChars\r
721         = "\u0000\u0001\u0002\u0003\u0004 !\"#$%&()+-01234<=>ABCDE[]^_`abcde{}|\u00a0\u00a2"\r
722         + "\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u00ab\u00ad\u00ae\u00af\u00b0\u00b2\u00b3"\r
723         + "\u00b4\u00b9\u00bb\u00bc\u00bd\u02b0\u02b1\u02b2\u02b3\u02b4\u0300\u0301\u0302\u0303"\r
724         + "\u0304\u05d0\u05d1\u05d2\u05d3\u05d4\u0903\u093e\u093f\u0940\u0949\u0f3a\u0f3b\u2000"\r
725         + "\u2001\u2002\u200c\u200d\u200e\u200f\u2010\u2011\u2012\u2028\u2029\u202a\u203e\u203f"\r
726         + "\u2040\u20dd\u20de\u20df\u20e0\u2160\u2161\u2162\u2163\u2164";\r
727 \r
728     public void TestSentenceInvariants()\r
729     {\r
730         BreakIterator e = BreakIterator.getSentenceInstance();\r
731         doOtherInvariantTest(e, cannedTestChars + ".,\u3001\u3002\u3041\u3042\u3043\ufeff");\r
732     }\r
733 \r
734     public void TestEmptyString()\r
735     {\r
736         String text = "";\r
737         Vector x = new Vector();\r
738         x.addElement(text);\r
739 \r
740         generalIteratorTest(lineBreak, x);\r
741     }\r
742 \r
743     public void TestGetAvailableLocales()\r
744     {\r
745         Locale[] locList = BreakIterator.getAvailableLocales();\r
746 \r
747         if (locList.length == 0)\r
748             errln("getAvailableLocales() returned an empty list!");\r
749         // I have no idea how to test this function...\r
750         \r
751         com.ibm.icu.util.ULocale[] ulocList = BreakIterator.getAvailableULocales();\r
752         if (ulocList.length == 0) {\r
753             errln("getAvailableULocales() returned an empty list!");        \r
754         } else {\r
755             logln("getAvailableULocales() returned " + ulocList.length + " locales");\r
756         }\r
757     }\r
758 \r
759     \r
760     /**\r
761      * @bug 4068137\r
762      */\r
763     public void TestEndBehavior()\r
764     {\r
765         String testString = "boo.";\r
766         BreakIterator wb = BreakIterator.getWordInstance();\r
767         wb.setText(testString);\r
768 \r
769         if (wb.first() != 0)\r
770             errln("Didn't get break at beginning of string.");\r
771         if (wb.next() != 3)\r
772             errln("Didn't get break before period in \"boo.\"");\r
773         if (wb.current() != 4 && wb.next() != 4)\r
774             errln("Didn't get break at end of string.");\r
775     }\r
776 \r
777     // The Following two tests are ported from ICU4C 1.8.1 [Richard/GCL]\r
778     /**\r
779      * Port From:   ICU4C v1.8.1 : textbounds : IntlTestTextBoundary\r
780      * Source File: $ICU4CRoot/source/test/intltest/ittxtbd.cpp\r
781      **/\r
782     /**\r
783      * test methods preceding, following and isBoundary\r
784      **/\r
785     public void TestPreceding() {\r
786         String words3 = "aaa bbb ccc";\r
787         BreakIterator e = BreakIterator.getWordInstance(Locale.getDefault());\r
788         e.setText( words3 );\r
789         e.first();\r
790         int p1 = e.next();\r
791         int p2 = e.next();\r
792         int p3 = e.next();\r
793         int p4 = e.next();\r
794 \r
795         int f = e.following(p2+1);\r
796         int p = e.preceding(p2+1);\r
797         if (f!=p3)\r
798             errln("IntlTestTextBoundary::TestPreceding: f!=p3");\r
799         if (p!=p2)\r
800             errln("IntlTestTextBoundary::TestPreceding: p!=p2");\r
801 \r
802         if (p1+1!=p2)\r
803             errln("IntlTestTextBoundary::TestPreceding: p1+1!=p2");\r
804 \r
805         if (p3+1!=p4)\r
806             errln("IntlTestTextBoundary::TestPreceding: p3+1!=p4");\r
807 \r
808         if (!e.isBoundary(p2) || e.isBoundary(p2+1) || !e.isBoundary(p3))\r
809         {\r
810             errln("IntlTestTextBoundary::TestPreceding: isBoundary err");\r
811         }\r
812     }\r
813 \r
814     \r
815     /**\r
816      * Bug 4450804\r
817      */\r
818     public void TestLineBreakContractions() {\r
819         Vector expected = new Vector();\r
820         expected.add("These ");\r
821         expected.add("are ");\r
822         expected.add("'foobles'. ");\r
823         expected.add("Don't ");\r
824         expected.add("you ");\r
825         expected.add("like ");\r
826         expected.add("them?");\r
827         generalIteratorTest(lineBreak, expected);\r
828     }\r
829 \r
830     /**\r
831      * Ticket#5615\r
832      */\r
833     public void TestT5615() {\r
834         com.ibm.icu.util.ULocale[] ulocales = BreakIterator.getAvailableULocales();\r
835         int type = 0;\r
836         com.ibm.icu.util.ULocale loc = null;\r
837         try {\r
838             for (int i = 0; i < ulocales.length; i++) {\r
839                 loc = ulocales[i];\r
840                 for (type = 0; type < 5 /* 5 = BreakIterator.KIND_COUNT */; ++type) {\r
841                     BreakIterator brk = BreakIterator.getBreakInstance(loc, type);\r
842                     if (brk == null) {\r
843                         errln("ERR: Failed to create an instance type: " + type + " / locale: " + loc);\r
844                     }\r
845                 }\r
846             }\r
847         } catch (Exception e) {\r
848             errln("ERR: Failed to create an instance type: " + type + " / locale: " + loc + " / exception: " + e.getMessage());\r
849         }\r
850     }\r
851     \r
852     /*\r
853      * Tests the constructors public DictionaryBasedBreakIterator(String rules, ... public\r
854      * DictionaryBasedBreakIterator(InputStream compiledRules, ...\r
855      */\r
856     public void TestDictionaryBasedBreakIterator() throws IOException {\r
857         // The following class allows the testing of the constructor\r
858         // public DictionaryBasedBreakIterator(String rules, ...\r
859         class TestDictionaryBasedBreakIterator extends DictionaryBasedBreakIterator {\r
860             public TestDictionaryBasedBreakIterator(InputStream is) throws IOException {\r
861                 super("", is);\r
862             }\r
863         }\r
864         try {\r
865             @SuppressWarnings("unused")\r
866             TestDictionaryBasedBreakIterator td = new TestDictionaryBasedBreakIterator(null);\r
867             errln("DictionaryBasedBreakIterator constructor is suppose to return an "\r
868                     + "exception for an empty string.");\r
869         } catch (Exception e) {\r
870         }\r
871         \r
872         try {\r
873             File file = File.createTempFile("dummy", "");\r
874             FileInputStream fis = new FileInputStream(file);\r
875             DataInputStream dis = new DataInputStream(fis);\r
876             @SuppressWarnings("unused")\r
877             TestDictionaryBasedBreakIterator td = new TestDictionaryBasedBreakIterator(dis);\r
878             errln("DictionaryBasedBreakIterator constructor is suppose to return an "\r
879                     + "exception for a temporary file with EOF.");\r
880         } catch (Exception e) {\r
881         }\r
882         \r
883         // The following class allows the testing of the constructor\r
884         // public DictionaryBasedBreakIterator(InputStream compiledRules, ...\r
885         class TestDictionaryBasedBreakIterator1 extends DictionaryBasedBreakIterator {\r
886             public TestDictionaryBasedBreakIterator1() throws IOException {\r
887                 super((InputStream) null, (InputStream) null);\r
888             }\r
889 \r
890         }\r
891         try {\r
892             @SuppressWarnings("unused")\r
893             TestDictionaryBasedBreakIterator1 td1 = new TestDictionaryBasedBreakIterator1();\r
894             errln("DictionaryBasedBreakIterator constructor is suppose to return an "\r
895                     + "exception for an null input stream.");\r
896         } catch (Exception e) {\r
897         }\r
898     }   \r
899 }