]> gitweb.fperrin.net Git - Dictionary.git/blob - jars/icu4j-4_8_1_1/main/tests/core/src/com/ibm/icu/dev/test/rbbi/RBBITestMonkey.java
Added flags.
[Dictionary.git] / jars / icu4j-4_8_1_1 / main / tests / core / src / com / ibm / icu / dev / test / rbbi / RBBITestMonkey.java
1 /*
2  *******************************************************************************
3  * Copyright (C) 2003-2010 International Business Machines Corporation and     *
4  * others. All Rights Reserved.                                                *
5  *******************************************************************************
6  */
7  package com.ibm.icu.dev.test.rbbi;
8
9
10 // Monkey testing of RuleBasedBreakIterator
11 import java.util.ArrayList;
12 import java.util.Arrays;
13 import java.util.List;
14 import java.util.Locale;
15
16 import com.ibm.icu.dev.test.TestFmwk;
17 import com.ibm.icu.lang.UCharacter;
18 import com.ibm.icu.lang.UProperty;
19 import com.ibm.icu.text.BreakIterator;
20 import com.ibm.icu.text.RuleBasedBreakIterator;
21 import com.ibm.icu.text.UTF16;
22 import com.ibm.icu.text.UnicodeSet;
23
24
25 /**
26  * Monkey tests for RBBI.  These tests have independent implementations of
27  * the Unicode TR boundary rules, and compare results between these and ICU's
28  * implementation, using random data.
29  * 
30  * Tests cover Grapheme Cluster (char), Word and Line breaks
31  * 
32  * Ported from ICU4C, original code in file source/test/intltest/rbbitst.cpp
33  *
34  */
35 public class RBBITestMonkey extends TestFmwk {
36     
37     public static void main(String[] args) {
38         new RBBITestMonkey().run(args);
39     }
40     
41 //
42 //     classs RBBIMonkeyKind
43 //
44 //        Monkey Test for Break Iteration
45 //        Abstract interface class.   Concrete derived classes independently
46 //        implement the break rules for different iterator types.
47 //
48 //        The Monkey Test itself uses doesn't know which type of break iterator it is
49 //        testing, but works purely in terms of the interface defined here.
50 //
51     abstract static class RBBIMonkeyKind {
52     
53         // Return a List of UnicodeSets, representing the character classes used
54         //   for this type of iterator.
55         abstract  List  charClasses();
56
57         // Set the test text on which subsequent calls to next() will operate
58         abstract  void   setText(StringBuffer text);
59
60         // Find the next break postion, starting from the specified position.
61         // Return -1 after reaching end of string.
62         abstract   int   next(int i);
63         
64         // A Character Property, one of the constants defined in class UProperty.
65         //   The value fo this property will be displayed for the characters
66         //    near any test failure.  
67         int   fCharProperty;
68     }
69
70  
71     /**
72      * Monkey test subclass for testing Character (Grapheme Cluster) boundaries.
73      */
74     static class RBBICharMonkey extends RBBIMonkeyKind {
75         List                      fSets;
76
77         UnicodeSet                fCRLFSet;
78         UnicodeSet                fControlSet;
79         UnicodeSet                fExtendSet;
80         UnicodeSet                fPrependSet;
81         UnicodeSet                fSpacingSet;
82         UnicodeSet                fLSet;
83         UnicodeSet                fVSet;
84         UnicodeSet                fTSet;
85         UnicodeSet                fLVSet;
86         UnicodeSet                fLVTSet;
87         UnicodeSet                fHangulSet;
88         UnicodeSet                fAnySet;
89
90         StringBuffer              fText;
91
92
93     RBBICharMonkey() {
94         fText       = null;
95         fCharProperty = UProperty.GRAPHEME_CLUSTER_BREAK;
96         fCRLFSet    = new UnicodeSet("[\\r\\n]");
97         fControlSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = Control}]");
98         fExtendSet  = new UnicodeSet("[\\p{Grapheme_Cluster_Break = Extend}]");
99         fPrependSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = Prepend}]");
100         fSpacingSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = SpacingMark}]");
101         fLSet       = new UnicodeSet("[\\p{Grapheme_Cluster_Break = L}]");
102         fVSet       = new UnicodeSet("[\\p{Grapheme_Cluster_Break = V}]");
103         fTSet       = new UnicodeSet("[\\p{Grapheme_Cluster_Break = T}]");
104         fLVSet      = new UnicodeSet("[\\p{Grapheme_Cluster_Break = LV}]");
105         fLVTSet     = new UnicodeSet("[\\p{Grapheme_Cluster_Break = LVT}]");
106         fHangulSet  = new UnicodeSet();
107         fHangulSet.addAll(fLSet);
108         fHangulSet.addAll(fVSet);
109         fHangulSet.addAll(fTSet);
110         fHangulSet.addAll(fLVSet);
111         fHangulSet.addAll(fLVTSet);
112
113         fAnySet     = new UnicodeSet("[\\u0000-\\U0010ffff]");
114
115         fSets       = new ArrayList();
116         fSets.add(fCRLFSet);
117         fSets.add(fControlSet);
118         fSets.add(fExtendSet);
119         fSets.add(fPrependSet);
120         fSets.add(fSpacingSet);
121         fSets.add(fHangulSet);
122         fSets.add(fAnySet);
123      }
124
125
126     void setText(StringBuffer s) {
127         fText = s;
128     }
129     
130     List charClasses() {
131         return fSets;
132     }
133     
134     int next(int prevPos) {
135         int    p1, p2, p3;    // Indices of the significant code points around the
136                               //   break position being tested.  The candidate break
137                               //   location is before p2.
138     
139         int     breakPos = -1;
140     
141         int   c1, c2, c3;     // The code points at p0, p1, p2 & p3.
142         
143         // Previous break at end of string.  return DONE.
144         if (prevPos >= fText.length()) {
145             return -1;
146         }
147         p1 = p2 = p3 = prevPos;
148         c3 =  UTF16.charAt(fText, prevPos);
149         c1 = c2 = 0;
150     
151         // Loop runs once per "significant" character position in the input text.
152         for (;;) {
153             // Move all of the positions forward in the input string.
154             p1 = p2;  c1 = c2;
155             p2 = p3;  c2 = c3;
156     
157             // Advance p3 by one codepoint
158             p3 = moveIndex32(fText, p3, 1);
159             c3 = (p3>=fText.length())? -1: UTF16.charAt(fText, p3);
160     
161             if (p1 == p2) {
162                 // Still warming up the loop.  (won't work with zero length strings, but we don't care)
163                 continue;
164             }
165             if (p2 == fText.length()) {
166                 // Reached end of string.  Always a break position.
167                 break;
168             }
169     
170             // Rule  GB3   CR x LF
171             //     No Extend or Format characters may appear between the CR and LF,
172             //     which requires the additional check for p2 immediately following p1.
173             //
174             if (c1==0x0D && c2==0x0A && p1==(p2-1)) {
175                 continue;
176             }
177     
178             // Rule (GB4).   ( Control | CR | LF ) <break>
179             if (fControlSet.contains(c1) ||
180                 c1 == 0x0D ||
181                 c1 == 0x0A)  {
182                 break;
183             }
184     
185             // Rule (GB5)    <break>  ( Control | CR | LF )
186             //
187             if (fControlSet.contains(c2) ||
188                 c2 == 0x0D ||
189                 c2 == 0x0A)  {
190                 break;
191             }
192     
193     
194             // Rule (GB6)  L x ( L | V | LV | LVT )
195             if (fLSet.contains(c1) &&
196                 (fLSet.contains(c2)  ||
197                     fVSet.contains(c2)  ||
198                     fLVSet.contains(c2) ||
199                     fLVTSet.contains(c2))) {
200                 continue;
201             }
202     
203             // Rule (GB7)    ( LV | V )  x  ( V | T )
204             if ((fLVSet.contains(c1) || fVSet.contains(c1)) &&
205                 (fVSet.contains(c2) || fTSet.contains(c2)))  {
206                 continue;
207             }
208     
209             // Rule (GB8)    ( LVT | T)  x T
210             if ((fLVTSet.contains(c1) || fTSet.contains(c1)) &&
211                 fTSet.contains(c2))  {
212                 continue;
213             }
214     
215             // Rule (GB9)    Numeric x ALetter
216             if (fExtendSet.contains(c2))  {
217                 continue;
218             }
219     
220             // Rule (GB9a)   x  SpacingMark
221             if (fSpacingSet.contains(c2)) {
222                 continue;
223             }
224     
225             // Rule (GB9b)   Prepend x
226             if (fPrependSet.contains(c1)) {
227                 continue;
228             }
229     
230             // Rule (GB10)  Any  <break>  Any
231             break;
232         }
233     
234         breakPos = p2;
235         return breakPos;
236         }
237     }
238
239
240     /**
241      * 
242      * Word Monkey Test Class
243      *
244      * 
245      * 
246      */
247     static class RBBIWordMonkey extends RBBIMonkeyKind {
248         List                      fSets;
249         StringBuffer              fText;
250
251         UnicodeSet                fCRSet;
252         UnicodeSet                fLFSet;
253         UnicodeSet                fNewlineSet;
254         UnicodeSet                fKatakanaSet;
255         UnicodeSet                fALetterSet;
256         UnicodeSet                fMidNumLetSet;
257         UnicodeSet                fMidLetterSet;
258         UnicodeSet                fMidNumSet;
259         UnicodeSet                fNumericSet;
260         UnicodeSet                fFormatSet;
261         UnicodeSet                fExtendSet;
262         UnicodeSet                fExtendNumLetSet;
263         UnicodeSet                fOtherSet;
264
265         
266         RBBIWordMonkey() {
267             fCharProperty    = UProperty.WORD_BREAK;
268
269             fCRSet           = new UnicodeSet("[\\p{Word_Break = CR}]");
270             fLFSet           = new UnicodeSet("[\\p{Word_Break = LF}]");
271             fNewlineSet      = new UnicodeSet("[\\p{Word_Break = Newline}]");
272             fALetterSet      = new UnicodeSet("[\\p{Word_Break = ALetter}]");
273             fKatakanaSet     = new UnicodeSet("[\\p{Word_Break = Katakana}]");
274             fMidNumLetSet    = new UnicodeSet("[\\p{Word_Break = MidNumLet}]");
275             fMidLetterSet    = new UnicodeSet("[\\p{Word_Break = MidLetter}]");
276             fMidNumSet       = new UnicodeSet("[\\p{Word_Break = MidNum}]");
277             fNumericSet      = new UnicodeSet("[\\p{Word_Break = Numeric}]");
278             fFormatSet       = new UnicodeSet("[\\p{Word_Break = Format}]");
279             fExtendNumLetSet = new UnicodeSet("[\\p{Word_Break = ExtendNumLet}]");
280             fExtendSet       = new UnicodeSet("[\\p{Word_Break = Extend}]");
281
282             fOtherSet        = new UnicodeSet();
283             fOtherSet.complement();
284             fOtherSet.removeAll(fCRSet);
285             fOtherSet.removeAll(fLFSet);
286             fOtherSet.removeAll(fNewlineSet);
287             fOtherSet.removeAll(fALetterSet);
288             fOtherSet.removeAll(fKatakanaSet);
289             fOtherSet.removeAll(fMidLetterSet);
290             fOtherSet.removeAll(fMidNumSet);
291             fOtherSet.removeAll(fNumericSet);
292             fOtherSet.removeAll(fFormatSet);
293             fOtherSet.removeAll(fExtendSet);
294             fOtherSet.removeAll(fExtendNumLetSet);
295             // Inhibit dictionary characters from being tested at all.
296             fOtherSet.removeAll(new UnicodeSet("[\\p{LineBreak = Complex_Context}]"));
297
298             fSets            = new ArrayList();
299             fSets.add(fCRSet);
300             fSets.add(fLFSet);
301             fSets.add(fNewlineSet);
302             fSets.add(fALetterSet);
303             fSets.add(fKatakanaSet);
304             fSets.add(fMidLetterSet);
305             fSets.add(fMidNumLetSet);
306             fSets.add(fMidNumSet);
307             fSets.add(fNumericSet);
308             fSets.add(fFormatSet);
309             fSets.add(fExtendSet);
310             fSets.add(fExtendNumLetSet);
311             fSets.add(fOtherSet);
312         }
313         
314         
315         List  charClasses() {
316          return fSets;  
317         }
318         
319         void   setText(StringBuffer s) { 
320             fText = s;        
321         }   
322
323         int   next(int prevPos) {  
324             int    /*p0,*/ p1, p2, p3;      // Indices of the significant code points around the 
325                                         //   break position being tested.  The candidate break
326                                         //   location is before p2.
327             int     breakPos = -1;
328             
329             int c0, c1, c2, c3;   // The code points at p0, p1, p2 & p3.
330             
331             // Previous break at end of string.  return DONE.
332             if (prevPos >= fText.length()) {
333                 return -1;
334             }
335             /*p0 =*/ p1 = p2 = p3 = prevPos;
336             c3 = UTF16.charAt(fText, prevPos);
337             c0 = c1 = c2 = 0;
338             
339             
340
341             // Loop runs once per "significant" character position in the input text.
342             for (;;) {
343                 // Move all of the positions forward in the input string.
344                 /*p0 = p1;*/  c0 = c1;
345                 p1 = p2;  c1 = c2;
346                 p2 = p3;  c2 = c3;
347                 
348                 // Advancd p3 by    X(Extend | Format)*   Rule 4
349                 //    But do not advance over Extend & Format following a new line. (Unicode 5.1 change)
350                 do {
351                     p3 = moveIndex32(fText, p3, 1);
352                     c3 = -1;
353                     if (p3>=fText.length()) {
354                         break;
355                     }
356                     c3 = UTF16.charAt(fText, p3);
357                     if (fCRSet.contains(c2) || fLFSet.contains(c2) || fNewlineSet.contains(c2)) {
358                         break;
359                     }
360                 }
361                 while (setContains(fFormatSet, c3) || setContains(fExtendSet, c3));
362
363                 if (p1 == p2) {
364                     // Still warming up the loop.  (won't work with zero length strings, but we don't care)
365                     continue;
366                 }
367                 if (p2 == fText.length()) {
368                     // Reached end of string.  Always a break position.
369                     break;
370                 }
371
372                 // Rule (3)   CR x LF
373                 //     No Extend or Format characters may appear between the CR and LF,
374                 //     which requires the additional check for p2 immediately following p1.
375                 //
376                 if (c1==0x0D && c2==0x0A) {
377                     continue;
378                 }
379                 
380                 // Rule (3a)  Break before and after newlines (including CR and LF)
381                 //
382                 if (fCRSet.contains(c1) || fLFSet.contains(c1) || fNewlineSet.contains(c1)) {
383                     break;
384                 }
385                 if (fCRSet.contains(c2) || fLFSet.contains(c2) || fNewlineSet.contains(c2)) {
386                     break;
387                 }
388
389                 // Rule (5).   ALetter x ALetter
390                 if (fALetterSet.contains(c1) &&
391                         fALetterSet.contains(c2))  {
392                     continue;
393                 }
394                 
395                 // Rule (6)  ALetter  x  (MidLetter | MidNumLet)  ALetter
396                 //
397                 if ( fALetterSet.contains(c1) &&
398                         (fMidLetterSet.contains(c2) || fMidNumLetSet.contains(c2)) &&
399                         setContains(fALetterSet, c3)) {
400                     continue;
401                 }
402                 
403                 
404                 // Rule (7)  ALetter (MidLetter | MidNumLet)   x  ALetter
405                 if (fALetterSet.contains(c0) &&
406                         (fMidLetterSet.contains(c1) ||  fMidNumLetSet.contains(c1))  &&
407                         fALetterSet.contains(c2)) {
408                     continue;
409                 }
410                 
411                 //  Rule (8)    Numeric x Numeric
412                 if (fNumericSet.contains(c1) &&
413                         fNumericSet.contains(c2))  {
414                     continue;
415                 }
416                 
417                 // Rule (9)    ALetter x Numeric
418                 if (fALetterSet.contains(c1) &&
419                         fNumericSet.contains(c2))  {
420                     continue;
421                 }
422
423                 // Rule (10)    Numeric x ALetter
424                 if (fNumericSet.contains(c1) &&
425                         fALetterSet.contains(c2))  {
426                     continue;
427                 }
428                 
429                 // Rule (11)   Numeric (MidNum | MidNumLet)  x  Numeric
430                 if ( fNumericSet.contains(c0) &&
431                         (fMidNumSet.contains(c1) || fMidNumLetSet.contains(c1))  && 
432                         fNumericSet.contains(c2)) {
433                     continue;
434                 }
435                 
436                 // Rule (12)  Numeric x (MidNum | MidNumLet) Numeric
437                 if (fNumericSet.contains(c1) &&
438                         (fMidNumSet.contains(c2) || fMidNumLetSet.contains(c2)) &&
439                         setContains(fNumericSet, c3)) {
440                     continue;
441                 }
442                 
443                 // Rule (13)  Katakana x Katakana
444                 if (fKatakanaSet.contains(c1) &&
445                         fKatakanaSet.contains(c2))  {
446                     continue;
447                 }
448                 
449                 // Rule 13a  (ALetter | Numeric | Katakana | ExtendNumLet) x ExtendNumLet
450                 if ((fALetterSet.contains(c1) || fNumericSet.contains(c1) ||
451                         fKatakanaSet.contains(c1) || fExtendNumLetSet.contains(c1)) &&
452                         fExtendNumLetSet.contains(c2)) {
453                     continue;
454                 }
455                 // Rule 13b   ExtendNumLet x (ALetter | Numeric | Katakana | ExtendNumLet)
456                 if (fExtendNumLetSet.contains(c1) &&
457                         (fALetterSet.contains(c2) || fNumericSet.contains(c2) ||
458                         fKatakanaSet.contains(c2) || fExtendNumLetSet.contains(c2))) {
459                     continue;
460                 }
461                
462                 // Rule 14.  Break found here.
463                 break;
464             }
465             
466             breakPos = p2;
467             return breakPos;
468         }
469         
470     }
471
472  
473     static class RBBILineMonkey extends RBBIMonkeyKind {
474         
475         List        fSets;
476         
477         UnicodeSet  fBK;
478         UnicodeSet  fCR;
479         UnicodeSet  fLF;
480         UnicodeSet  fCM;
481         UnicodeSet  fNL;
482         UnicodeSet  fSG;
483         UnicodeSet  fWJ;
484         UnicodeSet  fZW;
485         UnicodeSet  fGL;
486         UnicodeSet  fCB;
487         UnicodeSet  fSP;
488         UnicodeSet  fB2;
489         UnicodeSet  fBA;
490         UnicodeSet  fBB;
491         UnicodeSet  fHY;
492         UnicodeSet  fCL;
493         UnicodeSet  fCP;
494         UnicodeSet  fEX;
495         UnicodeSet  fIN;
496         UnicodeSet  fNS;
497         UnicodeSet  fOP;
498         UnicodeSet  fQU;
499         UnicodeSet  fIS;
500         UnicodeSet  fNU;
501         UnicodeSet  fPO;
502         UnicodeSet  fPR;
503         UnicodeSet  fSY;
504         UnicodeSet  fAI;
505         UnicodeSet  fAL;
506         UnicodeSet  fID;
507         UnicodeSet  fSA;
508         UnicodeSet  fJL;
509         UnicodeSet  fJV;
510         UnicodeSet  fJT;
511         UnicodeSet  fH2;
512         UnicodeSet  fH3;
513         UnicodeSet  fXX;
514         
515         StringBuffer  fText;
516         int           fOrigPositions;
517         
518         
519         
520         RBBILineMonkey()
521         {
522             fCharProperty  = UProperty.LINE_BREAK;
523             fSets          = new ArrayList();
524             
525             fBK    = new UnicodeSet("[\\p{Line_Break=BK}]");
526             fCR    = new UnicodeSet("[\\p{Line_break=CR}]");
527             fLF    = new UnicodeSet("[\\p{Line_break=LF}]");
528             fCM    = new UnicodeSet("[\\p{Line_break=CM}]");
529             fNL    = new UnicodeSet("[\\p{Line_break=NL}]");
530             fWJ    = new UnicodeSet("[\\p{Line_break=WJ}]");
531             fZW    = new UnicodeSet("[\\p{Line_break=ZW}]");
532             fGL    = new UnicodeSet("[\\p{Line_break=GL}]");
533             fCB    = new UnicodeSet("[\\p{Line_break=CB}]");
534             fSP    = new UnicodeSet("[\\p{Line_break=SP}]");
535             fB2    = new UnicodeSet("[\\p{Line_break=B2}]");
536             fBA    = new UnicodeSet("[\\p{Line_break=BA}]");
537             fBB    = new UnicodeSet("[\\p{Line_break=BB}]");
538             fHY    = new UnicodeSet("[\\p{Line_break=HY}]");
539             fCL    = new UnicodeSet("[\\p{Line_break=CL}]");
540             fCP    = new UnicodeSet("[\\p{Line_break=CP}]");
541             fEX    = new UnicodeSet("[\\p{Line_break=EX}]");
542             fIN    = new UnicodeSet("[\\p{Line_break=IN}]");
543             fNS    = new UnicodeSet("[\\p{Line_break=NS}]");
544             fOP    = new UnicodeSet("[\\p{Line_break=OP}]");
545             fQU    = new UnicodeSet("[\\p{Line_break=QU}]");
546             fIS    = new UnicodeSet("[\\p{Line_break=IS}]");
547             fNU    = new UnicodeSet("[\\p{Line_break=NU}]");
548             fPO    = new UnicodeSet("[\\p{Line_break=PO}]");
549             fPR    = new UnicodeSet("[\\p{Line_break=PR}]");
550             fSY    = new UnicodeSet("[\\p{Line_break=SY}]");
551             fAI    = new UnicodeSet("[\\p{Line_break=AI}]");
552             fAL    = new UnicodeSet("[\\p{Line_break=AL}]");
553             fID    = new UnicodeSet("[\\p{Line_break=ID}]");
554             fSA    = new UnicodeSet("[\\p{Line_break=SA}]");
555             fJL    = new UnicodeSet("[\\p{Line_break=JL}]");
556             fJV    = new UnicodeSet("[\\p{Line_break=JV}]");
557             fJT    = new UnicodeSet("[\\p{Line_break=JT}]");
558             fH2    = new UnicodeSet("[\\p{Line_break=H2}]");
559             fH3    = new UnicodeSet("[\\p{Line_break=H3}]");
560             fSG    = new UnicodeSet("[\\ud800-\\udfff]");
561             fXX    = new UnicodeSet("[\\p{Line_break=XX}]");
562
563             
564             fAL.addAll(fXX);     // Default behavior for XX is identical to AL
565             fAL.addAll(fAI);     // Default behavior for AI is identical to AL
566             fAL.addAll(fSA);     // Default behavior for SA is XX, which defaults to AL
567             fAL.addAll(fSG);     // Default behavior for SG (unpaired surrogates) is AL
568             
569             
570             
571             fSets.add(fBK);
572             fSets.add(fCR);
573             fSets.add(fLF);
574             fSets.add(fCM);
575             fSets.add(fNL);
576             fSets.add(fWJ);
577             fSets.add(fZW);
578             fSets.add(fGL);
579             fSets.add(fCB);
580             fSets.add(fSP);
581             fSets.add(fB2);
582             fSets.add(fBA);
583             fSets.add(fBB);
584             fSets.add(fHY);
585             fSets.add(fH2);
586             fSets.add(fH3);
587             fSets.add(fCL);
588             fSets.add(fCP);
589             fSets.add(fEX);
590             fSets.add(fIN);
591             fSets.add(fJL);
592             fSets.add(fJT);
593             fSets.add(fJV);
594             fSets.add(fNS);
595             fSets.add(fOP);
596             fSets.add(fQU);
597             fSets.add(fIS);
598             fSets.add(fNU);
599             fSets.add(fPO);
600             fSets.add(fPR);
601             fSets.add(fSY);
602             fSets.add(fAI);
603             fSets.add(fAL);
604             fSets.add(fID);
605             fSets.add(fWJ);
606             fSets.add(fSA);
607             fSets.add(fSG);
608             
609         }
610         
611         void setText(StringBuffer s) {
612             fText       = s;
613         }
614         
615         
616         
617
618         int next(int startPos) {
619             int    pos;       //  Index of the char following a potential break position
620             int    thisChar;  //  Character at above position "pos"
621             
622             int    prevPos;   //  Index of the char preceding a potential break position
623             int    prevChar;  //  Character at above position.  Note that prevChar
624                               //   and thisChar may not be adjacent because combining
625                               //   characters between them will be ignored.
626             
627             int    nextPos;   //  Index of the next character following pos.
628                               //     Usually skips over combining marks.
629             int    tPos;      //  temp value.
630             int    matchVals[]  = null;       // Number  Expression Match Results
631  
632             
633             if (startPos >= fText.length()) {
634                 return -1;
635             }
636             
637             
638             // Initial values for loop.  Loop will run the first time without finding breaks,
639             //                           while the invalid values shift out and the "this" and
640             //                           "prev" positions are filled in with good values.
641             pos      = prevPos   = -1;    // Invalid value, serves as flag for initial loop iteration.
642             thisChar = prevChar  = 0;
643             nextPos  = startPos;
644             
645             
646             // Loop runs once per position in the test text, until a break position
647             //  is found.  In each iteration, we are testing for a possible break
648             //  just preceding the character at index "pos".  The character preceding
649             //  this char is at postion "prevPos"; because of combining sequences,
650             //  "prevPos" can be arbitrarily far before "pos".
651             for (;;) {
652                 // Advance to the next position to be tested.
653                 prevPos   = pos;
654                 prevChar  = thisChar;
655                 pos       = nextPos;
656                 nextPos   = moveIndex32(fText, pos, 1);
657                 
658                 // Rule LB2 - Break at end of text.
659                 if (pos >= fText.length()) {
660                     break;
661                 }
662                 
663                 // Rule LB 9 - adjust for combining sequences.
664                 //             We do this rule out-of-order because the adjustment does
665                 //             not effect the way that rules LB 3 through LB 6 match,
666                 //             and doing it here rather than after LB 6 is substantially
667                 //             simpler when combining sequences do occur.
668                 
669                 
670                 // LB 9         Keep combining sequences together.
671                 //              advance over any CM class chars at "pos", 
672                 //              result is "nextPos" for the following loop iteration.
673                 thisChar  = UTF16.charAt(fText, pos);
674                 if (!(fSP.contains(thisChar) || fBK.contains(thisChar) || thisChar==0x0d ||
675                         thisChar==0x0a || fNL.contains(thisChar) || fZW.contains(thisChar) )) {
676                     for (;;) {
677                         if (nextPos == fText.length()) {
678                             break;   
679                         }
680                         int nextChar = UTF16.charAt(fText, nextPos);
681                         if (!fCM.contains(nextChar)) {
682                             break;
683                         }
684                         nextPos = moveIndex32(fText, nextPos, 1);
685                     }
686                 }
687                 
688                 // LB 9 Treat X CM* as if it were X
689                 //        No explicit action required.
690                 
691                 // LB 10     Treat any remaining combining mark as AL
692                 if (fCM.contains(thisChar)) {
693                     thisChar = 'A';   
694                 }
695
696                 
697                 // If the loop is still warming up - if we haven't shifted the initial
698                 //   -1 positions out of prevPos yet - loop back to advance the
699                 //    position in the input without any further looking for breaks.
700                 if (prevPos == -1) {
701                     continue;
702                 }
703                 
704                 // LB 4  Always break after hard line breaks,
705                 if (fBK.contains(prevChar)) {
706                     break;
707                 }
708                 
709                 // LB 5  Break after CR, LF, NL, but not inside CR LF
710                 if (fCR.contains(prevChar) && fLF.contains(thisChar)) {
711                     continue;
712                 }
713                 if  (fCR.contains(prevChar) ||
714                      fLF.contains(prevChar) ||
715                      fNL.contains(prevChar))  {
716                     break;
717                 }
718                 
719                 // LB 6  Don't break before hard line breaks
720                 if (fBK.contains(thisChar) || fCR.contains(thisChar) ||
721                         fLF.contains(thisChar) || fNL.contains(thisChar) ) {
722                     continue;
723                 }
724                 
725                 
726                 // LB 7  Don't break before spaces or zero-width space.
727                 if (fSP.contains(thisChar)) {
728                     continue;
729                 }
730                 
731                 if (fZW.contains(thisChar)) {
732                     continue;
733                 }
734                 
735                 // LB 8  Break after zero width space
736                 if (fZW.contains(prevChar)) {
737                     break;
738                 }
739                 
740                 //  LB 9, 10  Already done, at top of loop.
741                 //
742                 
743                 
744                 // LB 11
745                 //    x  WJ
746                 //    WJ  x
747                 if (fWJ.contains(thisChar) || fWJ.contains(prevChar)) {
748                     continue;
749                 }
750                 
751                 
752                 // LB 12
753                 //        GL x
754                 if (fGL.contains(prevChar)) {
755                     continue;
756                 }
757                 
758                 // LB 12a
759                 //    [^SP BA HY] x GL
760                 if (!(fSP.contains(prevChar) ||
761                       fBA.contains(prevChar) ||
762                       fHY.contains(prevChar)     ) && fGL.contains(thisChar)) {
763                     continue;
764                 }
765
766                 
767                 
768                 // LB 13  Don't break before closings.
769                 //       NU x CL, NU x CP  and NU x IS are not matched here so that they will
770                 //       fall into LB 17 and the more general number regular expression.
771                 //
772                 if (!fNU.contains(prevChar) && fCL.contains(thisChar) ||
773                     !fNU.contains(prevChar) && fCP.contains(thisChar) ||
774                                                fEX.contains(thisChar) ||
775                     !fNU.contains(prevChar) && fIS.contains(thisChar) ||
776                     !fNU.contains(prevChar) && fSY.contains(thisChar))    {
777                     continue;
778                 }
779                 
780                 // LB 14  Don't break after OP SP*
781                 //       Scan backwards, checking for this sequence.
782                 //       The OP char could include combining marks, so we actually check for
783                 //           OP CM* SP* x
784                 tPos = prevPos;
785                 if (fSP.contains(prevChar)) {
786                     while (tPos > 0 && fSP.contains(UTF16.charAt(fText, tPos))) {
787                         tPos=moveIndex32(fText, tPos, -1);
788                     }
789                 }
790                 while (tPos > 0 && fCM.contains(UTF16.charAt(fText, tPos))) {
791                     tPos=moveIndex32(fText, tPos, -1);
792                 }
793                 if (fOP.contains(UTF16.charAt(fText, tPos))) {
794                     continue;
795                 }
796                 
797                 // LB 15 Do not break within "[ 
798                 //       QU CM* SP* x OP
799                 if (fOP.contains(thisChar)) {
800                     // Scan backwards from prevChar to see if it is preceded by QU CM* SP*
801                     tPos = prevPos;
802                     while (tPos > 0 && fSP.contains(UTF16.charAt(fText, tPos))) {
803                         tPos = moveIndex32(fText, tPos, -1);
804                     }
805                     while (tPos > 0 && fCM.contains(UTF16.charAt(fText, tPos))) {
806                         tPos = moveIndex32(fText, tPos, -1);
807                     }
808                     if (fQU.contains(UTF16.charAt(fText, tPos))) {
809                         continue;
810                     }
811                 }               
812                 
813                 // LB 16   (CL | CP) SP* x NS
814                 if (fNS.contains(thisChar)) {
815                     tPos = prevPos;
816                     while (tPos > 0 && fSP.contains(UTF16.charAt(fText, tPos))) {
817                         tPos = moveIndex32(fText, tPos, -1);
818                     }
819                     while (tPos > 0 && fCM.contains(UTF16.charAt(fText, tPos))) {
820                         tPos = moveIndex32(fText, tPos, -1);
821                     }
822                     if (fCL.contains(UTF16.charAt(fText, tPos)) || fCP.contains(UTF16.charAt(fText, tPos))) {
823                         continue;
824                     }
825                 }               
826                 
827                                
828                 // LB 17        B2 SP* x B2
829                 if (fB2.contains(thisChar)) {
830                     tPos = prevPos;
831                     while (tPos > 0 && fSP.contains(UTF16.charAt(fText, tPos))) {
832                         tPos = moveIndex32(fText, tPos, -1);
833                     }
834                     while (tPos > 0 && fCM.contains(UTF16.charAt(fText, tPos))) {
835                         tPos = moveIndex32(fText, tPos, -1);
836                     }
837                     if (fB2.contains(UTF16.charAt(fText, tPos))) {
838                         continue;
839                     }
840                 }               
841                 
842                 // LB 18    break after space
843                 if (fSP.contains(prevChar)) {
844                     break;
845                 }
846                 
847                 // LB 19
848                 //    x   QU
849                 //    QU  x
850                 if (fQU.contains(thisChar) || fQU.contains(prevChar)) {
851                     continue;
852                 }
853                 
854                 // LB 20  Break around a CB
855                 if (fCB.contains(thisChar) || fCB.contains(prevChar)) {
856                     break;
857                 }
858                 
859                 // LB 21
860                 if (fBA.contains(thisChar) ||
861                         fHY.contains(thisChar) ||
862                         fNS.contains(thisChar) ||
863                         fBB.contains(prevChar) )   {
864                     continue;
865                 }
866                 
867                 // LB 22
868                 if (fAL.contains(prevChar) && fIN.contains(thisChar) ||
869                         fID.contains(prevChar) && fIN.contains(thisChar) ||
870                         fIN.contains(prevChar) && fIN.contains(thisChar) ||
871                         fNU.contains(prevChar) && fIN.contains(thisChar) )   {
872                     continue;
873                 }
874                 
875                 
876                 // LB 23    ID x PO    (Note:  Leading CM behaves like ID)
877                 //          AL x NU
878                 //          NU x AL
879                 if (fID.contains(prevChar) && fPO.contains(thisChar) ||
880                         fAL.contains(prevChar) && fNU.contains(thisChar) ||
881                         fNU.contains(prevChar) && fAL.contains(thisChar) )   {
882                     continue;
883                 }
884                 
885                 // LB 24  Do not break between prefix and letters or ideographs.
886                 //        PR x ID
887                 //        PR x AL
888                 //        PO x AL
889                 if (fPR.contains(prevChar) && fID.contains(thisChar) ||
890                     fPR.contains(prevChar) && fAL.contains(thisChar) ||
891                     fPO.contains(prevChar) && fAL.contains(thisChar))  {
892                     continue;
893                 }
894                 
895                 
896                 // LB 25    Numbers
897                 matchVals = LBNumberCheck(fText, prevPos, matchVals);
898                 if (matchVals[0] != -1) {
899                     // Matched a number.  But could have been just a single digit, which would
900                     //    not represent a "no break here" between prevChar and thisChar
901                     int numEndIdx = matchVals[1];  // idx of first char following num
902                     if (numEndIdx > pos) {
903                         // Number match includes at least the two chars being checked
904                         if (numEndIdx > nextPos) {
905                             // Number match includes additional chars.  Update pos and nextPos
906                             //   so that next loop iteration will continue at the end of the number,
907                             //   checking for breaks between last char in number & whatever follows.
908                             nextPos = numEndIdx;
909                             pos     = numEndIdx;
910                             do {
911                                 pos = moveIndex32(fText, pos, -1);  
912                                 thisChar = UTF16.charAt(fText, pos);
913                             }
914                             while (fCM.contains(thisChar));
915                         }
916                         continue;
917                     }
918                 }
919                 
920                 
921                 // LB 26  Do not break Korean Syllables
922                 if (fJL.contains(prevChar) && (fJL.contains(thisChar) ||
923                                                 fJV.contains(thisChar) ||
924                                                 fH2.contains(thisChar) ||
925                                                 fH3.contains(thisChar))) {
926                                                     continue;
927                                                 }
928
929                 if ((fJV.contains(prevChar) || fH2.contains(prevChar))  &&
930                     (fJV.contains(thisChar) || fJT.contains(thisChar))) {
931                         continue;
932                 }
933
934                 if ((fJT.contains(prevChar) || fH3.contains(prevChar)) &&
935                     fJT.contains(thisChar)) {
936                         continue;
937                 }
938
939                 // LB 27 Treat a Korean Syllable Block the same as ID
940                 if ((fJL.contains(prevChar) || fJV.contains(prevChar) ||
941                     fJT.contains(prevChar) || fH2.contains(prevChar) || fH3.contains(prevChar)) &&
942                     fIN.contains(thisChar)) {
943                         continue;
944                     }
945                 if ((fJL.contains(prevChar) || fJV.contains(prevChar) ||
946                     fJT.contains(prevChar) || fH2.contains(prevChar) || fH3.contains(prevChar)) &&
947                     fPO.contains(thisChar)) {
948                         continue;
949                     }
950                 if (fPR.contains(prevChar) && (fJL.contains(thisChar) || fJV.contains(thisChar) ||
951                     fJT.contains(thisChar) || fH2.contains(thisChar) || fH3.contains(thisChar))) {
952                         continue;
953                     }
954
955                 
956                 
957                 // LB 28 Do not break between alphabetics
958                 if (fAL.contains(prevChar) && fAL.contains(thisChar)) {
959                     continue;
960                 }
961                 
962                 // LB 29  Do not break between numeric punctuation and alphabetics
963                 if (fIS.contains(prevChar) && fAL.contains(thisChar)) {
964                     continue;
965                 }
966                 
967                 // LB 30    Do not break between letters, numbers, or ordinary symbols and opening or closing punctuation.
968                 //          (AL | NU) x OP
969                 //          CP x (AL | NU)
970                 if ((fAL.contains(prevChar) || fNU.contains(prevChar)) && fOP.contains(thisChar)) {
971                     continue;
972                 }
973                 if (fCP.contains(prevChar) && (fAL.contains(thisChar) || fNU.contains(thisChar))) {
974                     continue;
975                 }
976
977               
978                 // LB 31    Break everywhere else
979                 break;            
980             }
981             
982             return pos;
983         }
984         
985         
986         
987         // Match the following regular expression in the input text.
988         //    ((PR | PO) CM*)? ((OP | HY) CM*)? NU CM* ((NU | IS | SY) CM*) * ((CL | CP) CM*)?  (PR | PO) CM*)?
989         //      0    0   1       3    3    4              7    7    7    7      9    9    9     11   11    (match states)
990         //  retVals array  [0]  index of the start of the match, or -1 if no match
991         //                 [1]  index of first char following the match.
992         //  Can not use Java regex because need supplementary character support,
993         //     and because Unicode char properties version must be the same as in
994         //     the version of ICU being tested.
995         private int[] LBNumberCheck(StringBuffer s, int startIdx, int[] retVals) {
996             if (retVals == null) {
997                 retVals = new int[2];
998              }
999             retVals[0]     = -1;  // Indicates no match.
1000             int matchState = 0;
1001             int idx        = startIdx;
1002             
1003             matchLoop: for (idx = startIdx; idx<s.length(); idx = moveIndex32(s, idx, 1)){
1004                 int c = UTF16.charAt(s, idx);
1005                 int cLBType = UCharacter.getIntPropertyValue(c, UProperty.LINE_BREAK);
1006                 switch (matchState) {
1007                     case 0:   
1008                         if (cLBType == UCharacter.LineBreak.PREFIX_NUMERIC ||
1009                             cLBType == UCharacter.LineBreak.POSTFIX_NUMERIC) {
1010                             matchState = 1;  
1011                             break;
1012                         }
1013                         if (cLBType == UCharacter.LineBreak.OPEN_PUNCTUATION) {
1014                             matchState = 4;
1015                             break;
1016                         }
1017                         if (cLBType == UCharacter.LineBreak.HYPHEN) {
1018                             matchState = 4;
1019                             break;
1020                         }
1021                         if (cLBType == UCharacter.LineBreak.NUMERIC) {
1022                             matchState = 7;
1023                             break;
1024                         }
1025                         break matchLoop;   /* No Match  */
1026                         
1027                     case 1:
1028                         if (cLBType == UCharacter.LineBreak.COMBINING_MARK) {
1029                             matchState = 1;
1030                             break;
1031                         }
1032                         if (cLBType == UCharacter.LineBreak.OPEN_PUNCTUATION) {
1033                             matchState = 4;
1034                             break;
1035                         }
1036                         if (cLBType == UCharacter.LineBreak.HYPHEN) {
1037                             matchState = 4;
1038                             break;
1039                         }
1040                         if (cLBType == UCharacter.LineBreak.NUMERIC) {
1041                             matchState = 7;
1042                             break;
1043                         }
1044                         break matchLoop;   /* No Match  */
1045                         
1046                         
1047                     case 4:
1048                         if (cLBType == UCharacter.LineBreak.COMBINING_MARK) {
1049                             matchState = 4;
1050                             break;
1051                         }
1052                         if (cLBType == UCharacter.LineBreak.NUMERIC) {
1053                             matchState = 7;
1054                             break;
1055                         }
1056                         break matchLoop;   /* No Match  */
1057                         //    ((PR | PO) CM*)? ((OP | HY) CM*)? NU CM* ((NU | IS | SY) CM*) * (CL CM*)?  (PR | PO) CM*)?
1058                         //      0    0   1       3    3    4              7    7    7    7      9   9     11   11    (match states)
1059                  
1060                     case 7:
1061                         if (cLBType == UCharacter.LineBreak.COMBINING_MARK) {
1062                             matchState = 7;
1063                             break;                           
1064                         }
1065                         if (cLBType == UCharacter.LineBreak.NUMERIC) {
1066                             matchState = 7;
1067                             break;                           
1068                         }
1069                         if (cLBType == UCharacter.LineBreak.INFIX_NUMERIC) {
1070                             matchState = 7;
1071                             break;                           
1072                         }
1073                         if (cLBType == UCharacter.LineBreak.BREAK_SYMBOLS) {
1074                             matchState = 7;
1075                             break;       
1076                         }
1077                         if (cLBType == UCharacter.LineBreak.CLOSE_PUNCTUATION) {
1078                             matchState = 9;
1079                             break;                           
1080                         }
1081                         if (cLBType == UCharacter.LineBreak.CLOSE_PARENTHESIS) {
1082                             matchState = 9;
1083                             break;                           
1084                         }
1085                         if (cLBType == UCharacter.LineBreak.POSTFIX_NUMERIC) {
1086                             matchState = 11;
1087                             break;                           
1088                         }
1089                         if (cLBType == UCharacter.LineBreak.PREFIX_NUMERIC) {
1090                             matchState = 11;
1091                             break;                           
1092                         }
1093
1094                         break matchLoop;    // Match Complete.
1095                     case 9:
1096                         if (cLBType == UCharacter.LineBreak.COMBINING_MARK) {
1097                             matchState = 9;
1098                             break;                           
1099                         }
1100                         if (cLBType == UCharacter.LineBreak.POSTFIX_NUMERIC) {
1101                             matchState = 11;
1102                             break;                           
1103                         }
1104                         if (cLBType == UCharacter.LineBreak.PREFIX_NUMERIC) {
1105                             matchState = 11;
1106                             break;                           
1107                         }
1108                         break matchLoop;    // Match Complete.
1109                     case 11:
1110                         if (cLBType == UCharacter.LineBreak.COMBINING_MARK) {
1111                             matchState = 11;
1112                             break;                           
1113                         }
1114                         break matchLoop;    // Match Complete.
1115                 }
1116             }
1117             if (matchState > 4) {
1118                 retVals[0] = startIdx;   
1119                  retVals[1] = idx;   
1120             }
1121             return retVals;
1122         }
1123         
1124         
1125         List  charClasses() {
1126             return fSets;
1127         }
1128         
1129         
1130     
1131     }
1132
1133      
1134     /**
1135      * 
1136      * Sentence Monkey Test Class
1137      *
1138      * 
1139      * 
1140      */
1141     static class RBBISentenceMonkey extends RBBIMonkeyKind {
1142         List                 fSets;
1143         StringBuffer         fText;
1144
1145         UnicodeSet           fSepSet;
1146         UnicodeSet           fFormatSet;
1147         UnicodeSet           fSpSet;
1148         UnicodeSet           fLowerSet;
1149         UnicodeSet           fUpperSet;
1150         UnicodeSet           fOLetterSet;
1151         UnicodeSet           fNumericSet;
1152         UnicodeSet           fATermSet;
1153         UnicodeSet           fSContinueSet;
1154         UnicodeSet           fSTermSet;
1155         UnicodeSet           fCloseSet;
1156         UnicodeSet           fOtherSet;
1157         UnicodeSet           fExtendSet;
1158
1159  
1160         
1161         RBBISentenceMonkey() {
1162             fCharProperty  = UProperty.SENTENCE_BREAK;
1163
1164             fSets            = new ArrayList();
1165
1166             //  Separator Set Note:  Beginning with Unicode 5.1, CR and LF were removed from the separator
1167             //                       set and made into character classes of their own.  For the monkey impl,
1168             //                       they remain in SEP, since Sep always appears with CR and LF in the rules.
1169             fSepSet          = new UnicodeSet("[\\p{Sentence_Break = Sep} \\u000a \\u000d]");
1170             fFormatSet       = new UnicodeSet("[\\p{Sentence_Break = Format}]");
1171             fSpSet           = new UnicodeSet("[\\p{Sentence_Break = Sp}]");
1172             fLowerSet        = new UnicodeSet("[\\p{Sentence_Break = Lower}]");
1173             fUpperSet        = new UnicodeSet("[\\p{Sentence_Break = Upper}]");
1174             fOLetterSet      = new UnicodeSet("[\\p{Sentence_Break = OLetter}]");
1175             fNumericSet      = new UnicodeSet("[\\p{Sentence_Break = Numeric}]");
1176             fATermSet        = new UnicodeSet("[\\p{Sentence_Break = ATerm}]");
1177             fSContinueSet    = new UnicodeSet("[\\p{Sentence_Break = SContinue}]");
1178             fSTermSet        = new UnicodeSet("[\\p{Sentence_Break = STerm}]");
1179             fCloseSet        = new UnicodeSet("[\\p{Sentence_Break = Close}]");
1180             fExtendSet       = new UnicodeSet("[\\p{Sentence_Break = Extend}]");
1181             fOtherSet        = new UnicodeSet();
1182
1183
1184             fOtherSet.complement();
1185             fOtherSet.removeAll(fSepSet);
1186             fOtherSet.removeAll(fFormatSet);
1187             fOtherSet.removeAll(fSpSet);
1188             fOtherSet.removeAll(fLowerSet);
1189             fOtherSet.removeAll(fUpperSet);
1190             fOtherSet.removeAll(fOLetterSet);
1191             fOtherSet.removeAll(fNumericSet);
1192             fOtherSet.removeAll(fATermSet);
1193             fOtherSet.removeAll(fSContinueSet);
1194             fOtherSet.removeAll(fSTermSet);
1195             fOtherSet.removeAll(fCloseSet);
1196             fOtherSet.removeAll(fExtendSet);
1197
1198             fSets.add(fSepSet);
1199             fSets.add(fFormatSet);
1200
1201             fSets.add(fSpSet);
1202             fSets.add(fLowerSet);
1203             fSets.add(fUpperSet);
1204             fSets.add(fOLetterSet);
1205             fSets.add(fNumericSet);
1206             fSets.add(fATermSet);
1207             fSets.add(fSContinueSet);
1208             fSets.add(fSTermSet);
1209             fSets.add(fCloseSet);
1210             fSets.add(fOtherSet);
1211             fSets.add(fExtendSet);
1212         }
1213         
1214         
1215         List  charClasses() {
1216             return fSets;  
1217         }
1218         
1219         void   setText(StringBuffer s) { 
1220             fText = s;        
1221         }   
1222
1223         
1224         //      moveBack()   Find the "significant" code point preceding the index i.
1225         //      Skips over ($Extend | $Format)*
1226         // 
1227         private int moveBack(int i) {
1228             
1229             if (i <= 0) {
1230                 return -1;
1231             }
1232             
1233             int      c;
1234             int      j = i;
1235             do {
1236                 j = moveIndex32(fText, j, -1);
1237                 c = UTF16.charAt(fText, j);
1238             }
1239             while (j>0 &&(fFormatSet.contains(c) || fExtendSet.contains(c)));
1240             return j;
1241         }
1242         
1243         
1244         int moveForward(int i) {
1245             if (i>=fText.length()) {
1246                 return fText.length();
1247             }
1248             int   c;
1249             int   j = i;
1250             do {
1251                 j = moveIndex32(fText, j, 1);
1252                 c = cAt(j);
1253             }
1254             while (c>=0 && (fFormatSet.contains(c) || fExtendSet.contains(c)));
1255             return j;
1256            
1257         }
1258         
1259         int cAt(int pos) {
1260             if (pos<0 || pos>=fText.length()) {
1261                 return -1;
1262             }
1263             return UTF16.charAt(fText, pos);
1264         }
1265
1266         int   next(int prevPos) {  
1267             int    /*p0,*/ p1, p2, p3;      // Indices of the significant code points around the 
1268                                         //   break position being tested.  The candidate break
1269                                         //   location is before p2.
1270             int     breakPos = -1;
1271             
1272             int c0, c1, c2, c3;         // The code points at p0, p1, p2 & p3.
1273             int c;
1274             
1275             // Prev break at end of string.  return DONE.
1276             if (prevPos >= fText.length()) {
1277                 return -1;
1278             }
1279             /*p0 =*/ p1 = p2 = p3 = prevPos;
1280             c3 = UTF16.charAt(fText, prevPos);
1281             c0 = c1 = c2 = 0;
1282             
1283             // Loop runs once per "significant" character position in the input text.
1284             for (;;) {
1285                 // Move all of the positions forward in the input string.
1286                 /*p0 = p1;*/  c0 = c1;
1287                 p1 = p2;  c1 = c2;
1288                 p2 = p3;  c2 = c3;
1289                 
1290                 // Advancd p3 by  X(Extend | Format)*   Rule 4
1291                 p3 = moveForward(p3);
1292                 c3 = cAt(p3);
1293                 
1294                 // Rule (3) CR x LF
1295                 if (c1==0x0d && c2==0x0a && p2==(p1+1)) {
1296                     continue;
1297                 }
1298                 
1299                 // Rule (4)    Sep  <break>
1300                 if (fSepSet.contains(c1)) {
1301                     p2 = p1+1;   // Separators don't combine with Extend or Format
1302                     break;
1303                 }               
1304
1305                 if (p2 >= fText.length()) {
1306                     // Reached end of string.  Always a break position.
1307                     break;
1308                 }
1309
1310                 if (p2 == prevPos) {
1311                     // Still warming up the loop.  (won't work with zero length strings, but we don't care)
1312                     continue;
1313                 }
1314
1315                 // Rule (6).   ATerm x Numeric
1316                 if (fATermSet.contains(c1) &&  fNumericSet.contains(c2))  {
1317                     continue;
1318                 }
1319
1320                 // Rule (7).  Upper ATerm  x  Uppper
1321                 if (fUpperSet.contains(c0) && fATermSet.contains(c1) && fUpperSet.contains(c2)) {
1322                     continue;
1323                 }
1324
1325                 // Rule (8)  ATerm Close* Sp*  x  (not (OLettter | Upper | Lower | Sep))* Lower
1326                 //           Note:  Sterm | ATerm are added to the negated part of the expression by a 
1327                 //                  note to the Unicode 5.0 documents.
1328                 int p8 = p1;
1329                 while (p8>0 && fSpSet.contains(cAt(p8))) {
1330                     p8 = moveBack(p8);
1331                 }
1332                 while (p8>0 && fCloseSet.contains(cAt(p8))) {
1333                     p8 = moveBack(p8);
1334                 }
1335                 if (fATermSet.contains(cAt(p8))) {
1336                     p8=p2;
1337                     for (;;) {
1338                         c = cAt(p8);
1339                         if (c==-1 || fOLetterSet.contains(c) || fUpperSet.contains(c) ||
1340                             fLowerSet.contains(c) || fSepSet.contains(c) ||
1341                             fATermSet.contains(c) || fSTermSet.contains(c))  
1342                          {
1343                             break;
1344                         }
1345                         p8 = moveForward(p8);
1346                     }
1347                     if (p8<fText.length() && fLowerSet.contains(cAt(p8))) {
1348                         continue;
1349                     }
1350                 }
1351                 
1352                 // Rule 8a  (STerm | ATerm) Close* Sp* x (SContinue | Sterm | ATerm)
1353                 if (fSContinueSet.contains(c2) || fSTermSet.contains(c2) || fATermSet.contains(c2)) {
1354                     p8 = p1;
1355                     while (setContains(fSpSet, cAt(p8))) {
1356                         p8 = moveBack(p8);
1357                     }
1358                     while (setContains(fCloseSet, cAt(p8))) {
1359                         p8 = moveBack(p8);
1360                     }
1361                     c = cAt(p8);
1362                     if (setContains(fSTermSet, c) || setContains(fATermSet, c)) {
1363                         continue;
1364                     }
1365                 }
1366
1367
1368                 // Rule (9)  (STerm | ATerm) Close*  x  (Close | Sp | Sep | CR | LF)
1369                 int p9 = p1;
1370                 while (p9>0 && fCloseSet.contains(cAt(p9))) {
1371                     p9 = moveBack(p9);
1372                 }
1373                 c = cAt(p9);
1374                 if ((fSTermSet.contains(c) || fATermSet.contains(c))) {
1375                     if (fCloseSet.contains(c2) || fSpSet.contains(c2) || fSepSet.contains(c2)) {
1376                         continue;
1377                     }
1378                 }
1379
1380                 // Rule (10)  (Sterm | ATerm) Close* Sp*  x  (Sp | Sep | CR | LF)
1381                 int p10 = p1;
1382                 while (p10>0 && fSpSet.contains(cAt(p10))) {
1383                     p10 = moveBack(p10);
1384                 }
1385                 while (p10>0 && fCloseSet.contains(cAt(p10))) {
1386                     p10 = moveBack(p10);
1387                 }
1388                 if (fSTermSet.contains(cAt(p10)) || fATermSet.contains(cAt(p10))) {
1389                     if (fSpSet.contains(c2) || fSepSet.contains(c2)) {
1390                         continue;
1391                     }
1392                 }
1393
1394                 // Rule (11)  (STerm | ATerm) Close* Sp*   <break>
1395                 int p11 = p1;
1396                 if (p11>0 && fSepSet.contains(cAt(p11))) {
1397                     p11 = moveBack(p11);
1398                 }
1399                 while (p11>0 && fSpSet.contains(cAt(p11))) {
1400                     p11 = moveBack(p11);
1401                 }
1402                 while (p11>0 && fCloseSet.contains(cAt(p11))) {
1403                     p11 = moveBack(p11);
1404                 }
1405                 if (fSTermSet.contains(cAt(p11)) || fATermSet.contains(cAt(p11))) {
1406                     break;
1407                 }
1408
1409                 //  Rule (12)  Any x Any
1410                 continue;
1411             }
1412             breakPos = p2;
1413             return breakPos;
1414         }
1415            
1416
1417         
1418     }
1419
1420  
1421     /**
1422      * Move an index into a string by n code points.
1423      *   Similar to UTF16.moveCodePointOffset, but without the exceptions, which were
1424      *   complicating usage.
1425      * @param s   a Text string
1426      * @param pos The starting code unit index into the text string
1427      * @param amt The amount to adjust the string by.
1428      * @return    The adjusted code unit index, pinned to the string's length, or
1429      *            unchanged if input index was outside of the string.
1430      */
1431     static int moveIndex32(StringBuffer s, int pos, int amt) {
1432         int i;
1433         char  c;
1434         if (amt>0) {
1435             for (i=0; i<amt; i++) {
1436                 if (pos >= s.length()) {
1437                     return s.length();                   
1438                 }
1439                 c = s.charAt(pos);
1440                 pos++;
1441                 if (UTF16.isLeadSurrogate(c) && pos < s.length()) {
1442                     c = s.charAt(pos);
1443                     if (UTF16.isTrailSurrogate(c)) {
1444                         pos++;   
1445                     }
1446                 }
1447             }
1448         } else {
1449             for (i=0; i>amt; i--) {
1450                 if (pos <= 0) {
1451                     return 0;   
1452                 }
1453                 pos--;
1454                 c = s.charAt(pos);
1455                 if (UTF16.isTrailSurrogate(c) && pos >= 0) {
1456                     c = s.charAt(pos);
1457                     if (UTF16.isLeadSurrogate(c)) {
1458                         pos--;   
1459                     }
1460                 }
1461             }
1462         }
1463         return pos;
1464     }
1465     
1466     /**
1467      * No-exceptions form of UnicodeSet.contains(c).
1468      *    Simplifies loops that terminate with an end-of-input character value.
1469      * @param s  A unicode set
1470      * @param c  A code point value
1471      * @return   true if the set contains c.
1472      */
1473     static boolean setContains(UnicodeSet s, int c) {
1474         if (c<0 || c>UTF16.CODEPOINT_MAX_VALUE ) {
1475             return false;
1476         }
1477         return s.contains(c);
1478     }
1479     
1480     
1481     /**
1482      * return the index of the next code point in the input text.
1483      * @param i the preceding index
1484      * @return
1485      */
1486     static int  nextCP(StringBuffer s, int i) {
1487         if (i == -1) {
1488             // End of Input indication.  Continue to return end value.
1489             return -1;
1490         }
1491         int  retVal = i + 1;
1492         if (retVal > s.length()) {
1493             return -1;
1494         }
1495         int  c = UTF16.charAt(s, i);
1496         if (c >= UTF16.SUPPLEMENTARY_MIN_VALUE && UTF16.isLeadSurrogate(s.charAt(i))) {
1497             retVal++;
1498         }
1499         return retVal;
1500     }
1501     
1502     
1503     /**
1504      * random number generator.  Not using Java's built-in Randoms for two reasons:
1505      *    1.  Using this code allows obtaining the same sequences as those from the ICU4C monkey test.
1506      *    2.  We need to get and restore the seed from values occurring in the middle
1507      *        of a long sequence, to more easily reproduce failing cases.
1508      */
1509     private static int m_seed = 1;
1510     private static int  m_rand()
1511     {
1512         m_seed = m_seed * 1103515245 + 12345;
1513         return (int)(m_seed >>> 16) % 32768;
1514     }
1515
1516     // Helper function for formatting error output.
1517     //   Append a string into a fixed-size field in a StringBuffer.
1518     //   Blank-pad the string if it is shorter than the field.
1519     //   Truncate the source string if it is too long.
1520     //
1521     private static void appendToBuf(StringBuffer dest, String src, int fieldLen) {
1522         int appendLen = src.length();
1523         if (appendLen >= fieldLen) {
1524             dest.append(src.substring(0, fieldLen));
1525         } else {
1526             dest.append(src);
1527             while (appendLen < fieldLen) {
1528                 dest.append(' ');
1529                 appendLen++;
1530             }
1531         }
1532     }
1533
1534     // Helper function for formatting error output.
1535     // Display a code point in "\\uxxxx" or "\Uxxxxxxxx" format
1536     private static void appendCharToBuf(StringBuffer dest, int c, int fieldLen) {
1537            String hexChars = "0123456789abcdef";
1538            if (c < 0x10000) {
1539                 dest.append("\\u");
1540                 for (int bn=12; bn>=0; bn-=4) {
1541                     dest.append(hexChars.charAt((((int)c)>>bn)&0xf));
1542                 }
1543                 appendToBuf(dest, " ", fieldLen-6);
1544             } else {
1545                 dest.append("\\U");
1546                 for (int bn=28; bn>=0; bn-=4) {
1547                     dest.append(hexChars.charAt((((int)c)>>bn)&0xf));
1548                 }
1549                 appendToBuf(dest, " ", fieldLen-10);
1550
1551             }
1552        }
1553     
1554 /**
1555  *  Run a RBBI monkey test.  Common routine, for all break iterator types.
1556  *    Parameters:
1557  *       bi      - the break iterator to use
1558  *       mk      - MonkeyKind, abstraction for obtaining expected results
1559  *       name    - Name of test (char, word, etc.) for use in error messages
1560  *       seed    - Seed for starting random number generator (parameter from user)
1561  *       numIterations
1562  */
1563 void RunMonkey(BreakIterator  bi, RBBIMonkeyKind mk, String name, int  seed, int numIterations) {
1564     int              TESTSTRINGLEN = 500;
1565     StringBuffer     testText         = new StringBuffer();
1566     int              numCharClasses;
1567     List             chClasses;
1568     int[]            expected         = new int[TESTSTRINGLEN*2 + 1];
1569     int              expectedCount    = 0;
1570     boolean[]        expectedBreaks   = new boolean[TESTSTRINGLEN*2 + 1];
1571     boolean[]        forwardBreaks    = new boolean[TESTSTRINGLEN*2 + 1];
1572     boolean[]        reverseBreaks    = new boolean[TESTSTRINGLEN*2 + 1];
1573     boolean[]        isBoundaryBreaks = new boolean[TESTSTRINGLEN*2 + 1];
1574     boolean[]        followingBreaks  = new boolean[TESTSTRINGLEN*2 + 1];
1575     boolean[]        precedingBreaks  = new boolean[TESTSTRINGLEN*2 + 1];
1576     int              i;
1577     int              loopCount        = 0;
1578     boolean          printTestData    = false;
1579     boolean          printBreaksFromBI = false;
1580
1581     m_seed = seed;
1582
1583     numCharClasses = mk.charClasses().size();
1584     chClasses      = mk.charClasses();
1585
1586     // Verify that the character classes all have at least one member.
1587     for (i=0; i<numCharClasses; i++) {
1588         UnicodeSet s = (UnicodeSet)chClasses.get(i);
1589         if (s == null || s.size() == 0) {
1590             errln("Character Class " + i + " is null or of zero size.");
1591             return;
1592         }
1593     }
1594
1595     //--------------------------------------------------------------------------------------------
1596     //
1597     //  Debugging settings.  Comment out everything in the following block for normal operation
1598     //
1599     //--------------------------------------------------------------------------------------------
1600     // numIterations = -1;  
1601     // RuleBasedBreakIterator_New.fTrace = true;
1602     // m_seed = 859056465;
1603     // TESTSTRINGLEN = 50;
1604     // printTestData = true;
1605     // printBreaksFromBI = true;
1606     // ((RuleBasedBreakIterator_New)bi).dump();
1607     
1608     //--------------------------------------------------------------------------------------------
1609     //
1610     //  End of Debugging settings.  
1611     //
1612     //--------------------------------------------------------------------------------------------
1613     
1614     int  dotsOnLine = 0;
1615      while (loopCount < numIterations || numIterations == -1) {
1616         if (numIterations == -1 && loopCount % 10 == 0) {
1617             // If test is running in an infinite loop, display a periodic tic so
1618             //   we can tell that it is making progress.
1619             System.out.print(".");
1620             if (dotsOnLine++ >= 80){
1621                 System.out.println();
1622                 dotsOnLine = 0;
1623             }
1624         }
1625         // Save current random number seed, so that we can recreate the random numbers
1626         //   for this loop iteration in event of an error.
1627         seed = m_seed;
1628
1629         testText.setLength(0);
1630         // Populate a test string with data.
1631         if (printTestData) {
1632             System.out.println("Test Data string ..."); 
1633         }
1634         for (i=0; i<TESTSTRINGLEN; i++) {
1635             int        aClassNum = m_rand() % numCharClasses;
1636             UnicodeSet classSet  = (UnicodeSet)chClasses.get(aClassNum);
1637             int        charIdx   = m_rand() % classSet.size();
1638             int        c         = classSet.charAt(charIdx);
1639             if (c < 0) {   // TODO:  deal with sets containing strings.
1640                 errln("c < 0");
1641             }
1642             UTF16.appendCodePoint(testText, c);
1643             if (printTestData) {
1644                 System.out.print(Integer.toHexString(c) + " ");
1645             }
1646         }
1647         if (printTestData) {
1648             System.out.println(); 
1649         }
1650
1651         Arrays.fill(expected, 0);
1652         Arrays.fill(expectedBreaks, false);
1653         Arrays.fill(forwardBreaks, false);
1654         Arrays.fill(reverseBreaks, false);
1655         Arrays.fill(isBoundaryBreaks, false);
1656         Arrays.fill(followingBreaks, false);
1657         Arrays.fill(precedingBreaks, false);
1658  
1659         // Calculate the expected results for this test string.
1660         mk.setText(testText);
1661         expectedCount = 0;
1662         expectedBreaks[0] = true;
1663         expected[expectedCount ++] = 0;
1664         int breakPos = 0;
1665         int lastBreakPos = -1;
1666         for (;;) {
1667             lastBreakPos = breakPos;
1668             breakPos = mk.next(breakPos);
1669             if (breakPos == -1) {
1670                 break;
1671             }
1672             if (breakPos > testText.length()) {
1673                 errln("breakPos > testText.length()");
1674             }
1675             if (lastBreakPos >= breakPos) {
1676                 errln("Next() not increasing.");
1677                 // break;
1678             }
1679             expectedBreaks[breakPos] = true;
1680             expected[expectedCount ++] = breakPos;
1681         }
1682
1683         // Find the break positions using forward iteration
1684         if (printBreaksFromBI) {
1685             System.out.println("Breaks from BI...");  
1686         }
1687         bi.setText(testText.toString());
1688         for (i=bi.first(); i != BreakIterator.DONE; i=bi.next()) {
1689             if (i < 0 || i > testText.length()) {
1690                 errln(name + " break monkey test: Out of range value returned by breakIterator::next()");
1691                 break;
1692             }
1693             if (printBreaksFromBI) {
1694                 System.out.print(Integer.toHexString(i) + " ");
1695             }
1696             forwardBreaks[i] = true;
1697         }
1698         if (printBreaksFromBI) {
1699             System.out.println();
1700         }
1701
1702         // Find the break positions using reverse iteration
1703         for (i=bi.last(); i != BreakIterator.DONE; i=bi.previous()) {
1704             if (i < 0 || i > testText.length()) {
1705                 errln(name + " break monkey test: Out of range value returned by breakIterator.next()" + name);
1706                 break;
1707             }
1708             reverseBreaks[i] = true;
1709         }
1710
1711         // Find the break positions using isBoundary() tests.
1712         for (i=0; i<=testText.length(); i++) {
1713             isBoundaryBreaks[i] = bi.isBoundary(i);
1714         }
1715
1716         // Find the break positions using the following() function.
1717         lastBreakPos = 0;
1718         followingBreaks[0] = true;
1719         for (i=0; i<testText.length(); i++) {
1720             breakPos = bi.following(i);
1721             if (breakPos <= i ||
1722                 breakPos < lastBreakPos ||
1723                 breakPos > testText.length() ||
1724                 breakPos > lastBreakPos && lastBreakPos > i ) {
1725                 errln(name + " break monkey test: " +
1726                     "Out of range value returned by BreakIterator::following().\n" +
1727                     "index=" + i + "following returned=" + breakPos +
1728                     "lastBreak=" + lastBreakPos);
1729                 precedingBreaks[i] = !expectedBreaks[i];   // Forces an error.
1730             } else {
1731                 followingBreaks[breakPos] = true;
1732                 lastBreakPos = breakPos;
1733             }
1734         }
1735         
1736         // Find the break positions using the preceding() function.
1737         lastBreakPos = testText.length();
1738         precedingBreaks[testText.length()] = true;
1739         for (i=testText.length(); i>0; i--) {
1740             breakPos = bi.preceding(i);
1741             if (breakPos >= i ||
1742                 breakPos > lastBreakPos ||
1743                 breakPos < 0 ||
1744                 breakPos < lastBreakPos && lastBreakPos < i ) {
1745                 errln(name + " break monkey test: " +
1746                         "Out of range value returned by BreakIterator::preceding().\n" +
1747                         "index=" + i + "preceding returned=" + breakPos +
1748                         "lastBreak=" + lastBreakPos);
1749                 precedingBreaks[i] = !expectedBreaks[i];   // Forces an error.
1750             } else {
1751                 precedingBreaks[breakPos] = true;
1752                 lastBreakPos = breakPos;
1753             }
1754         }
1755
1756         
1757
1758         // Compare the expected and actual results.
1759         for (i=0; i<=testText.length(); i++) {
1760             String errorType = null;
1761             if  (forwardBreaks[i] != expectedBreaks[i]) {
1762                 errorType = "next()";
1763             } else if (reverseBreaks[i] != forwardBreaks[i]) {
1764                 errorType = "previous()";
1765             } else if (isBoundaryBreaks[i] != expectedBreaks[i]) {
1766                 errorType = "isBoundary()";
1767             } else if (followingBreaks[i] != expectedBreaks[i]) {
1768                 errorType = "following()";
1769             } else if (precedingBreaks[i] != expectedBreaks[i]) {
1770                 errorType = "preceding()";
1771             }
1772
1773
1774             if (errorType != null) {
1775                 // Format a range of the test text that includes the failure as
1776                 //  a data item that can be included in the rbbi test data file.
1777
1778                 // Start of the range is the last point where expected and actual results
1779                 //   both agreed that there was a break position.
1780                 int startContext = i;
1781                 int count = 0;
1782                 for (;;) {
1783                     if (startContext==0) { break; }
1784                     startContext --;
1785                     if (expectedBreaks[startContext]) {
1786                         if (count == 2) break;
1787                         count ++;
1788                     }
1789                 }
1790
1791                 // End of range is two expected breaks past the start position.
1792                 int endContext = i + 1;
1793                 int ci;
1794                 for (ci=0; ci<2; ci++) {  // Number of items to include in error text.
1795                     for (;;) {
1796                         if (endContext >= testText.length()) {break;}
1797                         if (expectedBreaks[endContext-1]) { 
1798                             if (count == 0) break;
1799                             count --;
1800                         }
1801                         endContext ++;
1802                     }
1803                 }
1804
1805                 // Format looks like   "<data><>\uabcd\uabcd<>\U0001abcd...</data>"
1806                 StringBuffer errorText = new StringBuffer();
1807
1808                 int      c;    // Char from test data
1809                 for (ci = startContext;  ci <= endContext && ci != -1;  ci = nextCP(testText, ci)) {
1810                     if (ci == i) {
1811                         // This is the location of the error.
1812                         errorText.append("<?>---------------------------------\n");
1813                     } else if (expectedBreaks[ci]) {
1814                         // This a non-error expected break position.
1815                         errorText.append("------------------------------------\n");
1816                     }
1817                     if (ci < testText.length()) {
1818                         c = UTF16.charAt(testText, ci);
1819                         appendCharToBuf(errorText, c, 11);
1820                         String gc = UCharacter.getPropertyValueName(UProperty.GENERAL_CATEGORY, UCharacter.getType(c), UProperty.NameChoice.SHORT);
1821                         appendToBuf(errorText, gc, 8);
1822                         int extraProp = UCharacter.getIntPropertyValue(c, mk.fCharProperty);
1823                         String extraPropValue = 
1824                             UCharacter.getPropertyValueName(mk.fCharProperty, extraProp, UProperty.NameChoice.LONG);
1825                         appendToBuf(errorText, extraPropValue, 20);
1826
1827                         String charName = UCharacter.getExtendedName(c);
1828                         appendToBuf(errorText, charName, 40);
1829                         errorText.append('\n');
1830                     }
1831                 }
1832                 if (ci == testText.length() && ci != -1) {
1833                     errorText.append("<>");
1834                 }
1835                 errorText.append("</data>\n");
1836
1837                 // Output the error
1838                 errln(name + " break monkey test error.  " + 
1839                      (expectedBreaks[i]? "Break expected but not found." : "Break found but not expected.") +
1840                       "\nOperation = " + errorType + "; random seed = " + seed + ";  buf Idx = " + i + "\n" +
1841                       errorText);
1842                 break;
1843             }
1844         }
1845
1846         loopCount++;
1847     }
1848 }
1849
1850 public void TestCharMonkey() {
1851     
1852     int        loopCount = 500;
1853     int        seed      = 1;
1854     
1855     if (params.inclusion >= 9) {
1856         loopCount = 10000;
1857     }
1858     
1859     RBBICharMonkey  m = new RBBICharMonkey();
1860     BreakIterator   bi = BreakIterator.getCharacterInstance(Locale.US);
1861     RunMonkey(bi, m, "char", seed, loopCount);
1862 }
1863
1864 public void TestWordMonkey() {
1865     
1866     int        loopCount = 500;
1867     int        seed      = 1;
1868     
1869     if (params.inclusion >= 9) {
1870         loopCount = 10000;
1871     }
1872     
1873     logln("Word Break Monkey Test");
1874     RBBIWordMonkey  m = new RBBIWordMonkey();
1875     BreakIterator   bi = BreakIterator.getWordInstance(Locale.US);
1876     RunMonkey(bi, m, "word", seed, loopCount);
1877 }
1878
1879 public void TestLineMonkey() {
1880     int        loopCount = 500;
1881     int        seed      = 1;
1882     
1883     if (params.inclusion >= 9) {
1884         loopCount = 10000;
1885     }
1886     
1887     logln("Line Break Monkey Test");
1888     RBBILineMonkey  m = new RBBILineMonkey();
1889     BreakIterator   bi = BreakIterator.getLineInstance(Locale.US);
1890     if (params == null) {
1891         loopCount = 50;
1892     }
1893     RunMonkey(bi, m, "line", seed, loopCount);
1894 }
1895
1896 public void TestSentMonkey() {
1897     
1898     int        loopCount = 500;
1899     int        seed      = 1;
1900     
1901     if (params.inclusion >= 9) {
1902         loopCount = 3000;
1903     }
1904     
1905     logln("Sentence Break Monkey Test");
1906     RBBISentenceMonkey  m = new RBBISentenceMonkey();
1907     BreakIterator   bi = BreakIterator.getSentenceInstance(Locale.US);
1908     if (params == null) {
1909         loopCount = 30;
1910     }
1911     RunMonkey(bi, m, "sent", seed, loopCount);
1912 }
1913 //
1914 //  Round-trip monkey tests.
1915 //  Verify that break iterators created from the rule source from the default
1916 //    break iterators still pass the monkey test for the iterator type.
1917 //
1918 //  This is a major test for the Rule Compiler.  The default break iterators are built
1919 //  from pre-compiled binary rule data that was created using ICU4C; these
1920 //  round-trip rule recompile tests verify that the Java rule compiler can
1921 //  rebuild break iterators from the original source rules.
1922 //
1923 public void TestRTCharMonkey() {
1924     
1925     int        loopCount = 200;
1926     int        seed      = 1;
1927     
1928     if (params.inclusion >= 9) {
1929         loopCount = 2000;
1930     }
1931     
1932     RBBICharMonkey  m = new RBBICharMonkey();
1933     BreakIterator   bi = BreakIterator.getCharacterInstance(Locale.US);
1934     String rules = bi.toString();
1935     BreakIterator rtbi = new RuleBasedBreakIterator(rules);
1936     RunMonkey(rtbi, m, "char", seed, loopCount);
1937 }
1938
1939 public void TestRTWordMonkey() {
1940     
1941     int        loopCount = 200;
1942     int        seed      = 1;
1943     
1944     if (params.inclusion >= 9) {
1945         loopCount = 2000;
1946     }
1947     
1948     logln("Word Break Monkey Test");
1949     RBBIWordMonkey  m = new RBBIWordMonkey();
1950     BreakIterator   bi = BreakIterator.getWordInstance(Locale.US);
1951     String rules = bi.toString();
1952     BreakIterator rtbi = new RuleBasedBreakIterator(rules);
1953     RunMonkey(rtbi, m, "word", seed, loopCount);
1954 }
1955
1956 public void TestRTLineMonkey() {
1957     int        loopCount = 200;
1958     int        seed      = 1;
1959     
1960     if (params.inclusion >= 9) {
1961         loopCount = 2000;
1962     }
1963     
1964     logln("Line Break Monkey Test");
1965     RBBILineMonkey  m = new RBBILineMonkey();
1966     BreakIterator   bi = BreakIterator.getLineInstance(Locale.US);
1967     String rules = bi.toString();
1968     BreakIterator rtbi = new RuleBasedBreakIterator(rules);
1969     if (params == null) {
1970         loopCount = 50;
1971     }
1972     RunMonkey(rtbi, m, "line", seed, loopCount);
1973 }
1974
1975 public void TestRTSentMonkey() {
1976     
1977     int        loopCount = 200;
1978     int        seed      = 1;
1979     
1980     if (params.inclusion >= 9) {
1981         loopCount = 1000;
1982     }
1983     
1984     logln("Sentence Break Monkey Test");
1985     RBBISentenceMonkey  m = new RBBISentenceMonkey();
1986     BreakIterator   bi = BreakIterator.getSentenceInstance(Locale.US);
1987     String rules = bi.toString();
1988     BreakIterator rtbi = new RuleBasedBreakIterator(rules);
1989     if (params == null) {
1990         loopCount = 30;
1991     }
1992     RunMonkey(rtbi, m, "sent", seed, loopCount);
1993 }
1994
1995
1996
1997 }
1998