/* ******************************************************************************* * Copyright (C) 2003-2010 International Business Machines Corporation and * * others. All Rights Reserved. * ******************************************************************************* */ package com.ibm.icu.dev.test.rbbi; // Monkey testing of RuleBasedBreakIterator import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Locale; import com.ibm.icu.dev.test.TestFmwk; import com.ibm.icu.lang.UCharacter; import com.ibm.icu.lang.UProperty; import com.ibm.icu.text.BreakIterator; import com.ibm.icu.text.RuleBasedBreakIterator; import com.ibm.icu.text.UTF16; import com.ibm.icu.text.UnicodeSet; /** * Monkey tests for RBBI. These tests have independent implementations of * the Unicode TR boundary rules, and compare results between these and ICU's * implementation, using random data. * * Tests cover Grapheme Cluster (char), Word and Line breaks * * Ported from ICU4C, original code in file source/test/intltest/rbbitst.cpp * */ public class RBBITestMonkey extends TestFmwk { public static void main(String[] args) { new RBBITestMonkey().run(args); } // // classs RBBIMonkeyKind // // Monkey Test for Break Iteration // Abstract interface class. Concrete derived classes independently // implement the break rules for different iterator types. // // The Monkey Test itself uses doesn't know which type of break iterator it is // testing, but works purely in terms of the interface defined here. // abstract static class RBBIMonkeyKind { // Return a List of UnicodeSets, representing the character classes used // for this type of iterator. abstract List charClasses(); // Set the test text on which subsequent calls to next() will operate abstract void setText(StringBuffer text); // Find the next break postion, starting from the specified position. // Return -1 after reaching end of string. abstract int next(int i); // A Character Property, one of the constants defined in class UProperty. // The value fo this property will be displayed for the characters // near any test failure. int fCharProperty; } /** * Monkey test subclass for testing Character (Grapheme Cluster) boundaries. */ static class RBBICharMonkey extends RBBIMonkeyKind { List fSets; UnicodeSet fCRLFSet; UnicodeSet fControlSet; UnicodeSet fExtendSet; UnicodeSet fPrependSet; UnicodeSet fSpacingSet; UnicodeSet fLSet; UnicodeSet fVSet; UnicodeSet fTSet; UnicodeSet fLVSet; UnicodeSet fLVTSet; UnicodeSet fHangulSet; UnicodeSet fAnySet; StringBuffer fText; RBBICharMonkey() { fText = null; fCharProperty = UProperty.GRAPHEME_CLUSTER_BREAK; fCRLFSet = new UnicodeSet("[\\r\\n]"); fControlSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = Control}]"); fExtendSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = Extend}]"); fPrependSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = Prepend}]"); fSpacingSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = SpacingMark}]"); fLSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = L}]"); fVSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = V}]"); fTSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = T}]"); fLVSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = LV}]"); fLVTSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = LVT}]"); fHangulSet = new UnicodeSet(); fHangulSet.addAll(fLSet); fHangulSet.addAll(fVSet); fHangulSet.addAll(fTSet); fHangulSet.addAll(fLVSet); fHangulSet.addAll(fLVTSet); fAnySet = new UnicodeSet("[\\u0000-\\U0010ffff]"); fSets = new ArrayList(); fSets.add(fCRLFSet); fSets.add(fControlSet); fSets.add(fExtendSet); fSets.add(fPrependSet); fSets.add(fSpacingSet); fSets.add(fHangulSet); fSets.add(fAnySet); } void setText(StringBuffer s) { fText = s; } List charClasses() { return fSets; } int next(int prevPos) { int p1, p2, p3; // Indices of the significant code points around the // break position being tested. The candidate break // location is before p2. int breakPos = -1; int c1, c2, c3; // The code points at p0, p1, p2 & p3. // Previous break at end of string. return DONE. if (prevPos >= fText.length()) { return -1; } p1 = p2 = p3 = prevPos; c3 = UTF16.charAt(fText, prevPos); c1 = c2 = 0; // Loop runs once per "significant" character position in the input text. for (;;) { // Move all of the positions forward in the input string. p1 = p2; c1 = c2; p2 = p3; c2 = c3; // Advance p3 by one codepoint p3 = moveIndex32(fText, p3, 1); c3 = (p3>=fText.length())? -1: UTF16.charAt(fText, p3); if (p1 == p2) { // Still warming up the loop. (won't work with zero length strings, but we don't care) continue; } if (p2 == fText.length()) { // Reached end of string. Always a break position. break; } // Rule GB3 CR x LF // No Extend or Format characters may appear between the CR and LF, // which requires the additional check for p2 immediately following p1. // if (c1==0x0D && c2==0x0A && p1==(p2-1)) { continue; } // Rule (GB4). ( Control | CR | LF ) if (fControlSet.contains(c1) || c1 == 0x0D || c1 == 0x0A) { break; } // Rule (GB5) ( Control | CR | LF ) // if (fControlSet.contains(c2) || c2 == 0x0D || c2 == 0x0A) { break; } // Rule (GB6) L x ( L | V | LV | LVT ) if (fLSet.contains(c1) && (fLSet.contains(c2) || fVSet.contains(c2) || fLVSet.contains(c2) || fLVTSet.contains(c2))) { continue; } // Rule (GB7) ( LV | V ) x ( V | T ) if ((fLVSet.contains(c1) || fVSet.contains(c1)) && (fVSet.contains(c2) || fTSet.contains(c2))) { continue; } // Rule (GB8) ( LVT | T) x T if ((fLVTSet.contains(c1) || fTSet.contains(c1)) && fTSet.contains(c2)) { continue; } // Rule (GB9) Numeric x ALetter if (fExtendSet.contains(c2)) { continue; } // Rule (GB9a) x SpacingMark if (fSpacingSet.contains(c2)) { continue; } // Rule (GB9b) Prepend x if (fPrependSet.contains(c1)) { continue; } // Rule (GB10) Any Any break; } breakPos = p2; return breakPos; } } /** * * Word Monkey Test Class * * * */ static class RBBIWordMonkey extends RBBIMonkeyKind { List fSets; StringBuffer fText; UnicodeSet fCRSet; UnicodeSet fLFSet; UnicodeSet fNewlineSet; UnicodeSet fKatakanaSet; UnicodeSet fALetterSet; UnicodeSet fMidNumLetSet; UnicodeSet fMidLetterSet; UnicodeSet fMidNumSet; UnicodeSet fNumericSet; UnicodeSet fFormatSet; UnicodeSet fExtendSet; UnicodeSet fExtendNumLetSet; UnicodeSet fOtherSet; RBBIWordMonkey() { fCharProperty = UProperty.WORD_BREAK; fCRSet = new UnicodeSet("[\\p{Word_Break = CR}]"); fLFSet = new UnicodeSet("[\\p{Word_Break = LF}]"); fNewlineSet = new UnicodeSet("[\\p{Word_Break = Newline}]"); fALetterSet = new UnicodeSet("[\\p{Word_Break = ALetter}]"); fKatakanaSet = new UnicodeSet("[\\p{Word_Break = Katakana}]"); fMidNumLetSet = new UnicodeSet("[\\p{Word_Break = MidNumLet}]"); fMidLetterSet = new UnicodeSet("[\\p{Word_Break = MidLetter}]"); fMidNumSet = new UnicodeSet("[\\p{Word_Break = MidNum}]"); fNumericSet = new UnicodeSet("[\\p{Word_Break = Numeric}]"); fFormatSet = new UnicodeSet("[\\p{Word_Break = Format}]"); fExtendNumLetSet = new UnicodeSet("[\\p{Word_Break = ExtendNumLet}]"); fExtendSet = new UnicodeSet("[\\p{Word_Break = Extend}]"); fOtherSet = new UnicodeSet(); fOtherSet.complement(); fOtherSet.removeAll(fCRSet); fOtherSet.removeAll(fLFSet); fOtherSet.removeAll(fNewlineSet); fOtherSet.removeAll(fALetterSet); fOtherSet.removeAll(fKatakanaSet); fOtherSet.removeAll(fMidLetterSet); fOtherSet.removeAll(fMidNumSet); fOtherSet.removeAll(fNumericSet); fOtherSet.removeAll(fFormatSet); fOtherSet.removeAll(fExtendSet); fOtherSet.removeAll(fExtendNumLetSet); // Inhibit dictionary characters from being tested at all. fOtherSet.removeAll(new UnicodeSet("[\\p{LineBreak = Complex_Context}]")); fSets = new ArrayList(); fSets.add(fCRSet); fSets.add(fLFSet); fSets.add(fNewlineSet); fSets.add(fALetterSet); fSets.add(fKatakanaSet); fSets.add(fMidLetterSet); fSets.add(fMidNumLetSet); fSets.add(fMidNumSet); fSets.add(fNumericSet); fSets.add(fFormatSet); fSets.add(fExtendSet); fSets.add(fExtendNumLetSet); fSets.add(fOtherSet); } List charClasses() { return fSets; } void setText(StringBuffer s) { fText = s; } int next(int prevPos) { int /*p0,*/ p1, p2, p3; // Indices of the significant code points around the // break position being tested. The candidate break // location is before p2. int breakPos = -1; int c0, c1, c2, c3; // The code points at p0, p1, p2 & p3. // Previous break at end of string. return DONE. if (prevPos >= fText.length()) { return -1; } /*p0 =*/ p1 = p2 = p3 = prevPos; c3 = UTF16.charAt(fText, prevPos); c0 = c1 = c2 = 0; // Loop runs once per "significant" character position in the input text. for (;;) { // Move all of the positions forward in the input string. /*p0 = p1;*/ c0 = c1; p1 = p2; c1 = c2; p2 = p3; c2 = c3; // Advancd p3 by X(Extend | Format)* Rule 4 // But do not advance over Extend & Format following a new line. (Unicode 5.1 change) do { p3 = moveIndex32(fText, p3, 1); c3 = -1; if (p3>=fText.length()) { break; } c3 = UTF16.charAt(fText, p3); if (fCRSet.contains(c2) || fLFSet.contains(c2) || fNewlineSet.contains(c2)) { break; } } while (setContains(fFormatSet, c3) || setContains(fExtendSet, c3)); if (p1 == p2) { // Still warming up the loop. (won't work with zero length strings, but we don't care) continue; } if (p2 == fText.length()) { // Reached end of string. Always a break position. break; } // Rule (3) CR x LF // No Extend or Format characters may appear between the CR and LF, // which requires the additional check for p2 immediately following p1. // if (c1==0x0D && c2==0x0A) { continue; } // Rule (3a) Break before and after newlines (including CR and LF) // if (fCRSet.contains(c1) || fLFSet.contains(c1) || fNewlineSet.contains(c1)) { break; } if (fCRSet.contains(c2) || fLFSet.contains(c2) || fNewlineSet.contains(c2)) { break; } // Rule (5). ALetter x ALetter if (fALetterSet.contains(c1) && fALetterSet.contains(c2)) { continue; } // Rule (6) ALetter x (MidLetter | MidNumLet) ALetter // if ( fALetterSet.contains(c1) && (fMidLetterSet.contains(c2) || fMidNumLetSet.contains(c2)) && setContains(fALetterSet, c3)) { continue; } // Rule (7) ALetter (MidLetter | MidNumLet) x ALetter if (fALetterSet.contains(c0) && (fMidLetterSet.contains(c1) || fMidNumLetSet.contains(c1)) && fALetterSet.contains(c2)) { continue; } // Rule (8) Numeric x Numeric if (fNumericSet.contains(c1) && fNumericSet.contains(c2)) { continue; } // Rule (9) ALetter x Numeric if (fALetterSet.contains(c1) && fNumericSet.contains(c2)) { continue; } // Rule (10) Numeric x ALetter if (fNumericSet.contains(c1) && fALetterSet.contains(c2)) { continue; } // Rule (11) Numeric (MidNum | MidNumLet) x Numeric if ( fNumericSet.contains(c0) && (fMidNumSet.contains(c1) || fMidNumLetSet.contains(c1)) && fNumericSet.contains(c2)) { continue; } // Rule (12) Numeric x (MidNum | MidNumLet) Numeric if (fNumericSet.contains(c1) && (fMidNumSet.contains(c2) || fMidNumLetSet.contains(c2)) && setContains(fNumericSet, c3)) { continue; } // Rule (13) Katakana x Katakana if (fKatakanaSet.contains(c1) && fKatakanaSet.contains(c2)) { continue; } // Rule 13a (ALetter | Numeric | Katakana | ExtendNumLet) x ExtendNumLet if ((fALetterSet.contains(c1) || fNumericSet.contains(c1) || fKatakanaSet.contains(c1) || fExtendNumLetSet.contains(c1)) && fExtendNumLetSet.contains(c2)) { continue; } // Rule 13b ExtendNumLet x (ALetter | Numeric | Katakana | ExtendNumLet) if (fExtendNumLetSet.contains(c1) && (fALetterSet.contains(c2) || fNumericSet.contains(c2) || fKatakanaSet.contains(c2) || fExtendNumLetSet.contains(c2))) { continue; } // Rule 14. Break found here. break; } breakPos = p2; return breakPos; } } static class RBBILineMonkey extends RBBIMonkeyKind { List fSets; UnicodeSet fBK; UnicodeSet fCR; UnicodeSet fLF; UnicodeSet fCM; UnicodeSet fNL; UnicodeSet fSG; UnicodeSet fWJ; UnicodeSet fZW; UnicodeSet fGL; UnicodeSet fCB; UnicodeSet fSP; UnicodeSet fB2; UnicodeSet fBA; UnicodeSet fBB; UnicodeSet fHY; UnicodeSet fCL; UnicodeSet fCP; UnicodeSet fEX; UnicodeSet fIN; UnicodeSet fNS; UnicodeSet fOP; UnicodeSet fQU; UnicodeSet fIS; UnicodeSet fNU; UnicodeSet fPO; UnicodeSet fPR; UnicodeSet fSY; UnicodeSet fAI; UnicodeSet fAL; UnicodeSet fID; UnicodeSet fSA; UnicodeSet fJL; UnicodeSet fJV; UnicodeSet fJT; UnicodeSet fH2; UnicodeSet fH3; UnicodeSet fXX; StringBuffer fText; int fOrigPositions; RBBILineMonkey() { fCharProperty = UProperty.LINE_BREAK; fSets = new ArrayList(); fBK = new UnicodeSet("[\\p{Line_Break=BK}]"); fCR = new UnicodeSet("[\\p{Line_break=CR}]"); fLF = new UnicodeSet("[\\p{Line_break=LF}]"); fCM = new UnicodeSet("[\\p{Line_break=CM}]"); fNL = new UnicodeSet("[\\p{Line_break=NL}]"); fWJ = new UnicodeSet("[\\p{Line_break=WJ}]"); fZW = new UnicodeSet("[\\p{Line_break=ZW}]"); fGL = new UnicodeSet("[\\p{Line_break=GL}]"); fCB = new UnicodeSet("[\\p{Line_break=CB}]"); fSP = new UnicodeSet("[\\p{Line_break=SP}]"); fB2 = new UnicodeSet("[\\p{Line_break=B2}]"); fBA = new UnicodeSet("[\\p{Line_break=BA}]"); fBB = new UnicodeSet("[\\p{Line_break=BB}]"); fHY = new UnicodeSet("[\\p{Line_break=HY}]"); fCL = new UnicodeSet("[\\p{Line_break=CL}]"); fCP = new UnicodeSet("[\\p{Line_break=CP}]"); fEX = new UnicodeSet("[\\p{Line_break=EX}]"); fIN = new UnicodeSet("[\\p{Line_break=IN}]"); fNS = new UnicodeSet("[\\p{Line_break=NS}]"); fOP = new UnicodeSet("[\\p{Line_break=OP}]"); fQU = new UnicodeSet("[\\p{Line_break=QU}]"); fIS = new UnicodeSet("[\\p{Line_break=IS}]"); fNU = new UnicodeSet("[\\p{Line_break=NU}]"); fPO = new UnicodeSet("[\\p{Line_break=PO}]"); fPR = new UnicodeSet("[\\p{Line_break=PR}]"); fSY = new UnicodeSet("[\\p{Line_break=SY}]"); fAI = new UnicodeSet("[\\p{Line_break=AI}]"); fAL = new UnicodeSet("[\\p{Line_break=AL}]"); fID = new UnicodeSet("[\\p{Line_break=ID}]"); fSA = new UnicodeSet("[\\p{Line_break=SA}]"); fJL = new UnicodeSet("[\\p{Line_break=JL}]"); fJV = new UnicodeSet("[\\p{Line_break=JV}]"); fJT = new UnicodeSet("[\\p{Line_break=JT}]"); fH2 = new UnicodeSet("[\\p{Line_break=H2}]"); fH3 = new UnicodeSet("[\\p{Line_break=H3}]"); fSG = new UnicodeSet("[\\ud800-\\udfff]"); fXX = new UnicodeSet("[\\p{Line_break=XX}]"); fAL.addAll(fXX); // Default behavior for XX is identical to AL fAL.addAll(fAI); // Default behavior for AI is identical to AL fAL.addAll(fSA); // Default behavior for SA is XX, which defaults to AL fAL.addAll(fSG); // Default behavior for SG (unpaired surrogates) is AL fSets.add(fBK); fSets.add(fCR); fSets.add(fLF); fSets.add(fCM); fSets.add(fNL); fSets.add(fWJ); fSets.add(fZW); fSets.add(fGL); fSets.add(fCB); fSets.add(fSP); fSets.add(fB2); fSets.add(fBA); fSets.add(fBB); fSets.add(fHY); fSets.add(fH2); fSets.add(fH3); fSets.add(fCL); fSets.add(fCP); fSets.add(fEX); fSets.add(fIN); fSets.add(fJL); fSets.add(fJT); fSets.add(fJV); fSets.add(fNS); fSets.add(fOP); fSets.add(fQU); fSets.add(fIS); fSets.add(fNU); fSets.add(fPO); fSets.add(fPR); fSets.add(fSY); fSets.add(fAI); fSets.add(fAL); fSets.add(fID); fSets.add(fWJ); fSets.add(fSA); fSets.add(fSG); } void setText(StringBuffer s) { fText = s; } int next(int startPos) { int pos; // Index of the char following a potential break position int thisChar; // Character at above position "pos" int prevPos; // Index of the char preceding a potential break position int prevChar; // Character at above position. Note that prevChar // and thisChar may not be adjacent because combining // characters between them will be ignored. int nextPos; // Index of the next character following pos. // Usually skips over combining marks. int tPos; // temp value. int matchVals[] = null; // Number Expression Match Results if (startPos >= fText.length()) { return -1; } // Initial values for loop. Loop will run the first time without finding breaks, // while the invalid values shift out and the "this" and // "prev" positions are filled in with good values. pos = prevPos = -1; // Invalid value, serves as flag for initial loop iteration. thisChar = prevChar = 0; nextPos = startPos; // Loop runs once per position in the test text, until a break position // is found. In each iteration, we are testing for a possible break // just preceding the character at index "pos". The character preceding // this char is at postion "prevPos"; because of combining sequences, // "prevPos" can be arbitrarily far before "pos". for (;;) { // Advance to the next position to be tested. prevPos = pos; prevChar = thisChar; pos = nextPos; nextPos = moveIndex32(fText, pos, 1); // Rule LB2 - Break at end of text. if (pos >= fText.length()) { break; } // Rule LB 9 - adjust for combining sequences. // We do this rule out-of-order because the adjustment does // not effect the way that rules LB 3 through LB 6 match, // and doing it here rather than after LB 6 is substantially // simpler when combining sequences do occur. // LB 9 Keep combining sequences together. // advance over any CM class chars at "pos", // result is "nextPos" for the following loop iteration. thisChar = UTF16.charAt(fText, pos); if (!(fSP.contains(thisChar) || fBK.contains(thisChar) || thisChar==0x0d || thisChar==0x0a || fNL.contains(thisChar) || fZW.contains(thisChar) )) { for (;;) { if (nextPos == fText.length()) { break; } int nextChar = UTF16.charAt(fText, nextPos); if (!fCM.contains(nextChar)) { break; } nextPos = moveIndex32(fText, nextPos, 1); } } // LB 9 Treat X CM* as if it were X // No explicit action required. // LB 10 Treat any remaining combining mark as AL if (fCM.contains(thisChar)) { thisChar = 'A'; } // If the loop is still warming up - if we haven't shifted the initial // -1 positions out of prevPos yet - loop back to advance the // position in the input without any further looking for breaks. if (prevPos == -1) { continue; } // LB 4 Always break after hard line breaks, if (fBK.contains(prevChar)) { break; } // LB 5 Break after CR, LF, NL, but not inside CR LF if (fCR.contains(prevChar) && fLF.contains(thisChar)) { continue; } if (fCR.contains(prevChar) || fLF.contains(prevChar) || fNL.contains(prevChar)) { break; } // LB 6 Don't break before hard line breaks if (fBK.contains(thisChar) || fCR.contains(thisChar) || fLF.contains(thisChar) || fNL.contains(thisChar) ) { continue; } // LB 7 Don't break before spaces or zero-width space. if (fSP.contains(thisChar)) { continue; } if (fZW.contains(thisChar)) { continue; } // LB 8 Break after zero width space if (fZW.contains(prevChar)) { break; } // LB 9, 10 Already done, at top of loop. // // LB 11 // x WJ // WJ x if (fWJ.contains(thisChar) || fWJ.contains(prevChar)) { continue; } // LB 12 // GL x if (fGL.contains(prevChar)) { continue; } // LB 12a // [^SP BA HY] x GL if (!(fSP.contains(prevChar) || fBA.contains(prevChar) || fHY.contains(prevChar) ) && fGL.contains(thisChar)) { continue; } // LB 13 Don't break before closings. // NU x CL, NU x CP and NU x IS are not matched here so that they will // fall into LB 17 and the more general number regular expression. // if (!fNU.contains(prevChar) && fCL.contains(thisChar) || !fNU.contains(prevChar) && fCP.contains(thisChar) || fEX.contains(thisChar) || !fNU.contains(prevChar) && fIS.contains(thisChar) || !fNU.contains(prevChar) && fSY.contains(thisChar)) { continue; } // LB 14 Don't break after OP SP* // Scan backwards, checking for this sequence. // The OP char could include combining marks, so we actually check for // OP CM* SP* x tPos = prevPos; if (fSP.contains(prevChar)) { while (tPos > 0 && fSP.contains(UTF16.charAt(fText, tPos))) { tPos=moveIndex32(fText, tPos, -1); } } while (tPos > 0 && fCM.contains(UTF16.charAt(fText, tPos))) { tPos=moveIndex32(fText, tPos, -1); } if (fOP.contains(UTF16.charAt(fText, tPos))) { continue; } // LB 15 Do not break within "[ // QU CM* SP* x OP if (fOP.contains(thisChar)) { // Scan backwards from prevChar to see if it is preceded by QU CM* SP* tPos = prevPos; while (tPos > 0 && fSP.contains(UTF16.charAt(fText, tPos))) { tPos = moveIndex32(fText, tPos, -1); } while (tPos > 0 && fCM.contains(UTF16.charAt(fText, tPos))) { tPos = moveIndex32(fText, tPos, -1); } if (fQU.contains(UTF16.charAt(fText, tPos))) { continue; } } // LB 16 (CL | CP) SP* x NS if (fNS.contains(thisChar)) { tPos = prevPos; while (tPos > 0 && fSP.contains(UTF16.charAt(fText, tPos))) { tPos = moveIndex32(fText, tPos, -1); } while (tPos > 0 && fCM.contains(UTF16.charAt(fText, tPos))) { tPos = moveIndex32(fText, tPos, -1); } if (fCL.contains(UTF16.charAt(fText, tPos)) || fCP.contains(UTF16.charAt(fText, tPos))) { continue; } } // LB 17 B2 SP* x B2 if (fB2.contains(thisChar)) { tPos = prevPos; while (tPos > 0 && fSP.contains(UTF16.charAt(fText, tPos))) { tPos = moveIndex32(fText, tPos, -1); } while (tPos > 0 && fCM.contains(UTF16.charAt(fText, tPos))) { tPos = moveIndex32(fText, tPos, -1); } if (fB2.contains(UTF16.charAt(fText, tPos))) { continue; } } // LB 18 break after space if (fSP.contains(prevChar)) { break; } // LB 19 // x QU // QU x if (fQU.contains(thisChar) || fQU.contains(prevChar)) { continue; } // LB 20 Break around a CB if (fCB.contains(thisChar) || fCB.contains(prevChar)) { break; } // LB 21 if (fBA.contains(thisChar) || fHY.contains(thisChar) || fNS.contains(thisChar) || fBB.contains(prevChar) ) { continue; } // LB 22 if (fAL.contains(prevChar) && fIN.contains(thisChar) || fID.contains(prevChar) && fIN.contains(thisChar) || fIN.contains(prevChar) && fIN.contains(thisChar) || fNU.contains(prevChar) && fIN.contains(thisChar) ) { continue; } // LB 23 ID x PO (Note: Leading CM behaves like ID) // AL x NU // NU x AL if (fID.contains(prevChar) && fPO.contains(thisChar) || fAL.contains(prevChar) && fNU.contains(thisChar) || fNU.contains(prevChar) && fAL.contains(thisChar) ) { continue; } // LB 24 Do not break between prefix and letters or ideographs. // PR x ID // PR x AL // PO x AL if (fPR.contains(prevChar) && fID.contains(thisChar) || fPR.contains(prevChar) && fAL.contains(thisChar) || fPO.contains(prevChar) && fAL.contains(thisChar)) { continue; } // LB 25 Numbers matchVals = LBNumberCheck(fText, prevPos, matchVals); if (matchVals[0] != -1) { // Matched a number. But could have been just a single digit, which would // not represent a "no break here" between prevChar and thisChar int numEndIdx = matchVals[1]; // idx of first char following num if (numEndIdx > pos) { // Number match includes at least the two chars being checked if (numEndIdx > nextPos) { // Number match includes additional chars. Update pos and nextPos // so that next loop iteration will continue at the end of the number, // checking for breaks between last char in number & whatever follows. nextPos = numEndIdx; pos = numEndIdx; do { pos = moveIndex32(fText, pos, -1); thisChar = UTF16.charAt(fText, pos); } while (fCM.contains(thisChar)); } continue; } } // LB 26 Do not break Korean Syllables if (fJL.contains(prevChar) && (fJL.contains(thisChar) || fJV.contains(thisChar) || fH2.contains(thisChar) || fH3.contains(thisChar))) { continue; } if ((fJV.contains(prevChar) || fH2.contains(prevChar)) && (fJV.contains(thisChar) || fJT.contains(thisChar))) { continue; } if ((fJT.contains(prevChar) || fH3.contains(prevChar)) && fJT.contains(thisChar)) { continue; } // LB 27 Treat a Korean Syllable Block the same as ID if ((fJL.contains(prevChar) || fJV.contains(prevChar) || fJT.contains(prevChar) || fH2.contains(prevChar) || fH3.contains(prevChar)) && fIN.contains(thisChar)) { continue; } if ((fJL.contains(prevChar) || fJV.contains(prevChar) || fJT.contains(prevChar) || fH2.contains(prevChar) || fH3.contains(prevChar)) && fPO.contains(thisChar)) { continue; } if (fPR.contains(prevChar) && (fJL.contains(thisChar) || fJV.contains(thisChar) || fJT.contains(thisChar) || fH2.contains(thisChar) || fH3.contains(thisChar))) { continue; } // LB 28 Do not break between alphabetics if (fAL.contains(prevChar) && fAL.contains(thisChar)) { continue; } // LB 29 Do not break between numeric punctuation and alphabetics if (fIS.contains(prevChar) && fAL.contains(thisChar)) { continue; } // LB 30 Do not break between letters, numbers, or ordinary symbols and opening or closing punctuation. // (AL | NU) x OP // CP x (AL | NU) if ((fAL.contains(prevChar) || fNU.contains(prevChar)) && fOP.contains(thisChar)) { continue; } if (fCP.contains(prevChar) && (fAL.contains(thisChar) || fNU.contains(thisChar))) { continue; } // LB 31 Break everywhere else break; } return pos; } // Match the following regular expression in the input text. // ((PR | PO) CM*)? ((OP | HY) CM*)? NU CM* ((NU | IS | SY) CM*) * ((CL | CP) CM*)? (PR | PO) CM*)? // 0 0 1 3 3 4 7 7 7 7 9 9 9 11 11 (match states) // retVals array [0] index of the start of the match, or -1 if no match // [1] index of first char following the match. // Can not use Java regex because need supplementary character support, // and because Unicode char properties version must be the same as in // the version of ICU being tested. private int[] LBNumberCheck(StringBuffer s, int startIdx, int[] retVals) { if (retVals == null) { retVals = new int[2]; } retVals[0] = -1; // Indicates no match. int matchState = 0; int idx = startIdx; matchLoop: for (idx = startIdx; idx 4) { retVals[0] = startIdx; retVals[1] = idx; } return retVals; } List charClasses() { return fSets; } } /** * * Sentence Monkey Test Class * * * */ static class RBBISentenceMonkey extends RBBIMonkeyKind { List fSets; StringBuffer fText; UnicodeSet fSepSet; UnicodeSet fFormatSet; UnicodeSet fSpSet; UnicodeSet fLowerSet; UnicodeSet fUpperSet; UnicodeSet fOLetterSet; UnicodeSet fNumericSet; UnicodeSet fATermSet; UnicodeSet fSContinueSet; UnicodeSet fSTermSet; UnicodeSet fCloseSet; UnicodeSet fOtherSet; UnicodeSet fExtendSet; RBBISentenceMonkey() { fCharProperty = UProperty.SENTENCE_BREAK; fSets = new ArrayList(); // Separator Set Note: Beginning with Unicode 5.1, CR and LF were removed from the separator // set and made into character classes of their own. For the monkey impl, // they remain in SEP, since Sep always appears with CR and LF in the rules. fSepSet = new UnicodeSet("[\\p{Sentence_Break = Sep} \\u000a \\u000d]"); fFormatSet = new UnicodeSet("[\\p{Sentence_Break = Format}]"); fSpSet = new UnicodeSet("[\\p{Sentence_Break = Sp}]"); fLowerSet = new UnicodeSet("[\\p{Sentence_Break = Lower}]"); fUpperSet = new UnicodeSet("[\\p{Sentence_Break = Upper}]"); fOLetterSet = new UnicodeSet("[\\p{Sentence_Break = OLetter}]"); fNumericSet = new UnicodeSet("[\\p{Sentence_Break = Numeric}]"); fATermSet = new UnicodeSet("[\\p{Sentence_Break = ATerm}]"); fSContinueSet = new UnicodeSet("[\\p{Sentence_Break = SContinue}]"); fSTermSet = new UnicodeSet("[\\p{Sentence_Break = STerm}]"); fCloseSet = new UnicodeSet("[\\p{Sentence_Break = Close}]"); fExtendSet = new UnicodeSet("[\\p{Sentence_Break = Extend}]"); fOtherSet = new UnicodeSet(); fOtherSet.complement(); fOtherSet.removeAll(fSepSet); fOtherSet.removeAll(fFormatSet); fOtherSet.removeAll(fSpSet); fOtherSet.removeAll(fLowerSet); fOtherSet.removeAll(fUpperSet); fOtherSet.removeAll(fOLetterSet); fOtherSet.removeAll(fNumericSet); fOtherSet.removeAll(fATermSet); fOtherSet.removeAll(fSContinueSet); fOtherSet.removeAll(fSTermSet); fOtherSet.removeAll(fCloseSet); fOtherSet.removeAll(fExtendSet); fSets.add(fSepSet); fSets.add(fFormatSet); fSets.add(fSpSet); fSets.add(fLowerSet); fSets.add(fUpperSet); fSets.add(fOLetterSet); fSets.add(fNumericSet); fSets.add(fATermSet); fSets.add(fSContinueSet); fSets.add(fSTermSet); fSets.add(fCloseSet); fSets.add(fOtherSet); fSets.add(fExtendSet); } List charClasses() { return fSets; } void setText(StringBuffer s) { fText = s; } // moveBack() Find the "significant" code point preceding the index i. // Skips over ($Extend | $Format)* // private int moveBack(int i) { if (i <= 0) { return -1; } int c; int j = i; do { j = moveIndex32(fText, j, -1); c = UTF16.charAt(fText, j); } while (j>0 &&(fFormatSet.contains(c) || fExtendSet.contains(c))); return j; } int moveForward(int i) { if (i>=fText.length()) { return fText.length(); } int c; int j = i; do { j = moveIndex32(fText, j, 1); c = cAt(j); } while (c>=0 && (fFormatSet.contains(c) || fExtendSet.contains(c))); return j; } int cAt(int pos) { if (pos<0 || pos>=fText.length()) { return -1; } return UTF16.charAt(fText, pos); } int next(int prevPos) { int /*p0,*/ p1, p2, p3; // Indices of the significant code points around the // break position being tested. The candidate break // location is before p2. int breakPos = -1; int c0, c1, c2, c3; // The code points at p0, p1, p2 & p3. int c; // Prev break at end of string. return DONE. if (prevPos >= fText.length()) { return -1; } /*p0 =*/ p1 = p2 = p3 = prevPos; c3 = UTF16.charAt(fText, prevPos); c0 = c1 = c2 = 0; // Loop runs once per "significant" character position in the input text. for (;;) { // Move all of the positions forward in the input string. /*p0 = p1;*/ c0 = c1; p1 = p2; c1 = c2; p2 = p3; c2 = c3; // Advancd p3 by X(Extend | Format)* Rule 4 p3 = moveForward(p3); c3 = cAt(p3); // Rule (3) CR x LF if (c1==0x0d && c2==0x0a && p2==(p1+1)) { continue; } // Rule (4) Sep if (fSepSet.contains(c1)) { p2 = p1+1; // Separators don't combine with Extend or Format break; } if (p2 >= fText.length()) { // Reached end of string. Always a break position. break; } if (p2 == prevPos) { // Still warming up the loop. (won't work with zero length strings, but we don't care) continue; } // Rule (6). ATerm x Numeric if (fATermSet.contains(c1) && fNumericSet.contains(c2)) { continue; } // Rule (7). Upper ATerm x Uppper if (fUpperSet.contains(c0) && fATermSet.contains(c1) && fUpperSet.contains(c2)) { continue; } // Rule (8) ATerm Close* Sp* x (not (OLettter | Upper | Lower | Sep))* Lower // Note: Sterm | ATerm are added to the negated part of the expression by a // note to the Unicode 5.0 documents. int p8 = p1; while (p8>0 && fSpSet.contains(cAt(p8))) { p8 = moveBack(p8); } while (p8>0 && fCloseSet.contains(cAt(p8))) { p8 = moveBack(p8); } if (fATermSet.contains(cAt(p8))) { p8=p2; for (;;) { c = cAt(p8); if (c==-1 || fOLetterSet.contains(c) || fUpperSet.contains(c) || fLowerSet.contains(c) || fSepSet.contains(c) || fATermSet.contains(c) || fSTermSet.contains(c)) { break; } p8 = moveForward(p8); } if (p80 && fCloseSet.contains(cAt(p9))) { p9 = moveBack(p9); } c = cAt(p9); if ((fSTermSet.contains(c) || fATermSet.contains(c))) { if (fCloseSet.contains(c2) || fSpSet.contains(c2) || fSepSet.contains(c2)) { continue; } } // Rule (10) (Sterm | ATerm) Close* Sp* x (Sp | Sep | CR | LF) int p10 = p1; while (p10>0 && fSpSet.contains(cAt(p10))) { p10 = moveBack(p10); } while (p10>0 && fCloseSet.contains(cAt(p10))) { p10 = moveBack(p10); } if (fSTermSet.contains(cAt(p10)) || fATermSet.contains(cAt(p10))) { if (fSpSet.contains(c2) || fSepSet.contains(c2)) { continue; } } // Rule (11) (STerm | ATerm) Close* Sp* int p11 = p1; if (p11>0 && fSepSet.contains(cAt(p11))) { p11 = moveBack(p11); } while (p11>0 && fSpSet.contains(cAt(p11))) { p11 = moveBack(p11); } while (p11>0 && fCloseSet.contains(cAt(p11))) { p11 = moveBack(p11); } if (fSTermSet.contains(cAt(p11)) || fATermSet.contains(cAt(p11))) { break; } // Rule (12) Any x Any continue; } breakPos = p2; return breakPos; } } /** * Move an index into a string by n code points. * Similar to UTF16.moveCodePointOffset, but without the exceptions, which were * complicating usage. * @param s a Text string * @param pos The starting code unit index into the text string * @param amt The amount to adjust the string by. * @return The adjusted code unit index, pinned to the string's length, or * unchanged if input index was outside of the string. */ static int moveIndex32(StringBuffer s, int pos, int amt) { int i; char c; if (amt>0) { for (i=0; i= s.length()) { return s.length(); } c = s.charAt(pos); pos++; if (UTF16.isLeadSurrogate(c) && pos < s.length()) { c = s.charAt(pos); if (UTF16.isTrailSurrogate(c)) { pos++; } } } } else { for (i=0; i>amt; i--) { if (pos <= 0) { return 0; } pos--; c = s.charAt(pos); if (UTF16.isTrailSurrogate(c) && pos >= 0) { c = s.charAt(pos); if (UTF16.isLeadSurrogate(c)) { pos--; } } } } return pos; } /** * No-exceptions form of UnicodeSet.contains(c). * Simplifies loops that terminate with an end-of-input character value. * @param s A unicode set * @param c A code point value * @return true if the set contains c. */ static boolean setContains(UnicodeSet s, int c) { if (c<0 || c>UTF16.CODEPOINT_MAX_VALUE ) { return false; } return s.contains(c); } /** * return the index of the next code point in the input text. * @param i the preceding index * @return */ static int nextCP(StringBuffer s, int i) { if (i == -1) { // End of Input indication. Continue to return end value. return -1; } int retVal = i + 1; if (retVal > s.length()) { return -1; } int c = UTF16.charAt(s, i); if (c >= UTF16.SUPPLEMENTARY_MIN_VALUE && UTF16.isLeadSurrogate(s.charAt(i))) { retVal++; } return retVal; } /** * random number generator. Not using Java's built-in Randoms for two reasons: * 1. Using this code allows obtaining the same sequences as those from the ICU4C monkey test. * 2. We need to get and restore the seed from values occurring in the middle * of a long sequence, to more easily reproduce failing cases. */ private static int m_seed = 1; private static int m_rand() { m_seed = m_seed * 1103515245 + 12345; return (int)(m_seed >>> 16) % 32768; } // Helper function for formatting error output. // Append a string into a fixed-size field in a StringBuffer. // Blank-pad the string if it is shorter than the field. // Truncate the source string if it is too long. // private static void appendToBuf(StringBuffer dest, String src, int fieldLen) { int appendLen = src.length(); if (appendLen >= fieldLen) { dest.append(src.substring(0, fieldLen)); } else { dest.append(src); while (appendLen < fieldLen) { dest.append(' '); appendLen++; } } } // Helper function for formatting error output. // Display a code point in "\\uxxxx" or "\Uxxxxxxxx" format private static void appendCharToBuf(StringBuffer dest, int c, int fieldLen) { String hexChars = "0123456789abcdef"; if (c < 0x10000) { dest.append("\\u"); for (int bn=12; bn>=0; bn-=4) { dest.append(hexChars.charAt((((int)c)>>bn)&0xf)); } appendToBuf(dest, " ", fieldLen-6); } else { dest.append("\\U"); for (int bn=28; bn>=0; bn-=4) { dest.append(hexChars.charAt((((int)c)>>bn)&0xf)); } appendToBuf(dest, " ", fieldLen-10); } } /** * Run a RBBI monkey test. Common routine, for all break iterator types. * Parameters: * bi - the break iterator to use * mk - MonkeyKind, abstraction for obtaining expected results * name - Name of test (char, word, etc.) for use in error messages * seed - Seed for starting random number generator (parameter from user) * numIterations */ void RunMonkey(BreakIterator bi, RBBIMonkeyKind mk, String name, int seed, int numIterations) { int TESTSTRINGLEN = 500; StringBuffer testText = new StringBuffer(); int numCharClasses; List chClasses; int[] expected = new int[TESTSTRINGLEN*2 + 1]; int expectedCount = 0; boolean[] expectedBreaks = new boolean[TESTSTRINGLEN*2 + 1]; boolean[] forwardBreaks = new boolean[TESTSTRINGLEN*2 + 1]; boolean[] reverseBreaks = new boolean[TESTSTRINGLEN*2 + 1]; boolean[] isBoundaryBreaks = new boolean[TESTSTRINGLEN*2 + 1]; boolean[] followingBreaks = new boolean[TESTSTRINGLEN*2 + 1]; boolean[] precedingBreaks = new boolean[TESTSTRINGLEN*2 + 1]; int i; int loopCount = 0; boolean printTestData = false; boolean printBreaksFromBI = false; m_seed = seed; numCharClasses = mk.charClasses().size(); chClasses = mk.charClasses(); // Verify that the character classes all have at least one member. for (i=0; i= 80){ System.out.println(); dotsOnLine = 0; } } // Save current random number seed, so that we can recreate the random numbers // for this loop iteration in event of an error. seed = m_seed; testText.setLength(0); // Populate a test string with data. if (printTestData) { System.out.println("Test Data string ..."); } for (i=0; i testText.length()) { errln("breakPos > testText.length()"); } if (lastBreakPos >= breakPos) { errln("Next() not increasing."); // break; } expectedBreaks[breakPos] = true; expected[expectedCount ++] = breakPos; } // Find the break positions using forward iteration if (printBreaksFromBI) { System.out.println("Breaks from BI..."); } bi.setText(testText.toString()); for (i=bi.first(); i != BreakIterator.DONE; i=bi.next()) { if (i < 0 || i > testText.length()) { errln(name + " break monkey test: Out of range value returned by breakIterator::next()"); break; } if (printBreaksFromBI) { System.out.print(Integer.toHexString(i) + " "); } forwardBreaks[i] = true; } if (printBreaksFromBI) { System.out.println(); } // Find the break positions using reverse iteration for (i=bi.last(); i != BreakIterator.DONE; i=bi.previous()) { if (i < 0 || i > testText.length()) { errln(name + " break monkey test: Out of range value returned by breakIterator.next()" + name); break; } reverseBreaks[i] = true; } // Find the break positions using isBoundary() tests. for (i=0; i<=testText.length(); i++) { isBoundaryBreaks[i] = bi.isBoundary(i); } // Find the break positions using the following() function. lastBreakPos = 0; followingBreaks[0] = true; for (i=0; i testText.length() || breakPos > lastBreakPos && lastBreakPos > i ) { errln(name + " break monkey test: " + "Out of range value returned by BreakIterator::following().\n" + "index=" + i + "following returned=" + breakPos + "lastBreak=" + lastBreakPos); precedingBreaks[i] = !expectedBreaks[i]; // Forces an error. } else { followingBreaks[breakPos] = true; lastBreakPos = breakPos; } } // Find the break positions using the preceding() function. lastBreakPos = testText.length(); precedingBreaks[testText.length()] = true; for (i=testText.length(); i>0; i--) { breakPos = bi.preceding(i); if (breakPos >= i || breakPos > lastBreakPos || breakPos < 0 || breakPos < lastBreakPos && lastBreakPos < i ) { errln(name + " break monkey test: " + "Out of range value returned by BreakIterator::preceding().\n" + "index=" + i + "preceding returned=" + breakPos + "lastBreak=" + lastBreakPos); precedingBreaks[i] = !expectedBreaks[i]; // Forces an error. } else { precedingBreaks[breakPos] = true; lastBreakPos = breakPos; } } // Compare the expected and actual results. for (i=0; i<=testText.length(); i++) { String errorType = null; if (forwardBreaks[i] != expectedBreaks[i]) { errorType = "next()"; } else if (reverseBreaks[i] != forwardBreaks[i]) { errorType = "previous()"; } else if (isBoundaryBreaks[i] != expectedBreaks[i]) { errorType = "isBoundary()"; } else if (followingBreaks[i] != expectedBreaks[i]) { errorType = "following()"; } else if (precedingBreaks[i] != expectedBreaks[i]) { errorType = "preceding()"; } if (errorType != null) { // Format a range of the test text that includes the failure as // a data item that can be included in the rbbi test data file. // Start of the range is the last point where expected and actual results // both agreed that there was a break position. int startContext = i; int count = 0; for (;;) { if (startContext==0) { break; } startContext --; if (expectedBreaks[startContext]) { if (count == 2) break; count ++; } } // End of range is two expected breaks past the start position. int endContext = i + 1; int ci; for (ci=0; ci<2; ci++) { // Number of items to include in error text. for (;;) { if (endContext >= testText.length()) {break;} if (expectedBreaks[endContext-1]) { if (count == 0) break; count --; } endContext ++; } } // Format looks like "<>\uabcd\uabcd<>\U0001abcd..." StringBuffer errorText = new StringBuffer(); int c; // Char from test data for (ci = startContext; ci <= endContext && ci != -1; ci = nextCP(testText, ci)) { if (ci == i) { // This is the location of the error. errorText.append("---------------------------------\n"); } else if (expectedBreaks[ci]) { // This a non-error expected break position. errorText.append("------------------------------------\n"); } if (ci < testText.length()) { c = UTF16.charAt(testText, ci); appendCharToBuf(errorText, c, 11); String gc = UCharacter.getPropertyValueName(UProperty.GENERAL_CATEGORY, UCharacter.getType(c), UProperty.NameChoice.SHORT); appendToBuf(errorText, gc, 8); int extraProp = UCharacter.getIntPropertyValue(c, mk.fCharProperty); String extraPropValue = UCharacter.getPropertyValueName(mk.fCharProperty, extraProp, UProperty.NameChoice.LONG); appendToBuf(errorText, extraPropValue, 20); String charName = UCharacter.getExtendedName(c); appendToBuf(errorText, charName, 40); errorText.append('\n'); } } if (ci == testText.length() && ci != -1) { errorText.append("<>"); } errorText.append("\n"); // Output the error errln(name + " break monkey test error. " + (expectedBreaks[i]? "Break expected but not found." : "Break found but not expected.") + "\nOperation = " + errorType + "; random seed = " + seed + "; buf Idx = " + i + "\n" + errorText); break; } } loopCount++; } } public void TestCharMonkey() { int loopCount = 500; int seed = 1; if (params.inclusion >= 9) { loopCount = 10000; } RBBICharMonkey m = new RBBICharMonkey(); BreakIterator bi = BreakIterator.getCharacterInstance(Locale.US); RunMonkey(bi, m, "char", seed, loopCount); } public void TestWordMonkey() { int loopCount = 500; int seed = 1; if (params.inclusion >= 9) { loopCount = 10000; } logln("Word Break Monkey Test"); RBBIWordMonkey m = new RBBIWordMonkey(); BreakIterator bi = BreakIterator.getWordInstance(Locale.US); RunMonkey(bi, m, "word", seed, loopCount); } public void TestLineMonkey() { int loopCount = 500; int seed = 1; if (params.inclusion >= 9) { loopCount = 10000; } logln("Line Break Monkey Test"); RBBILineMonkey m = new RBBILineMonkey(); BreakIterator bi = BreakIterator.getLineInstance(Locale.US); if (params == null) { loopCount = 50; } RunMonkey(bi, m, "line", seed, loopCount); } public void TestSentMonkey() { int loopCount = 500; int seed = 1; if (params.inclusion >= 9) { loopCount = 3000; } logln("Sentence Break Monkey Test"); RBBISentenceMonkey m = new RBBISentenceMonkey(); BreakIterator bi = BreakIterator.getSentenceInstance(Locale.US); if (params == null) { loopCount = 30; } RunMonkey(bi, m, "sent", seed, loopCount); } // // Round-trip monkey tests. // Verify that break iterators created from the rule source from the default // break iterators still pass the monkey test for the iterator type. // // This is a major test for the Rule Compiler. The default break iterators are built // from pre-compiled binary rule data that was created using ICU4C; these // round-trip rule recompile tests verify that the Java rule compiler can // rebuild break iterators from the original source rules. // public void TestRTCharMonkey() { int loopCount = 200; int seed = 1; if (params.inclusion >= 9) { loopCount = 2000; } RBBICharMonkey m = new RBBICharMonkey(); BreakIterator bi = BreakIterator.getCharacterInstance(Locale.US); String rules = bi.toString(); BreakIterator rtbi = new RuleBasedBreakIterator(rules); RunMonkey(rtbi, m, "char", seed, loopCount); } public void TestRTWordMonkey() { int loopCount = 200; int seed = 1; if (params.inclusion >= 9) { loopCount = 2000; } logln("Word Break Monkey Test"); RBBIWordMonkey m = new RBBIWordMonkey(); BreakIterator bi = BreakIterator.getWordInstance(Locale.US); String rules = bi.toString(); BreakIterator rtbi = new RuleBasedBreakIterator(rules); RunMonkey(rtbi, m, "word", seed, loopCount); } public void TestRTLineMonkey() { int loopCount = 200; int seed = 1; if (params.inclusion >= 9) { loopCount = 2000; } logln("Line Break Monkey Test"); RBBILineMonkey m = new RBBILineMonkey(); BreakIterator bi = BreakIterator.getLineInstance(Locale.US); String rules = bi.toString(); BreakIterator rtbi = new RuleBasedBreakIterator(rules); if (params == null) { loopCount = 50; } RunMonkey(rtbi, m, "line", seed, loopCount); } public void TestRTSentMonkey() { int loopCount = 200; int seed = 1; if (params.inclusion >= 9) { loopCount = 1000; } logln("Sentence Break Monkey Test"); RBBISentenceMonkey m = new RBBISentenceMonkey(); BreakIterator bi = BreakIterator.getSentenceInstance(Locale.US); String rules = bi.toString(); BreakIterator rtbi = new RuleBasedBreakIterator(rules); if (params == null) { loopCount = 30; } RunMonkey(rtbi, m, "sent", seed, loopCount); } }