]> gitweb.fperrin.net Git - Dictionary.git/blob - jars/icu4j-52_1/main/tests/core/src/com/ibm/icu/dev/test/rbbi/RBBITestExtended.java
Clean up imports.
[Dictionary.git] / jars / icu4j-52_1 / main / tests / core / src / com / ibm / icu / dev / test / rbbi / RBBITestExtended.java
1 /*
2  * Created on May 5, 2004
3  *
4  * Copyright (C) 2004-2013 International Business Machines Corporation and others.
5  * All Rights Reserved.
6  *
7  */
8 package com.ibm.icu.dev.test.rbbi;
9
10 import java.io.IOException;
11 import java.io.InputStream;
12 import java.io.InputStreamReader;
13 import java.util.Arrays;
14
15 import com.ibm.icu.dev.test.TestFmwk;
16 import com.ibm.icu.impl.Utility;
17 import com.ibm.icu.lang.UCharacter;
18 import com.ibm.icu.text.BreakIterator;
19 import com.ibm.icu.text.UTF16;
20 import com.ibm.icu.util.ULocale;
21
22
23 /**
24  * Rule based break iterator data driven test.
25  *      Perform the tests from the file rbbitst.txt.
26  *      The test data file is common to both ICU4C and ICU4J.
27  *      See the data file for a description of the tests.
28  *
29  */
30 public class RBBITestExtended extends TestFmwk {
31
32     public static void main(String[] args)throws Exception {
33         new RBBITestExtended().run(args);
34     }
35
36
37 public RBBITestExtended() {
38     }
39
40
41
42 static class TestParams {
43     BreakIterator   bi;
44     StringBuffer    dataToBreak    = new StringBuffer();
45     int[]           expectedBreaks = new int[1000];
46     int[]           srcLine        = new int[1000];
47     int[]           srcCol         = new int[1000];
48     ULocale         currentLocale  = new ULocale("en_US");
49 }
50
51
52 public void TestExtended() {
53     TestParams     tp = new TestParams();
54
55
56     //
57     //  Open and read the test data file.
58     //
59     InputStreamReader isr = null;
60     StringBuffer  testFileBuf = new StringBuffer();
61     try {
62         InputStream is = RBBITestExtended.class.getResourceAsStream("rbbitst.txt");
63         if (is == null) {
64             errln("Could not open test data file rbbitst.txt");
65             return;
66         }
67         isr = new InputStreamReader(is, "UTF-8");
68         int c;
69         int count = 0;
70         for (;;) {
71             c = isr.read();
72             if (c < 0) {
73                 break;
74             }
75             count++;
76             if (c==0xFEFF && count==1) {
77                // BOM in the test data file.  Discard it.
78                continue;
79             }
80
81             UTF16.append(testFileBuf, c);
82         }
83
84     } catch (IOException e) {
85         errln(e.toString());
86         return;
87     }
88
89     String testString = testFileBuf.toString();
90
91
92     final int  PARSE_COMMENT = 1;
93     final int  PARSE_TAG     = 2;
94     final int  PARSE_DATA    = 3;
95     final int  PARSE_NUM     = 4;
96
97     int parseState = PARSE_TAG;
98
99     int savedState = PARSE_TAG;
100
101     final char CH_LF        = 0x0a;
102     final char CH_CR        = 0x0d;
103     final char CH_HASH      = 0x23;
104     /*static const UChar CH_PERIOD    = 0x2e;*/
105     final char CH_LT        = 0x3c;
106     final char CH_GT        = 0x3e;
107     final char CH_BACKSLASH = 0x5c;
108     final char CH_BULLET    = 0x2022;
109
110     int    lineNum  = 1;
111     int    colStart = 0;
112     int    column   = 0;
113     int    charIdx  = 0;
114     int    i;
115
116     int    tagValue = 0;       // The numeric value of a <nnn> tag.
117     int    len = testString.length();
118
119     for (charIdx = 0; charIdx < len; ) {
120         int  c = UTF16.charAt(testString, charIdx);
121         charIdx++;
122         if (c == CH_CR && charIdx<len && testString.charAt(charIdx) == CH_LF) {
123             // treat CRLF as a unit
124             c = CH_LF;
125             charIdx++;
126         }
127         if (c == CH_LF || c == CH_CR) {
128             lineNum++;
129             colStart = charIdx;
130         }
131         column = charIdx - colStart + 1;
132
133         switch (parseState) {
134         case PARSE_COMMENT:
135             if (c == 0x0a || c == 0x0d) {
136                 parseState = savedState;
137             }
138             break;
139
140         case PARSE_TAG:
141             {
142             if (c == CH_HASH) {
143                 parseState = PARSE_COMMENT;
144                 savedState = PARSE_TAG;
145                 break;
146             }
147             if (UCharacter.isWhitespace(c)) {
148                 break;
149             }
150            if (testString.startsWith("<word>", charIdx-1)) {
151                 tp.bi = BreakIterator.getWordInstance(tp.currentLocale);
152                 charIdx += 5;
153                 break;
154             }
155             if (testString.startsWith("<char>", charIdx-1)) {
156                 tp.bi = BreakIterator.getCharacterInstance(tp.currentLocale);
157                 charIdx += 5;
158                 break;
159             }
160             if (testString.startsWith("<line>", charIdx-1)) {
161                 tp.bi = BreakIterator.getLineInstance(tp.currentLocale);
162                 charIdx += 5;
163                 break;
164             }
165             if (testString.startsWith("<sent>", charIdx-1)) {
166                 tp.bi = BreakIterator.getSentenceInstance(tp.currentLocale);
167                 charIdx += 5;
168                 break;
169             }
170             if (testString.startsWith("<title>", charIdx-1)) {
171                 tp.bi = BreakIterator.getTitleInstance(tp.currentLocale);
172                 charIdx += 6;
173                 break;
174             }
175             if (testString.startsWith("<locale ", charIdx-1)) {
176                 int closeIndex = testString.indexOf(">", charIdx);
177                 if (closeIndex < 0) {
178                     errln("line" + lineNum + ": missing close on <locale  tag.");
179                     break;
180                 }
181                 String localeName = testString.substring(charIdx+6, closeIndex);
182                 localeName = localeName.trim();
183                 tp.currentLocale = new ULocale(localeName);
184                 charIdx = closeIndex+1;
185                 break;
186             }
187             if (testString.startsWith("<data>", charIdx-1)) {
188                 parseState = PARSE_DATA;
189                 charIdx += 5;
190                 tp.dataToBreak.setLength(0);
191                 Arrays.fill(tp.expectedBreaks, 0);
192                 Arrays.fill(tp.srcCol, 0);
193                 Arrays.fill(tp.srcLine, 0);
194                 break;
195             }
196
197             errln("line" + lineNum + ": Tag expected in test file.");
198             return;
199             //parseState = PARSE_COMMENT;
200             //savedState = PARSE_DATA;
201             }
202
203         case PARSE_DATA:
204             if (c == CH_BULLET) {
205                 int  breakIdx = tp.dataToBreak.length();
206                 tp.expectedBreaks[breakIdx] = -1;
207                 tp.srcLine[breakIdx]        = lineNum;
208                 tp.srcCol[breakIdx]         = column;
209                 break;
210             }
211
212             if (testString.startsWith("</data>", charIdx-1))  {
213                 // Add final entry to mappings from break location to source file position.
214                 //  Need one extra because last break position returned is after the
215                 //    last char in the data, not at the last char.
216                 int idx = tp.dataToBreak.length();
217                 tp.srcLine[idx] = lineNum;
218                 tp.srcCol[idx]  = column;
219
220                 parseState = PARSE_TAG;
221                 charIdx += 6;
222
223                 // RUN THE TEST!
224                 executeTest(tp);
225                 break;
226             }
227
228            if (testString.startsWith("\\N{", charIdx-1)) {
229                int nameEndIdx = testString.indexOf('}', charIdx);
230                if (nameEndIdx == -1) {
231                    errln("Error in named character in test file at line " + lineNum +
232                            ", col " + column);
233                }
234                 // Named character, e.g. \N{COMBINING GRAVE ACCENT}
235                 // Get the code point from the name and insert it into the test data.
236                 String charName = testString.substring(charIdx+2, nameEndIdx);
237                 c = UCharacter.getCharFromName(charName);
238                 if (c == -1) {
239                     errln("Error in named character in test file at line " + lineNum +
240                             ", col " + column);
241                 } else {
242                     // Named code point was recognized.  Insert it
243                     //   into the test data.
244                     UTF16.append(tp.dataToBreak, c);
245                     for (i = tp.dataToBreak.length()-1; i>=0 && tp.srcLine[i]==0; i--) {
246                         tp.srcLine[i] = lineNum;
247                         tp.srcCol[i]  = column;
248                     }
249
250                  }
251                 if (nameEndIdx > charIdx) {
252                     charIdx = nameEndIdx+1;
253                 }
254                 break;
255             }
256
257             if (testString.startsWith("<>", charIdx-1)) {
258                 charIdx++;
259                 int  breakIdx = tp.dataToBreak.length();
260                 tp.expectedBreaks[breakIdx] = -1;
261                 tp.srcLine[breakIdx]        = lineNum;
262                 tp.srcCol[breakIdx]         = column;
263                 break;
264             }
265
266             if (c == CH_LT) {
267                 tagValue   = 0;
268                 parseState = PARSE_NUM;
269                 break;
270             }
271
272             if (c == CH_HASH && column==3) {   // TODO:  why is column off so far?
273                 parseState = PARSE_COMMENT;
274                 savedState = PARSE_DATA;
275                 break;
276             }
277
278             if (c == CH_BACKSLASH) {
279                 // Check for \ at end of line, a line continuation.
280                 //     Advance over (discard) the newline
281                 int cp = UTF16.charAt(testString, charIdx);
282                 if (cp == CH_CR && charIdx<len && UTF16.charAt(testString, charIdx+1) == CH_LF) {
283                     // We have a CR LF
284                     //  Need an extra increment of the input ptr to move over both of them
285                     charIdx++;
286                 }
287                 if (cp == CH_LF || cp == CH_CR) {
288                     lineNum++;
289                     column   = 0;
290                     charIdx++;
291                     colStart = charIdx;
292                     break;
293                 }
294
295                 // Let unescape handle the back slash.
296                 int  charIdxAr[] = new int[1];
297                 charIdxAr[0] = charIdx;
298                 cp = Utility.unescapeAt(testString, charIdxAr);
299                 if (cp != -1) {
300                     // Escape sequence was recognized.  Insert the char
301                     //   into the test data.
302                     charIdx = charIdxAr[0];
303                     UTF16.append(tp.dataToBreak, cp);
304                     for (i=tp.dataToBreak.length()-1; i>=0 && tp.srcLine[i]==0; i--) {
305                         tp.srcLine[i] = lineNum;
306                         tp.srcCol[i]  = column;
307                     }
308
309                     break;
310                 }
311
312
313                 // Not a recognized backslash escape sequence.
314                 // Take the next char as a literal.
315                 //  TODO:  Should this be an error?
316                 c = UTF16.charAt(testString,charIdx);
317                 charIdx = UTF16.moveCodePointOffset(testString, charIdx, 1);
318              }
319
320             // Normal, non-escaped data char.
321             UTF16.append(tp.dataToBreak, c);
322
323             // Save the mapping from offset in the data to line/column numbers in
324             //   the original input file.  Will be used for better error messages only.
325             //   If there's an expected break before this char, the slot in the mapping
326             //     vector will already be set for this char; don't overwrite it.
327             for (i=tp.dataToBreak.length()-1; i>=0 && tp.srcLine[i]==0; i--) {
328                 tp.srcLine[i] = lineNum;
329                 tp.srcCol[i]  = column;
330             }
331             break;
332
333
334         case PARSE_NUM:
335             // We are parsing an expected numeric tag value, like <1234>,
336             //   within a chunk of data.
337             if (UCharacter.isWhitespace(c)) {
338                 break;
339             }
340
341             if (c == CH_GT) {
342                 // Finished the number.  Add the info to the expected break data,
343                 //   and switch parse state back to doing plain data.
344                 parseState = PARSE_DATA;
345                 if (tagValue == 0) {
346                     tagValue = -1;
347                 }
348                 int  breakIdx = tp.dataToBreak.length();
349                 tp.expectedBreaks[breakIdx] = tagValue;
350                 tp.srcLine[breakIdx]        = lineNum;
351                 tp.srcCol[breakIdx]         = column;
352                 break;
353             }
354
355             if (UCharacter.isDigit(c)) {
356                 tagValue = tagValue*10 + UCharacter.digit(c);
357                 break;
358             }
359
360             errln("Syntax Error in test file at line "+ lineNum +", col %d" + column);
361             return;
362
363             // parseState = PARSE_COMMENT;   // TODO: unreachable.  Don't stop on errors.
364             // break;
365         }
366
367
368
369     }
370 }
371
372 void executeTest(TestParams t) {
373     int    bp;
374     int    prevBP;
375     int    i;
376
377     if (t.bi == null) {
378         return;
379     }
380
381     t.bi.setText(t.dataToBreak.toString());
382     //
383     //  Run the iterator forward
384     //
385     prevBP = -1;
386     for (bp = t.bi.first(); bp != BreakIterator.DONE; bp = t.bi.next()) {
387         if (prevBP ==  bp) {
388             // Fail for lack of forward progress.
389             errln("Forward Iteration, no forward progress.  Break Pos=" + bp +
390                     "  File line,col=" + t.srcLine[bp] + ", " + t.srcCol[bp]);
391             break;
392         }
393
394         // Check that there were we didn't miss an expected break between the last one
395         //  and this one.
396         for (i=prevBP+1; i<bp; i++) {
397             if (t.expectedBreaks[i] != 0) {
398                 errln("Forward Iteration, break expected, but not found.  Pos=" + i +
399                     "  File line,col= " + t.srcLine[i] + ", " + t.srcCol[i]);
400             }
401         }
402
403         // Check that the break we did find was expected
404         if (t.expectedBreaks[bp] == 0) {
405             errln("Forward Iteration, break found, but not expected.  Pos=" + bp +
406                     "  File line,col= " + t.srcLine[bp] + ", " + t.srcCol[bp]);
407         } else {
408             // The break was expected.
409             //   Check that the {nnn} tag value is correct.
410             int expectedTagVal = t.expectedBreaks[bp];
411             if (expectedTagVal == -1) {
412                 expectedTagVal = 0;
413             }
414             int line = t.srcLine[bp];
415             int rs = t.bi.getRuleStatus();
416             if (rs != expectedTagVal) {
417                 errln("Incorrect status for forward break.  Pos = " + bp +
418                         ".  File line,col = " + line + ", " + t.srcCol[bp] + "\n" +
419                       "          Actual, Expected status = " + rs + ", " + expectedTagVal);
420             }
421             int[] fillInArray = new int[4];
422             int numStatusVals = t.bi.getRuleStatusVec(fillInArray);
423             assertTrue("", numStatusVals >= 1);
424             assertEquals("", expectedTagVal, fillInArray[0]);
425         }
426
427
428         prevBP = bp;
429     }
430
431     // Verify that there were no missed expected breaks after the last one found
432     for (i=prevBP+1; i<t.dataToBreak.length()+1; i++) {
433         if (t.expectedBreaks[i] != 0) {
434             errln("Forward Iteration, break expected, but not found.  Pos=" + i +
435                     "  File line,col= " + t.srcLine[i] + ", " + t.srcCol[i]);
436        }
437     }
438
439
440     //
441     //  Run the iterator backwards, verify that the same breaks are found.
442     //
443     prevBP = t.dataToBreak.length()+2;  // start with a phony value for the last break pos seen.
444     for (bp = t.bi.last(); bp != BreakIterator.DONE; bp = t.bi.previous()) {
445         if (prevBP ==  bp) {
446             // Fail for lack of progress.
447             errln("Reverse Iteration, no progress.  Break Pos=" + bp +
448                     "File line,col=" + t.srcLine[bp] + " " +  t.srcCol[bp]);
449             break;
450         }
451
452         // Check that we didn't miss an expected break between the last one
453         //  and this one.  (UVector returns zeros for index out of bounds.)
454         for (i=prevBP-1; i>bp; i--) {
455             if (t.expectedBreaks[i] != 0) {
456                 errln("Reverse Itertion, break expected, but not found.  Pos=" + i +
457                     "  File line,col= " + t.srcLine[i] + ", " + t.srcCol[i]);
458             }
459         }
460
461         // Check that the break we did find was expected
462         if (t.expectedBreaks[bp] == 0) {
463             errln("Reverse Itertion, break found, but not expected.  Pos=" + bp +
464                     "  File line,col= " + t.srcLine[bp] + ", " + t.srcCol[bp]);
465         } else {
466             // The break was expected.
467             //   Check that the {nnn} tag value is correct.
468             int expectedTagVal = t.expectedBreaks[bp];
469             if (expectedTagVal == -1) {
470                 expectedTagVal = 0;
471             }
472             int line = t.srcLine[bp];
473             int rs = t.bi.getRuleStatus();
474             if (rs != expectedTagVal) {
475                 errln("Incorrect status for reverse break.  Pos=  " + bp +
476                         "File line,col= " + line + ", " + t.srcCol[bp] + "\n" +
477                       "          Actual, Expected status = " + rs + ", " + expectedTagVal);
478             }
479         }
480
481         prevBP = bp;
482     }
483
484     // Verify that there were no missed breaks prior to the last one found
485     for (i=prevBP-1; i>=0; i--) {
486         if (t.expectedBreaks[i] != 0) {
487             errln("Reverse Itertion, break expected, but not found.  Pos=" + i +
488                     "  File line,col= " + t.srcLine[i] + ", " + t.srcCol[i]);
489          }
490     }
491     // Check isBoundary()
492     for (i=0; i<=t.dataToBreak.length(); i++) {
493         boolean boundaryExpected = (t.expectedBreaks[i] != 0);
494         boolean boundaryFound    = t.bi.isBoundary(i);
495         if (boundaryExpected != boundaryFound) {
496             errln("isBoundary(" + i + ") incorrect.\n" +
497                   "  File line,col= " + t.srcLine[i] + ", " + t.srcCol[i] +
498                   "    Expected, Actual= " + boundaryExpected + ", " + boundaryFound);
499         }
500     }
501
502     // Check following()
503     for (i=0; i<=t.dataToBreak.length(); i++) {
504         int actualBreak = t.bi.following(i);
505         int expectedBreak = BreakIterator.DONE;
506         for (int j=i+1; j < t.expectedBreaks.length; j++) {
507             if (t.expectedBreaks[j] != 0) {
508                 expectedBreak = j;
509                 break;
510             }
511         }
512         if (expectedBreak != actualBreak) {
513             errln("following(" + i + ") incorrect.\n" +
514                     "  File line,col= " + t.srcLine[i] + ", " + t.srcCol[i] +
515                     "    Expected, Actual= " + expectedBreak + ", " + actualBreak);
516         }
517     }
518
519     // Check preceding()
520     for (i=t.dataToBreak.length(); i>=0; i--) {
521         int actualBreak = t.bi.preceding(i);
522         int expectedBreak = BreakIterator.DONE;
523
524         for (int j=i-1; j >= 0; j--) {
525             if (t.expectedBreaks[j] != 0) {
526                 expectedBreak = j;
527                 break;
528             }
529         }
530         if (expectedBreak != actualBreak) {
531             errln("preceding(" + i + ") incorrect.\n" +
532                     "  File line,col= " + t.srcLine[i] + ", " + t.srcCol[i] +
533                     "    Expected, Actual= " + expectedBreak + ", " + actualBreak);
534         }
535     }
536
537 }
538
539
540
541
542 }