2 **********************************************************************
\r
3 * Copyright (c) 2002-2008, International Business Machines *
\r
4 * Corporation and others. All Rights Reserved. *
\r
5 **********************************************************************
\r
7 package com.ibm.icu.dev.test.perf;
\r
9 import java.io.FileInputStream;
\r
10 import java.util.ArrayList;
\r
12 public class BreakIteratorPerformanceTest extends PerfTest {
\r
14 String fileContents;
\r
16 com.ibm.icu.text.BreakIterator iSentenceIter;
\r
17 com.ibm.icu.text.BreakIterator iWordIter;
\r
18 com.ibm.icu.text.BreakIterator iLineIter;
\r
19 com.ibm.icu.text.BreakIterator iCharacterIter;
\r
20 java.text.BreakIterator jSentenceIter;
\r
21 java.text.BreakIterator jWordIter;
\r
22 java.text.BreakIterator jLineIter;
\r
23 java.text.BreakIterator jCharacterIter;
\r
24 String[] iSentences;
\r
27 String[] iCharacters;
\r
28 String[] jSentences;
\r
31 String[] jCharacters;
\r
33 public static void main(String[] args) throws Exception {
\r
34 new BreakIteratorPerformanceTest().run(args);
\r
37 protected void setup(String[] args) {
\r
39 // read in the input file, being careful with a possible BOM
\r
40 FileInputStream in = new FileInputStream(fileName);
\r
41 BOMFreeReader reader = new BOMFreeReader(in, encoding);
\r
42 fileContents = new String(readToEOS(reader));
\r
44 // // get rid of any characters that may cause differences between ICU4J and Java BreakIterator
\r
45 // // fileContents = fileContents.replaceAll("[\t\f\r\n\\-/ ]+", " ");
\r
47 // StringTokenizer tokenizer = new StringTokenizer(fileContents, "\t\f\r\n-/ ");
\r
48 // while (tokenizer.hasMoreTokens())
\r
49 // res += tokenizer.nextToken() + " ";
\r
50 // fileContents = res.trim();
\r
52 // create the break iterators with respect to locale
\r
53 if (locale == null) {
\r
54 iSentenceIter = com.ibm.icu.text.BreakIterator.getSentenceInstance();
\r
55 iWordIter = com.ibm.icu.text.BreakIterator.getWordInstance();
\r
56 iLineIter = com.ibm.icu.text.BreakIterator.getLineInstance();
\r
57 iCharacterIter = com.ibm.icu.text.BreakIterator.getCharacterInstance();
\r
59 jSentenceIter = java.text.BreakIterator.getSentenceInstance();
\r
60 jWordIter = java.text.BreakIterator.getWordInstance();
\r
61 jLineIter = java.text.BreakIterator.getLineInstance();
\r
62 jCharacterIter = java.text.BreakIterator.getCharacterInstance();
\r
64 iSentenceIter = com.ibm.icu.text.BreakIterator.getSentenceInstance(locale);
\r
65 iWordIter = com.ibm.icu.text.BreakIterator.getWordInstance(locale);
\r
66 iLineIter = com.ibm.icu.text.BreakIterator.getLineInstance(locale);
\r
67 iCharacterIter = com.ibm.icu.text.BreakIterator.getCharacterInstance(locale);
\r
69 jSentenceIter = java.text.BreakIterator.getSentenceInstance(locale);
\r
70 jWordIter = java.text.BreakIterator.getWordInstance(locale);
\r
71 jLineIter = java.text.BreakIterator.getLineInstance(locale);
\r
72 jCharacterIter = java.text.BreakIterator.getCharacterInstance(locale);
\r
75 iSentences = init(iSentenceIter);
\r
76 iWords = init(iWordIter);
\r
77 iLines = init(iLineIter);
\r
78 iCharacters = init(iCharacterIter);
\r
79 jSentences = init(jSentenceIter);
\r
80 jWords = init(jWordIter);
\r
81 jLines = init(jLineIter);
\r
82 jCharacters = init(jCharacterIter);
\r
84 } catch (Exception ex) {
\r
85 ex.printStackTrace();
\r
86 throw new RuntimeException(ex.getMessage());
\r
89 // we created some heavy objects, so lets try to clean up a little before running the tests
\r
93 private String[] init(com.ibm.icu.text.BreakIterator iter) {
\r
94 // set the string to iterate on
\r
95 iter.setText(fileContents);
\r
97 // produce a token list
\r
98 ArrayList tokenList = new ArrayList();
\r
99 int start = iter.first();
\r
100 for (int end = iter.next(); end != com.ibm.icu.text.BreakIterator.DONE; start = end, end = iter.next())
\r
101 tokenList.add(fileContents.substring(start, end));
\r
103 // return the token list as a string array
\r
104 return (String[]) tokenList.toArray(new String[0]);
\r
107 private String[] init(java.text.BreakIterator iter) {
\r
108 // set the string to iterate on
\r
109 iter.setText(fileContents);
\r
111 // produce a token list
\r
112 ArrayList tokenList = new ArrayList();
\r
113 int start = iter.first();
\r
114 for (int end = iter.next(); end != com.ibm.icu.text.BreakIterator.DONE; start = end, end = iter.next())
\r
115 tokenList.add(fileContents.substring(start, end));
\r
117 // return the token list as a string array
\r
118 return (String[]) tokenList.toArray(new String[0]);
\r
121 PerfTest.Function createTestICU(final com.ibm.icu.text.BreakIterator iIter, final String[] correct,
\r
122 final String breakType) {
\r
123 return new PerfTest.Function() {
\r
124 public void call() {
\r
126 int start = iIter.first();
\r
127 for (int end = iIter.next(); end != com.ibm.icu.text.BreakIterator.DONE; start = end, end = iIter
\r
129 if (!correct[k++].equals(fileContents.substring(start, end)))
\r
130 throw new RuntimeException("ICU4J BreakIterator gave the wrong answer for " + breakType + " "
\r
131 + (k - 1) + " during the performance test. Cannot continue the performance test.");
\r
132 if (k != correct.length)
\r
133 throw new RuntimeException("ICU4J BreakIterator gave the wrong number of " + breakType
\r
134 + "s during the performance test. Cannot continue the performance test.");
\r
137 public long getOperationsPerIteration() {
\r
138 return fileContents.length();
\r
143 PerfTest.Function createTestJava(final java.text.BreakIterator jIter, final String[] correct, final String breakType) {
\r
144 return new PerfTest.Function() {
\r
145 public void call() {
\r
147 int start = jIter.first();
\r
148 for (int end = jIter.next(); end != java.text.BreakIterator.DONE; start = end, end = jIter.next())
\r
149 if (!correct[k++].equals(fileContents.substring(start, end)))
\r
150 throw new RuntimeException("Java BreakIterator gave the wrong answer for " + breakType + " "
\r
151 + (k - 1) + " during the performance test. Cannot continue the performance test.");
\r
152 if (k != correct.length)
\r
153 throw new RuntimeException("Java BreakIterator gave the wrong number of " + breakType
\r
154 + "s during the performance test. Cannot continue the performance test.");
\r
157 public long getOperationsPerIteration() {
\r
158 return fileContents.length();
\r
163 PerfTest.Function TestICUSentences() {
\r
164 return createTestICU(iSentenceIter, iSentences, "sentence");
\r
167 PerfTest.Function TestICUWords() {
\r
168 return createTestICU(iWordIter, iWords, "word");
\r
171 PerfTest.Function TestICULines() {
\r
172 return createTestICU(iLineIter, iLines, "line");
\r
175 PerfTest.Function TestICUCharacters() {
\r
176 return createTestICU(iCharacterIter, iCharacters, "character");
\r
179 PerfTest.Function TestJavaSentences() {
\r
180 return createTestJava(jSentenceIter, jSentences, "sentence");
\r
183 PerfTest.Function TestJavaWords() {
\r
184 return createTestJava(jWordIter, jWords, "word");
\r
187 PerfTest.Function TestJavaLines() {
\r
188 return createTestJava(jLineIter, jLines, "line");
\r
191 PerfTest.Function TestJavaCharacters() {
\r
192 return createTestJava(jCharacterIter, jCharacters, "character");
\r