]> gitweb.fperrin.net Git - Dictionary.git/blob - jars/icu4j-4_2_1-src/src/com/ibm/icu/dev/test/perf/BreakIteratorPerformanceTest.java
icu4jsrc
[Dictionary.git] / jars / icu4j-4_2_1-src / src / com / ibm / icu / dev / test / perf / BreakIteratorPerformanceTest.java
1 /*\r
2  **********************************************************************\r
3  * Copyright (c) 2002-2008, International Business Machines           *\r
4  * Corporation and others.  All Rights Reserved.                      *\r
5  **********************************************************************\r
6  */\r
7 package com.ibm.icu.dev.test.perf;\r
8 \r
9 import java.io.FileInputStream;\r
10 import java.util.ArrayList;\r
11 \r
12 public class BreakIteratorPerformanceTest extends PerfTest {\r
13 \r
14     String fileContents;\r
15 \r
16     com.ibm.icu.text.BreakIterator iSentenceIter;\r
17     com.ibm.icu.text.BreakIterator iWordIter;\r
18     com.ibm.icu.text.BreakIterator iLineIter;\r
19     com.ibm.icu.text.BreakIterator iCharacterIter;\r
20     java.text.BreakIterator jSentenceIter;\r
21     java.text.BreakIterator jWordIter;\r
22     java.text.BreakIterator jLineIter;\r
23     java.text.BreakIterator jCharacterIter;\r
24     String[] iSentences;\r
25     String[] iWords;\r
26     String[] iLines;\r
27     String[] iCharacters;\r
28     String[] jSentences;\r
29     String[] jWords;\r
30     String[] jLines;\r
31     String[] jCharacters;\r
32 \r
33     public static void main(String[] args) throws Exception {\r
34         new BreakIteratorPerformanceTest().run(args);\r
35     }\r
36 \r
37     protected void setup(String[] args) {\r
38         try {\r
39             // read in the input file, being careful with a possible BOM\r
40             FileInputStream in = new FileInputStream(fileName);\r
41             BOMFreeReader reader = new BOMFreeReader(in, encoding);\r
42             fileContents = new String(readToEOS(reader));\r
43 \r
44             // // get rid of any characters that may cause differences between ICU4J and Java BreakIterator\r
45             // // fileContents = fileContents.replaceAll("[\t\f\r\n\\-/ ]+", " ");\r
46             // String res = "";\r
47             // StringTokenizer tokenizer = new StringTokenizer(fileContents, "\t\f\r\n-/ ");\r
48             // while (tokenizer.hasMoreTokens())\r
49             // res += tokenizer.nextToken() + " ";\r
50             // fileContents = res.trim();\r
51 \r
52             // create the break iterators with respect to locale\r
53             if (locale == null) {\r
54                 iSentenceIter = com.ibm.icu.text.BreakIterator.getSentenceInstance();\r
55                 iWordIter = com.ibm.icu.text.BreakIterator.getWordInstance();\r
56                 iLineIter = com.ibm.icu.text.BreakIterator.getLineInstance();\r
57                 iCharacterIter = com.ibm.icu.text.BreakIterator.getCharacterInstance();\r
58 \r
59                 jSentenceIter = java.text.BreakIterator.getSentenceInstance();\r
60                 jWordIter = java.text.BreakIterator.getWordInstance();\r
61                 jLineIter = java.text.BreakIterator.getLineInstance();\r
62                 jCharacterIter = java.text.BreakIterator.getCharacterInstance();\r
63             } else {\r
64                 iSentenceIter = com.ibm.icu.text.BreakIterator.getSentenceInstance(locale);\r
65                 iWordIter = com.ibm.icu.text.BreakIterator.getWordInstance(locale);\r
66                 iLineIter = com.ibm.icu.text.BreakIterator.getLineInstance(locale);\r
67                 iCharacterIter = com.ibm.icu.text.BreakIterator.getCharacterInstance(locale);\r
68 \r
69                 jSentenceIter = java.text.BreakIterator.getSentenceInstance(locale);\r
70                 jWordIter = java.text.BreakIterator.getWordInstance(locale);\r
71                 jLineIter = java.text.BreakIterator.getLineInstance(locale);\r
72                 jCharacterIter = java.text.BreakIterator.getCharacterInstance(locale);\r
73             }\r
74 \r
75             iSentences = init(iSentenceIter);\r
76             iWords = init(iWordIter);\r
77             iLines = init(iLineIter);\r
78             iCharacters = init(iCharacterIter);\r
79             jSentences = init(jSentenceIter);\r
80             jWords = init(jWordIter);\r
81             jLines = init(jLineIter);\r
82             jCharacters = init(jCharacterIter);\r
83 \r
84         } catch (Exception ex) {\r
85             ex.printStackTrace();\r
86             throw new RuntimeException(ex.getMessage());\r
87         }\r
88 \r
89         // we created some heavy objects, so lets try to clean up a little before running the tests\r
90         gc();\r
91     }\r
92 \r
93     private String[] init(com.ibm.icu.text.BreakIterator iter) {\r
94         // set the string to iterate on\r
95         iter.setText(fileContents);\r
96 \r
97         // produce a token list\r
98         ArrayList tokenList = new ArrayList();\r
99         int start = iter.first();\r
100         for (int end = iter.next(); end != com.ibm.icu.text.BreakIterator.DONE; start = end, end = iter.next())\r
101             tokenList.add(fileContents.substring(start, end));\r
102 \r
103         // return the token list as a string array\r
104         return (String[]) tokenList.toArray(new String[0]);\r
105     }\r
106 \r
107     private String[] init(java.text.BreakIterator iter) {\r
108         // set the string to iterate on\r
109         iter.setText(fileContents);\r
110 \r
111         // produce a token list\r
112         ArrayList tokenList = new ArrayList();\r
113         int start = iter.first();\r
114         for (int end = iter.next(); end != com.ibm.icu.text.BreakIterator.DONE; start = end, end = iter.next())\r
115             tokenList.add(fileContents.substring(start, end));\r
116 \r
117         // return the token list as a string array\r
118         return (String[]) tokenList.toArray(new String[0]);\r
119     }\r
120 \r
121     PerfTest.Function createTestICU(final com.ibm.icu.text.BreakIterator iIter, final String[] correct,\r
122             final String breakType) {\r
123         return new PerfTest.Function() {\r
124             public void call() {\r
125                 int k = 0;\r
126                 int start = iIter.first();\r
127                 for (int end = iIter.next(); end != com.ibm.icu.text.BreakIterator.DONE; start = end, end = iIter\r
128                         .next())\r
129                     if (!correct[k++].equals(fileContents.substring(start, end)))\r
130                         throw new RuntimeException("ICU4J BreakIterator gave the wrong answer for " + breakType + " "\r
131                                 + (k - 1) + " during the performance test. Cannot continue the performance test.");\r
132                 if (k != correct.length)\r
133                     throw new RuntimeException("ICU4J BreakIterator gave the wrong number of " + breakType\r
134                             + "s during the performance test. Cannot continue the performance test.");\r
135             }\r
136 \r
137             public long getOperationsPerIteration() {\r
138                 return fileContents.length();\r
139             }\r
140         };\r
141     }\r
142 \r
143     PerfTest.Function createTestJava(final java.text.BreakIterator jIter, final String[] correct, final String breakType) {\r
144         return new PerfTest.Function() {\r
145             public void call() {\r
146                 int k = 0;\r
147                 int start = jIter.first();\r
148                 for (int end = jIter.next(); end != java.text.BreakIterator.DONE; start = end, end = jIter.next())\r
149                     if (!correct[k++].equals(fileContents.substring(start, end)))\r
150                         throw new RuntimeException("Java BreakIterator gave the wrong answer for " + breakType + " "\r
151                                 + (k - 1) + " during the performance test. Cannot continue the performance test.");\r
152                 if (k != correct.length)\r
153                     throw new RuntimeException("Java BreakIterator gave the wrong number of " + breakType\r
154                             + "s during the performance test. Cannot continue the performance test.");\r
155             }\r
156 \r
157             public long getOperationsPerIteration() {\r
158                 return fileContents.length();\r
159             }\r
160         };\r
161     }\r
162 \r
163     PerfTest.Function TestICUSentences() {\r
164         return createTestICU(iSentenceIter, iSentences, "sentence");\r
165     }\r
166 \r
167     PerfTest.Function TestICUWords() {\r
168         return createTestICU(iWordIter, iWords, "word");\r
169     }\r
170 \r
171     PerfTest.Function TestICULines() {\r
172         return createTestICU(iLineIter, iLines, "line");\r
173     }\r
174 \r
175     PerfTest.Function TestICUCharacters() {\r
176         return createTestICU(iCharacterIter, iCharacters, "character");\r
177     }\r
178 \r
179     PerfTest.Function TestJavaSentences() {\r
180         return createTestJava(jSentenceIter, jSentences, "sentence");\r
181     }\r
182 \r
183     PerfTest.Function TestJavaWords() {\r
184         return createTestJava(jWordIter, jWords, "word");\r
185     }\r
186 \r
187     PerfTest.Function TestJavaLines() {\r
188         return createTestJava(jLineIter, jLines, "line");\r
189     }\r
190 \r
191     PerfTest.Function TestJavaCharacters() {\r
192         return createTestJava(jCharacterIter, jCharacters, "character");\r
193     }\r
194 }\r