2 *******************************************************************************
\r
3 * Copyright (C) 2002-2007, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 *******************************************************************************
\r
8 package com.ibm.icu.dev.test.perf;
\r
10 import com.ibm.icu.text.*;
\r
13 import com.ibm.icu.impl.LocaleUtility;
\r
15 public class CollationPerformanceTest {
\r
16 static final String usageString =
\r
17 "usage: collperf options...\n"
\r
18 + "-help Display this message.\n"
\r
19 + "-file file_name utf-16 format file of names.\n"
\r
20 + "-locale name ICU locale to use. Default is en_US\n"
\r
21 + "-rules file_name Collation rules file (overrides locale)\n"
\r
22 //+ "-langid 0x1234 Windows Language ID number. Default to value for -locale option\n"
\r
23 //+ " see http://msdn.microsoft.com/library/psdk/winbase/nls_8xo3.htm\n"
\r
24 //+ "-win Run test using Windows native services. (ICU is default)\n"
\r
25 //+ "-unix Run test using Unix strxfrm, strcoll services.\n"
\r
26 //+ "-uselen Use API with string lengths. Default is null-terminated strings\n"
\r
27 + "-usekeys Run tests using sortkeys rather than strcoll\n"
\r
28 + "-strcmp Run tests using u_strcmp rather than strcoll\n"
\r
29 + "-strcmpCPO Run tests using u_strcmpCodePointOrder rather than strcoll\n"
\r
30 + "-loop nnnn Loopcount for test. Adjust for reasonable total running time.\n"
\r
31 + "-iloop n Inner Loop Count. Default = 1. Number of calls to function\n"
\r
32 + " under test at each call point. For measuring test overhead.\n"
\r
33 + "-terse Terse numbers-only output. Intended for use by scripts.\n"
\r
34 + "-french French accent ordering\n"
\r
35 + "-frenchoff No French accent ordering (for use with French locales.)\n"
\r
36 + "-norm Normalizing mode on\n"
\r
37 + "-shifted Shifted mode\n"
\r
38 + "-lower Lower case first\n"
\r
39 + "-upper Upper case first\n"
\r
40 + "-case Enable separate case level\n"
\r
41 + "-level n Sort level, 1 to 5, for Primary, Secndary, Tertiary, Quaternary, Identical\n"
\r
42 + "-keyhist Produce a table sort key size vs. string length\n"
\r
43 + "-binsearch Binary Search timing test\n"
\r
44 + "-keygen Sort Key Generation timing test\n"
\r
45 + "-qsort Quicksort timing test\n"
\r
46 + "-iter Iteration Performance Test\n"
\r
47 + "-dump Display strings, sort keys and CEs.\n"
\r
48 + "-java Run test using java.text.Collator.\n";
\r
50 //enum {FLAG, NUM, STRING} type;
\r
51 static StringBuffer temp_opt_fName = new StringBuffer("");
\r
52 static StringBuffer temp_opt_locale = new StringBuffer("en_US");
\r
53 //static StringBuffer temp_opt_langid = new StringBuffer("0"); // Defaults to value corresponding to opt_locale.
\r
54 static StringBuffer temp_opt_rules = new StringBuffer("");
\r
55 static StringBuffer temp_opt_help = new StringBuffer("");
\r
56 static StringBuffer temp_opt_loopCount = new StringBuffer("1");
\r
57 static StringBuffer temp_opt_iLoopCount = new StringBuffer("1");
\r
58 static StringBuffer temp_opt_terse = new StringBuffer("false");
\r
59 static StringBuffer temp_opt_qsort = new StringBuffer("");
\r
60 static StringBuffer temp_opt_binsearch = new StringBuffer("");
\r
61 static StringBuffer temp_opt_icu = new StringBuffer("true");
\r
62 //static StringBuffer opt_win = new StringBuffer(""); // Run with Windows native functions.
\r
63 //static StringBuffer opt_unix = new StringBuffer(""); // Run with UNIX strcoll, strxfrm functions.
\r
64 //static StringBuffer opt_uselen = new StringBuffer("");
\r
65 static StringBuffer temp_opt_usekeys = new StringBuffer("");
\r
66 static StringBuffer temp_opt_strcmp = new StringBuffer("");
\r
67 static StringBuffer temp_opt_strcmpCPO = new StringBuffer("");
\r
68 static StringBuffer temp_opt_norm = new StringBuffer("");
\r
69 static StringBuffer temp_opt_keygen = new StringBuffer("");
\r
70 static StringBuffer temp_opt_french = new StringBuffer("");
\r
71 static StringBuffer temp_opt_frenchoff = new StringBuffer("");
\r
72 static StringBuffer temp_opt_shifted = new StringBuffer("");
\r
73 static StringBuffer temp_opt_lower = new StringBuffer("");
\r
74 static StringBuffer temp_opt_upper = new StringBuffer("");
\r
75 static StringBuffer temp_opt_case = new StringBuffer("");
\r
76 static StringBuffer temp_opt_level = new StringBuffer("0");
\r
77 static StringBuffer temp_opt_keyhist = new StringBuffer("");
\r
78 static StringBuffer temp_opt_itertest = new StringBuffer("");
\r
79 static StringBuffer temp_opt_dump = new StringBuffer("");
\r
80 static StringBuffer temp_opt_java = new StringBuffer("");
\r
83 static String opt_fName = "";
\r
84 static String opt_locale = "en_US";
\r
85 //static int opt_langid = 0; // Defaults to value corresponding to opt_locale.
\r
86 static String opt_rules = "";
\r
87 static boolean opt_help = false;
\r
88 static int opt_loopCount = 1;
\r
89 static int opt_iLoopCount = 1;
\r
90 static boolean opt_terse = false;
\r
91 static boolean opt_qsort = false;
\r
92 static boolean opt_binsearch = false;
\r
93 static boolean opt_icu = true;
\r
94 //static boolean opt_win = false; // Run with Windows native functions.
\r
95 //static boolean opt_unix = false; // Run with UNIX strcoll, strxfrm functions.
\r
96 //static boolean opt_uselen = false;
\r
97 static boolean opt_usekeys = false;
\r
98 static boolean opt_strcmp = false;
\r
99 static boolean opt_strcmpCPO = false;
\r
100 static boolean opt_norm = false;
\r
101 static boolean opt_keygen = false;
\r
102 static boolean opt_french = false;
\r
103 static boolean opt_frenchoff = false;
\r
104 static boolean opt_shifted = false;
\r
105 static boolean opt_lower = false;
\r
106 static boolean opt_upper = false;
\r
107 static boolean opt_case = false;
\r
108 static int opt_level = 0;
\r
109 static boolean opt_keyhist = false;
\r
110 static boolean opt_itertest = false;
\r
111 static boolean opt_dump = false;
\r
112 static boolean opt_java = false;
\r
114 static OptionSpec[] options = {
\r
115 new OptionSpec("-file", 2, temp_opt_fName),
\r
116 new OptionSpec("-locale", 2, temp_opt_locale),
\r
117 //new OptionSpec("-langid", 1, temp_opt_langid),
\r
118 new OptionSpec("-rules", 2, temp_opt_rules),
\r
119 new OptionSpec("-qsort", 0, temp_opt_qsort),
\r
120 new OptionSpec("-binsearch", 0, temp_opt_binsearch),
\r
121 new OptionSpec("-iter", 0, temp_opt_itertest),
\r
122 //new OptionSpec("-win", 0, temp_opt_win),
\r
123 //new OptionSpec("-unix", 0, temp_opt_unix),
\r
124 //new OptionSpec("-uselen", 0, temp_opt_uselen),
\r
125 new OptionSpec("-usekeys", 0, temp_opt_usekeys),
\r
126 new OptionSpec("-strcmp", 0, temp_opt_strcmp),
\r
127 new OptionSpec("-strcmpCPO", 0, temp_opt_strcmpCPO),
\r
128 new OptionSpec("-norm", 0, temp_opt_norm),
\r
129 new OptionSpec("-french", 0, temp_opt_french),
\r
130 new OptionSpec("-frenchoff", 0, temp_opt_frenchoff),
\r
131 new OptionSpec("-shifted", 0, temp_opt_shifted),
\r
132 new OptionSpec("-lower", 0, temp_opt_lower),
\r
133 new OptionSpec("-upper", 0, temp_opt_upper),
\r
134 new OptionSpec("-case", 0, temp_opt_case),
\r
135 new OptionSpec("-level", 1, temp_opt_level),
\r
136 new OptionSpec("-keyhist", 0, temp_opt_keyhist),
\r
137 new OptionSpec("-keygen", 0, temp_opt_keygen),
\r
138 new OptionSpec("-loop", 1, temp_opt_loopCount),
\r
139 new OptionSpec("-iloop", 1, temp_opt_iLoopCount),
\r
140 new OptionSpec("-terse", 0, temp_opt_terse),
\r
141 new OptionSpec("-dump", 0, temp_opt_dump),
\r
142 new OptionSpec("-help", 0, temp_opt_help),
\r
143 new OptionSpec("-?", 0, temp_opt_help),
\r
144 new OptionSpec("-java", 0, temp_opt_java),
\r
147 static java.text.Collator javaCol = null;
\r
148 static com.ibm.icu.text.Collator icuCol = null;
\r
149 static NumberFormat nf = null;
\r
150 static NumberFormat percent = null;
\r
151 ArrayList list = null;
\r
152 String[] tests = null;
\r
153 int globalCount = 0;
\r
155 public static void main(String[] args) {
\r
156 CollationPerformanceTest collPerf = new CollationPerformanceTest();
\r
157 if ( !CollationPerformanceTest.processOptions(args) || opt_help || opt_fName.length()==0) {
\r
158 System.out.println(usageString);
\r
162 nf = NumberFormat.getInstance();
\r
163 nf.setMaximumFractionDigits(2);
\r
164 percent = NumberFormat.getPercentInstance();
\r
166 collPerf.setOptions();
\r
167 collPerf.readDataLines();
\r
174 collPerf.doQSort();
\r
177 if (opt_binsearch) {
\r
178 collPerf.doBinarySearch();
\r
182 collPerf.doKeyGen();
\r
186 collPerf.doKeyHist();
\r
189 if (opt_itertest) {
\r
190 collPerf.doIterTest();
\r
195 //Dump file lines, CEs, Sort Keys if requested
\r
197 for(int i = 0; i < list.size(); i++) {
\r
199 String line = com.ibm.icu.impl.Utility.escape((String)list.get(i));
\r
200 System.out.println(line);
\r
202 System.out.print(" CEs: ");
\r
203 CollationElementIterator CEiter = ((com.ibm.icu.text.RuleBasedCollator)icuCol).getCollationElementIterator(line);
\r
207 ce = CEiter.next();
\r
208 if (ce == CollationElementIterator.NULLORDER) {
\r
211 //System.out.print();
\r
212 String outStr = Integer.toHexString(ce);
\r
213 for (int len = 0; len < 8 - outStr.length(); len++) {
\r
214 outStr ='0' + outStr;
\r
216 System.out.print(outStr + " ");
\r
218 System.out.print("\n ");
\r
223 System.out.print("\n ICU Sort Key: ");
\r
224 CollationKey ck = ((com.ibm.icu.text.RuleBasedCollator)icuCol).getCollationKey(line);
\r
225 byte[] cks = ck.toByteArray();
\r
227 for(int k = 0; k < cks.length; k++) {
\r
228 String outStr = Integer.toHexString(cks[k]);
\r
229 switch (outStr.length()) {
\r
230 case 1: outStr = '0' + outStr;
\r
232 case 8: outStr = outStr.substring(6);
\r
235 System.out.print(outStr);
\r
236 System.out.print(" ");
\r
238 if(j > 0 && j % 20 == 0) {
\r
239 System.out.print("\n ");
\r
242 System.out.println("\n");
\r
246 /**---------------------------------------------------------------------------------------
\r
248 * doQSort() The quick sort timing test.
\r
250 *---------------------------------------------------------------------------------------
\r
254 //String[] sortTests = (String[]) tests.clone();
\r
255 //Adjust loop count to compensate for file size. QSort should be nlog(n)
\r
256 double dLoopCount = opt_loopCount * 3000 / ((Math.log(tests.length) / Math.log(10)* tests.length));
\r
262 int adj_loopCount = (int)dLoopCount;
\r
263 if(adj_loopCount < 1) {
\r
268 long startTime = 0;
\r
270 if (opt_icu && opt_usekeys) {
\r
271 startTime = System.currentTimeMillis();
\r
272 qSortImpl_icu_usekeys(tests, 0, tests.length -1, icuCol);
\r
273 endTime = System.currentTimeMillis();
\r
275 if (opt_icu && !opt_usekeys){
\r
276 startTime = System.currentTimeMillis();
\r
277 qSortImpl_nokeys(tests, 0, tests.length -1, icuCol);
\r
278 endTime = System.currentTimeMillis();
\r
280 if (opt_java && opt_usekeys) {
\r
281 startTime = System.currentTimeMillis();
\r
282 qSortImpl_java_usekeys(tests, 0, tests.length -1, javaCol);
\r
283 endTime = System.currentTimeMillis();
\r
285 if (opt_java && !opt_usekeys){
\r
286 startTime = System.currentTimeMillis();
\r
287 qSortImpl_nokeys(tests, 0, tests.length -1, javaCol);
\r
288 endTime = System.currentTimeMillis();
\r
290 long elapsedTime = endTime - startTime;
\r
291 int ns = (int)(1000000 * elapsedTime / (globalCount + 0.0));
\r
293 System.out.println("qsort: total # of string compares = " + globalCount);
\r
294 System.out.println("qsort: time per compare = " + ns);
\r
296 System.out.println(ns);
\r
300 /**---------------------------------------------------------------------------------------
\r
302 * doBinarySearch() Binary Search timing test. Each name from the list
\r
303 * is looked up in the full sorted list of names.
\r
305 *---------------------------------------------------------------------------------------
\r
307 void doBinarySearch() {
\r
311 double dLoopCount = opt_loopCount * 3000 / (Math.log(tests.length) / Math.log(10)* tests.length);
\r
312 long startTime = 0;
\r
313 long elapsedTime = 0;
\r
318 int adj_loopCount = (int)dLoopCount;
\r
319 if(adj_loopCount < 1) {
\r
325 for(;;) { //not really a loop, just allows "break" to work, to simplify
\r
326 //inadvertantly running more than one test through here
\r
329 startTime = System.currentTimeMillis();
\r
330 for(loops = 0; loops < adj_loopCount; loops++) {
\r
331 for (int j = 0; j < tests.length; j++) {
\r
332 int hi = tests.length-1;
\r
336 int newGuess = (hi + lo) / 2;
\r
337 if(newGuess == guess){
\r
341 r = tests[j].compareTo(tests[guess]);
\r
354 elapsedTime = System.currentTimeMillis() - startTime;
\r
358 if (opt_strcmpCPO) {
\r
360 startTime = System.currentTimeMillis();
\r
361 for(loops = 0; loops < adj_loopCount; loops++) {
\r
362 for (int j = 0; j < tests.length; j++) {
\r
363 int hi = tests.length-1;
\r
367 int newGuess = (hi + lo) / 2;
\r
368 if(newGuess == guess){
\r
372 r = com.ibm.icu.text.Normalizer.compare(tests[j], tests[guess], Normalizer.COMPARE_CODE_POINT_ORDER);
\r
385 elapsedTime = System.currentTimeMillis() - startTime;
\r
392 startTime = System.currentTimeMillis();
\r
393 for (loops = 0; loops < adj_loopCount; loops++) {
\r
394 for (int j = 0; j < tests.length; j++) {
\r
395 int hi = tests.length - 1;
\r
399 int newGuess = (hi + lo) / 2;
\r
400 if (newGuess == guess) {
\r
405 com.ibm.icu.text.CollationKey sortKey1 = icuCol.getCollationKey(tests[j]);
\r
406 com.ibm.icu.text.CollationKey sortKey2 = icuCol.getCollationKey(tests[guess]);
\r
407 r = sortKey1.compareTo(sortKey2);
\r
410 r = icuCol.compare(tests[j], tests[guess]);
\r
424 elapsedTime = System.currentTimeMillis() - startTime;
\r
430 startTime = System.currentTimeMillis();
\r
431 for (loops = 0; loops < adj_loopCount; loops++) {
\r
432 for (int j = 0; j < tests.length; j++) {
\r
433 int hi = tests.length - 1;
\r
437 int newGuess = (hi + lo) / 2;
\r
438 if (newGuess == guess) {
\r
443 java.text.CollationKey sortKey1 = javaCol.getCollationKey(tests[j]);
\r
444 java.text.CollationKey sortKey2 = javaCol.getCollationKey(tests[guess]);
\r
445 r = sortKey1.compareTo(sortKey2);
\r
448 r = javaCol.compare(tests[j], tests[guess]);
\r
462 elapsedTime = System.currentTimeMillis() - startTime;
\r
467 int ns = (int)((float)(1000000) * (float)elapsedTime / (float)gCount);
\r
469 System.out.println("binary search: total # of string compares = " + gCount);
\r
470 System.out.println("binary search: compares per loop = " + gCount / loops);
\r
471 System.out.println("binary search: time per compare = " + ns);
\r
473 System.out.println(ns);
\r
477 /**---------------------------------------------------------------------------------------
\r
479 * doKeyGen() Key Generation Timing Test
\r
481 *---------------------------------------------------------------------------------------
\r
486 // Adjust loop count to compensate for file size. Should be order n
\r
487 double dLoopCount = opt_loopCount * (1000.0 / (double)list.size());
\r
488 int adj_loopCount = (int)dLoopCount;
\r
489 if (adj_loopCount < 1) adj_loopCount = 1;
\r
491 long startTime = 0;
\r
492 long totalKeyLen = 0;
\r
493 long totalChars = 0;
\r
495 startTime = System.currentTimeMillis();
\r
496 for (int loops=0; loops<adj_loopCount; loops++) {
\r
497 for (int line=0; line < tests.length; line++) {
\r
498 for (int iLoop=0; iLoop < opt_iLoopCount; iLoop++) {
\r
499 totalChars += tests[line].length();
\r
500 byte[] sortKey = javaCol.getCollationKey(tests[line]).toByteArray();
\r
501 totalKeyLen += sortKey.length;
\r
506 startTime = System.currentTimeMillis();
\r
507 for (int loops=0; loops<adj_loopCount; loops++) {
\r
508 for (int line=0; line < tests.length; line++) {
\r
509 for (int iLoop=0; iLoop < opt_iLoopCount; iLoop++) {
\r
510 totalChars += tests[line].length();
\r
511 byte[] sortKey = icuCol.getCollationKey(tests[line]).toByteArray();
\r
512 totalKeyLen += sortKey.length;
\r
518 long elapsedTime = System.currentTimeMillis() - startTime;
\r
519 long ns = (long)(1000000 * elapsedTime / (adj_loopCount * tests.length + 0.0));
\r
521 System.out.println("Sort Key Generation: total # of keys =" + adj_loopCount * tests.length);
\r
522 System.out.println("Sort Key Generation: time per key = " + ns + " ns");
\r
523 System.out.println("Key Length / character = " + nf.format(totalKeyLen / (totalChars + 0.0)));
\r
526 System.out.print(ns + ", ");
\r
527 System.out.println(nf.format(totalKeyLen / (totalChars + 0.0)) + ", ");
\r
531 /**---------------------------------------------------------------------------------------
\r
533 * doKeyHist() Output a table of data for average sort key size vs. string length.
\r
535 *---------------------------------------------------------------------------------------
\r
541 // Find the maximum string length
\r
542 for (int i = 0; i < tests.length; i++) {
\r
543 if (tests[i].length() > maxLen) maxLen = tests[i].length();
\r
546 int[] accumulatedLen = new int[maxLen + 1];
\r
547 int[] numKeysOfSize = new int[maxLen + 1];
\r
549 // Fill the arrays...
\r
550 for (int i = 0; i < tests.length; i++) {
\r
551 int len = tests[i].length();
\r
552 accumulatedLen[len] += icuCol.getCollationKey(tests[i]).toByteArray().length;
\r
553 numKeysOfSize[len] += 1;
\r
556 // And write out averages
\r
557 System.out.println("String Length, Avg Key Length, Avg Key Len per char");
\r
558 for (int i = 1; i <= maxLen; i++) {
\r
559 if (numKeysOfSize[i] > 0) {
\r
560 System.out.println(i + ", " + nf.format(accumulatedLen[i] / (numKeysOfSize[i]+ 0.0)) + ", "
\r
561 + nf.format(accumulatedLen[i] / (numKeysOfSize[i] * i + 0.0)));
\r
567 void doForwardIterTest() {
\r
569 System.out.print("\n\nPerforming forward iteration performance test with ");
\r
570 System.out.println("performance test on strings from file -----------");
\r
572 CollationElementIterator iter = ((RuleBasedCollator)icuCol).getCollationElementIterator("");
\r
576 long startTime = System.currentTimeMillis();
\r
577 while (count < opt_loopCount) {
\r
579 while (linecount < tests.length) {
\r
580 String str = tests[linecount];
\r
582 while (iter.next() != CollationElementIterator.NULLORDER) {
\r
590 long elapsedTime = System.currentTimeMillis() - startTime;
\r
591 System.out.println("elapsedTime " + elapsedTime + " ms");
\r
593 // empty loop recalculation
\r
595 startTime = System.currentTimeMillis();
\r
596 while (count < opt_loopCount) {
\r
598 while (linecount < tests.length) {
\r
599 String str = tests[linecount];
\r
605 elapsedTime -= (System.currentTimeMillis() - startTime);
\r
606 System.out.println("elapsedTime " + elapsedTime + " ms");
\r
608 int ns = (int)(1000000 * elapsedTime / (gCount + 0.0));
\r
609 System.out.println("Total number of strings compared " + tests.length
\r
610 + "in " + opt_loopCount + " loops");
\r
611 System.out.println("Average time per CollationElementIterator.next() nano seconds " + ns);
\r
612 System.out.println("performance test on skipped-5 concatenated strings from file -----------");
\r
614 String totalStr = "";
\r
616 // appending all the strings
\r
618 while (linecount < tests.length) {
\r
619 totalStr += tests[linecount];
\r
620 strlen += tests[linecount].length();
\r
623 System.out.println("Total size of strings " + strlen);
\r
627 iter = ((RuleBasedCollator)icuCol).getCollationElementIterator(totalStr);
\r
628 strlen -= 5; // any left over characters are not iterated,
\r
629 // this is to ensure the backwards and forwards iterators
\r
630 // gets the same position
\r
632 startTime = System.currentTimeMillis();
\r
633 while (count < opt_loopCount) {
\r
636 iter.setOffset(strindex);
\r
638 if (iter.next() == CollationElementIterator.NULLORDER) {
\r
645 if (strindex > strlen) {
\r
648 iter.setOffset(strindex);
\r
655 elapsedTime = System.currentTimeMillis() - startTime;
\r
656 System.out.println("elapsedTime " + elapsedTime);
\r
658 // empty loop recalculation
\r
659 int tempgCount = 0;
\r
661 startTime = System.currentTimeMillis();
\r
662 while (count < opt_loopCount) {
\r
665 iter.setOffset(strindex);
\r
671 if (strindex > strlen) {
\r
674 iter.setOffset(strindex);
\r
680 elapsedTime -= (System.currentTimeMillis() - startTime);
\r
681 System.out.println("elapsedTime " + elapsedTime);
\r
683 System.out.println("gCount " + gCount);
\r
684 ns = (int)(1000000 * elapsedTime / (gCount + 0.0));
\r
685 System.out.println("Average time per CollationElementIterator.next() nano seconds " + ns);
\r
688 void doBackwardIterTest() {
\r
689 System.out.print("\n\nPerforming backward iteration performance test with ");
\r
690 System.out.println("performance test on strings from file -----------\n");
\r
692 CollationElementIterator iter = ((RuleBasedCollator)icuCol).getCollationElementIterator("");
\r
696 long startTime = System.currentTimeMillis();
\r
697 while (count < opt_loopCount) {
\r
699 while (linecount < tests.length) {
\r
700 String str = tests[linecount];
\r
702 while (iter.previous() != CollationElementIterator.NULLORDER) {
\r
709 long elapsedTime = System.currentTimeMillis() - startTime;
\r
710 System.out.println("elapsedTime " + elapsedTime + " ms");
\r
712 // empty loop recalculation
\r
714 startTime = System.currentTimeMillis();
\r
715 while (count < opt_loopCount) {
\r
717 while (linecount < tests.length) {
\r
718 String str = tests[linecount];
\r
724 elapsedTime -= (System.currentTimeMillis() - startTime);
\r
725 System.out.println("elapsedTime " + elapsedTime + " ms");
\r
727 int ns = (int)(1000000 * elapsedTime / (gCount + 0.0));
\r
728 System.out.println("Total number of strings compared " + tests.length
\r
729 + "in " + opt_loopCount + " loops");
\r
730 System.out.println("Average time per CollationElementIterator.previous() nano seconds " + ns);
\r
731 System.out.println("performance test on skipped-5 concatenated strings from file -----------");
\r
733 String totalStr = "";
\r
735 // appending all the strings
\r
737 while (linecount < tests.length) {
\r
738 totalStr += tests[linecount];
\r
739 strlen += tests[linecount].length();
\r
742 System.out.println("Total size of strings " + strlen);
\r
747 iter = ((RuleBasedCollator)icuCol).getCollationElementIterator(totalStr);
\r
749 startTime = System.currentTimeMillis();
\r
750 while (count < opt_loopCount) {
\r
753 iter.setOffset(strindex);
\r
755 if (iter.previous() == CollationElementIterator.NULLORDER) {
\r
762 if (strindex > strlen) {
\r
765 iter.setOffset(strindex);
\r
772 elapsedTime = System.currentTimeMillis() - startTime;
\r
773 System.out.println("elapsedTime " + elapsedTime);
\r
775 // empty loop recalculation
\r
777 int tempgCount = 0;
\r
778 startTime = System.currentTimeMillis();
\r
779 while (count < opt_loopCount) {
\r
782 iter.setOffset(strindex);
\r
788 if (strindex > strlen) {
\r
791 iter.setOffset(strindex);
\r
797 elapsedTime -= (System.currentTimeMillis() - startTime);
\r
798 System.out.println("elapsedTime " + elapsedTime);
\r
800 System.out.println("gCount " + gCount);
\r
801 ns = (int)(1000000 * elapsedTime / (gCount + 0.0));
\r
802 System.out.println("Average time per CollationElementIterator.previous() nano seconds " + ns);
\r
806 /**---------------------------------------------------------------------------------------
\r
808 * doIterTest() Iteration test
\r
810 *---------------------------------------------------------------------------------------
\r
812 void doIterTest() {
\r
813 doForwardIterTest();
\r
814 doBackwardIterTest();
\r
817 void setOptions() {
\r
823 if (opt_rules.length() != 0) {
\r
825 icuCol = new com.ibm.icu.text.RuleBasedCollator(getCollationRules(opt_rules));
\r
826 } catch (Exception e) {
\r
827 System.out.println("Cannot open rules:" + e.getMessage());
\r
831 icuCol = com.ibm.icu.text.Collator.getInstance(
\r
832 LocaleUtility.getLocaleFromName(opt_locale));
\r
835 javaCol = java.text.Collator.getInstance(
\r
836 LocaleUtility.getLocaleFromName(opt_locale));
\r
839 javaCol.setDecomposition(java.text.Collator.CANONICAL_DECOMPOSITION);
\r
840 icuCol.setDecomposition(com.ibm.icu.text.Collator.CANONICAL_DECOMPOSITION);
\r
843 if (opt_french && opt_frenchoff) {
\r
844 System.err.println("Error: specified both -french and -frenchoff options.");
\r
848 ((com.ibm.icu.text.RuleBasedCollator)icuCol).setFrenchCollation(true);
\r
850 if (opt_frenchoff) {
\r
851 ((com.ibm.icu.text.RuleBasedCollator)icuCol).setFrenchCollation(false);
\r
855 ((com.ibm.icu.text.RuleBasedCollator)icuCol).setLowerCaseFirst(true);
\r
859 ((com.ibm.icu.text.RuleBasedCollator)icuCol).setUpperCaseFirst(true);
\r
863 ((com.ibm.icu.text.RuleBasedCollator)icuCol).setAlternateHandlingShifted(true);
\r
866 if (opt_level != 0) {
\r
867 switch (opt_level) {
\r
869 javaCol.setStrength(java.text.Collator.PRIMARY);
\r
870 icuCol.setStrength(com.ibm.icu.text.Collator.PRIMARY);
\r
873 javaCol.setStrength(java.text.Collator.SECONDARY);
\r
874 icuCol.setStrength(com.ibm.icu.text.Collator.SECONDARY);
\r
877 javaCol.setStrength(java.text.Collator.TERTIARY);
\r
878 icuCol.setStrength(com.ibm.icu.text.Collator.TERTIARY);
\r
881 icuCol.setStrength(com.ibm.icu.text.Collator.QUATERNARY);
\r
884 javaCol.setStrength(java.text.Collator.IDENTICAL);
\r
885 icuCol.setStrength(com.ibm.icu.text.Collator.IDENTICAL);
\r
888 System.err.println("-level param must be between 1 and 5\n");
\r
892 // load classes at least once before starting
\r
893 javaCol.compare("a", "b");
\r
894 icuCol.compare("a", "b");
\r
897 static boolean processOptions(String[] args) {
\r
899 for (argNum =0; argNum < args.length; argNum++) {
\r
900 for (int i = 0; i < options.length; i++) {
\r
901 if (args[argNum].equalsIgnoreCase(options[i].name)) {
\r
902 switch (options[i].type) {
\r
904 options[i].value.delete(0, options[i].value.capacity()).append("true");
\r
908 if ((argNum >= args.length) || (args[argNum].charAt(0)=='-')) {
\r
909 System.err.println("value expected for"+ options[i].name +"option.\n");
\r
913 /* int value =*/ Integer.parseInt(args[argNum]);
\r
914 options[i].value.delete(0, options[i].value.capacity()).append(args[argNum]);
\r
915 } catch (NumberFormatException e) {
\r
916 System.err.println("Expected: a number value");
\r
922 if ((argNum >= args.length) || (args[argNum].charAt(0)=='-')) {
\r
923 System.err.println("value expected for"+ options[i].name +"option.\n");
\r
926 options[i].value.delete(0, options[i].value.capacity()).append(args[argNum]);
\r
929 System.err.println("Option type error: {FLAG=0, NUM=1, STRING=2}");
\r
936 opt_fName = temp_opt_fName.toString();
\r
937 opt_locale = temp_opt_locale.toString();
\r
938 opt_rules = temp_opt_rules.toString();
\r
939 if (temp_opt_help.toString().equalsIgnoreCase("true")) {
\r
942 opt_loopCount = Integer.parseInt(temp_opt_loopCount.toString());
\r
943 opt_iLoopCount = Integer.parseInt(temp_opt_iLoopCount.toString());
\r
944 if (temp_opt_terse.toString().equalsIgnoreCase("true")) {
\r
947 if (temp_opt_qsort.toString().equalsIgnoreCase("true")) {
\r
950 if (temp_opt_binsearch.toString().equalsIgnoreCase("true")) {
\r
951 opt_binsearch = true;
\r
953 if (temp_opt_icu.toString().equalsIgnoreCase("true")) {
\r
956 if (temp_opt_usekeys.toString().equalsIgnoreCase("true")) {
\r
957 opt_usekeys = true;
\r
959 if (temp_opt_strcmp.toString().equalsIgnoreCase("true")) {
\r
962 if (temp_opt_strcmpCPO.toString().equalsIgnoreCase("true")) {
\r
963 opt_strcmpCPO = true;
\r
965 if (temp_opt_keygen.toString().equalsIgnoreCase("true")) {
\r
968 if (temp_opt_norm.toString().equalsIgnoreCase("true")) {
\r
971 if (temp_opt_french.toString().equalsIgnoreCase("true")) {
\r
974 if (temp_opt_frenchoff.toString().equalsIgnoreCase("true")) {
\r
975 opt_frenchoff = true;
\r
977 if (temp_opt_shifted.toString().equalsIgnoreCase("true")) {
\r
978 opt_shifted = true;
\r
980 if (temp_opt_lower.toString().equalsIgnoreCase("true")) {
\r
983 if (temp_opt_upper.toString().equalsIgnoreCase("true")) {
\r
986 if (temp_opt_case.toString().equalsIgnoreCase("true")) {
\r
989 opt_level = Integer.parseInt(temp_opt_level.toString());
\r
990 if (temp_opt_keyhist.toString().equalsIgnoreCase("true")) {
\r
991 opt_keyhist = true;
\r
993 if (temp_opt_itertest.toString().equalsIgnoreCase("true")) {
\r
994 opt_itertest = true;
\r
996 if (temp_opt_dump.toString().equalsIgnoreCase("true")) {
\r
999 if (temp_opt_java.toString().equalsIgnoreCase("true")) {
\r
1007 * Invoke the runtime's garbage collection procedure repeatedly
\r
1008 * until the amount of free memory stabilizes to within 10%.
\r
1010 private void callGC() {
\r
1011 // From "Java Platform Performance". This is the procedure
\r
1012 // recommended by Javasoft.
\r
1015 Thread.sleep(100);
\r
1016 System.runFinalization();
\r
1017 Thread.sleep(100);
\r
1020 Thread.sleep(100);
\r
1021 System.runFinalization();
\r
1022 Thread.sleep(100);
\r
1023 } catch (InterruptedException e) {}
\r
1026 //private boolean needCRLF = false;
\r
1028 public int DOTMASK = 0x7FF;
\r
1031 if ((i % DOTMASK) == 0) {
\r
1032 //needCRLF = true;
\r
1033 // I do not know why print the dot here
\r
1034 //System.out.print('.');
\r
1038 String readDataLine(BufferedReader br) throws Exception {
\r
1039 String originalLine = "";
\r
1043 line = originalLine = br.readLine();
\r
1044 if (line == null) return null;
\r
1045 if (line.length() > 0 && line.charAt(0) == 0xFEFF) line = line.substring(1);
\r
1046 int commentPos = line.indexOf('#');
\r
1047 if (commentPos >= 0) line = line.substring(0, commentPos);
\r
1048 line = line.trim();
\r
1049 } catch (Exception e) {
\r
1050 throw new Exception("Line \"{0}\", \"{1}\"" + originalLine + " "
\r
1051 + line + " " + e.toString());
\r
1056 void readDataLines() {
\r
1057 // Read in the input file.
\r
1058 // File assumed to be utf-16.
\r
1059 // Lines go onto heap buffers. Global index array to line starts is created.
\r
1060 // Lines themselves are null terminated.
\r
1062 FileInputStream fis = null;
\r
1063 InputStreamReader isr = null;
\r
1064 BufferedReader br = null;
\r
1066 fis = new FileInputStream(opt_fName);
\r
1067 isr = new InputStreamReader(fis, "UTF-8");
\r
1068 br= new BufferedReader(isr, 32*1024);
\r
1069 } catch (Exception e) {
\r
1070 System.err.println("Error: File access exception: " + e.getMessage() + "!");
\r
1076 list = new ArrayList();
\r
1078 String line = null;
\r
1080 line = readDataLine(br);
\r
1081 } catch (Exception e) {
\r
1082 System.err.println("Read File Error" + e.getMessage() + "!");
\r
1086 if (line == null) break;
\r
1087 if (line.length() == 0) continue;
\r
1092 System.out.println("Read " + counter + " lines in file");
\r
1095 int size = list.size();
\r
1096 tests = new String [size];
\r
1098 for (int i = 0; i < size; ++i) {
\r
1099 tests[i] = (String) list.get(i);
\r
1104 * Get the Collator Rules
\r
1105 * The Rule File format:
\r
1106 * 1. leading and trailing whitespaces will be omitted
\r
1107 * 2. lines with the leading character '#' will be treated as comments
\r
1108 * 3. File encoding is ISO-8859-1
\r
1110 String getCollationRules(String ruleFileName) {
\r
1111 FileInputStream fis = null;
\r
1112 InputStreamReader isr = null;
\r
1113 BufferedReader br = null;
\r
1115 fis = new FileInputStream(opt_rules);
\r
1116 isr = new InputStreamReader(fis,"ISO-8859-1");
\r
1117 br= new BufferedReader(isr);
\r
1118 } catch (Exception e) {
\r
1119 System.err.println("Error: File access exception: " + e.getMessage() + "!");
\r
1122 String rules = "";
\r
1126 line = br.readLine();
\r
1127 } catch (IOException e) {
\r
1128 System.err.println("Read File Error" + e.getMessage() + "!");
\r
1131 if (line == null) {
\r
1134 int commentPos = line.indexOf('#');
\r
1135 if (commentPos >= 0) line = line.substring(0, commentPos);
\r
1136 line = line.trim();
\r
1137 rules = rules + line;
\r
1142 //Implementing qsort
\r
1143 void qSortImpl_java_usekeys(String src[], int fromIndex, int toIndex, java.text.Collator c) {
\r
1144 int low = fromIndex;
\r
1145 int high = toIndex;
\r
1146 String middle = "";
\r
1148 middle = src[ (low + high) / 2 ];
\r
1149 while(low <= high) {
\r
1150 while((low < toIndex) && (compare(c.getCollationKey(src[low]), c.getCollationKey(middle)) < 0)) {
\r
1153 while((high > fromIndex) && (compare(c.getCollationKey(src[high]), c.getCollationKey(middle)) > 0)) {
\r
1157 String swap = src[low];
\r
1158 src[low] = src[high];
\r
1164 if(fromIndex < high) {
\r
1165 qSortImpl_java_usekeys(src, fromIndex, high, c);
\r
1168 if(low < toIndex) {
\r
1169 qSortImpl_java_usekeys(src, low, toIndex, c);
\r
1174 void qSortImpl_icu_usekeys(String src[], int fromIndex, int toIndex, com.ibm.icu.text.Collator c) {
\r
1175 int low = fromIndex;
\r
1176 int high = toIndex;
\r
1177 String middle = "";
\r
1179 middle = src[ (low + high) / 2 ];
\r
1180 while(low <= high) {
\r
1181 while((low < toIndex) && (compare(c.getCollationKey(src[low]), c.getCollationKey(middle)) < 0)) {
\r
1184 while((high > fromIndex) && (compare(c.getCollationKey(src[high]), c.getCollationKey(middle)) > 0)) {
\r
1188 String swap = src[low];
\r
1189 src[low] = src[high];
\r
1195 if(fromIndex < high) {
\r
1196 qSortImpl_icu_usekeys(src, fromIndex, high, c);
\r
1199 if(low < toIndex) {
\r
1200 qSortImpl_icu_usekeys(src, low, toIndex, c);
\r
1205 void qSortImpl_nokeys(String src[], int fromIndex, int toIndex, Comparator c) {
\r
1206 int low = fromIndex;
\r
1207 int high = toIndex;
\r
1208 String middle = "";
\r
1210 middle = src[ (low + high) / 2 ];
\r
1211 while(low <= high) {
\r
1212 while((low < toIndex) && (compare(src[low], middle, c) < 0)) {
\r
1215 while((high > fromIndex) && (compare(src[high], middle, c) > 0)) {
\r
1219 String swap = src[low];
\r
1220 src[low] = src[high];
\r
1226 if(fromIndex < high) {
\r
1227 qSortImpl_nokeys(src, fromIndex, high, c);
\r
1230 if(low < toIndex) {
\r
1231 qSortImpl_nokeys(src, low, toIndex, c);
\r
1236 int compare(String source, String target, Comparator c) {
\r
1238 return c.compare(source, target);
\r
1241 int compare(java.text.CollationKey source, java.text.CollationKey target) {
\r
1243 return source.compareTo(target);
\r
1246 int compare(com.ibm.icu.text.CollationKey source, com.ibm.icu.text.CollationKey target) {
\r
1248 return source.compareTo(target);
\r
1251 //Class for command line option
\r
1252 static class OptionSpec {
\r
1255 StringBuffer value;
\r
1256 public OptionSpec(String name, int type, StringBuffer value) {
\r
1259 this.value = value;
\r