2 *******************************************************************************
3 * Copyright (C) 2002-2007, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 *******************************************************************************
8 package com.ibm.icu.dev.test.perf;
10 import com.ibm.icu.text.*;
13 import com.ibm.icu.impl.LocaleUtility;
15 public class CollationPerformanceTest {
16 static final String usageString =
17 "usage: collperf options...\n"
18 + "-help Display this message.\n"
19 + "-file file_name utf-16 format file of names.\n"
20 + "-locale name ICU locale to use. Default is en_US\n"
21 + "-rules file_name Collation rules file (overrides locale)\n"
22 //+ "-langid 0x1234 Windows Language ID number. Default to value for -locale option\n"
23 //+ " see http://msdn.microsoft.com/library/psdk/winbase/nls_8xo3.htm\n"
24 //+ "-win Run test using Windows native services. (ICU is default)\n"
25 //+ "-unix Run test using Unix strxfrm, strcoll services.\n"
26 //+ "-uselen Use API with string lengths. Default is null-terminated strings\n"
27 + "-usekeys Run tests using sortkeys rather than strcoll\n"
28 + "-strcmp Run tests using u_strcmp rather than strcoll\n"
29 + "-strcmpCPO Run tests using u_strcmpCodePointOrder rather than strcoll\n"
30 + "-loop nnnn Loopcount for test. Adjust for reasonable total running time.\n"
31 + "-iloop n Inner Loop Count. Default = 1. Number of calls to function\n"
32 + " under test at each call point. For measuring test overhead.\n"
33 + "-terse Terse numbers-only output. Intended for use by scripts.\n"
34 + "-french French accent ordering\n"
35 + "-frenchoff No French accent ordering (for use with French locales.)\n"
36 + "-norm Normalizing mode on\n"
37 + "-shifted Shifted mode\n"
38 + "-lower Lower case first\n"
39 + "-upper Upper case first\n"
40 + "-case Enable separate case level\n"
41 + "-level n Sort level, 1 to 5, for Primary, Secndary, Tertiary, Quaternary, Identical\n"
42 + "-keyhist Produce a table sort key size vs. string length\n"
43 + "-binsearch Binary Search timing test\n"
44 + "-keygen Sort Key Generation timing test\n"
45 + "-qsort Quicksort timing test\n"
46 + "-iter Iteration Performance Test\n"
47 + "-dump Display strings, sort keys and CEs.\n"
48 + "-java Run test using java.text.Collator.\n";
50 //enum {FLAG, NUM, STRING} type;
51 static StringBuffer temp_opt_fName = new StringBuffer("");
52 static StringBuffer temp_opt_locale = new StringBuffer("en_US");
53 //static StringBuffer temp_opt_langid = new StringBuffer("0"); // Defaults to value corresponding to opt_locale.
54 static StringBuffer temp_opt_rules = new StringBuffer("");
55 static StringBuffer temp_opt_help = new StringBuffer("");
56 static StringBuffer temp_opt_loopCount = new StringBuffer("1");
57 static StringBuffer temp_opt_iLoopCount = new StringBuffer("1");
58 static StringBuffer temp_opt_terse = new StringBuffer("false");
59 static StringBuffer temp_opt_qsort = new StringBuffer("");
60 static StringBuffer temp_opt_binsearch = new StringBuffer("");
61 static StringBuffer temp_opt_icu = new StringBuffer("true");
62 //static StringBuffer opt_win = new StringBuffer(""); // Run with Windows native functions.
63 //static StringBuffer opt_unix = new StringBuffer(""); // Run with UNIX strcoll, strxfrm functions.
64 //static StringBuffer opt_uselen = new StringBuffer("");
65 static StringBuffer temp_opt_usekeys = new StringBuffer("");
66 static StringBuffer temp_opt_strcmp = new StringBuffer("");
67 static StringBuffer temp_opt_strcmpCPO = new StringBuffer("");
68 static StringBuffer temp_opt_norm = new StringBuffer("");
69 static StringBuffer temp_opt_keygen = new StringBuffer("");
70 static StringBuffer temp_opt_french = new StringBuffer("");
71 static StringBuffer temp_opt_frenchoff = new StringBuffer("");
72 static StringBuffer temp_opt_shifted = new StringBuffer("");
73 static StringBuffer temp_opt_lower = new StringBuffer("");
74 static StringBuffer temp_opt_upper = new StringBuffer("");
75 static StringBuffer temp_opt_case = new StringBuffer("");
76 static StringBuffer temp_opt_level = new StringBuffer("0");
77 static StringBuffer temp_opt_keyhist = new StringBuffer("");
78 static StringBuffer temp_opt_itertest = new StringBuffer("");
79 static StringBuffer temp_opt_dump = new StringBuffer("");
80 static StringBuffer temp_opt_java = new StringBuffer("");
83 static String opt_fName = "";
84 static String opt_locale = "en_US";
85 //static int opt_langid = 0; // Defaults to value corresponding to opt_locale.
86 static String opt_rules = "";
87 static boolean opt_help = false;
88 static int opt_loopCount = 1;
89 static int opt_iLoopCount = 1;
90 static boolean opt_terse = false;
91 static boolean opt_qsort = false;
92 static boolean opt_binsearch = false;
93 static boolean opt_icu = true;
94 //static boolean opt_win = false; // Run with Windows native functions.
95 //static boolean opt_unix = false; // Run with UNIX strcoll, strxfrm functions.
96 //static boolean opt_uselen = false;
97 static boolean opt_usekeys = false;
98 static boolean opt_strcmp = false;
99 static boolean opt_strcmpCPO = false;
100 static boolean opt_norm = false;
101 static boolean opt_keygen = false;
102 static boolean opt_french = false;
103 static boolean opt_frenchoff = false;
104 static boolean opt_shifted = false;
105 static boolean opt_lower = false;
106 static boolean opt_upper = false;
107 static boolean opt_case = false;
108 static int opt_level = 0;
109 static boolean opt_keyhist = false;
110 static boolean opt_itertest = false;
111 static boolean opt_dump = false;
112 static boolean opt_java = false;
114 static OptionSpec[] options = {
115 new OptionSpec("-file", 2, temp_opt_fName),
116 new OptionSpec("-locale", 2, temp_opt_locale),
117 //new OptionSpec("-langid", 1, temp_opt_langid),
118 new OptionSpec("-rules", 2, temp_opt_rules),
119 new OptionSpec("-qsort", 0, temp_opt_qsort),
120 new OptionSpec("-binsearch", 0, temp_opt_binsearch),
121 new OptionSpec("-iter", 0, temp_opt_itertest),
122 //new OptionSpec("-win", 0, temp_opt_win),
123 //new OptionSpec("-unix", 0, temp_opt_unix),
124 //new OptionSpec("-uselen", 0, temp_opt_uselen),
125 new OptionSpec("-usekeys", 0, temp_opt_usekeys),
126 new OptionSpec("-strcmp", 0, temp_opt_strcmp),
127 new OptionSpec("-strcmpCPO", 0, temp_opt_strcmpCPO),
128 new OptionSpec("-norm", 0, temp_opt_norm),
129 new OptionSpec("-french", 0, temp_opt_french),
130 new OptionSpec("-frenchoff", 0, temp_opt_frenchoff),
131 new OptionSpec("-shifted", 0, temp_opt_shifted),
132 new OptionSpec("-lower", 0, temp_opt_lower),
133 new OptionSpec("-upper", 0, temp_opt_upper),
134 new OptionSpec("-case", 0, temp_opt_case),
135 new OptionSpec("-level", 1, temp_opt_level),
136 new OptionSpec("-keyhist", 0, temp_opt_keyhist),
137 new OptionSpec("-keygen", 0, temp_opt_keygen),
138 new OptionSpec("-loop", 1, temp_opt_loopCount),
139 new OptionSpec("-iloop", 1, temp_opt_iLoopCount),
140 new OptionSpec("-terse", 0, temp_opt_terse),
141 new OptionSpec("-dump", 0, temp_opt_dump),
142 new OptionSpec("-help", 0, temp_opt_help),
143 new OptionSpec("-?", 0, temp_opt_help),
144 new OptionSpec("-java", 0, temp_opt_java),
147 static java.text.Collator javaCol = null;
148 static com.ibm.icu.text.Collator icuCol = null;
149 static NumberFormat nf = null;
150 static NumberFormat percent = null;
151 ArrayList list = null;
152 String[] tests = null;
155 public static void main(String[] args) {
156 CollationPerformanceTest collPerf = new CollationPerformanceTest();
157 if ( !CollationPerformanceTest.processOptions(args) || opt_help || opt_fName.length()==0) {
158 System.out.println(usageString);
162 nf = NumberFormat.getInstance();
163 nf.setMaximumFractionDigits(2);
164 percent = NumberFormat.getPercentInstance();
166 collPerf.setOptions();
167 collPerf.readDataLines();
178 collPerf.doBinarySearch();
186 collPerf.doKeyHist();
190 collPerf.doIterTest();
195 //Dump file lines, CEs, Sort Keys if requested
197 for(int i = 0; i < list.size(); i++) {
199 String line = com.ibm.icu.impl.Utility.escape((String)list.get(i));
200 System.out.println(line);
202 System.out.print(" CEs: ");
203 CollationElementIterator CEiter = ((com.ibm.icu.text.RuleBasedCollator)icuCol).getCollationElementIterator(line);
208 if (ce == CollationElementIterator.NULLORDER) {
211 //System.out.print();
212 String outStr = Integer.toHexString(ce);
213 for (int len = 0; len < 8 - outStr.length(); len++) {
214 outStr ='0' + outStr;
216 System.out.print(outStr + " ");
218 System.out.print("\n ");
223 System.out.print("\n ICU Sort Key: ");
224 CollationKey ck = ((com.ibm.icu.text.RuleBasedCollator)icuCol).getCollationKey(line);
225 byte[] cks = ck.toByteArray();
227 for(int k = 0; k < cks.length; k++) {
228 String outStr = Integer.toHexString(cks[k]);
229 switch (outStr.length()) {
230 case 1: outStr = '0' + outStr;
232 case 8: outStr = outStr.substring(6);
235 System.out.print(outStr);
236 System.out.print(" ");
238 if(j > 0 && j % 20 == 0) {
239 System.out.print("\n ");
242 System.out.println("\n");
246 /**---------------------------------------------------------------------------------------
248 * doQSort() The quick sort timing test.
250 *---------------------------------------------------------------------------------------
254 //String[] sortTests = (String[]) tests.clone();
255 //Adjust loop count to compensate for file size. QSort should be nlog(n)
256 double dLoopCount = opt_loopCount * 3000 / ((Math.log(tests.length) / Math.log(10)* tests.length));
262 int adj_loopCount = (int)dLoopCount;
263 if(adj_loopCount < 1) {
270 if (opt_icu && opt_usekeys) {
271 startTime = System.currentTimeMillis();
272 qSortImpl_icu_usekeys(tests, 0, tests.length -1, icuCol);
273 endTime = System.currentTimeMillis();
275 if (opt_icu && !opt_usekeys){
276 startTime = System.currentTimeMillis();
277 qSortImpl_nokeys(tests, 0, tests.length -1, icuCol);
278 endTime = System.currentTimeMillis();
280 if (opt_java && opt_usekeys) {
281 startTime = System.currentTimeMillis();
282 qSortImpl_java_usekeys(tests, 0, tests.length -1, javaCol);
283 endTime = System.currentTimeMillis();
285 if (opt_java && !opt_usekeys){
286 startTime = System.currentTimeMillis();
287 qSortImpl_nokeys(tests, 0, tests.length -1, javaCol);
288 endTime = System.currentTimeMillis();
290 long elapsedTime = endTime - startTime;
291 int ns = (int)(1000000 * elapsedTime / (globalCount + 0.0));
293 System.out.println("qsort: total # of string compares = " + globalCount);
294 System.out.println("qsort: time per compare = " + ns);
296 System.out.println(ns);
300 /**---------------------------------------------------------------------------------------
302 * doBinarySearch() Binary Search timing test. Each name from the list
303 * is looked up in the full sorted list of names.
305 *---------------------------------------------------------------------------------------
307 void doBinarySearch() {
311 double dLoopCount = opt_loopCount * 3000 / (Math.log(tests.length) / Math.log(10)* tests.length);
313 long elapsedTime = 0;
318 int adj_loopCount = (int)dLoopCount;
319 if(adj_loopCount < 1) {
325 for(;;) { //not really a loop, just allows "break" to work, to simplify
326 //inadvertantly running more than one test through here
329 startTime = System.currentTimeMillis();
330 for(loops = 0; loops < adj_loopCount; loops++) {
331 for (int j = 0; j < tests.length; j++) {
332 int hi = tests.length-1;
336 int newGuess = (hi + lo) / 2;
337 if(newGuess == guess){
341 r = tests[j].compareTo(tests[guess]);
354 elapsedTime = System.currentTimeMillis() - startTime;
360 startTime = System.currentTimeMillis();
361 for(loops = 0; loops < adj_loopCount; loops++) {
362 for (int j = 0; j < tests.length; j++) {
363 int hi = tests.length-1;
367 int newGuess = (hi + lo) / 2;
368 if(newGuess == guess){
372 r = com.ibm.icu.text.Normalizer.compare(tests[j], tests[guess], Normalizer.COMPARE_CODE_POINT_ORDER);
385 elapsedTime = System.currentTimeMillis() - startTime;
392 startTime = System.currentTimeMillis();
393 for (loops = 0; loops < adj_loopCount; loops++) {
394 for (int j = 0; j < tests.length; j++) {
395 int hi = tests.length - 1;
399 int newGuess = (hi + lo) / 2;
400 if (newGuess == guess) {
405 com.ibm.icu.text.CollationKey sortKey1 = icuCol.getCollationKey(tests[j]);
406 com.ibm.icu.text.CollationKey sortKey2 = icuCol.getCollationKey(tests[guess]);
407 r = sortKey1.compareTo(sortKey2);
410 r = icuCol.compare(tests[j], tests[guess]);
424 elapsedTime = System.currentTimeMillis() - startTime;
430 startTime = System.currentTimeMillis();
431 for (loops = 0; loops < adj_loopCount; loops++) {
432 for (int j = 0; j < tests.length; j++) {
433 int hi = tests.length - 1;
437 int newGuess = (hi + lo) / 2;
438 if (newGuess == guess) {
443 java.text.CollationKey sortKey1 = javaCol.getCollationKey(tests[j]);
444 java.text.CollationKey sortKey2 = javaCol.getCollationKey(tests[guess]);
445 r = sortKey1.compareTo(sortKey2);
448 r = javaCol.compare(tests[j], tests[guess]);
462 elapsedTime = System.currentTimeMillis() - startTime;
467 int ns = (int)((float)(1000000) * (float)elapsedTime / (float)gCount);
469 System.out.println("binary search: total # of string compares = " + gCount);
470 System.out.println("binary search: compares per loop = " + gCount / loops);
471 System.out.println("binary search: time per compare = " + ns);
473 System.out.println(ns);
477 /**---------------------------------------------------------------------------------------
479 * doKeyGen() Key Generation Timing Test
481 *---------------------------------------------------------------------------------------
486 // Adjust loop count to compensate for file size. Should be order n
487 double dLoopCount = opt_loopCount * (1000.0 / (double)list.size());
488 int adj_loopCount = (int)dLoopCount;
489 if (adj_loopCount < 1) adj_loopCount = 1;
492 long totalKeyLen = 0;
495 startTime = System.currentTimeMillis();
496 for (int loops=0; loops<adj_loopCount; loops++) {
497 for (int line=0; line < tests.length; line++) {
498 for (int iLoop=0; iLoop < opt_iLoopCount; iLoop++) {
499 totalChars += tests[line].length();
500 byte[] sortKey = javaCol.getCollationKey(tests[line]).toByteArray();
501 totalKeyLen += sortKey.length;
506 startTime = System.currentTimeMillis();
507 for (int loops=0; loops<adj_loopCount; loops++) {
508 for (int line=0; line < tests.length; line++) {
509 for (int iLoop=0; iLoop < opt_iLoopCount; iLoop++) {
510 totalChars += tests[line].length();
511 byte[] sortKey = icuCol.getCollationKey(tests[line]).toByteArray();
512 totalKeyLen += sortKey.length;
518 long elapsedTime = System.currentTimeMillis() - startTime;
519 long ns = (long)(1000000 * elapsedTime / (adj_loopCount * tests.length + 0.0));
521 System.out.println("Sort Key Generation: total # of keys =" + adj_loopCount * tests.length);
522 System.out.println("Sort Key Generation: time per key = " + ns + " ns");
523 System.out.println("Key Length / character = " + nf.format(totalKeyLen / (totalChars + 0.0)));
526 System.out.print(ns + ", ");
527 System.out.println(nf.format(totalKeyLen / (totalChars + 0.0)) + ", ");
531 /**---------------------------------------------------------------------------------------
533 * doKeyHist() Output a table of data for average sort key size vs. string length.
535 *---------------------------------------------------------------------------------------
541 // Find the maximum string length
542 for (int i = 0; i < tests.length; i++) {
543 if (tests[i].length() > maxLen) maxLen = tests[i].length();
546 int[] accumulatedLen = new int[maxLen + 1];
547 int[] numKeysOfSize = new int[maxLen + 1];
549 // Fill the arrays...
550 for (int i = 0; i < tests.length; i++) {
551 int len = tests[i].length();
552 accumulatedLen[len] += icuCol.getCollationKey(tests[i]).toByteArray().length;
553 numKeysOfSize[len] += 1;
556 // And write out averages
557 System.out.println("String Length, Avg Key Length, Avg Key Len per char");
558 for (int i = 1; i <= maxLen; i++) {
559 if (numKeysOfSize[i] > 0) {
560 System.out.println(i + ", " + nf.format(accumulatedLen[i] / (numKeysOfSize[i]+ 0.0)) + ", "
561 + nf.format(accumulatedLen[i] / (numKeysOfSize[i] * i + 0.0)));
567 void doForwardIterTest() {
569 System.out.print("\n\nPerforming forward iteration performance test with ");
570 System.out.println("performance test on strings from file -----------");
572 CollationElementIterator iter = ((RuleBasedCollator)icuCol).getCollationElementIterator("");
576 long startTime = System.currentTimeMillis();
577 while (count < opt_loopCount) {
579 while (linecount < tests.length) {
580 String str = tests[linecount];
582 while (iter.next() != CollationElementIterator.NULLORDER) {
590 long elapsedTime = System.currentTimeMillis() - startTime;
591 System.out.println("elapsedTime " + elapsedTime + " ms");
593 // empty loop recalculation
595 startTime = System.currentTimeMillis();
596 while (count < opt_loopCount) {
598 while (linecount < tests.length) {
599 String str = tests[linecount];
605 elapsedTime -= (System.currentTimeMillis() - startTime);
606 System.out.println("elapsedTime " + elapsedTime + " ms");
608 int ns = (int)(1000000 * elapsedTime / (gCount + 0.0));
609 System.out.println("Total number of strings compared " + tests.length
610 + "in " + opt_loopCount + " loops");
611 System.out.println("Average time per CollationElementIterator.next() nano seconds " + ns);
612 System.out.println("performance test on skipped-5 concatenated strings from file -----------");
614 String totalStr = "";
616 // appending all the strings
618 while (linecount < tests.length) {
619 totalStr += tests[linecount];
620 strlen += tests[linecount].length();
623 System.out.println("Total size of strings " + strlen);
627 iter = ((RuleBasedCollator)icuCol).getCollationElementIterator(totalStr);
628 strlen -= 5; // any left over characters are not iterated,
629 // this is to ensure the backwards and forwards iterators
630 // gets the same position
632 startTime = System.currentTimeMillis();
633 while (count < opt_loopCount) {
636 iter.setOffset(strindex);
638 if (iter.next() == CollationElementIterator.NULLORDER) {
645 if (strindex > strlen) {
648 iter.setOffset(strindex);
655 elapsedTime = System.currentTimeMillis() - startTime;
656 System.out.println("elapsedTime " + elapsedTime);
658 // empty loop recalculation
661 startTime = System.currentTimeMillis();
662 while (count < opt_loopCount) {
665 iter.setOffset(strindex);
671 if (strindex > strlen) {
674 iter.setOffset(strindex);
680 elapsedTime -= (System.currentTimeMillis() - startTime);
681 System.out.println("elapsedTime " + elapsedTime);
683 System.out.println("gCount " + gCount);
684 ns = (int)(1000000 * elapsedTime / (gCount + 0.0));
685 System.out.println("Average time per CollationElementIterator.next() nano seconds " + ns);
688 void doBackwardIterTest() {
689 System.out.print("\n\nPerforming backward iteration performance test with ");
690 System.out.println("performance test on strings from file -----------\n");
692 CollationElementIterator iter = ((RuleBasedCollator)icuCol).getCollationElementIterator("");
696 long startTime = System.currentTimeMillis();
697 while (count < opt_loopCount) {
699 while (linecount < tests.length) {
700 String str = tests[linecount];
702 while (iter.previous() != CollationElementIterator.NULLORDER) {
709 long elapsedTime = System.currentTimeMillis() - startTime;
710 System.out.println("elapsedTime " + elapsedTime + " ms");
712 // empty loop recalculation
714 startTime = System.currentTimeMillis();
715 while (count < opt_loopCount) {
717 while (linecount < tests.length) {
718 String str = tests[linecount];
724 elapsedTime -= (System.currentTimeMillis() - startTime);
725 System.out.println("elapsedTime " + elapsedTime + " ms");
727 int ns = (int)(1000000 * elapsedTime / (gCount + 0.0));
728 System.out.println("Total number of strings compared " + tests.length
729 + "in " + opt_loopCount + " loops");
730 System.out.println("Average time per CollationElementIterator.previous() nano seconds " + ns);
731 System.out.println("performance test on skipped-5 concatenated strings from file -----------");
733 String totalStr = "";
735 // appending all the strings
737 while (linecount < tests.length) {
738 totalStr += tests[linecount];
739 strlen += tests[linecount].length();
742 System.out.println("Total size of strings " + strlen);
747 iter = ((RuleBasedCollator)icuCol).getCollationElementIterator(totalStr);
749 startTime = System.currentTimeMillis();
750 while (count < opt_loopCount) {
753 iter.setOffset(strindex);
755 if (iter.previous() == CollationElementIterator.NULLORDER) {
762 if (strindex > strlen) {
765 iter.setOffset(strindex);
772 elapsedTime = System.currentTimeMillis() - startTime;
773 System.out.println("elapsedTime " + elapsedTime);
775 // empty loop recalculation
778 startTime = System.currentTimeMillis();
779 while (count < opt_loopCount) {
782 iter.setOffset(strindex);
788 if (strindex > strlen) {
791 iter.setOffset(strindex);
797 elapsedTime -= (System.currentTimeMillis() - startTime);
798 System.out.println("elapsedTime " + elapsedTime);
800 System.out.println("gCount " + gCount);
801 ns = (int)(1000000 * elapsedTime / (gCount + 0.0));
802 System.out.println("Average time per CollationElementIterator.previous() nano seconds " + ns);
806 /**---------------------------------------------------------------------------------------
808 * doIterTest() Iteration test
810 *---------------------------------------------------------------------------------------
814 doBackwardIterTest();
823 if (opt_rules.length() != 0) {
825 icuCol = new com.ibm.icu.text.RuleBasedCollator(getCollationRules(opt_rules));
826 } catch (Exception e) {
827 System.out.println("Cannot open rules:" + e.getMessage());
831 icuCol = com.ibm.icu.text.Collator.getInstance(
832 LocaleUtility.getLocaleFromName(opt_locale));
835 javaCol = java.text.Collator.getInstance(
836 LocaleUtility.getLocaleFromName(opt_locale));
839 javaCol.setDecomposition(java.text.Collator.CANONICAL_DECOMPOSITION);
840 icuCol.setDecomposition(com.ibm.icu.text.Collator.CANONICAL_DECOMPOSITION);
843 if (opt_french && opt_frenchoff) {
844 System.err.println("Error: specified both -french and -frenchoff options.");
848 ((com.ibm.icu.text.RuleBasedCollator)icuCol).setFrenchCollation(true);
851 ((com.ibm.icu.text.RuleBasedCollator)icuCol).setFrenchCollation(false);
855 ((com.ibm.icu.text.RuleBasedCollator)icuCol).setLowerCaseFirst(true);
859 ((com.ibm.icu.text.RuleBasedCollator)icuCol).setUpperCaseFirst(true);
863 ((com.ibm.icu.text.RuleBasedCollator)icuCol).setAlternateHandlingShifted(true);
866 if (opt_level != 0) {
869 javaCol.setStrength(java.text.Collator.PRIMARY);
870 icuCol.setStrength(com.ibm.icu.text.Collator.PRIMARY);
873 javaCol.setStrength(java.text.Collator.SECONDARY);
874 icuCol.setStrength(com.ibm.icu.text.Collator.SECONDARY);
877 javaCol.setStrength(java.text.Collator.TERTIARY);
878 icuCol.setStrength(com.ibm.icu.text.Collator.TERTIARY);
881 icuCol.setStrength(com.ibm.icu.text.Collator.QUATERNARY);
884 javaCol.setStrength(java.text.Collator.IDENTICAL);
885 icuCol.setStrength(com.ibm.icu.text.Collator.IDENTICAL);
888 System.err.println("-level param must be between 1 and 5\n");
892 // load classes at least once before starting
893 javaCol.compare("a", "b");
894 icuCol.compare("a", "b");
897 static boolean processOptions(String[] args) {
899 for (argNum =0; argNum < args.length; argNum++) {
900 for (int i = 0; i < options.length; i++) {
901 if (args[argNum].equalsIgnoreCase(options[i].name)) {
902 switch (options[i].type) {
904 options[i].value.delete(0, options[i].value.capacity()).append("true");
908 if ((argNum >= args.length) || (args[argNum].charAt(0)=='-')) {
909 System.err.println("value expected for"+ options[i].name +"option.\n");
913 /* int value =*/ Integer.parseInt(args[argNum]);
914 options[i].value.delete(0, options[i].value.capacity()).append(args[argNum]);
915 } catch (NumberFormatException e) {
916 System.err.println("Expected: a number value");
922 if ((argNum >= args.length) || (args[argNum].charAt(0)=='-')) {
923 System.err.println("value expected for"+ options[i].name +"option.\n");
926 options[i].value.delete(0, options[i].value.capacity()).append(args[argNum]);
929 System.err.println("Option type error: {FLAG=0, NUM=1, STRING=2}");
936 opt_fName = temp_opt_fName.toString();
937 opt_locale = temp_opt_locale.toString();
938 opt_rules = temp_opt_rules.toString();
939 if (temp_opt_help.toString().equalsIgnoreCase("true")) {
942 opt_loopCount = Integer.parseInt(temp_opt_loopCount.toString());
943 opt_iLoopCount = Integer.parseInt(temp_opt_iLoopCount.toString());
944 if (temp_opt_terse.toString().equalsIgnoreCase("true")) {
947 if (temp_opt_qsort.toString().equalsIgnoreCase("true")) {
950 if (temp_opt_binsearch.toString().equalsIgnoreCase("true")) {
951 opt_binsearch = true;
953 if (temp_opt_icu.toString().equalsIgnoreCase("true")) {
956 if (temp_opt_usekeys.toString().equalsIgnoreCase("true")) {
959 if (temp_opt_strcmp.toString().equalsIgnoreCase("true")) {
962 if (temp_opt_strcmpCPO.toString().equalsIgnoreCase("true")) {
963 opt_strcmpCPO = true;
965 if (temp_opt_keygen.toString().equalsIgnoreCase("true")) {
968 if (temp_opt_norm.toString().equalsIgnoreCase("true")) {
971 if (temp_opt_french.toString().equalsIgnoreCase("true")) {
974 if (temp_opt_frenchoff.toString().equalsIgnoreCase("true")) {
975 opt_frenchoff = true;
977 if (temp_opt_shifted.toString().equalsIgnoreCase("true")) {
980 if (temp_opt_lower.toString().equalsIgnoreCase("true")) {
983 if (temp_opt_upper.toString().equalsIgnoreCase("true")) {
986 if (temp_opt_case.toString().equalsIgnoreCase("true")) {
989 opt_level = Integer.parseInt(temp_opt_level.toString());
990 if (temp_opt_keyhist.toString().equalsIgnoreCase("true")) {
993 if (temp_opt_itertest.toString().equalsIgnoreCase("true")) {
996 if (temp_opt_dump.toString().equalsIgnoreCase("true")) {
999 if (temp_opt_java.toString().equalsIgnoreCase("true")) {
1007 * Invoke the runtime's garbage collection procedure repeatedly
1008 * until the amount of free memory stabilizes to within 10%.
1010 private void callGC() {
1011 // From "Java Platform Performance". This is the procedure
1012 // recommended by Javasoft.
1016 System.runFinalization();
1021 System.runFinalization();
1023 } catch (InterruptedException e) {}
1026 //private boolean needCRLF = false;
1028 public int DOTMASK = 0x7FF;
1031 if ((i % DOTMASK) == 0) {
1033 // I do not know why print the dot here
1034 //System.out.print('.');
1038 String readDataLine(BufferedReader br) throws Exception {
1039 String originalLine = "";
1043 line = originalLine = br.readLine();
1044 if (line == null) return null;
1045 if (line.length() > 0 && line.charAt(0) == 0xFEFF) line = line.substring(1);
1046 int commentPos = line.indexOf('#');
1047 if (commentPos >= 0) line = line.substring(0, commentPos);
1049 } catch (Exception e) {
1050 throw new Exception("Line \"{0}\", \"{1}\"" + originalLine + " "
1051 + line + " " + e.toString());
1056 void readDataLines() {
1057 // Read in the input file.
1058 // File assumed to be utf-16.
1059 // Lines go onto heap buffers. Global index array to line starts is created.
1060 // Lines themselves are null terminated.
1062 FileInputStream fis = null;
1063 InputStreamReader isr = null;
1064 BufferedReader br = null;
1066 fis = new FileInputStream(opt_fName);
1067 isr = new InputStreamReader(fis, "UTF-8");
1068 br= new BufferedReader(isr, 32*1024);
1069 } catch (Exception e) {
1070 System.err.println("Error: File access exception: " + e.getMessage() + "!");
1076 list = new ArrayList();
1080 line = readDataLine(br);
1081 } catch (Exception e) {
1082 System.err.println("Read File Error" + e.getMessage() + "!");
1086 if (line == null) break;
1087 if (line.length() == 0) continue;
1092 System.out.println("Read " + counter + " lines in file");
1095 int size = list.size();
1096 tests = new String [size];
1098 for (int i = 0; i < size; ++i) {
1099 tests[i] = (String) list.get(i);
1104 * Get the Collator Rules
1105 * The Rule File format:
1106 * 1. leading and trailing whitespaces will be omitted
1107 * 2. lines with the leading character '#' will be treated as comments
1108 * 3. File encoding is ISO-8859-1
1110 String getCollationRules(String ruleFileName) {
1111 FileInputStream fis = null;
1112 InputStreamReader isr = null;
1113 BufferedReader br = null;
1115 fis = new FileInputStream(opt_rules);
1116 isr = new InputStreamReader(fis,"ISO-8859-1");
1117 br= new BufferedReader(isr);
1118 } catch (Exception e) {
1119 System.err.println("Error: File access exception: " + e.getMessage() + "!");
1126 line = br.readLine();
1127 } catch (IOException e) {
1128 System.err.println("Read File Error" + e.getMessage() + "!");
1134 int commentPos = line.indexOf('#');
1135 if (commentPos >= 0) line = line.substring(0, commentPos);
1137 rules = rules + line;
1142 //Implementing qsort
1143 void qSortImpl_java_usekeys(String src[], int fromIndex, int toIndex, java.text.Collator c) {
1144 int low = fromIndex;
1148 middle = src[ (low + high) / 2 ];
1149 while(low <= high) {
1150 while((low < toIndex) && (compare(c.getCollationKey(src[low]), c.getCollationKey(middle)) < 0)) {
1153 while((high > fromIndex) && (compare(c.getCollationKey(src[high]), c.getCollationKey(middle)) > 0)) {
1157 String swap = src[low];
1158 src[low] = src[high];
1164 if(fromIndex < high) {
1165 qSortImpl_java_usekeys(src, fromIndex, high, c);
1169 qSortImpl_java_usekeys(src, low, toIndex, c);
1174 void qSortImpl_icu_usekeys(String src[], int fromIndex, int toIndex, com.ibm.icu.text.Collator c) {
1175 int low = fromIndex;
1179 middle = src[ (low + high) / 2 ];
1180 while(low <= high) {
1181 while((low < toIndex) && (compare(c.getCollationKey(src[low]), c.getCollationKey(middle)) < 0)) {
1184 while((high > fromIndex) && (compare(c.getCollationKey(src[high]), c.getCollationKey(middle)) > 0)) {
1188 String swap = src[low];
1189 src[low] = src[high];
1195 if(fromIndex < high) {
1196 qSortImpl_icu_usekeys(src, fromIndex, high, c);
1200 qSortImpl_icu_usekeys(src, low, toIndex, c);
1205 void qSortImpl_nokeys(String src[], int fromIndex, int toIndex, Comparator c) {
1206 int low = fromIndex;
1210 middle = src[ (low + high) / 2 ];
1211 while(low <= high) {
1212 while((low < toIndex) && (compare(src[low], middle, c) < 0)) {
1215 while((high > fromIndex) && (compare(src[high], middle, c) > 0)) {
1219 String swap = src[low];
1220 src[low] = src[high];
1226 if(fromIndex < high) {
1227 qSortImpl_nokeys(src, fromIndex, high, c);
1231 qSortImpl_nokeys(src, low, toIndex, c);
1236 int compare(String source, String target, Comparator c) {
1238 return c.compare(source, target);
1241 int compare(java.text.CollationKey source, java.text.CollationKey target) {
1243 return source.compareTo(target);
1246 int compare(com.ibm.icu.text.CollationKey source, com.ibm.icu.text.CollationKey target) {
1248 return source.compareTo(target);
1251 //Class for command line option
1252 static class OptionSpec {
1256 public OptionSpec(String name, int type, StringBuffer value) {