2 //#if defined(FOUNDATION10) || defined(J2SE13)
5 *******************************************************************************
6 * Copyright (C) 2002-2009, International Business Machines Corporation and *
7 * others. All Rights Reserved. *
8 *******************************************************************************
10 package com.ibm.icu.dev.test.collator;
13 import com.ibm.icu.text.Collator;
14 import com.ibm.icu.text.RuleBasedCollator;
15 import com.ibm.icu.text.UnicodeSet;
17 import com.ibm.icu.dev.test.TestFmwk;
18 import com.ibm.icu.dev.test.util.BNF;
19 import com.ibm.icu.dev.test.util.BagFormatter;
20 import com.ibm.icu.dev.test.util.Quoter;
23 import java.io.IOException;
24 import java.io.PrintWriter;
25 import java.text.ParseException;
26 import java.util.Random;
28 public class RandomCollator extends TestFmwk {
29 public static void main(String[] args) throws Exception {
30 new RandomCollator().run(args);
31 //new CollationAPITest().TestGetTailoredSet();
34 static final int CONSTRUCT_RANDOM_COUNT = 100;
35 static final int FORMAL_TEST_COUNT = 1000;
37 static final String POSITION = "{$$$}";
40 class Shower extends BagFormatter.Shower {
41 public void print(String arg) {
46 public Shower LOG = new Shower();
49 public void TestRandom() throws IOException {
51 // = java.util.Calendar.getInstance().get(java.util.Calendar.YEAR);
53 // System.out.println("\nTestRandom skipped for 2003");
57 PrintWriter pw = BagFormatter.openUTF8Writer(System.getProperty("user.dir")+File.separator, "RandomCollationTestLog.txt");
58 TestCollator tc = new TestCollator(chars);
59 pw.println("Collation Test Run");
60 pw.println("Note: For parse-exception, " + POSITION + " indicates the errorOffset");
62 pw.println(currentRules);
63 String rules = "<unknown>";
67 for (int i = 0; i < CONSTRUCT_RANDOM_COUNT; ++i) {
71 Collator c = new RuleBasedCollator(rules.toString());
72 tc.test(c, FORMAL_TEST_COUNT);
78 } catch (ParseException pe) {
80 pw.println("========PARSE EXCEPTION======== (" + i + ")");
81 int errorOffset = pe.getErrorOffset();
82 pw.print(rules.substring(0,errorOffset));
84 pw.println(rules.substring(errorOffset));
85 //pw.println("========ERROR======== (" + i + ")");
86 //pe.printStackTrace(pw);
87 //pw.println("========END======== (" + i + ")");
88 errln("ParseException");
89 } catch (Exception e) {
91 pw.println("========OTHER EXCEPTION======== (" + i + ")");
92 e.printStackTrace(pw);
93 pw.println("========RULES======== (" + i + ")");
95 //pw.println("========END======== (" + i + ")");
96 errln("ParseException");
99 pw.println("Successful: " + sCount
100 + ",\tParseException: " + peCount
101 + ",\tOther Exception: " + oeCount);
102 logln("Successful: " + sCount
103 + ",\tParseException: " + peCount
104 + ",\tOther Exception: " + oeCount);
109 public static class TestCollator extends TestComparator {
112 TestCollator(UnicodeSet chars) {
113 rs = new BNF(new Random(0), new Quoter.RuleQuoter())
114 .addRules("$root = " + chars + "{1,8};").complete();
117 public Object newObject(Object c) {
121 public String format(Object c) {
122 return BagFormatter.hex.transliterate(c.toString());
127 String currentRules = null;
130 public String get() {
134 public RandomCollator() {
137 protected void init()throws Exception{
138 init(1,10, new UnicodeSet("[AZa-z<\\&\\[\\]]"));
140 private void init(int minRuleCount, int maxRuleCount, UnicodeSet setOfChars) {
141 this.chars = setOfChars;
142 bnf = new BNF(new Random(0), new Quoter.RuleQuoter())
143 .addSet("$chars", setOfChars)
144 .addRules(collationBNF)
148 private static String collationBNF =
149 "$s = ' '? 50%;\r\n" +
150 "$relationList = (" + " '<'" + " | ' <<'" + " | ' ;'" +
151 " | ' <<<'" + " | ' ,'" +
152 " | ' ='" + ");\r\n" +
153 "$alternateOptions = non'-'ignorable | shifted;\r\n" +
154 "$caseFirstOptions = off | upper | lower;\r\n" +
155 "$strengthOptions = '1' | '2' | '3' | '4' | 'I';\r\n" +
156 "$commandList = '['" + " ( alternate ' ' $alternateOptions" + " | backwards' 2'" + " | normalization ' ' $onoff " + " | caseLevel ' ' $onoff " + " | hiraganaQ ' ' $onoff" + " | caseFirst ' ' $caseFirstOptions" + " | strength ' ' $strengthOptions" + " ) ']';\r\n" +
157 "$ignorableTypes = (tertiary | secondary | primary) ' ' ignorable;\r\n" +
158 "$allTypes = variable | regular | implicit | trailing | $ignorableTypes;\r\n" +
159 "$onoff = on | off;\r\n" +
160 "$positionList = '[' (first | last) ' ' $allTypes ']';\r\n" + "$beforeList = '[before ' ('1' | '2' | '3') ']';\r\n" + "$string = $chars{1,5}~@;\r\n" +
161 "$crlf = '\r\n';\r\n" +
162 "$rel1 = '[variable top]' $s ;\r\n" +
163 "$p1 = ($string $s '|' $s)? 25%;\r\n" +
164 "$p2 = ('\\' $s $string $s)? 25%;\r\n" +
165 "$rel2 = $p1 $string $s $p2;\r\n" +
166 "$relation = $relationList $s ($rel1 | $rel2) $crlf;\r\n" +
167 "$command = $commandList $crlf;\r\n" +
168 "$reset = '&' $s ($beforeList $s)? 10% ($positionList | $string 10%) $crlf;\r\n" +
169 "$mostRules = $command 1% | $reset 5% | $relation 25%;\r\n" +
170 "$root = $command{0,5} $reset $mostRules{1,20};\r\n";
176 gc ; C ; Other # Cc | Cf | Cn | Co | Cs
180 gc ; Co ; Private_Use
182 gc ; L ; Letter # Ll | Lm | Lo | Lt | Lu
183 gc ; LC ; Cased_Letter # Ll | Lt | Lu
184 gc ; Ll ; Lowercase_Letter
185 gc ; Lm ; Modifier_Letter
186 gc ; Lo ; Other_Letter
187 gc ; Lt ; Titlecase_Letter
188 gc ; Lu ; Uppercase_Letter
189 gc ; M ; Mark # Mc | Me | Mn
190 gc ; Mc ; Spacing_Mark
191 gc ; Me ; Enclosing_Mark
192 gc ; Mn ; Nonspacing_Mark
193 gc ; N ; Number # Nd | Nl | No
194 gc ; Nd ; Decimal_Number
195 gc ; Nl ; Letter_Number
196 gc ; No ; Other_Number
197 gc ; P ; Punctuation # Pc | Pd | Pe | Pf | Pi | Po | Ps
198 gc ; Pc ; Connector_Punctuation
199 gc ; Pd ; Dash_Punctuation
200 gc ; Pe ; Close_Punctuation
201 gc ; Pf ; Final_Punctuation
202 gc ; Pi ; Initial_Punctuation
203 gc ; Po ; Other_Punctuation
204 gc ; Ps ; Open_Punctuation
205 gc ; S ; Symbol # Sc | Sk | Sm | So
206 gc ; Sc ; Currency_Symbol
207 gc ; Sk ; Modifier_Symbol
208 gc ; Sm ; Math_Symbol
209 gc ; So ; Other_Symbol
210 gc ; Z ; Separator # Zl | Zp | Zs
211 gc ; Zl ; Line_Separator
212 gc ; Zp ; Paragraph_Separator
213 gc ; Zs ; Space_Separator
219 // "& [" position "]"
221 // relation "[variable top]"
222 // relation (chars "|")? chars ("/" chars)?
223 // plus, a reset must come before a relation
225 // the following reflects the above rules, plus allows whitespace.
226 Pick chars = Pick.string(1, 5, Pick.codePoint(uSet)); // insert something needing quotes
227 Pick s = Pick.maybe(0.8, Pick.unquoted(" ")).name("Space"); // optional space
228 Pick CRLF = Pick.unquoted("\r\n");
230 Pick rel1 = Pick.and(Pick.unquoted("[variable top]")).and2(s);
231 Pick p1 = Pick.maybe(0.25, Pick.and(chars).and2(s).and2("|").and2(s));
232 Pick p2 = Pick.maybe(0.25, Pick.and("/").and2(s).and2(chars).and2(s));
233 Pick rel2 = Pick.and(p1).and2(chars).and2(s).and2(p2);
234 Pick relation = Pick.and(Pick.or(relationList)).and2(s)
235 .and2(Pick.or(1, rel1).or2(10, rel2))
236 .and2(CRLF).name("Relation");
238 Pick command = Pick.and(Pick.or(commandList)).and2(CRLF).name("Command");
240 Pick reset = Pick.and("&").and2(s)
241 .and2(0.1, Pick.or(beforeList)).and2(s)
242 .and2(Pick.or(0.1, Pick.or(positionList)).or2(1.0, chars))
243 .and2(CRLF).name("Reset");
244 Pick rule = Pick.and(Pick.or(1, command).or2(5, reset).or2(25, relation)).name("Rule");
245 Pick rules2 = Pick.and(Pick.repeat(0,5,command))
247 .and2(Pick.repeat(1,20,rule)).name("Rules");
248 rules = Pick.Target.make(rules2);
250 static final String[] relationList = {" <", " <<", " <<<", " =", " ;", " ,"};
252 static final String[] commandList = {
253 "[alternate non-ignorable]", "[alternate shifted]",
255 "[normalization off]",
256 "[normalization on]",
271 static final String[] positionList = {
272 "[first tertiary ignorable]",
273 "[last tertiary ignorable]",
274 "[first secondary ignorable]",
275 "[last secondary ignorable]",
276 "[first primary ignorable]",
277 "[last primary ignorable]",
288 static final String[] beforeList = {