2 //#if defined(FOUNDATION10) || defined(J2SE13)
\r
5 *******************************************************************************
\r
6 * Copyright (C) 2002-2009, International Business Machines Corporation and *
\r
7 * others. All Rights Reserved. *
\r
8 *******************************************************************************
\r
10 package com.ibm.icu.dev.test.collator;
\r
13 import com.ibm.icu.text.Collator;
\r
14 import com.ibm.icu.text.RuleBasedCollator;
\r
15 import com.ibm.icu.text.UnicodeSet;
\r
17 import com.ibm.icu.dev.test.TestFmwk;
\r
18 import com.ibm.icu.dev.test.util.BNF;
\r
19 import com.ibm.icu.dev.test.util.BagFormatter;
\r
20 import com.ibm.icu.dev.test.util.Quoter;
\r
22 import java.io.File;
\r
23 import java.io.IOException;
\r
24 import java.io.PrintWriter;
\r
25 import java.text.ParseException;
\r
26 import java.util.Random;
\r
28 public class RandomCollator extends TestFmwk {
\r
29 public static void main(String[] args) throws Exception {
\r
30 new RandomCollator().run(args);
\r
31 //new CollationAPITest().TestGetTailoredSet();
\r
34 static final int CONSTRUCT_RANDOM_COUNT = 100;
\r
35 static final int FORMAL_TEST_COUNT = 1000;
\r
37 static final String POSITION = "{$$$}";
\r
40 class Shower extends BagFormatter.Shower {
\r
41 public void print(String arg) {
\r
46 public Shower LOG = new Shower();
\r
49 public void TestRandom() throws IOException {
\r
51 // = java.util.Calendar.getInstance().get(java.util.Calendar.YEAR);
\r
52 // if (year < 2004) {
\r
53 // System.out.println("\nTestRandom skipped for 2003");
\r
57 PrintWriter pw = BagFormatter.openUTF8Writer(System.getProperty("user.dir")+File.separator, "RandomCollationTestLog.txt");
\r
58 TestCollator tc = new TestCollator(chars);
\r
59 pw.println("Collation Test Run");
\r
60 pw.println("Note: For parse-exception, " + POSITION + " indicates the errorOffset");
\r
61 pw.println("Rules:");
\r
62 pw.println(currentRules);
\r
63 String rules = "<unknown>";
\r
67 for (int i = 0; i < CONSTRUCT_RANDOM_COUNT; ++i) {
\r
71 Collator c = new RuleBasedCollator(rules.toString());
\r
72 tc.test(c, FORMAL_TEST_COUNT);
\r
78 } catch (ParseException pe) {
\r
80 pw.println("========PARSE EXCEPTION======== (" + i + ")");
\r
81 int errorOffset = pe.getErrorOffset();
\r
82 pw.print(rules.substring(0,errorOffset));
\r
84 pw.println(rules.substring(errorOffset));
\r
85 //pw.println("========ERROR======== (" + i + ")");
\r
86 //pe.printStackTrace(pw);
\r
87 //pw.println("========END======== (" + i + ")");
\r
88 errln("ParseException");
\r
89 } catch (Exception e) {
\r
91 pw.println("========OTHER EXCEPTION======== (" + i + ")");
\r
92 e.printStackTrace(pw);
\r
93 pw.println("========RULES======== (" + i + ")");
\r
95 //pw.println("========END======== (" + i + ")");
\r
96 errln("ParseException");
\r
99 pw.println("Successful: " + sCount
\r
100 + ",\tParseException: " + peCount
\r
101 + ",\tOther Exception: " + oeCount);
\r
102 logln("Successful: " + sCount
\r
103 + ",\tParseException: " + peCount
\r
104 + ",\tOther Exception: " + oeCount);
\r
109 public static class TestCollator extends TestComparator {
\r
112 TestCollator(UnicodeSet chars) {
\r
113 rs = new BNF(new Random(0), new Quoter.RuleQuoter())
\r
114 .addRules("$root = " + chars + "{1,8};").complete();
\r
117 public Object newObject(Object c) {
\r
121 public String format(Object c) {
\r
122 return BagFormatter.hex.transliterate(c.toString());
\r
127 String currentRules = null;
\r
130 public String get() {
\r
134 public RandomCollator() {
\r
137 protected void init()throws Exception{
\r
138 init(1,10, new UnicodeSet("[AZa-z<\\&\\[\\]]"));
\r
140 private void init(int minRuleCount, int maxRuleCount, UnicodeSet setOfChars) {
\r
141 this.chars = setOfChars;
\r
142 bnf = new BNF(new Random(0), new Quoter.RuleQuoter())
\r
143 .addSet("$chars", setOfChars)
\r
144 .addRules(collationBNF)
\r
148 private static String collationBNF =
\r
149 "$s = ' '? 50%;\r\n" +
\r
150 "$relationList = (" + " '<'" + " | ' <<'" + " | ' ;'" +
\r
151 " | ' <<<'" + " | ' ,'" +
\r
152 " | ' ='" + ");\r\n" +
\r
153 "$alternateOptions = non'-'ignorable | shifted;\r\n" +
\r
154 "$caseFirstOptions = off | upper | lower;\r\n" +
\r
155 "$strengthOptions = '1' | '2' | '3' | '4' | 'I';\r\n" +
\r
156 "$commandList = '['" + " ( alternate ' ' $alternateOptions" + " | backwards' 2'" + " | normalization ' ' $onoff " + " | caseLevel ' ' $onoff " + " | hiraganaQ ' ' $onoff" + " | caseFirst ' ' $caseFirstOptions" + " | strength ' ' $strengthOptions" + " ) ']';\r\n" +
\r
157 "$ignorableTypes = (tertiary | secondary | primary) ' ' ignorable;\r\n" +
\r
158 "$allTypes = variable | regular | implicit | trailing | $ignorableTypes;\r\n" +
\r
159 "$onoff = on | off;\r\n" +
\r
160 "$positionList = '[' (first | last) ' ' $allTypes ']';\r\n" + "$beforeList = '[before ' ('1' | '2' | '3') ']';\r\n" + "$string = $chars{1,5}~@;\r\n" +
\r
161 "$crlf = '\r\n';\r\n" +
\r
162 "$rel1 = '[variable top]' $s ;\r\n" +
\r
163 "$p1 = ($string $s '|' $s)? 25%;\r\n" +
\r
164 "$p2 = ('\\' $s $string $s)? 25%;\r\n" +
\r
165 "$rel2 = $p1 $string $s $p2;\r\n" +
\r
166 "$relation = $relationList $s ($rel1 | $rel2) $crlf;\r\n" +
\r
167 "$command = $commandList $crlf;\r\n" +
\r
168 "$reset = '&' $s ($beforeList $s)? 10% ($positionList | $string 10%) $crlf;\r\n" +
\r
169 "$mostRules = $command 1% | $reset 5% | $relation 25%;\r\n" +
\r
170 "$root = $command{0,5} $reset $mostRules{1,20};\r\n";
\r
176 gc ; C ; Other # Cc | Cf | Cn | Co | Cs
\r
179 gc ; Cn ; Unassigned
\r
180 gc ; Co ; Private_Use
\r
181 gc ; Cs ; Surrogate
\r
182 gc ; L ; Letter # Ll | Lm | Lo | Lt | Lu
\r
183 gc ; LC ; Cased_Letter # Ll | Lt | Lu
\r
184 gc ; Ll ; Lowercase_Letter
\r
185 gc ; Lm ; Modifier_Letter
\r
186 gc ; Lo ; Other_Letter
\r
187 gc ; Lt ; Titlecase_Letter
\r
188 gc ; Lu ; Uppercase_Letter
\r
189 gc ; M ; Mark # Mc | Me | Mn
\r
190 gc ; Mc ; Spacing_Mark
\r
191 gc ; Me ; Enclosing_Mark
\r
192 gc ; Mn ; Nonspacing_Mark
\r
193 gc ; N ; Number # Nd | Nl | No
\r
194 gc ; Nd ; Decimal_Number
\r
195 gc ; Nl ; Letter_Number
\r
196 gc ; No ; Other_Number
\r
197 gc ; P ; Punctuation # Pc | Pd | Pe | Pf | Pi | Po | Ps
\r
198 gc ; Pc ; Connector_Punctuation
\r
199 gc ; Pd ; Dash_Punctuation
\r
200 gc ; Pe ; Close_Punctuation
\r
201 gc ; Pf ; Final_Punctuation
\r
202 gc ; Pi ; Initial_Punctuation
\r
203 gc ; Po ; Other_Punctuation
\r
204 gc ; Ps ; Open_Punctuation
\r
205 gc ; S ; Symbol # Sc | Sk | Sm | So
\r
206 gc ; Sc ; Currency_Symbol
\r
207 gc ; Sk ; Modifier_Symbol
\r
208 gc ; Sm ; Math_Symbol
\r
209 gc ; So ; Other_Symbol
\r
210 gc ; Z ; Separator # Zl | Zp | Zs
\r
211 gc ; Zl ; Line_Separator
\r
212 gc ; Zp ; Paragraph_Separator
\r
213 gc ; Zs ; Space_Separator
\r
217 // each rule can be:
\r
219 // "& [" position "]"
\r
220 // "&" before chars
\r
221 // relation "[variable top]"
\r
222 // relation (chars "|")? chars ("/" chars)?
\r
223 // plus, a reset must come before a relation
\r
225 // the following reflects the above rules, plus allows whitespace.
\r
226 Pick chars = Pick.string(1, 5, Pick.codePoint(uSet)); // insert something needing quotes
\r
227 Pick s = Pick.maybe(0.8, Pick.unquoted(" ")).name("Space"); // optional space
\r
228 Pick CRLF = Pick.unquoted("\r\n");
\r
230 Pick rel1 = Pick.and(Pick.unquoted("[variable top]")).and2(s);
\r
231 Pick p1 = Pick.maybe(0.25, Pick.and(chars).and2(s).and2("|").and2(s));
\r
232 Pick p2 = Pick.maybe(0.25, Pick.and("/").and2(s).and2(chars).and2(s));
\r
233 Pick rel2 = Pick.and(p1).and2(chars).and2(s).and2(p2);
\r
234 Pick relation = Pick.and(Pick.or(relationList)).and2(s)
\r
235 .and2(Pick.or(1, rel1).or2(10, rel2))
\r
236 .and2(CRLF).name("Relation");
\r
238 Pick command = Pick.and(Pick.or(commandList)).and2(CRLF).name("Command");
\r
240 Pick reset = Pick.and("&").and2(s)
\r
241 .and2(0.1, Pick.or(beforeList)).and2(s)
\r
242 .and2(Pick.or(0.1, Pick.or(positionList)).or2(1.0, chars))
\r
243 .and2(CRLF).name("Reset");
\r
244 Pick rule = Pick.and(Pick.or(1, command).or2(5, reset).or2(25, relation)).name("Rule");
\r
245 Pick rules2 = Pick.and(Pick.repeat(0,5,command))
\r
247 .and2(Pick.repeat(1,20,rule)).name("Rules");
\r
248 rules = Pick.Target.make(rules2);
\r
250 static final String[] relationList = {" <", " <<", " <<<", " =", " ;", " ,"};
\r
252 static final String[] commandList = {
\r
253 "[alternate non-ignorable]", "[alternate shifted]",
\r
255 "[normalization off]",
\r
256 "[normalization on]",
\r
260 "[caseFirst upper]",
\r
261 "[caseFirst lower]",
\r
271 static final String[] positionList = {
\r
272 "[first tertiary ignorable]",
\r
273 "[last tertiary ignorable]",
\r
274 "[first secondary ignorable]",
\r
275 "[last secondary ignorable]",
\r
276 "[first primary ignorable]",
\r
277 "[last primary ignorable]",
\r
278 "[first variable]",
\r
282 "[first implicit]",
\r
284 "[first trailing]",
\r
288 static final String[] beforeList = {
\r