2 *******************************************************************************
\r
3 * Copyright (C) 2002-2009, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 *******************************************************************************
\r
7 package com.ibm.icu.dev.test.util;
\r
9 import java.util.ArrayList;
\r
10 import java.util.HashMap;
\r
11 import java.util.HashSet;
\r
12 import java.util.Iterator;
\r
13 import java.util.Map;
\r
14 import java.util.Random;
\r
15 import java.util.Set;
\r
17 import com.ibm.icu.text.UnicodeSet;
\r
20 private Map map = new HashMap();
\r
21 private Set variables = new HashSet();
\r
22 private Pick pick = null;
\r
23 private Pick.Target target = null;
\r
24 private Tokenizer t;
\r
25 private Quoter quoter;
\r
26 private Random random;
\r
28 public String next() {
\r
29 return target.next();
\r
32 public String getInternal() {
\r
33 return pick.getInternal(0, new HashSet());
\r
37 + "weight = integer '%';"
\r
38 + "range = '{' integer (',' integer?)? '}' weight*;"
\r
40 + "star = '*' weight*;"
\r
41 + "plus = '+' weight*;"
\r
42 + "maybe = '?' weight?;"
\r
43 + "quantifier = range | star | maybe | plus;"
\r
44 + "core = string | unicodeSet | '(' alternation ')';"
\r
45 + "sequence = (core quantifier*)+;"
\r
46 + "alternation = sequence (weight? ('|' sequence weight?)+)?;"
\r
47 + "rule = string '=' alternation;";
\r
50 * Match 0 or more times
\r
51 + Match 1 or more times
\r
52 ? Match 1 or 0 times
\r
53 {n} Match exactly n times
\r
54 {n,} Match at least n times
\r
55 {n,m} Match at least n but not more than m times
\r
61 public BNF(Random random, Quoter quoter) {
\r
62 this.random = random;
\r
63 this.quoter = quoter;
\r
64 t = new Tokenizer();
\r
67 public BNF addRules(String rules) {
\r
68 t.setSource(rules);
\r
71 return this; // for chaining
\r
74 public BNF complete() {
\r
75 // check that the rules match the variables, except for $root in rules
\r
76 Set ruleSet = map.keySet();
\r
78 variables.add("$root");
\r
79 variables.addAll(t.getLookedUpItems());
\r
80 if (!ruleSet.equals(variables)) {
\r
81 String msg = showDiff(variables, ruleSet);
\r
82 if (msg.length() != 0) msg = "Error: Missing definitions for: " + msg;
\r
83 String temp = showDiff(ruleSet, variables);
\r
84 if (temp.length() != 0) temp = "Warning: Defined but not used: " + temp;
\r
85 if (msg.length() == 0) msg = temp;
\r
86 else if (temp.length() != 0) {
\r
87 msg = msg + "; " + temp;
\r
92 if (!ruleSet.equals(variables)) {
\r
93 String msg = showDiff(variables, ruleSet);
\r
94 if (msg.length() != 0) msg = "Missing definitions for: " + msg;
\r
95 String temp = showDiff(ruleSet, variables);
\r
96 if (temp.length() != 0) temp = "Defined but not used: " + temp;
\r
97 if (msg.length() == 0) msg = temp;
\r
98 else if (temp.length() != 0) {
\r
99 msg = msg + "; " + temp;
\r
104 // replace variables by definitions
\r
105 Iterator it = ruleSet.iterator();
\r
106 while (it.hasNext()) {
\r
107 String key = (String) it.next();
\r
108 Pick expression = (Pick) map.get(key);
\r
109 Iterator it2 = ruleSet.iterator();
\r
110 if (false && key.equals("$crlf")) {
\r
111 System.out.println("debug") ;
\r
113 while (it2.hasNext()) {
\r
114 Object key2 = it2.next();
\r
115 if (key.equals(key2)) continue;
\r
116 Pick expression2 = (Pick) map.get(key2);
\r
117 expression2.replace(key, expression);
\r
120 pick = (Pick) map.get("$root");
\r
121 target = Pick.Target.make(pick, random, quoter);
\r
122 // TODO remove temp collections
\r
126 String showDiff(Set a, Set b) {
\r
127 Set temp = new HashSet();
\r
130 if (temp.size() == 0) return "";
\r
131 StringBuffer buffer = new StringBuffer();
\r
132 Iterator it = temp.iterator();
\r
133 while (it.hasNext()) {
\r
134 if (buffer.length() != 0) buffer.append(", ");
\r
135 buffer.append(it.next().toString());
\r
137 return buffer.toString();
\r
140 void error(String msg) {
\r
141 throw new IllegalArgumentException(msg
\r
142 + "\r\n" + t.toString());
\r
145 private boolean addRule() {
\r
146 int type = t.next();
\r
147 if (type == Tokenizer.DONE) return false;
\r
148 if (type != Tokenizer.STRING) error("missing weight");
\r
149 String s = t.getString();
\r
150 if (s.length() == 0 || s.charAt(0) != '$') error("missing $ in variable");
\r
151 if (t.next() != '=') error("missing =");
\r
152 int startBody = t.index;
\r
153 Pick rule = getAlternation();
\r
154 if (rule == null) error("missing expression");
\r
155 t.addSymbol(s, t.getSource(), startBody, t.index);
\r
156 if (t.next() != ';') error("missing ;");
\r
157 return addPick(s, rule);
\r
160 protected boolean addPick(String s, Pick rule) {
\r
161 Object temp = map.get(s);
\r
162 if (temp != null) error("duplicate variable");
\r
163 if (rule.name == null) rule.name(s);
\r
168 public BNF addSet(String variable, UnicodeSet set) {
\r
170 String body = set.toString();
\r
171 t.addSymbol(variable, body, 0, body.length());
\r
172 addPick(variable, Pick.codePoint(set));
\r
177 int maxRepeat = 99;
\r
179 Pick qualify(Pick item) {
\r
181 int type = t.next();
\r
184 return new Pick.Quote(item);
\r
186 return new Pick.Morph(item);
\r
188 int weight = getWeight();
\r
189 if (weight == NO_WEIGHT) weight = 50;
\r
190 weights = new int[] {100-weight, weight};
\r
191 return Pick.repeat(0, 1, weights, item);
\r
193 weights = getWeights();
\r
194 return Pick.repeat(1, maxRepeat, weights, item);
\r
196 weights = getWeights();
\r
197 return Pick.repeat(1, maxRepeat, weights, item);
\r
199 if (t.next() != Tokenizer.NUMBER) error("missing number");
\r
200 int start = (int) t.getNumber();
\r
206 if (type == Tokenizer.NUMBER) {
\r
207 end = (int)t.getNumber();
\r
211 if (type != '}') error("missing }");
\r
212 weights = getWeights();
\r
213 return Pick.repeat(start, end, weights, item);
\r
220 int token = t.next();
\r
221 if (token == Tokenizer.STRING) {
\r
222 String s = t.getString();
\r
223 if (s.charAt(0) == '$') variables.add(s);
\r
224 return Pick.string(s);
\r
226 if (token == Tokenizer.UNICODESET) {
\r
227 return Pick.codePoint(t.getUnicodeSet());
\r
229 if (token != '(') {
\r
233 Pick temp = getAlternation();
\r
235 if (token != ')') error("missing )");
\r
239 Pick getSequence() {
\r
240 Pick.Sequence result = null;
\r
243 Pick item = getCore();
\r
244 if (item == null) {
\r
245 if (result != null) return result;
\r
246 if (last != null) return last;
\r
247 error("missing item");
\r
249 // qualify it as many times as possible
\r
253 item = qualify(item);
\r
254 } while (item != oldItem);
\r
256 if (last == null) {
\r
259 if (result == null) result = Pick.makeSequence().and2(last);
\r
260 result = result.and2(item);
\r
265 // for simplicity, we just use recursive descent
\r
266 Pick getAlternation() {
\r
267 Pick.Alternation result = null;
\r
269 int lastWeight = NO_WEIGHT;
\r
271 Pick temp = getSequence();
\r
272 if (temp == null) error("empty alternation");
\r
273 int weight = getWeight();
\r
274 if (weight == NO_WEIGHT) weight = 1;
\r
275 if (last == null) {
\r
277 lastWeight = weight;
\r
279 if (result == null) result = Pick.makeAlternation().or2(lastWeight, last);
\r
280 result = result.or2(weight, temp);
\r
282 int token = t.next();
\r
283 if (token != '|') {
\r
285 if (result != null) return result;
\r
286 if (last != null) return last;
\r
291 private static final int NO_WEIGHT = Integer.MIN_VALUE;
\r
295 int token = t.next();
\r
296 if (token != Tokenizer.NUMBER) {
\r
300 weight = (int)t.getNumber();
\r
302 if (token != '%') error("missing %");
\r
306 int[] getWeights() {
\r
307 ArrayList list = new ArrayList();
\r
309 int weight = getWeight();
\r
310 if (weight == NO_WEIGHT) break;
\r
311 list.add(new Integer(weight));
\r
313 if (list.size() == 0) return null;
\r
314 int[] result = new int[list.size()];
\r
315 for (int i = 0; i < list.size(); ++i) {
\r
316 result[i] = ((Integer)list.get(i)).intValue();
\r
321 public int getMaxRepeat() {
\r
325 public BNF setMaxRepeat(int maxRepeat) {
\r
326 this.maxRepeat = maxRepeat;
\r