2 *******************************************************************************
3 * Copyright (C) 2002-2012, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 *******************************************************************************
7 package com.ibm.icu.dev.util;
9 import java.util.ArrayList;
10 import java.util.HashMap;
11 import java.util.HashSet;
12 import java.util.Iterator;
14 import java.util.Random;
17 import com.ibm.icu.text.UnicodeSet;
20 private Map map = new HashMap();
21 private Set variables = new HashSet();
22 private Pick pick = null;
23 private Pick.Target target = null;
25 private Quoter quoter;
26 private Random random;
28 public String next() {
32 public String getInternal() {
33 return pick.getInternal(0, new HashSet());
37 + "weight = integer '%';"
38 + "range = '{' integer (',' integer?)? '}' weight*;"
40 + "star = '*' weight*;"
41 + "plus = '+' weight*;"
42 + "maybe = '?' weight?;"
43 + "quantifier = range | star | maybe | plus;"
44 + "core = string | unicodeSet | '(' alternation ')';"
45 + "sequence = (core quantifier*)+;"
46 + "alternation = sequence (weight? ('|' sequence weight?)+)?;"
47 + "rule = string '=' alternation;";
50 * Match 0 or more times
51 + Match 1 or more times
53 {n} Match exactly n times
54 {n,} Match at least n times
55 {n,m} Match at least n but not more than m times
61 public BNF(Random random, Quoter quoter) {
67 public BNF addRules(String rules) {
71 return this; // for chaining
74 public BNF complete() {
75 // check that the rules match the variables, except for $root in rules
76 Set ruleSet = map.keySet();
78 variables.add("$root");
79 variables.addAll(t.getLookedUpItems());
80 if (!ruleSet.equals(variables)) {
81 String msg = showDiff(variables, ruleSet);
82 if (msg.length() != 0) msg = "Error: Missing definitions for: " + msg;
83 String temp = showDiff(ruleSet, variables);
84 if (temp.length() != 0) temp = "Warning: Defined but not used: " + temp;
85 if (msg.length() == 0) msg = temp;
86 else if (temp.length() != 0) {
87 msg = msg + "; " + temp;
92 if (!ruleSet.equals(variables)) {
93 String msg = showDiff(variables, ruleSet);
94 if (msg.length() != 0) msg = "Missing definitions for: " + msg;
95 String temp = showDiff(ruleSet, variables);
96 if (temp.length() != 0) temp = "Defined but not used: " + temp;
97 if (msg.length() == 0) msg = temp;
98 else if (temp.length() != 0) {
99 msg = msg + "; " + temp;
104 // replace variables by definitions
105 Iterator it = ruleSet.iterator();
106 while (it.hasNext()) {
107 String key = (String) it.next();
108 Pick expression = (Pick) map.get(key);
109 Iterator it2 = ruleSet.iterator();
110 if (false && key.equals("$crlf")) {
111 System.out.println("debug") ;
113 while (it2.hasNext()) {
114 Object key2 = it2.next();
115 if (key.equals(key2)) continue;
116 Pick expression2 = (Pick) map.get(key2);
117 expression2.replace(key, expression);
120 pick = (Pick) map.get("$root");
121 target = Pick.Target.make(pick, random, quoter);
122 // TODO remove temp collections
126 String showDiff(Set a, Set b) {
127 Set temp = new HashSet();
130 if (temp.size() == 0) return "";
131 StringBuffer buffer = new StringBuffer();
132 Iterator it = temp.iterator();
133 while (it.hasNext()) {
134 if (buffer.length() != 0) buffer.append(", ");
135 buffer.append(it.next().toString());
137 return buffer.toString();
140 void error(String msg) {
141 throw new IllegalArgumentException(msg
142 + "\r\n" + t.toString());
145 private boolean addRule() {
147 if (type == Tokenizer.DONE) return false;
148 if (type != Tokenizer.STRING) error("missing weight");
149 String s = t.getString();
150 if (s.length() == 0 || s.charAt(0) != '$') error("missing $ in variable");
151 if (t.next() != '=') error("missing =");
152 int startBody = t.index;
153 Pick rule = getAlternation();
154 if (rule == null) error("missing expression");
155 t.addSymbol(s, t.getSource(), startBody, t.index);
156 if (t.next() != ';') error("missing ;");
157 return addPick(s, rule);
160 protected boolean addPick(String s, Pick rule) {
161 Object temp = map.get(s);
162 if (temp != null) error("duplicate variable");
163 if (rule.name == null) rule.name(s);
168 public BNF addSet(String variable, UnicodeSet set) {
170 String body = set.toString();
171 t.addSymbol(variable, body, 0, body.length());
172 addPick(variable, Pick.codePoint(set));
179 Pick qualify(Pick item) {
184 return new Pick.Quote(item);
186 return new Pick.Morph(item);
188 int weight = getWeight();
189 if (weight == NO_WEIGHT) weight = 50;
190 weights = new int[] {100-weight, weight};
191 return Pick.repeat(0, 1, weights, item);
193 weights = getWeights();
194 return Pick.repeat(1, maxRepeat, weights, item);
196 weights = getWeights();
197 return Pick.repeat(1, maxRepeat, weights, item);
199 if (t.next() != Tokenizer.NUMBER) error("missing number");
200 int start = (int) t.getNumber();
206 if (type == Tokenizer.NUMBER) {
207 end = (int)t.getNumber();
211 if (type != '}') error("missing }");
212 weights = getWeights();
213 return Pick.repeat(start, end, weights, item);
220 int token = t.next();
221 if (token == Tokenizer.STRING) {
222 String s = t.getString();
223 if (s.charAt(0) == '$') variables.add(s);
224 return Pick.string(s);
226 if (token == Tokenizer.UNICODESET) {
227 return Pick.codePoint(t.getUnicodeSet());
233 Pick temp = getAlternation();
235 if (token != ')') error("missing )");
240 Pick.Sequence result = null;
243 Pick item = getCore();
245 if (result != null) return result;
246 if (last != null) return last;
247 error("missing item");
249 // qualify it as many times as possible
253 item = qualify(item);
254 } while (item != oldItem);
259 if (result == null) result = Pick.makeSequence().and2(last);
260 result = result.and2(item);
265 // for simplicity, we just use recursive descent
266 Pick getAlternation() {
267 Pick.Alternation result = null;
269 int lastWeight = NO_WEIGHT;
271 Pick temp = getSequence();
272 if (temp == null) error("empty alternation");
273 int weight = getWeight();
274 if (weight == NO_WEIGHT) weight = 1;
279 if (result == null) result = Pick.makeAlternation().or2(lastWeight, last);
280 result = result.or2(weight, temp);
282 int token = t.next();
285 if (result != null) return result;
286 if (last != null) return last;
291 private static final int NO_WEIGHT = Integer.MIN_VALUE;
295 int token = t.next();
296 if (token != Tokenizer.NUMBER) {
300 weight = (int)t.getNumber();
302 if (token != '%') error("missing %");
307 ArrayList list = new ArrayList();
309 int weight = getWeight();
310 if (weight == NO_WEIGHT) break;
311 list.add(new Integer(weight));
313 if (list.size() == 0) return null;
314 int[] result = new int[list.size()];
315 for (int i = 0; i < list.size(); ++i) {
316 result[i] = ((Integer)list.get(i)).intValue();
321 public int getMaxRepeat() {
325 public BNF setMaxRepeat(int maxRepeat) {
326 this.maxRepeat = maxRepeat;