/* ******************************************************************************* * Copyright (C) 2002-2009, International Business Machines Corporation and * * others. All Rights Reserved. * ******************************************************************************* */ package com.ibm.icu.dev.test.util; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.Map; import java.util.Random; import java.util.Set; import com.ibm.icu.text.UnicodeSet; public class BNF { private Map map = new HashMap(); private Set variables = new HashSet(); private Pick pick = null; private Pick.Target target = null; private Tokenizer t; private Quoter quoter; private Random random; public String next() { return target.next(); } public String getInternal() { return pick.getInternal(0, new HashSet()); } /* + "weight = integer '%';" + "range = '{' integer (',' integer?)? '}' weight*;" + "quote = '@';" + "star = '*' weight*;" + "plus = '+' weight*;" + "maybe = '?' weight?;" + "quantifier = range | star | maybe | plus;" + "core = string | unicodeSet | '(' alternation ')';" + "sequence = (core quantifier*)+;" + "alternation = sequence (weight? ('|' sequence weight?)+)?;" + "rule = string '=' alternation;"; * Match 0 or more times + Match 1 or more times ? Match 1 or 0 times {n} Match exactly n times {n,} Match at least n times {n,m} Match at least n but not more than m times */ public BNF(Random random, Quoter quoter) { this.random = random; this.quoter = quoter; t = new Tokenizer(); } public BNF addRules(String rules) { t.setSource(rules); while (addRule()) { } return this; // for chaining } public BNF complete() { // check that the rules match the variables, except for $root in rules Set ruleSet = map.keySet(); // add also variables.add("$root"); variables.addAll(t.getLookedUpItems()); if (!ruleSet.equals(variables)) { String msg = showDiff(variables, ruleSet); if (msg.length() != 0) msg = "Error: Missing definitions for: " + msg; String temp = showDiff(ruleSet, variables); if (temp.length() != 0) temp = "Warning: Defined but not used: " + temp; if (msg.length() == 0) msg = temp; else if (temp.length() != 0) { msg = msg + "; " + temp; } error(msg); } if (!ruleSet.equals(variables)) { String msg = showDiff(variables, ruleSet); if (msg.length() != 0) msg = "Missing definitions for: " + msg; String temp = showDiff(ruleSet, variables); if (temp.length() != 0) temp = "Defined but not used: " + temp; if (msg.length() == 0) msg = temp; else if (temp.length() != 0) { msg = msg + "; " + temp; } error(msg); } // replace variables by definitions Iterator it = ruleSet.iterator(); while (it.hasNext()) { String key = (String) it.next(); Pick expression = (Pick) map.get(key); Iterator it2 = ruleSet.iterator(); if (false && key.equals("$crlf")) { System.out.println("debug") ; } while (it2.hasNext()) { Object key2 = it2.next(); if (key.equals(key2)) continue; Pick expression2 = (Pick) map.get(key2); expression2.replace(key, expression); } } pick = (Pick) map.get("$root"); target = Pick.Target.make(pick, random, quoter); // TODO remove temp collections return this; } String showDiff(Set a, Set b) { Set temp = new HashSet(); temp.addAll(a); temp.removeAll(b); if (temp.size() == 0) return ""; StringBuffer buffer = new StringBuffer(); Iterator it = temp.iterator(); while (it.hasNext()) { if (buffer.length() != 0) buffer.append(", "); buffer.append(it.next().toString()); } return buffer.toString(); } void error(String msg) { throw new IllegalArgumentException(msg + "\r\n" + t.toString()); } private boolean addRule() { int type = t.next(); if (type == Tokenizer.DONE) return false; if (type != Tokenizer.STRING) error("missing weight"); String s = t.getString(); if (s.length() == 0 || s.charAt(0) != '$') error("missing $ in variable"); if (t.next() != '=') error("missing ="); int startBody = t.index; Pick rule = getAlternation(); if (rule == null) error("missing expression"); t.addSymbol(s, t.getSource(), startBody, t.index); if (t.next() != ';') error("missing ;"); return addPick(s, rule); } protected boolean addPick(String s, Pick rule) { Object temp = map.get(s); if (temp != null) error("duplicate variable"); if (rule.name == null) rule.name(s); map.put(s, rule); return true; } public BNF addSet(String variable, UnicodeSet set) { if (set != null) { String body = set.toString(); t.addSymbol(variable, body, 0, body.length()); addPick(variable, Pick.codePoint(set)); } return this; } int maxRepeat = 99; Pick qualify(Pick item) { int[] weights; int type = t.next(); switch(type) { case '@': return new Pick.Quote(item); case '~': return new Pick.Morph(item); case '?': int weight = getWeight(); if (weight == NO_WEIGHT) weight = 50; weights = new int[] {100-weight, weight}; return Pick.repeat(0, 1, weights, item); case '*': weights = getWeights(); return Pick.repeat(1, maxRepeat, weights, item); case '+': weights = getWeights(); return Pick.repeat(1, maxRepeat, weights, item); case '{': if (t.next() != Tokenizer.NUMBER) error("missing number"); int start = (int) t.getNumber(); int end = start; type = t.next(); if (type == ',') { end = maxRepeat; type = t.next(); if (type == Tokenizer.NUMBER) { end = (int)t.getNumber(); type = t.next(); } } if (type != '}') error("missing }"); weights = getWeights(); return Pick.repeat(start, end, weights, item); } t.backup(); return item; } Pick getCore() { int token = t.next(); if (token == Tokenizer.STRING) { String s = t.getString(); if (s.charAt(0) == '$') variables.add(s); return Pick.string(s); } if (token == Tokenizer.UNICODESET) { return Pick.codePoint(t.getUnicodeSet()); } if (token != '(') { t.backup(); return null; } Pick temp = getAlternation(); token = t.next(); if (token != ')') error("missing )"); return temp; } Pick getSequence() { Pick.Sequence result = null; Pick last = null; while (true) { Pick item = getCore(); if (item == null) { if (result != null) return result; if (last != null) return last; error("missing item"); } // qualify it as many times as possible Pick oldItem; do { oldItem = item; item = qualify(item); } while (item != oldItem); // add it in if (last == null) { last = item; } else { if (result == null) result = Pick.makeSequence().and2(last); result = result.and2(item); } } } // for simplicity, we just use recursive descent Pick getAlternation() { Pick.Alternation result = null; Pick last = null; int lastWeight = NO_WEIGHT; while (true) { Pick temp = getSequence(); if (temp == null) error("empty alternation"); int weight = getWeight(); if (weight == NO_WEIGHT) weight = 1; if (last == null) { last = temp; lastWeight = weight; } else { if (result == null) result = Pick.makeAlternation().or2(lastWeight, last); result = result.or2(weight, temp); } int token = t.next(); if (token != '|') { t.backup(); if (result != null) return result; if (last != null) return last; } } } private static final int NO_WEIGHT = Integer.MIN_VALUE; int getWeight() { int weight; int token = t.next(); if (token != Tokenizer.NUMBER) { t.backup(); return NO_WEIGHT; } weight = (int)t.getNumber(); token = t.next(); if (token != '%') error("missing %"); return weight; } int[] getWeights() { ArrayList list = new ArrayList(); while (true) { int weight = getWeight(); if (weight == NO_WEIGHT) break; list.add(new Integer(weight)); } if (list.size() == 0) return null; int[] result = new int[list.size()]; for (int i = 0; i < list.size(); ++i) { result[i] = ((Integer)list.get(i)).intValue(); } return result; } public int getMaxRepeat() { return maxRepeat; } public BNF setMaxRepeat(int maxRepeat) { this.maxRepeat = maxRepeat; return this; } }