]> gitweb.fperrin.net Git - Dictionary.git/blob - jars/icu4j-4_2_1-src/src/com/ibm/icu/dev/test/util/TestBNF.java
go
[Dictionary.git] / jars / icu4j-4_2_1-src / src / com / ibm / icu / dev / test / util / TestBNF.java
1 //##header J2SE15
2 //#if defined(FOUNDATION10) || defined(J2SE13)
3 //#else
4 /*
5  *******************************************************************************
6  * Copyright (C) 2002-2009, International Business Machines Corporation and    *
7  * others. All Rights Reserved.                                                *
8  *******************************************************************************
9  */
10 package com.ibm.icu.dev.test.util;
11
12 import java.util.Random;
13
14 //TODO integrate this into the test framework
15
16 import com.ibm.icu.text.UnicodeSet;
17
18 public class TestBNF {
19     
20     static final String[] testRules = {
21         "$root = [ab]{3};",
22         
23         "$root = [ab]{3,};",
24         
25         "$root = [ab]{3,5};",
26         
27         "$root = [ab]*;",
28         
29         "$root = [ab]?;",
30         
31         "$root = [ab]+;",
32         
33         "$us = [a-z];" +
34         "$root = [0-9$us];",
35         
36         "$root = a $foo b? 25% end 30% | $foo 50%;\r\n" +
37         "$foo = c{1,5} 20%;",
38         
39         "$root = [a-z]{1,5}~;",
40         
41         "$root = [a-z]{5}~;",
42         
43         "$root = '\\' (u | U0010 | U000 $hex) $hex{4} ;\r\n" +
44         "$hex = [0-9A-Fa-f];",
45     };
46         
47     static String unicodeSetBNF = "" +
48     "$root = $leaf | '[' $s $root2 $s ']' ;\r\n" +
49     "$root2 = $leaf | '[' $s $root3 $s ']' | ($root3 $s ($op $root3 $s){0,3}) ;\r\n" +
50     "$root3 = $leaf | '[' $s $root4 $s ']' | ($root4 $s ($op $root4 $s){0,3}) ;\r\n" +
51     "$root4 = $leaf | ($leaf $s ($op $leaf $s){0,3}) ;\r\n" +
52     "$op = (('&' | '-') $s)? 70%;" +
53     "$leaf = '[' $s $list $s ']' | $prop;\r\n" +
54     "$list = ($char $s ('-' $s $char $s)? 30%){1,5} ;\r\n" +
55     "$prop = '\\' (p | P) '{' $s $propName $s '}' | '[:' '^'? $s $propName $s ':]';\r\n" +
56     "$needsQuote = [\\-\\][:whitespace:][:control:]] ;\r\n" +
57     "$char = [[\\u0000-\\U00010FFFF]-$needsQuote] | $quoted ;\r\n" +
58     "$quoted = '\\' ('u' | 'U0010' | 'U000' $hex) $hex{4} ;\r\n" +
59     "$hex = [0-9A-Fa-f];\r\n" +
60     "$s = ' '? 20%;\r\n" +
61     "$propName = (whitespace | ws) | (uppercase | uc) | (lowercase | lc) | $category;\r\n" +
62     "$category = ((general | gc) $s '=' $s)? $catvalue;\r\n" +
63     "$catvalue = (C | Other | Cc | Control | Cf | Format | Cn | Unassigned | L | Letter);\r\n";
64
65     public static void main (String[] args) {
66         testTokenizer();
67         for (int i = 0; i < testRules.length; ++i) {
68             testBNF(testRules[i], null, 20);          
69         }
70         
71         testBNF(unicodeSetBNF, null, 20);
72         //testParser();
73     }
74     
75     static void testBNF(String rules, UnicodeSet chars, int count) {
76         BNF bnf = new BNF(new Random(0), new Quoter.RuleQuoter())
77         .addSet("$chars", chars)
78         .addRules(rules)
79         .complete();
80
81         System.out.println("====================================");
82         System.out.println("BNF");
83         System.out.println(rules);
84         System.out.println(bnf.getInternal());
85         for (int i = 0; i < count; ++i) {
86             System.out.println(i + ": " + bnf.next());
87         }
88     }
89     
90     /*
91     public static testManual() {
92         Pick p = Pick.maybe(75,Pick.unquoted("a"));
93         testOr(p, 1);
94         p = Pick.or(new String[]{"", "a", "bb", "ccc"});
95         testOr(p, 3);
96         p = Pick.repeat(3, 5, new int[]{20, 30, 20}, "a");
97         testOr(p, 5);        
98         p = Pick.codePoint("[a-ce]");
99         testCodePoints(p);        
100         p = Pick.codePoint("[a-ce]");
101         testCodePoints(p);        
102         p = Pick.string(2, 8, p);
103         testOr(p,10);
104         
105         p = Pick.or(new String[]{"", "a", "bb", "ccc"});
106         p = Pick.and(p).and2(p).and2("&");
107         testMatch(p, "abb&");
108         testMatch(p, "bba");
109         
110         // testEnglish();        
111     }
112     */
113     
114     static void testMatch(Pick p, String source) {
115         Pick.Position pp = new Pick.Position();
116         boolean value = p.match(source, pp);
117         System.out.println("Match: " + value + ", " + pp);      
118     }
119     /*
120     static void testParser() {
121         try {
122             Pick.Target target = new Pick.Target();
123             for (int i = 0; i < rules.length; ++i) {
124                 target.addRule(rules[i]);
125             }
126         } catch (ParseException e) {
127             // TODO Auto-generated catch block
128             e.printStackTrace();
129         }
130     }
131     */
132     
133     static class Counts {
134         int[] counts;       
135         Counts(int max) {
136             counts = new int[max+1];
137         }
138         void inc(int index) {
139             counts[index]++;
140         }
141         void show() {
142             System.out.println("Printing Counts");
143             for (int i = 0; i < counts.length; ++i) {
144                 if (counts[i] == 0) continue;
145                 System.out.println(i + ": " + counts[i]);
146             }
147             System.out.println();
148         }
149     }
150     
151 /*    static final String[] rules = {
152         "$s = ' ';",
153         "$noun = dog | house | government | wall | street | zebra;",
154         "$adjective = red | glorious | simple | nasty | heavy | clean;",
155         "$article = quickly | oddly | silently | boldly;",
156         "$adjectivePhrase = ($adverb $s)? 50% $adjective* 0% 30% 20% 10%;",
157         "$nounPhrase = $articles $s ($adjectivePhrase $s)? 30% $noun;",
158         "$verb = goes | fishes | walks | sleeps;",
159         "$tverb = carries | lifts | overturns | hits | jumps on;",
160         "$copula = is 30% | seems 10%;",
161         "$sentence1 = $nounPhrase $s $verb $s ($s $adverb)? 30%;",
162         "$sentence2 = $nounPhrase $s $tverb $s $nounPhrase ($s $adverb)? 30%;",
163         "$sentence3 = $nounPhrase $s $copula $s $adjectivePhrase;",
164         "$conj = but | and | or;",
165         "$sentence4 = $sentence1 | $sentence2 | $sentence3 20% | $sentence4 $conj $sentence4 20%;",
166         "$sentence = $sentence4 '.';"};
167  */
168     /*
169     private static void testEnglish() {
170         Pick s = Pick.unquoted(" ");
171         Pick verbs = Pick.or(new String[]{"goes", "fishes", "walks", "sleeps"});
172         Pick transitive = Pick.or(new String[]{"carries", "lifts", "overturns", "hits", "jumps on"});
173         Pick nouns = Pick.or(new String[]{"dog", "house", "government", "wall", "street", "zebra"});
174         Pick adjectives = Pick.or(new String[]{"red", "glorious", "simple", "nasty", "heavy", "clean"});
175         Pick articles = Pick.or(new String[]{"the", "a"});
176         Pick adverbs = Pick.or(new String[]{"quickly", "oddly", "silently", "boldly"});
177         Pick adjectivePhrase = Pick.and(0.5, Pick.and(adverbs).and2(s)).and2(adjectives);
178         Pick nounPhrase = Pick.and(articles).and2(s)
179             .and2(0.3, Pick.and(adjectivePhrase).and2(s))
180             .and2(nouns);
181         Pick copula = Pick.or(new String[]{"is", "seems"});
182         Pick sentence1 = Pick.and(nounPhrase).and2(s).and2(verbs)
183             .and2(0.3, Pick.and(s).and2(adverbs)).name("s1");
184         Pick sentence2 = Pick.and(nounPhrase).and2(s).and2(transitive).and2(s).and2(nounPhrase)
185             .and2(0.3, Pick.and(s).and2(adverbs)).name("s2");
186         Pick sentence3 = Pick.and(nounPhrase).and2(s).and2(copula).and2(s).and2(adjectivePhrase).name("s3");
187         Pick conj = Pick.or(new String[]{", but", ", and", ", or"});
188         Pick forward = Pick.unquoted("forward");
189         Pick pair = Pick.and(forward).and2(conj).and2(s).and2(forward).name("part");
190         Pick sentenceBase = Pick.or(sentence1).or2(sentence2).or2(sentence3).or2(0.6666, pair).name("sentence");
191         sentenceBase.replace(forward, sentenceBase);
192         Pick sentence = Pick.and(sentenceBase).and2(Pick.unquoted("."));
193         Pick.Target target = Pick.Target.make(sentence);
194         for (int i = 0; i < 50; ++i) {
195             System.out.println(i + ": " + target.next());
196         }
197     }
198     private static void testOr(Pick p, int count) {
199         Pick.Target target = Pick.Target.make(p);
200         Counts counts = new Counts(count + 10);
201         for (int i = 0; i < 1000; ++i) {
202             String s = target.next();
203             counts.inc(s.length());
204         }
205         counts.show();
206     }
207     private static void testCodePoints(Pick p) {
208         Pick.Target target = Pick.Target.make(p);
209         Counts counts = new Counts(128);
210         for (int i = 0; i < 10000; ++i) {
211             String s = target.next();
212             counts.inc(s.charAt(0));
213         }
214         counts.show();
215     }
216     */
217     public static void printRandoms() {
218         BNF bnf = new BNF(new Random(0), new Quoter.RuleQuoter())
219         .addRules("[a-z]{2,5}").complete();
220         System.out.println("Start");
221         for (int i = 0; i < 100; ++i) {
222             String temp = bnf.next();
223             System.out.println(i + ")\t" + temp);
224         }
225     }
226     
227     public static void testTokenizer() {
228         Tokenizer t = new Tokenizer();
229         
230         String[] samples = {"a'b'c d #abc\r e", "'a '123 321", 
231             "\\\\", "a'b", "a'", "abc def%?ghi", "%", "a", "\\ a", "a''''b"};
232         for (int i = 0; i < samples.length; ++i) {
233             t.setSource(samples[i]);
234             System.out.println();
235             System.out.println("Input: " + t.getSource());
236             int type = 0;
237             while (type != Tokenizer.DONE) {
238                 type = t.next();
239                 System.out.println(t.toString(type, false));
240             }
241         }
242     }
243
244 }
245
246 //#endif