]> gitweb.fperrin.net Git - Dictionary.git/blob - jars/icu4j-4_2_1-src/src/com/ibm/icu/dev/test/translit/RegexUtilitiesTest.java
icu4jsrc
[Dictionary.git] / jars / icu4j-4_2_1-src / src / com / ibm / icu / dev / test / translit / RegexUtilitiesTest.java
1 //##header\r
2 //#if defined(FOUNDATION10) || defined(J2SE13) || defined(J2SE14)\r
3 //#else\r
4 /*\r
5  *******************************************************************************\r
6  * Copyright (C) 2009, International Business Machines Corporation and         *\r
7  * others. All Rights Reserved.                                                *\r
8  *******************************************************************************\r
9  */\r
10 package com.ibm.icu.dev.test.translit;\r
11 \r
12 import java.util.ArrayList;\r
13 import java.util.List;\r
14 import java.util.regex.Matcher;\r
15 import java.util.regex.Pattern;\r
16 \r
17 import com.ibm.icu.dev.test.TestFmwk;\r
18 import com.ibm.icu.impl.UnicodeRegex;\r
19 import com.ibm.icu.lang.UCharacter;\r
20 import com.ibm.icu.lang.UProperty;\r
21 import com.ibm.icu.lang.UProperty.NameChoice;\r
22 import com.ibm.icu.text.Transliterator;\r
23 import com.ibm.icu.text.UTF16;\r
24 import com.ibm.icu.text.UnicodeSet;\r
25 \r
26 /**\r
27  * @author markdavis\r
28  */\r
29 public class RegexUtilitiesTest extends TestFmwk {\r
30 \r
31     public static void main(String[] args) throws Exception {\r
32         new RegexUtilitiesTest().run(args);\r
33     }\r
34 \r
35     /**\r
36      * Check basic construction.\r
37      */\r
38     public void TestConstruction() {\r
39         String[][] tests = {\r
40                 {"a"},\r
41                 {"a[a-z]b"},\r
42                 {"[ba-z]", "[a-z]"},\r
43                 {"q[ba-z]", "q[a-z]"},\r
44                 {"[ba-z]q", "[a-z]q"},\r
45                 {"a\\p{joincontrol}b", "a[\u200C\u200D]b"},\r
46                 {"a\\P{joincontrol}b", "a[^\u200C\u200D]b"},\r
47                 {"a[[:whitespace:]&[:Zl:]]b", "a[\\\u2028]b"},\r
48                 {"a [[:bc=cs:]&[:wspace:]] b", "a [\u00A0\u202F] b"},\r
49         };\r
50         for (int i = 0; i < tests.length; ++i) {\r
51             final String source = tests[i][0];\r
52             String expected = tests[i].length == 1 ? source : tests[i][1];\r
53             String actual = UnicodeRegex.fix(source);\r
54             assertEquals(source, expected, actual);\r
55         } \r
56     }\r
57 \r
58     Transliterator hex = Transliterator.getInstance("hex");\r
59 \r
60     /**\r
61      * Perform an exhaustive test on all Unicode characters to make sure that the UnicodeSet with each\r
62      * character works.\r
63      */\r
64     public void TestCharacters() {\r
65         UnicodeSet requiresQuote = new UnicodeSet("[\\$\\&\\-\\:\\[\\\\\\]\\^\\{\\}[:pattern_whitespace:]]");\r
66         boolean skip = getInclusion() < 10;\r
67         for (int cp = 0; cp < 0x110000; ++cp) {\r
68             if (cp > 0xFF && skip && (cp % 37 != 0)) {\r
69                 continue;\r
70             }\r
71             String cpString = UTF16.valueOf(cp);\r
72             String s = requiresQuote.contains(cp) ? "\\" + cpString : cpString;\r
73             String pattern = null;\r
74             final String rawPattern = "[" + s + s + "]";\r
75             try {\r
76                 pattern = UnicodeRegex.fix(rawPattern);\r
77             } catch (Exception e) {\r
78                 errln(e.getMessage());\r
79                 continue;\r
80             }\r
81             final String expected = "[" + s + "]";\r
82             assertEquals("Doubled character works" + hex.transform(s), expected, pattern);\r
83 \r
84             // verify that we can create a regex pattern and use as expected\r
85             String shouldNotMatch = UTF16.valueOf((cp + 1) % 0x110000);\r
86             checkCharPattern(Pattern.compile(pattern), pattern, cpString, shouldNotMatch);\r
87 \r
88             // verify that the Pattern.compile works\r
89             checkCharPattern(UnicodeRegex.compile(rawPattern), pattern, cpString, shouldNotMatch);\r
90         }\r
91     }\r
92 \r
93     /**\r
94      * Check all integer Unicode properties to make sure they work.\r
95      */\r
96     public void TestUnicodeProperties() {\r
97         final boolean skip = getInclusion() < 10;\r
98         UnicodeSet temp = new UnicodeSet();\r
99         for (int propNum = UProperty.INT_START; propNum < UProperty.INT_LIMIT; ++propNum) {\r
100             if (skip && (propNum % 5 != 0)) {\r
101                 continue;\r
102             }\r
103             String propName = UCharacter.getPropertyName(propNum, NameChoice.LONG);\r
104             final int intPropertyMinValue = UCharacter.getIntPropertyMinValue(propNum);\r
105             int intPropertyMaxValue = UCharacter.getIntPropertyMaxValue(propNum);\r
106             if (skip) { // only test first if not exhaustive\r
107                 intPropertyMaxValue = intPropertyMinValue;\r
108             }\r
109             for (int valueNum = intPropertyMinValue; valueNum <= intPropertyMaxValue; ++valueNum) {\r
110                 // hack for getting property value name\r
111                 String valueName = UCharacter.getPropertyValueName(propNum, valueNum, NameChoice.LONG);\r
112                 if (valueName == null) {\r
113                     valueName = UCharacter.getPropertyValueName(propNum, valueNum, NameChoice.SHORT);\r
114                     if (valueName == null) {\r
115                         valueName = Integer.toString(valueNum);\r
116                     }\r
117                 }\r
118                 temp.applyIntPropertyValue(propNum, valueNum);\r
119                 if (temp.size() == 0) {\r
120                     continue;\r
121                 }\r
122                 final String prefix = "a";\r
123                 final String suffix = "b";\r
124                 String shouldMatch = prefix + UTF16.valueOf(temp.charAt(0)) + suffix;\r
125                 temp.complement();\r
126                 String shouldNotMatch = prefix + UTF16.valueOf(temp.charAt(0)) + suffix;\r
127 \r
128                 // posix style pattern\r
129                 String rawPattern = prefix + "[:" + propName + "=" + valueName + ":]" + suffix;\r
130                 String rawNegativePattern = prefix + "[:^" + propName + "=" + valueName + ":]" + suffix;\r
131                 checkCharPattern(UnicodeRegex.compile(rawPattern), rawPattern, shouldMatch, shouldNotMatch);\r
132                 checkCharPattern(UnicodeRegex.compile(rawNegativePattern), rawNegativePattern, shouldNotMatch, shouldMatch);\r
133 \r
134                 // perl style pattern\r
135                 rawPattern = prefix + "\\p{" + propName + "=" + valueName + "}" + suffix;\r
136                 rawNegativePattern = prefix + "\\P{" + propName + "=" + valueName + "}" + suffix;\r
137                 checkCharPattern(UnicodeRegex.compile(rawPattern), rawPattern, shouldMatch, shouldNotMatch);\r
138                 checkCharPattern(UnicodeRegex.compile(rawNegativePattern), rawNegativePattern, shouldNotMatch, shouldMatch);\r
139             }\r
140         }\r
141     }\r
142 \r
143     public void TestBnf() {\r
144         UnicodeRegex regex = new UnicodeRegex();\r
145         final String[][] tests = {\r
146                 {\r
147                     "c = a wq;\n" +\r
148                     "a = xyz;\n" +\r
149                     "b = a a c;\n"\r
150                 },\r
151                 {\r
152                     "c = a b;\n" +\r
153                     "a = xyz;\n" +\r
154                     "b = a a c;\n",\r
155                     "Exception"\r
156                 },\r
157                 {\r
158                     "uri = (?: (scheme) \\:)? (host) (?: \\? (query))? (?: \\u0023 (fragment))?;\n" +\r
159                     "scheme = reserved+;\n" +\r
160                     "host = // reserved+;\n" +\r
161                     "query = [\\=reserved]+;\n" +\r
162                     "fragment = reserved+;\n" +\r
163                     "reserved = [[:ascii:][:sc=grek:]&[:alphabetic:]];\n",\r
164                 "http://\u03B1\u03B2\u03B3?huh=hi#there"},\r
165                 {\r
166                     "langtagRegex.txt"\r
167                 }\r
168         };\r
169         for (int i = 0; i < tests.length; ++i) {\r
170             String test = tests[i][0];\r
171             final boolean expectException = tests[i].length < 2 ? false : tests[i][1].equals("Exception");\r
172             try {\r
173                 String result;\r
174                 if (test.endsWith(".txt")) {\r
175                     java.io.InputStream is = RegexUtilitiesTest.class.getResourceAsStream(test);\r
176                     List lines = UnicodeRegex.appendLines(new ArrayList(), is, "UTF-8");\r
177                     result = regex.compileBnf(lines);\r
178                 } else {\r
179                     result = regex.compileBnf(test);\r
180                 }\r
181                 if (expectException) {\r
182                     errln("Expected exception for " + test);\r
183                     continue;\r
184                 }\r
185                 result = result.replaceAll("[0-9]+%", ""); // just so we can use the language subtag stuff\r
186                 String resolved = regex.transform(result);\r
187                 logln(resolved);\r
188                 Matcher m = Pattern.compile(resolved, Pattern.COMMENTS).matcher("");\r
189                 String checks = "";\r
190                 for (int j = 1; j < tests[i].length; ++j) {\r
191                     String check = tests[i][j];\r
192                     if (!m.reset(check).matches()) {\r
193                         checks = checks + "Fails " + check + "\n";\r
194                     } else {\r
195                         for (int k = 1; k <= m.groupCount(); ++k) {\r
196                             checks += "(" + m.group(k) + ")";\r
197                         }\r
198                         checks += "\n";\r
199                     }\r
200                 }\r
201                 logln("Result: " + result + "\n" + checks + "\n" + test);\r
202             } catch (Exception e) {\r
203                 if (!expectException) {\r
204                     errln(e.getClass().getName() + ": " + e.getMessage());\r
205                 }\r
206                 continue;\r
207             }\r
208         }\r
209     }\r
210 \r
211     /**\r
212      * Utility for checking patterns\r
213      */\r
214     private void checkCharPattern(Pattern pat, String matchTitle, String shouldMatch, String shouldNotMatch) {\r
215         Matcher matcher = pat.matcher(shouldMatch);\r
216         assertTrue(matchTitle + " and " + shouldMatch, matcher.matches());\r
217         matcher.reset(shouldNotMatch);\r
218         assertFalse(matchTitle + " and " + shouldNotMatch, matcher.matches());\r
219     }\r
220 }\r
221 //#endif\r