-//##header\r
-//#if defined(FOUNDATION10) || defined(J2SE13) || defined(J2SE14)\r
-//#else\r
-/*\r
- *******************************************************************************\r
- * Copyright (C) 2009, International Business Machines Corporation and *\r
- * others. All Rights Reserved. *\r
- *******************************************************************************\r
- */\r
-package com.ibm.icu.dev.test.translit;\r
-\r
-import java.util.ArrayList;\r
-import java.util.List;\r
-import java.util.regex.Matcher;\r
-import java.util.regex.Pattern;\r
-\r
-import com.ibm.icu.dev.test.TestFmwk;\r
-import com.ibm.icu.impl.UnicodeRegex;\r
-import com.ibm.icu.lang.UCharacter;\r
-import com.ibm.icu.lang.UProperty;\r
-import com.ibm.icu.lang.UProperty.NameChoice;\r
-import com.ibm.icu.text.Transliterator;\r
-import com.ibm.icu.text.UTF16;\r
-import com.ibm.icu.text.UnicodeSet;\r
-\r
-/**\r
- * @author markdavis\r
- */\r
-public class RegexUtilitiesTest extends TestFmwk {\r
-\r
- public static void main(String[] args) throws Exception {\r
- new RegexUtilitiesTest().run(args);\r
- }\r
-\r
- /**\r
- * Check basic construction.\r
- */\r
- public void TestConstruction() {\r
- String[][] tests = {\r
- {"a"},\r
- {"a[a-z]b"},\r
- {"[ba-z]", "[a-z]"},\r
- {"q[ba-z]", "q[a-z]"},\r
- {"[ba-z]q", "[a-z]q"},\r
- {"a\\p{joincontrol}b", "a[\u200C\u200D]b"},\r
- {"a\\P{joincontrol}b", "a[^\u200C\u200D]b"},\r
- {"a[[:whitespace:]&[:Zl:]]b", "a[\\\u2028]b"},\r
- {"a [[:bc=cs:]&[:wspace:]] b", "a [\u00A0\u202F] b"},\r
- };\r
- for (int i = 0; i < tests.length; ++i) {\r
- final String source = tests[i][0];\r
- String expected = tests[i].length == 1 ? source : tests[i][1];\r
- String actual = UnicodeRegex.fix(source);\r
- assertEquals(source, expected, actual);\r
- } \r
- }\r
-\r
- Transliterator hex = Transliterator.getInstance("hex");\r
-\r
- /**\r
- * Perform an exhaustive test on all Unicode characters to make sure that the UnicodeSet with each\r
- * character works.\r
- */\r
- public void TestCharacters() {\r
- UnicodeSet requiresQuote = new UnicodeSet("[\\$\\&\\-\\:\\[\\\\\\]\\^\\{\\}[:pattern_whitespace:]]");\r
- boolean skip = getInclusion() < 10;\r
- for (int cp = 0; cp < 0x110000; ++cp) {\r
- if (cp > 0xFF && skip && (cp % 37 != 0)) {\r
- continue;\r
- }\r
- String cpString = UTF16.valueOf(cp);\r
- String s = requiresQuote.contains(cp) ? "\\" + cpString : cpString;\r
- String pattern = null;\r
- final String rawPattern = "[" + s + s + "]";\r
- try {\r
- pattern = UnicodeRegex.fix(rawPattern);\r
- } catch (Exception e) {\r
- errln(e.getMessage());\r
- continue;\r
- }\r
- final String expected = "[" + s + "]";\r
- assertEquals("Doubled character works" + hex.transform(s), expected, pattern);\r
-\r
- // verify that we can create a regex pattern and use as expected\r
- String shouldNotMatch = UTF16.valueOf((cp + 1) % 0x110000);\r
- checkCharPattern(Pattern.compile(pattern), pattern, cpString, shouldNotMatch);\r
-\r
- // verify that the Pattern.compile works\r
- checkCharPattern(UnicodeRegex.compile(rawPattern), pattern, cpString, shouldNotMatch);\r
- }\r
- }\r
-\r
- /**\r
- * Check all integer Unicode properties to make sure they work.\r
- */\r
- public void TestUnicodeProperties() {\r
- final boolean skip = getInclusion() < 10;\r
- UnicodeSet temp = new UnicodeSet();\r
- for (int propNum = UProperty.INT_START; propNum < UProperty.INT_LIMIT; ++propNum) {\r
- if (skip && (propNum % 5 != 0)) {\r
- continue;\r
- }\r
- String propName = UCharacter.getPropertyName(propNum, NameChoice.LONG);\r
- final int intPropertyMinValue = UCharacter.getIntPropertyMinValue(propNum);\r
- int intPropertyMaxValue = UCharacter.getIntPropertyMaxValue(propNum);\r
- if (skip) { // only test first if not exhaustive\r
- intPropertyMaxValue = intPropertyMinValue;\r
- }\r
- for (int valueNum = intPropertyMinValue; valueNum <= intPropertyMaxValue; ++valueNum) {\r
- // hack for getting property value name\r
- String valueName = UCharacter.getPropertyValueName(propNum, valueNum, NameChoice.LONG);\r
- if (valueName == null) {\r
- valueName = UCharacter.getPropertyValueName(propNum, valueNum, NameChoice.SHORT);\r
- if (valueName == null) {\r
- valueName = Integer.toString(valueNum);\r
- }\r
- }\r
- temp.applyIntPropertyValue(propNum, valueNum);\r
- if (temp.size() == 0) {\r
- continue;\r
- }\r
- final String prefix = "a";\r
- final String suffix = "b";\r
- String shouldMatch = prefix + UTF16.valueOf(temp.charAt(0)) + suffix;\r
- temp.complement();\r
- String shouldNotMatch = prefix + UTF16.valueOf(temp.charAt(0)) + suffix;\r
-\r
- // posix style pattern\r
- String rawPattern = prefix + "[:" + propName + "=" + valueName + ":]" + suffix;\r
- String rawNegativePattern = prefix + "[:^" + propName + "=" + valueName + ":]" + suffix;\r
- checkCharPattern(UnicodeRegex.compile(rawPattern), rawPattern, shouldMatch, shouldNotMatch);\r
- checkCharPattern(UnicodeRegex.compile(rawNegativePattern), rawNegativePattern, shouldNotMatch, shouldMatch);\r
-\r
- // perl style pattern\r
- rawPattern = prefix + "\\p{" + propName + "=" + valueName + "}" + suffix;\r
- rawNegativePattern = prefix + "\\P{" + propName + "=" + valueName + "}" + suffix;\r
- checkCharPattern(UnicodeRegex.compile(rawPattern), rawPattern, shouldMatch, shouldNotMatch);\r
- checkCharPattern(UnicodeRegex.compile(rawNegativePattern), rawNegativePattern, shouldNotMatch, shouldMatch);\r
- }\r
- }\r
- }\r
-\r
- public void TestBnf() {\r
- UnicodeRegex regex = new UnicodeRegex();\r
- final String[][] tests = {\r
- {\r
- "c = a wq;\n" +\r
- "a = xyz;\n" +\r
- "b = a a c;\n"\r
- },\r
- {\r
- "c = a b;\n" +\r
- "a = xyz;\n" +\r
- "b = a a c;\n",\r
- "Exception"\r
- },\r
- {\r
- "uri = (?: (scheme) \\:)? (host) (?: \\? (query))? (?: \\u0023 (fragment))?;\n" +\r
- "scheme = reserved+;\n" +\r
- "host = // reserved+;\n" +\r
- "query = [\\=reserved]+;\n" +\r
- "fragment = reserved+;\n" +\r
- "reserved = [[:ascii:][:sc=grek:]&[:alphabetic:]];\n",\r
- "http://\u03B1\u03B2\u03B3?huh=hi#there"},\r
- {\r
- "langtagRegex.txt"\r
- }\r
- };\r
- for (int i = 0; i < tests.length; ++i) {\r
- String test = tests[i][0];\r
- final boolean expectException = tests[i].length < 2 ? false : tests[i][1].equals("Exception");\r
- try {\r
- String result;\r
- if (test.endsWith(".txt")) {\r
- java.io.InputStream is = RegexUtilitiesTest.class.getResourceAsStream(test);\r
- List lines = UnicodeRegex.appendLines(new ArrayList(), is, "UTF-8");\r
- result = regex.compileBnf(lines);\r
- } else {\r
- result = regex.compileBnf(test);\r
- }\r
- if (expectException) {\r
- errln("Expected exception for " + test);\r
- continue;\r
- }\r
- result = result.replaceAll("[0-9]+%", ""); // just so we can use the language subtag stuff\r
- String resolved = regex.transform(result);\r
- logln(resolved);\r
- Matcher m = Pattern.compile(resolved, Pattern.COMMENTS).matcher("");\r
- String checks = "";\r
- for (int j = 1; j < tests[i].length; ++j) {\r
- String check = tests[i][j];\r
- if (!m.reset(check).matches()) {\r
- checks = checks + "Fails " + check + "\n";\r
- } else {\r
- for (int k = 1; k <= m.groupCount(); ++k) {\r
- checks += "(" + m.group(k) + ")";\r
- }\r
- checks += "\n";\r
- }\r
- }\r
- logln("Result: " + result + "\n" + checks + "\n" + test);\r
- } catch (Exception e) {\r
- if (!expectException) {\r
- errln(e.getClass().getName() + ": " + e.getMessage());\r
- }\r
- continue;\r
- }\r
- }\r
- }\r
-\r
- /**\r
- * Utility for checking patterns\r
- */\r
- private void checkCharPattern(Pattern pat, String matchTitle, String shouldMatch, String shouldNotMatch) {\r
- Matcher matcher = pat.matcher(shouldMatch);\r
- assertTrue(matchTitle + " and " + shouldMatch, matcher.matches());\r
- matcher.reset(shouldNotMatch);\r
- assertFalse(matchTitle + " and " + shouldNotMatch, matcher.matches());\r
- }\r
-}\r
-//#endif\r
+//##header J2SE15
+//#if defined(FOUNDATION10) || defined(J2SE13) || defined(J2SE14)
+//#else
+/*
+ *******************************************************************************
+ * Copyright (C) 2009, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.dev.test.translit;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import com.ibm.icu.dev.test.TestFmwk;
+import com.ibm.icu.impl.UnicodeRegex;
+import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.lang.UProperty;
+import com.ibm.icu.lang.UProperty.NameChoice;
+import com.ibm.icu.text.Transliterator;
+import com.ibm.icu.text.UTF16;
+import com.ibm.icu.text.UnicodeSet;
+
+/**
+ * @author markdavis
+ */
+public class RegexUtilitiesTest extends TestFmwk {
+
+ public static void main(String[] args) throws Exception {
+ new RegexUtilitiesTest().run(args);
+ }
+
+ /**
+ * Check basic construction.
+ */
+ public void TestConstruction() {
+ String[][] tests = {
+ {"a"},
+ {"a[a-z]b"},
+ {"[ba-z]", "[a-z]"},
+ {"q[ba-z]", "q[a-z]"},
+ {"[ba-z]q", "[a-z]q"},
+ {"a\\p{joincontrol}b", "a[\u200C\u200D]b"},
+ {"a\\P{joincontrol}b", "a[^\u200C\u200D]b"},
+ {"a[[:whitespace:]&[:Zl:]]b", "a[\\\u2028]b"},
+ {"a [[:bc=cs:]&[:wspace:]] b", "a [\u00A0\u202F] b"},
+ };
+ for (int i = 0; i < tests.length; ++i) {
+ final String source = tests[i][0];
+ String expected = tests[i].length == 1 ? source : tests[i][1];
+ String actual = UnicodeRegex.fix(source);
+ assertEquals(source, expected, actual);
+ }
+ }
+
+ Transliterator hex = Transliterator.getInstance("hex");
+
+ /**
+ * Perform an exhaustive test on all Unicode characters to make sure that the UnicodeSet with each
+ * character works.
+ */
+ public void TestCharacters() {
+ UnicodeSet requiresQuote = new UnicodeSet("[\\$\\&\\-\\:\\[\\\\\\]\\^\\{\\}[:pattern_whitespace:]]");
+ boolean skip = getInclusion() < 10;
+ for (int cp = 0; cp < 0x110000; ++cp) {
+ if (cp > 0xFF && skip && (cp % 37 != 0)) {
+ continue;
+ }
+ String cpString = UTF16.valueOf(cp);
+ String s = requiresQuote.contains(cp) ? "\\" + cpString : cpString;
+ String pattern = null;
+ final String rawPattern = "[" + s + s + "]";
+ try {
+ pattern = UnicodeRegex.fix(rawPattern);
+ } catch (Exception e) {
+ errln(e.getMessage());
+ continue;
+ }
+ final String expected = "[" + s + "]";
+ assertEquals("Doubled character works" + hex.transform(s), expected, pattern);
+
+ // verify that we can create a regex pattern and use as expected
+ String shouldNotMatch = UTF16.valueOf((cp + 1) % 0x110000);
+ checkCharPattern(Pattern.compile(pattern), pattern, cpString, shouldNotMatch);
+
+ // verify that the Pattern.compile works
+ checkCharPattern(UnicodeRegex.compile(rawPattern), pattern, cpString, shouldNotMatch);
+ }
+ }
+
+ /**
+ * Check all integer Unicode properties to make sure they work.
+ */
+ public void TestUnicodeProperties() {
+ final boolean skip = getInclusion() < 10;
+ UnicodeSet temp = new UnicodeSet();
+ for (int propNum = UProperty.INT_START; propNum < UProperty.INT_LIMIT; ++propNum) {
+ if (skip && (propNum % 5 != 0)) {
+ continue;
+ }
+ String propName = UCharacter.getPropertyName(propNum, NameChoice.LONG);
+ final int intPropertyMinValue = UCharacter.getIntPropertyMinValue(propNum);
+ int intPropertyMaxValue = UCharacter.getIntPropertyMaxValue(propNum);
+ if (skip) { // only test first if not exhaustive
+ intPropertyMaxValue = intPropertyMinValue;
+ }
+ for (int valueNum = intPropertyMinValue; valueNum <= intPropertyMaxValue; ++valueNum) {
+ // hack for getting property value name
+ String valueName = UCharacter.getPropertyValueName(propNum, valueNum, NameChoice.LONG);
+ if (valueName == null) {
+ valueName = UCharacter.getPropertyValueName(propNum, valueNum, NameChoice.SHORT);
+ if (valueName == null) {
+ valueName = Integer.toString(valueNum);
+ }
+ }
+ temp.applyIntPropertyValue(propNum, valueNum);
+ if (temp.size() == 0) {
+ continue;
+ }
+ final String prefix = "a";
+ final String suffix = "b";
+ String shouldMatch = prefix + UTF16.valueOf(temp.charAt(0)) + suffix;
+ temp.complement();
+ String shouldNotMatch = prefix + UTF16.valueOf(temp.charAt(0)) + suffix;
+
+ // posix style pattern
+ String rawPattern = prefix + "[:" + propName + "=" + valueName + ":]" + suffix;
+ String rawNegativePattern = prefix + "[:^" + propName + "=" + valueName + ":]" + suffix;
+ checkCharPattern(UnicodeRegex.compile(rawPattern), rawPattern, shouldMatch, shouldNotMatch);
+ checkCharPattern(UnicodeRegex.compile(rawNegativePattern), rawNegativePattern, shouldNotMatch, shouldMatch);
+
+ // perl style pattern
+ rawPattern = prefix + "\\p{" + propName + "=" + valueName + "}" + suffix;
+ rawNegativePattern = prefix + "\\P{" + propName + "=" + valueName + "}" + suffix;
+ checkCharPattern(UnicodeRegex.compile(rawPattern), rawPattern, shouldMatch, shouldNotMatch);
+ checkCharPattern(UnicodeRegex.compile(rawNegativePattern), rawNegativePattern, shouldNotMatch, shouldMatch);
+ }
+ }
+ }
+
+ public void TestBnf() {
+ UnicodeRegex regex = new UnicodeRegex();
+ final String[][] tests = {
+ {
+ "c = a wq;\n" +
+ "a = xyz;\n" +
+ "b = a a c;\n"
+ },
+ {
+ "c = a b;\n" +
+ "a = xyz;\n" +
+ "b = a a c;\n",
+ "Exception"
+ },
+ {
+ "uri = (?: (scheme) \\:)? (host) (?: \\? (query))? (?: \\u0023 (fragment))?;\n" +
+ "scheme = reserved+;\n" +
+ "host = // reserved+;\n" +
+ "query = [\\=reserved]+;\n" +
+ "fragment = reserved+;\n" +
+ "reserved = [[:ascii:][:sc=grek:]&[:alphabetic:]];\n",
+ "http://\u03B1\u03B2\u03B3?huh=hi#there"},
+ {
+ "langtagRegex.txt"
+ }
+ };
+ for (int i = 0; i < tests.length; ++i) {
+ String test = tests[i][0];
+ final boolean expectException = tests[i].length < 2 ? false : tests[i][1].equals("Exception");
+ try {
+ String result;
+ if (test.endsWith(".txt")) {
+ java.io.InputStream is = RegexUtilitiesTest.class.getResourceAsStream(test);
+ List lines = UnicodeRegex.appendLines(new ArrayList(), is, "UTF-8");
+ result = regex.compileBnf(lines);
+ } else {
+ result = regex.compileBnf(test);
+ }
+ if (expectException) {
+ errln("Expected exception for " + test);
+ continue;
+ }
+ result = result.replaceAll("[0-9]+%", ""); // just so we can use the language subtag stuff
+ String resolved = regex.transform(result);
+ logln(resolved);
+ Matcher m = Pattern.compile(resolved, Pattern.COMMENTS).matcher("");
+ String checks = "";
+ for (int j = 1; j < tests[i].length; ++j) {
+ String check = tests[i][j];
+ if (!m.reset(check).matches()) {
+ checks = checks + "Fails " + check + "\n";
+ } else {
+ for (int k = 1; k <= m.groupCount(); ++k) {
+ checks += "(" + m.group(k) + ")";
+ }
+ checks += "\n";
+ }
+ }
+ logln("Result: " + result + "\n" + checks + "\n" + test);
+ } catch (Exception e) {
+ if (!expectException) {
+ errln(e.getClass().getName() + ": " + e.getMessage());
+ }
+ continue;
+ }
+ }
+ }
+
+ /**
+ * Utility for checking patterns
+ */
+ private void checkCharPattern(Pattern pat, String matchTitle, String shouldMatch, String shouldNotMatch) {
+ Matcher matcher = pat.matcher(shouldMatch);
+ assertTrue(matchTitle + " and " + shouldMatch, matcher.matches());
+ matcher.reset(shouldNotMatch);
+ assertFalse(matchTitle + " and " + shouldNotMatch, matcher.matches());
+ }
+}
+//#endif