]> gitweb.fperrin.net Git - Dictionary.git/blobdiff - jars/icu4j-4_2_1-src/src/com/ibm/icu/dev/test/translit/RegexUtilitiesTest.java
go
[Dictionary.git] / jars / icu4j-4_2_1-src / src / com / ibm / icu / dev / test / translit / RegexUtilitiesTest.java
old mode 100755 (executable)
new mode 100644 (file)
index 3da4e19..12aa671
-//##header\r
-//#if defined(FOUNDATION10) || defined(J2SE13) || defined(J2SE14)\r
-//#else\r
-/*\r
- *******************************************************************************\r
- * Copyright (C) 2009, International Business Machines Corporation and         *\r
- * others. All Rights Reserved.                                                *\r
- *******************************************************************************\r
- */\r
-package com.ibm.icu.dev.test.translit;\r
-\r
-import java.util.ArrayList;\r
-import java.util.List;\r
-import java.util.regex.Matcher;\r
-import java.util.regex.Pattern;\r
-\r
-import com.ibm.icu.dev.test.TestFmwk;\r
-import com.ibm.icu.impl.UnicodeRegex;\r
-import com.ibm.icu.lang.UCharacter;\r
-import com.ibm.icu.lang.UProperty;\r
-import com.ibm.icu.lang.UProperty.NameChoice;\r
-import com.ibm.icu.text.Transliterator;\r
-import com.ibm.icu.text.UTF16;\r
-import com.ibm.icu.text.UnicodeSet;\r
-\r
-/**\r
- * @author markdavis\r
- */\r
-public class RegexUtilitiesTest extends TestFmwk {\r
-\r
-    public static void main(String[] args) throws Exception {\r
-        new RegexUtilitiesTest().run(args);\r
-    }\r
-\r
-    /**\r
-     * Check basic construction.\r
-     */\r
-    public void TestConstruction() {\r
-        String[][] tests = {\r
-                {"a"},\r
-                {"a[a-z]b"},\r
-                {"[ba-z]", "[a-z]"},\r
-                {"q[ba-z]", "q[a-z]"},\r
-                {"[ba-z]q", "[a-z]q"},\r
-                {"a\\p{joincontrol}b", "a[\u200C\u200D]b"},\r
-                {"a\\P{joincontrol}b", "a[^\u200C\u200D]b"},\r
-                {"a[[:whitespace:]&[:Zl:]]b", "a[\\\u2028]b"},\r
-                {"a [[:bc=cs:]&[:wspace:]] b", "a [\u00A0\u202F] b"},\r
-        };\r
-        for (int i = 0; i < tests.length; ++i) {\r
-            final String source = tests[i][0];\r
-            String expected = tests[i].length == 1 ? source : tests[i][1];\r
-            String actual = UnicodeRegex.fix(source);\r
-            assertEquals(source, expected, actual);\r
-        } \r
-    }\r
-\r
-    Transliterator hex = Transliterator.getInstance("hex");\r
-\r
-    /**\r
-     * Perform an exhaustive test on all Unicode characters to make sure that the UnicodeSet with each\r
-     * character works.\r
-     */\r
-    public void TestCharacters() {\r
-        UnicodeSet requiresQuote = new UnicodeSet("[\\$\\&\\-\\:\\[\\\\\\]\\^\\{\\}[:pattern_whitespace:]]");\r
-        boolean skip = getInclusion() < 10;\r
-        for (int cp = 0; cp < 0x110000; ++cp) {\r
-            if (cp > 0xFF && skip && (cp % 37 != 0)) {\r
-                continue;\r
-            }\r
-            String cpString = UTF16.valueOf(cp);\r
-            String s = requiresQuote.contains(cp) ? "\\" + cpString : cpString;\r
-            String pattern = null;\r
-            final String rawPattern = "[" + s + s + "]";\r
-            try {\r
-                pattern = UnicodeRegex.fix(rawPattern);\r
-            } catch (Exception e) {\r
-                errln(e.getMessage());\r
-                continue;\r
-            }\r
-            final String expected = "[" + s + "]";\r
-            assertEquals("Doubled character works" + hex.transform(s), expected, pattern);\r
-\r
-            // verify that we can create a regex pattern and use as expected\r
-            String shouldNotMatch = UTF16.valueOf((cp + 1) % 0x110000);\r
-            checkCharPattern(Pattern.compile(pattern), pattern, cpString, shouldNotMatch);\r
-\r
-            // verify that the Pattern.compile works\r
-            checkCharPattern(UnicodeRegex.compile(rawPattern), pattern, cpString, shouldNotMatch);\r
-        }\r
-    }\r
-\r
-    /**\r
-     * Check all integer Unicode properties to make sure they work.\r
-     */\r
-    public void TestUnicodeProperties() {\r
-        final boolean skip = getInclusion() < 10;\r
-        UnicodeSet temp = new UnicodeSet();\r
-        for (int propNum = UProperty.INT_START; propNum < UProperty.INT_LIMIT; ++propNum) {\r
-            if (skip && (propNum % 5 != 0)) {\r
-                continue;\r
-            }\r
-            String propName = UCharacter.getPropertyName(propNum, NameChoice.LONG);\r
-            final int intPropertyMinValue = UCharacter.getIntPropertyMinValue(propNum);\r
-            int intPropertyMaxValue = UCharacter.getIntPropertyMaxValue(propNum);\r
-            if (skip) { // only test first if not exhaustive\r
-                intPropertyMaxValue = intPropertyMinValue;\r
-            }\r
-            for (int valueNum = intPropertyMinValue; valueNum <= intPropertyMaxValue; ++valueNum) {\r
-                // hack for getting property value name\r
-                String valueName = UCharacter.getPropertyValueName(propNum, valueNum, NameChoice.LONG);\r
-                if (valueName == null) {\r
-                    valueName = UCharacter.getPropertyValueName(propNum, valueNum, NameChoice.SHORT);\r
-                    if (valueName == null) {\r
-                        valueName = Integer.toString(valueNum);\r
-                    }\r
-                }\r
-                temp.applyIntPropertyValue(propNum, valueNum);\r
-                if (temp.size() == 0) {\r
-                    continue;\r
-                }\r
-                final String prefix = "a";\r
-                final String suffix = "b";\r
-                String shouldMatch = prefix + UTF16.valueOf(temp.charAt(0)) + suffix;\r
-                temp.complement();\r
-                String shouldNotMatch = prefix + UTF16.valueOf(temp.charAt(0)) + suffix;\r
-\r
-                // posix style pattern\r
-                String rawPattern = prefix + "[:" + propName + "=" + valueName + ":]" + suffix;\r
-                String rawNegativePattern = prefix + "[:^" + propName + "=" + valueName + ":]" + suffix;\r
-                checkCharPattern(UnicodeRegex.compile(rawPattern), rawPattern, shouldMatch, shouldNotMatch);\r
-                checkCharPattern(UnicodeRegex.compile(rawNegativePattern), rawNegativePattern, shouldNotMatch, shouldMatch);\r
-\r
-                // perl style pattern\r
-                rawPattern = prefix + "\\p{" + propName + "=" + valueName + "}" + suffix;\r
-                rawNegativePattern = prefix + "\\P{" + propName + "=" + valueName + "}" + suffix;\r
-                checkCharPattern(UnicodeRegex.compile(rawPattern), rawPattern, shouldMatch, shouldNotMatch);\r
-                checkCharPattern(UnicodeRegex.compile(rawNegativePattern), rawNegativePattern, shouldNotMatch, shouldMatch);\r
-            }\r
-        }\r
-    }\r
-\r
-    public void TestBnf() {\r
-        UnicodeRegex regex = new UnicodeRegex();\r
-        final String[][] tests = {\r
-                {\r
-                    "c = a wq;\n" +\r
-                    "a = xyz;\n" +\r
-                    "b = a a c;\n"\r
-                },\r
-                {\r
-                    "c = a b;\n" +\r
-                    "a = xyz;\n" +\r
-                    "b = a a c;\n",\r
-                    "Exception"\r
-                },\r
-                {\r
-                    "uri = (?: (scheme) \\:)? (host) (?: \\? (query))? (?: \\u0023 (fragment))?;\n" +\r
-                    "scheme = reserved+;\n" +\r
-                    "host = // reserved+;\n" +\r
-                    "query = [\\=reserved]+;\n" +\r
-                    "fragment = reserved+;\n" +\r
-                    "reserved = [[:ascii:][:sc=grek:]&[:alphabetic:]];\n",\r
-                "http://\u03B1\u03B2\u03B3?huh=hi#there"},\r
-                {\r
-                    "langtagRegex.txt"\r
-                }\r
-        };\r
-        for (int i = 0; i < tests.length; ++i) {\r
-            String test = tests[i][0];\r
-            final boolean expectException = tests[i].length < 2 ? false : tests[i][1].equals("Exception");\r
-            try {\r
-                String result;\r
-                if (test.endsWith(".txt")) {\r
-                    java.io.InputStream is = RegexUtilitiesTest.class.getResourceAsStream(test);\r
-                    List lines = UnicodeRegex.appendLines(new ArrayList(), is, "UTF-8");\r
-                    result = regex.compileBnf(lines);\r
-                } else {\r
-                    result = regex.compileBnf(test);\r
-                }\r
-                if (expectException) {\r
-                    errln("Expected exception for " + test);\r
-                    continue;\r
-                }\r
-                result = result.replaceAll("[0-9]+%", ""); // just so we can use the language subtag stuff\r
-                String resolved = regex.transform(result);\r
-                logln(resolved);\r
-                Matcher m = Pattern.compile(resolved, Pattern.COMMENTS).matcher("");\r
-                String checks = "";\r
-                for (int j = 1; j < tests[i].length; ++j) {\r
-                    String check = tests[i][j];\r
-                    if (!m.reset(check).matches()) {\r
-                        checks = checks + "Fails " + check + "\n";\r
-                    } else {\r
-                        for (int k = 1; k <= m.groupCount(); ++k) {\r
-                            checks += "(" + m.group(k) + ")";\r
-                        }\r
-                        checks += "\n";\r
-                    }\r
-                }\r
-                logln("Result: " + result + "\n" + checks + "\n" + test);\r
-            } catch (Exception e) {\r
-                if (!expectException) {\r
-                    errln(e.getClass().getName() + ": " + e.getMessage());\r
-                }\r
-                continue;\r
-            }\r
-        }\r
-    }\r
-\r
-    /**\r
-     * Utility for checking patterns\r
-     */\r
-    private void checkCharPattern(Pattern pat, String matchTitle, String shouldMatch, String shouldNotMatch) {\r
-        Matcher matcher = pat.matcher(shouldMatch);\r
-        assertTrue(matchTitle + " and " + shouldMatch, matcher.matches());\r
-        matcher.reset(shouldNotMatch);\r
-        assertFalse(matchTitle + " and " + shouldNotMatch, matcher.matches());\r
-    }\r
-}\r
-//#endif\r
+//##header J2SE15
+//#if defined(FOUNDATION10) || defined(J2SE13) || defined(J2SE14)
+//#else
+/*
+ *******************************************************************************
+ * Copyright (C) 2009, International Business Machines Corporation and         *
+ * others. All Rights Reserved.                                                *
+ *******************************************************************************
+ */
+package com.ibm.icu.dev.test.translit;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import com.ibm.icu.dev.test.TestFmwk;
+import com.ibm.icu.impl.UnicodeRegex;
+import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.lang.UProperty;
+import com.ibm.icu.lang.UProperty.NameChoice;
+import com.ibm.icu.text.Transliterator;
+import com.ibm.icu.text.UTF16;
+import com.ibm.icu.text.UnicodeSet;
+
+/**
+ * @author markdavis
+ */
+public class RegexUtilitiesTest extends TestFmwk {
+
+    public static void main(String[] args) throws Exception {
+        new RegexUtilitiesTest().run(args);
+    }
+
+    /**
+     * Check basic construction.
+     */
+    public void TestConstruction() {
+        String[][] tests = {
+                {"a"},
+                {"a[a-z]b"},
+                {"[ba-z]", "[a-z]"},
+                {"q[ba-z]", "q[a-z]"},
+                {"[ba-z]q", "[a-z]q"},
+                {"a\\p{joincontrol}b", "a[\u200C\u200D]b"},
+                {"a\\P{joincontrol}b", "a[^\u200C\u200D]b"},
+                {"a[[:whitespace:]&[:Zl:]]b", "a[\\\u2028]b"},
+                {"a [[:bc=cs:]&[:wspace:]] b", "a [\u00A0\u202F] b"},
+        };
+        for (int i = 0; i < tests.length; ++i) {
+            final String source = tests[i][0];
+            String expected = tests[i].length == 1 ? source : tests[i][1];
+            String actual = UnicodeRegex.fix(source);
+            assertEquals(source, expected, actual);
+        } 
+    }
+
+    Transliterator hex = Transliterator.getInstance("hex");
+
+    /**
+     * Perform an exhaustive test on all Unicode characters to make sure that the UnicodeSet with each
+     * character works.
+     */
+    public void TestCharacters() {
+        UnicodeSet requiresQuote = new UnicodeSet("[\\$\\&\\-\\:\\[\\\\\\]\\^\\{\\}[:pattern_whitespace:]]");
+        boolean skip = getInclusion() < 10;
+        for (int cp = 0; cp < 0x110000; ++cp) {
+            if (cp > 0xFF && skip && (cp % 37 != 0)) {
+                continue;
+            }
+            String cpString = UTF16.valueOf(cp);
+            String s = requiresQuote.contains(cp) ? "\\" + cpString : cpString;
+            String pattern = null;
+            final String rawPattern = "[" + s + s + "]";
+            try {
+                pattern = UnicodeRegex.fix(rawPattern);
+            } catch (Exception e) {
+                errln(e.getMessage());
+                continue;
+            }
+            final String expected = "[" + s + "]";
+            assertEquals("Doubled character works" + hex.transform(s), expected, pattern);
+
+            // verify that we can create a regex pattern and use as expected
+            String shouldNotMatch = UTF16.valueOf((cp + 1) % 0x110000);
+            checkCharPattern(Pattern.compile(pattern), pattern, cpString, shouldNotMatch);
+
+            // verify that the Pattern.compile works
+            checkCharPattern(UnicodeRegex.compile(rawPattern), pattern, cpString, shouldNotMatch);
+        }
+    }
+
+    /**
+     * Check all integer Unicode properties to make sure they work.
+     */
+    public void TestUnicodeProperties() {
+        final boolean skip = getInclusion() < 10;
+        UnicodeSet temp = new UnicodeSet();
+        for (int propNum = UProperty.INT_START; propNum < UProperty.INT_LIMIT; ++propNum) {
+            if (skip && (propNum % 5 != 0)) {
+                continue;
+            }
+            String propName = UCharacter.getPropertyName(propNum, NameChoice.LONG);
+            final int intPropertyMinValue = UCharacter.getIntPropertyMinValue(propNum);
+            int intPropertyMaxValue = UCharacter.getIntPropertyMaxValue(propNum);
+            if (skip) { // only test first if not exhaustive
+                intPropertyMaxValue = intPropertyMinValue;
+            }
+            for (int valueNum = intPropertyMinValue; valueNum <= intPropertyMaxValue; ++valueNum) {
+                // hack for getting property value name
+                String valueName = UCharacter.getPropertyValueName(propNum, valueNum, NameChoice.LONG);
+                if (valueName == null) {
+                    valueName = UCharacter.getPropertyValueName(propNum, valueNum, NameChoice.SHORT);
+                    if (valueName == null) {
+                        valueName = Integer.toString(valueNum);
+                    }
+                }
+                temp.applyIntPropertyValue(propNum, valueNum);
+                if (temp.size() == 0) {
+                    continue;
+                }
+                final String prefix = "a";
+                final String suffix = "b";
+                String shouldMatch = prefix + UTF16.valueOf(temp.charAt(0)) + suffix;
+                temp.complement();
+                String shouldNotMatch = prefix + UTF16.valueOf(temp.charAt(0)) + suffix;
+
+                // posix style pattern
+                String rawPattern = prefix + "[:" + propName + "=" + valueName + ":]" + suffix;
+                String rawNegativePattern = prefix + "[:^" + propName + "=" + valueName + ":]" + suffix;
+                checkCharPattern(UnicodeRegex.compile(rawPattern), rawPattern, shouldMatch, shouldNotMatch);
+                checkCharPattern(UnicodeRegex.compile(rawNegativePattern), rawNegativePattern, shouldNotMatch, shouldMatch);
+
+                // perl style pattern
+                rawPattern = prefix + "\\p{" + propName + "=" + valueName + "}" + suffix;
+                rawNegativePattern = prefix + "\\P{" + propName + "=" + valueName + "}" + suffix;
+                checkCharPattern(UnicodeRegex.compile(rawPattern), rawPattern, shouldMatch, shouldNotMatch);
+                checkCharPattern(UnicodeRegex.compile(rawNegativePattern), rawNegativePattern, shouldNotMatch, shouldMatch);
+            }
+        }
+    }
+
+    public void TestBnf() {
+        UnicodeRegex regex = new UnicodeRegex();
+        final String[][] tests = {
+                {
+                    "c = a wq;\n" +
+                    "a = xyz;\n" +
+                    "b = a a c;\n"
+                },
+                {
+                    "c = a b;\n" +
+                    "a = xyz;\n" +
+                    "b = a a c;\n",
+                    "Exception"
+                },
+                {
+                    "uri = (?: (scheme) \\:)? (host) (?: \\? (query))? (?: \\u0023 (fragment))?;\n" +
+                    "scheme = reserved+;\n" +
+                    "host = // reserved+;\n" +
+                    "query = [\\=reserved]+;\n" +
+                    "fragment = reserved+;\n" +
+                    "reserved = [[:ascii:][:sc=grek:]&[:alphabetic:]];\n",
+                "http://\u03B1\u03B2\u03B3?huh=hi#there"},
+                {
+                    "langtagRegex.txt"
+                }
+        };
+        for (int i = 0; i < tests.length; ++i) {
+            String test = tests[i][0];
+            final boolean expectException = tests[i].length < 2 ? false : tests[i][1].equals("Exception");
+            try {
+                String result;
+                if (test.endsWith(".txt")) {
+                    java.io.InputStream is = RegexUtilitiesTest.class.getResourceAsStream(test);
+                    List lines = UnicodeRegex.appendLines(new ArrayList(), is, "UTF-8");
+                    result = regex.compileBnf(lines);
+                } else {
+                    result = regex.compileBnf(test);
+                }
+                if (expectException) {
+                    errln("Expected exception for " + test);
+                    continue;
+                }
+                result = result.replaceAll("[0-9]+%", ""); // just so we can use the language subtag stuff
+                String resolved = regex.transform(result);
+                logln(resolved);
+                Matcher m = Pattern.compile(resolved, Pattern.COMMENTS).matcher("");
+                String checks = "";
+                for (int j = 1; j < tests[i].length; ++j) {
+                    String check = tests[i][j];
+                    if (!m.reset(check).matches()) {
+                        checks = checks + "Fails " + check + "\n";
+                    } else {
+                        for (int k = 1; k <= m.groupCount(); ++k) {
+                            checks += "(" + m.group(k) + ")";
+                        }
+                        checks += "\n";
+                    }
+                }
+                logln("Result: " + result + "\n" + checks + "\n" + test);
+            } catch (Exception e) {
+                if (!expectException) {
+                    errln(e.getClass().getName() + ": " + e.getMessage());
+                }
+                continue;
+            }
+        }
+    }
+
+    /**
+     * Utility for checking patterns
+     */
+    private void checkCharPattern(Pattern pat, String matchTitle, String shouldMatch, String shouldNotMatch) {
+        Matcher matcher = pat.matcher(shouldMatch);
+        assertTrue(matchTitle + " and " + shouldMatch, matcher.matches());
+        matcher.reset(shouldNotMatch);
+        assertFalse(matchTitle + " and " + shouldNotMatch, matcher.matches());
+    }
+}
+//#endif