]> gitweb.fperrin.net Git - Dictionary.git/blob - jars/icu4j-4_4_2-src/tools/misc/src/com/ibm/icu/dev/tool/translit/UnicodeSetClosure.java
go
[Dictionary.git] / jars / icu4j-4_4_2-src / tools / misc / src / com / ibm / icu / dev / tool / translit / UnicodeSetClosure.java
1 /*\r
2  *******************************************************************************\r
3  * Copyright (C) 1996-2010, International Business Machines Corporation and    *\r
4  * others. All Rights Reserved.                                                *\r
5  *******************************************************************************\r
6  */\r
7 package com.ibm.icu.dev.tool.translit;\r
8 import java.io.File;\r
9 import java.io.FileOutputStream;\r
10 import java.io.OutputStreamWriter;\r
11 import java.io.PrintWriter;\r
12 import java.io.StringWriter;\r
13 import java.util.Locale;\r
14 \r
15 import com.ibm.icu.lang.UCharacter;\r
16 import com.ibm.icu.text.Normalizer;\r
17 import com.ibm.icu.text.UTF16;\r
18 import com.ibm.icu.text.UnicodeSet;\r
19 \r
20 // com.ibm.icu.dev.tool.translit.UnicodeSetClosure\r
21 // com.ibm.icu.dev.test.translit.TransliteratorTest\r
22 \r
23 public class UnicodeSetClosure {\r
24     public static void main(String[] args) throws Exception {\r
25         \r
26         UnicodeSet foo =         new UnicodeSet("[\u1FF6-\u1FFD\u2000-\u2001\u2126]");\r
27         \r
28         test();\r
29         if(foo==null){}\r
30         \r
31         /* The following is superceded by Alan's tool\r
32         \r
33         File f = new File("UnicodeSetClosure.txt");\r
34         String filename = f.getCanonicalFile().toString();\r
35         out = new PrintWriter(\r
36             new OutputStreamWriter(\r
37                 new FileOutputStream(filename), "UTF-8"));\r
38         System.out.println("Writing " + filename);\r
39         out.print('\uFEFF'); // BOM\r
40         \r
41         generateSets("Latin-Katakana", true, Normalizer.DECOMP_COMPAT, true,\r
42 "[',.a-z~\u00DF\u00E6\u00F0\u00F8\u00FE\u02BE\u0300-\u034E\u0360-\u0362\u0483-\u0486\u0591-\u05A1\u05A3-\u05B9\u05BB-\u05BD\u05BF\u05C1-\u05C2\u05C4\u064B-\u0655\u0670\u06D6-\u06DC\u06DF-\u06E4\u06E7-\u06E8\u06EA-\u06ED\u0711\u0730-\u074A\u07A6-\u07B0\u0901-\u0902\u093C\u0941-\u0948\u094D\u0951-\u0954\u0962-\u0963\u0981\u09BC\u09C1-\u09C4\u09CD\u09E2-\u09E3\u0A02\u0A3C\u0A41-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A70-\u0A71\u0A81-\u0A82\u0ABC\u0AC1-\u0AC5\u0AC7-\u0AC8\u0ACD\u0B01\u0B3C\u0B3F\u0B41-\u0B43\u0B4D\u0B56\u0B82\u0BC0\u0BCD\u0C3E-\u0C40\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0CBF\u0CC6\u0CCC-\u0CCD\u0D41-\u0D43\u0D4D\u0DCA\u0DD2-\u0DD4\u0DD6\u0E31\u0E34-\u0E3A\u0E47-\u0E4E\u0EB1\u0EB4-\u0EB9\u0EBB-\u0EBC\u0EC8-\u0ECD\u0F18-\u0F19\u0F35\u0F37\u0F39\u0F71-\u0F7E\u0F80-\u0F84\u0F86-\u0F87\u0F90-\u0F97\u0F99-\u0FBC\u0FC6\u102D-\u1030\u1032\u1036-\u1037\u1039\u1058-\u1059\u17B7-\u17BD\u17C6\u17C9-\u17D3\u18A9\u20D0-\u20DC\u20E1\u302A-\u302F\uFB1E\uFE20-\uFE23\\U0001D167-\\U0001D169\\U0001D17B-\\U0001D182\\U0001D185-\\U0001D18B\\U0001D1AA-\\U0001D1AD]"\r
43         );\r
44         generateSets("Latin-Katakana", false, Normalizer.DECOMP_COMPAT, false,\r
45 "[~\u3001-\u3002\u30A1-\u30AB\u30AD\u30AF\u30B1\u30B3\u30B5\u30B7\u30B9\u30BB\u30BD\u30BF\u30C1\u30C3-\u30C4\u30C6\u30C8\u30CA-\u30CF\u30D2\u30D5\u30D8\u30DB\u30DE-\u30F3\u30F5-\u30F6\u30FC-\u30FD]"\r
46         );\r
47         \r
48         out.close();\r
49         \r
50         /////////////////////////////////////////////////\r
51         if (true) return; // skip the stuff we've done already\r
52         \r
53         generateSets("Cyrillic-Latin", true, Normalizer.DECOMP, false,\r
54             "[\u0402\u0404-\u0406\u0408-\u040B\u040F-\u0418\u041A-\u0438\u043A-\u044F\u0452\u0454-\u0456\u0458-\u045B\u045F\u0490-\u0495\u0498-\u0499\u04D4-\u04D5\u04D8-\u04D9]"\r
55         );\r
56         generateSets("Latin-Cyrillic", false, Normalizer.DECOMP, false,\r
57             "[A-Za-z\u00C6\u00E6\u0110-\u0111\u018F\u0259\u02B9-\u02BA]"\r
58         );\r
59         */\r
60     }\r
61     \r
62     public static void generateSets(String label, boolean forward, \r
63             Normalizer.Mode m, boolean lowerFirst, String rules) {\r
64         UnicodeSet s = new UnicodeSet(rules);\r
65         System.out.println("Generating " + label + (forward ? "" : " BACKWARD"));\r
66         close(s, m, lowerFirst);\r
67         out.println("# MINIMAL FILTER GENERATED FOR: " + label + (forward ? "" : " BACKWARD"));\r
68         out.println(":: " \r
69             + (forward ? "" : "( ") \r
70             + s.toPattern(true) \r
71             + (forward ? "" : " )")\r
72             + " ;");\r
73         out.println();\r
74         out.println("Unicode: " + s.toPattern(false));\r
75     }\r
76     \r
77     static boolean GENERATE = false;\r
78     \r
79     public static void test() throws Exception {\r
80         File f = new File("TestUnicodeSetClosure.txt");\r
81         String filename = f.getCanonicalFile().toString();\r
82         out = new PrintWriter(\r
83             new OutputStreamWriter(\r
84                 new FileOutputStream(filename), "UTF-8"));\r
85         System.out.println("Writing " + filename);\r
86         out.print('\uFEFF'); // BOM\r
87         \r
88         GENERATE = true;\r
89         test("[:Devanagari:]");\r
90         /*\r
91         test("[\u00E0Bc]");\r
92         test("[m]");\r
93         */\r
94         \r
95         out.close();\r
96     }\r
97     \r
98     static final Normalizer.Mode[] testModes = {\r
99         Normalizer.NONE, Normalizer.NFD, Normalizer.NFC, Normalizer.NFKD, Normalizer.NFKC};\r
100     static final String[] modeNames = {\r
101         "NoNF", "NFD", "NFC", "NFKD", "NFKC"};\r
102         \r
103     static final boolean[] testCases = {\r
104         false, true};\r
105     static final String[] caseNames = {\r
106         "noLower", "lower"};\r
107     \r
108     public static void test(String testStr) throws Exception {\r
109         UnicodeSet original = new UnicodeSet(testStr);\r
110         \r
111         System.out.println("Testing Closure of: " + original.toPattern(true));\r
112         out.println("Testing Closure of: " + original.toPattern(false));\r
113         UnicodeSet raw = new UnicodeSet(original);\r
114         original.complement();\r
115         original.complement();\r
116         if (!raw.equals(original)) {\r
117             out.println("Equals:" + original.toPattern(false));\r
118         }\r
119         StringWriter swLog = null;\r
120         PrintWriter log = null;\r
121         \r
122         if (GENERATE) {\r
123             swLog = new StringWriter();\r
124             log = new PrintWriter(swLog);\r
125             log.println("static UnicodeSet[][] UNCHANGED = {");\r
126         }\r
127         for (int i = 0; i < testCases.length; ++i) {\r
128             if (GENERATE) log.println("    {");\r
129             for (int j = 0; j < testModes.length; ++j) {\r
130                 UnicodeSet test = new UnicodeSet(original);\r
131                 close(test, testModes[j], testCases[i]);\r
132                 if (GENERATE) {\r
133                     log.println("\tnew UnicodeSet(\"" + generatedSet.toPattern(true) + "\"),");\r
134                 }\r
135                 String label = caseNames[i]  + ", " + modeNames[j] ;\r
136                 System.out.println(label);\r
137                 out.println(label + ": " + test.toPattern(false));\r
138                 test.removeAll(original);\r
139                 if (test.isEmpty()) {\r
140                     out.println("\tNo Difference from original");\r
141                 } else {\r
142                     out.println("\tDifference = " + test.toPattern(false));\r
143                 }\r
144                 out.flush();\r
145             }\r
146             if (GENERATE) log.println("    },");\r
147             out.println();\r
148         }\r
149         \r
150         if (GENERATE) {\r
151             log.println("};");\r
152             out.print(swLog.getBuffer().toString());\r
153         }\r
154         \r
155         /*\r
156             close(test, Normalizer.DECOMP, false);\r
157             print("NFD", test);\r
158             \r
159             test = new UnicodeSet(testStr);\r
160             close(test, Normalizer.NO_OP, true);\r
161             print("Lower", test);\r
162 \r
163             test = new UnicodeSet(testStr);\r
164             close(test, Normalizer.COMPOSE, false);\r
165             print("NFC", test);\r
166 \r
167             test = new UnicodeSet(testStr);\r
168             close(test, Normalizer.DECOMP_COMPAT, false);\r
169             print("NFKD", test);\r
170 \r
171             test = new UnicodeSet(testStr);\r
172             close(test, Normalizer.COMPOSE_COMPAT, false);\r
173             print("NFKC", test);\r
174         */\r
175     }\r
176     \r
177     static PrintWriter out;\r
178     \r
179     /*\r
180     public static void print(String label, UnicodeSet test) {\r
181         System.out.println(label);\r
182         out.println(label + ": " + test.toPattern(false));\r
183         out.println();\r
184     }\r
185     */\r
186     \r
187     // dumb, slow implementations\r
188     public static class NFToString implements Char32ToString {\r
189         Normalizer.Mode mode;\r
190         boolean lowerFirst;\r
191         \r
192         NFToString(Normalizer.Mode m, boolean lowerFirst) {\r
193             mode = m;\r
194             this.lowerFirst = lowerFirst;\r
195         }\r
196         \r
197         public String get(int cp) {\r
198             String source = UTF16.valueOf(cp);\r
199             String result = source;\r
200             if (lowerFirst) result = UCharacter.toLowerCase(Locale.US, result);\r
201             result = Normalizer.normalize(result, mode);\r
202             if (lowerFirst) result = UCharacter.toLowerCase(Locale.US, result);\r
203             if (result.equals(source)) return null;\r
204             return result;\r
205         }\r
206     }\r
207         \r
208     \r
209     /** Returns a mapping from char32 to a string. If there is no change,\r
210      * null is returned.\r
211      */\r
212      \r
213     interface Char32ToString {\r
214         public String get(int cp);\r
215     }\r
216     \r
217     static boolean FAST = true;\r
218     \r
219     public static void close(UnicodeSet s, Normalizer.Mode m, boolean lowerFirst) {\r
220         Char32ToString f = new NFToString(m, lowerFirst);\r
221         if (FAST) {\r
222             int mm;\r
223             for (mm = 0; ; ++mm) if (m == testModes[mm]) break; // find mode\r
224             close2(s, f, lowerFirst ? 1 : 0, mm);\r
225             return;\r
226         }\r
227         close(s, f);\r
228     }\r
229     \r
230     public static void close(UnicodeSet s, Char32ToString f) {\r
231         if (GENERATE) generatedSet = new UnicodeSet();\r
232         \r
233         for (int cp = 0; cp <= 0x10FFFF; ++cp) {\r
234             int type = UCharacter.getType(cp);\r
235             if (type == Character.UNASSIGNED) continue;\r
236             \r
237             //if (cp == '\u00e7') {\r
238               //  System.out.println("debug");\r
239             //}\r
240             String result = f.get(cp);\r
241             if (result == null) continue;\r
242             if (GENERATE) {\r
243                 generatedSet.add(cp);\r
244             }\r
245             if (!containsSome(s, result)) continue;\r
246             s.add(cp);\r
247         }\r
248     }\r
249     \r
250     public static void close2(UnicodeSet s, Char32ToString f, int lc, int mode) {\r
251         UnicodeSet unchanged = new UnicodeSet(); // UNCHANGED[lc][mode];\r
252         int count = unchanged.getRangeCount();\r
253         for (int i = 0; i < count; ++i) {\r
254             int start = unchanged.getRangeStart(i);\r
255             int end = unchanged.getRangeEnd(i);\r
256             for (int cp = start; cp <= end; ++cp) {\r
257                 String result = f.get(cp);\r
258                 if (result == null) throw new IllegalArgumentException("Something wrong -- should never happen");\r
259                 if (!containsSome(s, result)) continue;\r
260                 s.add(cp);\r
261             }\r
262         }\r
263     }\r
264     \r
265     /*\r
266     static final UnicodeSet[][] UNCHANGED = {\r
267         { // \u1F80-\u1FB4\u1FB6-\u1FBC\u1FBE\u1FC1-\u1FC4\u1FC6-\u1FD3\u1FD6-\u1FDB\u1FDD-\u1FEF\u1FF2-\u1FF4\r
268         new UnicodeSet("[\u1FF6-\u1FFD\u2000-\u2001\u2126]"),\r
269         // \u212A-\u212B\u219A-\u219B\u21AE\u21CD-\u21CF\u2204\u2209\u220C\u2224\u2226\u2241\u2244\u2247\u2249\u2260\u2262\u226D-\u2271\u2274-\u2275\u2278\r
270         new UnicodeSet("[\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0340-\u0341\u0343-\u0344\u0374\u037E\u0385-\u038A\u038C\u038E-\u0390\u03AA-\u03B0\u03CA-\u03CE\u03D3-\u03D4\u0400-\u0401\u0403\u0407\u040C-\u040E\u0419\u0439\u0450-\u0451\u0453\u0457\u045C-\u045E\u0476-\u0477\u04C1-\u04C2\u04D0-\u04D3\u04D6-\u04D7\u04DA-\u04DF\u04E2-\u04E7\u04EA-\u04F5\u04F8-\u04F9\u0622-\u0626\u06C0\u06C2\u06D3\u0929\u0931\u0934\u0958-\u095F\u09CB-\u09CC\u09DC-\u09DD\u09DF\u0A33\u0A36\u0A59-\u0A5B\u0A5E\u0B48\u0B4B-\u0B4C\u0B5C-\u0B5D\u0B94\u0BCA-\u0BCC\u0C48\u0CC0\u0CC7-\u0CC8\u0CCA-\u0CCB\u0D4A-\u0D4C\u0DDA\u0DDC-\u0DDE\u0F43\u0F4D\u0F52\u0F57\u0F5C\u0F69\u0F73\u0F75-\u0F76\u0F78\u0F81\u0F93\u0F9D\u0FA2\u0FA7\u0FAC\u0FB9\u1026\u1E00-\u1E99\u1E9B\u1EA0-\u1EF9\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FBE\u1FC1-\u1FC4\u1FC6-\u1FD3\u1FD6-\u1FDB\u1FDD-\u1FEF\u1FF2-\u1FF4\u1FF6-\u1FFD\u2000-\u2001\u2126\u212A-\u212B\u219A-\u219B\u21AE\u21CD-\u21CF\u2204\u2209\u220C\u2224\u2226\u2241\u2244\u2247\u2249\u2260\u2262\u226D-\u2271\u2274-\u2275\u2278-\u2279\u2280-\u2281\u2284-\u2285\u2288-\u2289\u22AC-\u22AF\u22E0-\u22E3\u22EA-\u22ED\u2329-\u232A\u304C\u304E\u3050\u3052\u3054\u3056\u3058\u305A\u305C\u305E\u3060\u3062\u3065\u3067\u3069\u3070-\u3071\u3073-\u3074\u3076-\u3077\u3079-\u307A\u307C-\u307D\u3094\u309E\u30AC\u30AE\u30B0\u30B2\u30B4\u30B6\u30B8\u30BA\u30BC\u30BE\u30C0\u30C2\u30C5\u30C7\u30C9\u30D0-\u30D1\u30D3-\u30D4\u30D6-\u30D7\u30D9-\u30DA\u30DC-\u30DD\u30F4\u30F7-\u30FA\u30FE\uAC00-\uD7A3\uF900-\uFA0D\uFA10\uFA12\uFA15-\uFA1E\uFA20\uFA22\uFA25-\uFA26\uFA2A-\uFA2D\uFB1D\uFB1F\uFB2A-\uFB36\uFB38-\uFB3C\uFB3E\uFB40-\uFB41\uFB43-\uFB44\uFB46-\uFB4E]"),\r
271         new UnicodeSet("[\u0340-\u0341\u0343-\u0344\u0374\u037E\u0387\u0958-\u095F\u09DC-\u09DD\u09DF\u0A33\u0A36\u0A59-\u0A5B\u0A5E\u0B5C-\u0B5D\u0F43\u0F4D\u0F52\u0F57\u0F5C\u0F69\u0F73\u0F75-\u0F76\u0F78\u0F81\u0F93\u0F9D\u0FA2\u0FA7\u0FAC\u0FB9\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1FBB\u1FBE\u1FC9\u1FCB\u1FD3\u1FDB\u1FE3\u1FEB\u1FEE-\u1FEF\u1FF9\u1FFB\u1FFD\u2000-\u2001\u2126\u212A-\u212B\u2329-\u232A\uF900-\uFA0D\uFA10\uFA12\uFA15-\uFA1E\uFA20\uFA22\uFA25-\uFA26\uFA2A-\uFA2D\uFB1F\uFB2A-\uFB36\uFB38-\uFB3C\uFB3E\uFB40-\uFB41\uFB43-\uFB44\uFB46-\uFB4E]"),\r
272         new UnicodeSet("[\u00A0\u00A8\u00AA\u00AF\u00B2-\u00B5\u00B8-\u00BA\u00BC-\u00BE\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0132-\u0137\u0139-\u0140\u0143-\u0149\u014C-\u0151\u0154-\u0165\u0168-\u017F\u01A0-\u01A1\u01AF-\u01B0\u01C4-\u01DC\u01DE-\u01E3\u01E6-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u02B0-\u02B8\u02D8-\u02DD\u02E0-\u02E4\u0340-\u0341\u0343-\u0344\u0374\u037A\u037E\u0384-\u038A\u038C\u038E-\u0390\u03AA-\u03B0\u03CA-\u03CE\u03D0-\u03D6\u03F0-\u03F2\u0400-\u0401\u0403\u0407\u040C-\u040E\u0419\u0439\u0450-\u0451\u0453\u0457\u045C-\u045E\u0476-\u0477\u04C1-\u04C2\u04D0-\u04D3\u04D6-\u04D7\u04DA-\u04DF\u04E2-\u04E7\u04EA-\u04F5\u04F8-\u04F9\u0587\u0622-\u0626\u0675-\u0678\u06C0\u06C2\u06D3\u0929\u0931\u0934\u0958-\u095F\u09CB-\u09CC\u09DC-\u09DD\u09DF\u0A33\u0A36\u0A59-\u0A5B\u0A5E\u0B48\u0B4B-\u0B4C\u0B5C-\u0B5D\u0B94\u0BCA-\u0BCC\u0C48\u0CC0\u0CC7-\u0CC8\u0CCA-\u0CCB\u0D4A-\u0D4C\u0DDA\u0DDC-\u0DDE\u0E33\u0EB3\u0EDC-\u0EDD\u0F0C\u0F43\u0F4D\u0F52\u0F57\u0F5C\u0F69\u0F73\u0F75-\u0F79\u0F81\u0F93\u0F9D\u0FA2\u0FA7\u0FAC\u0FB9\u1026\u1E00-\u1E9B\u1EA0-\u1EF9\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FC4\u1FC6-\u1FD3\u1FD6-\u1FDB\u1FDD-\u1FEF\u1FF2-\u1FF4\u1FF6-\u1FFE\u2000-\u200A\u2011\u2017\u2024-\u2026\u202F\u2033-\u2034\u2036-\u2037\u203C\u203E\u2048-\u2049\u2070\u2074-\u208E\u20A8\u2100-\u2103\u2105-\u2107\u2109-\u2113\u2115-\u2116\u2119-\u211D\u2120-\u2122\u2124\u2126\u2128\u212A-\u212D\u212F-\u2131\u2133-\u2139\u2153-\u217F\u219A-\u219B\u21AE\u21CD-\u21CF\u2204\u2209\u220C\u2224\u2226\u222C-\u222D\u222F-\u2230\u2241\u2244\u2247\u2249\u2260\u2262\u226D-\u2271\u2274-\u2275\u2278-\u2279\u2280-\u2281\u2284-\u2285\u2288-\u2289\u22AC-\u22AF\u22E0-\u22E3\u22EA-\u22ED\u2329-\u232A\u2460-\u24EA\u2E9F\u2EF3\u2F00-\u2FD5\u3000\u3036\u3038-\u303A\u304C\u304E\u3050\u3052\u3054\u3056\u3058\u305A\u305C\u305E\u3060\u3062\u3065\u3067\u3069\u3070-\u3071\u3073-\u3074\u3076-\u3077\u3079-\u307A\u307C-\u307D\u3094\u309B-\u309C\u309E\u30AC\u30AE\u30B0\u30B2\u30B4\u30B6\u30B8\u30BA\u30BC\u30BE\u30C0\u30C2\u30C5\u30C7\u30C9\u30D0-\u30D1\u30D3-\u30D4\u30D6-\u30D7\u30D9-\u30DA\u30DC-\u30DD\u30F4\u30F7-\u30FA\u30FE\u3131-\u318E\u3192-\u319F\u3200-\u321C\u3220-\u3243\u3260-\u327B\u3280-\u32B0\u32C0-\u32CB\u32D0-\u32FE\u3300-\u3376\u337B-\u33DD\u33E0-\u33FE\uAC00-\uD7A3\uF900-\uFA0D\uFA10\uFA12\uFA15-\uFA1E\uFA20\uFA22\uFA25-\uFA26\uFA2A-\uFA2D\uFB00-\uFB06\uFB13-\uFB17\uFB1D\uFB1F-\uFB36\uFB38-\uFB3C\uFB3E\uFB40-\uFB41\uFB43-\uFB44\uFB46-\uFBB1\uFBD3-\uFD3D\uFD50-\uFD8F\uFD92-\uFDC7\uFDF0-\uFDFB\uFE30-\uFE44\uFE49-\uFE52\uFE54-\uFE66\uFE68-\uFE6B\uFE70-\uFE72\uFE74\uFE76-\uFEFC\uFF01-\uFF5E\uFF61-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC\uFFE0-\uFFE6\uFFE8-\uFFEE]"),\r
273         new UnicodeSet("[\u00A0\u00A8\u00AA\u00AF\u00B2-\u00B5\u00B8-\u00BA\u00BC-\u00BE\u0132-\u0133\u013F-\u0140\u0149\u017F\u01C4-\u01CC\u01F1-\u01F3\u02B0-\u02B8\u02D8-\u02DD\u02E0-\u02E4\u0340-\u0341\u0343-\u0344\u0374\u037A\u037E\u0384-\u0385\u0387\u03D0-\u03D6\u03F0-\u03F2\u0587\u0675-\u0678\u0958-\u095F\u09DC-\u09DD\u09DF\u0A33\u0A36\u0A59-\u0A5B\u0A5E\u0B5C-\u0B5D\u0E33\u0EB3\u0EDC-\u0EDD\u0F0C\u0F43\u0F4D\u0F52\u0F57\u0F5C\u0F69\u0F73\u0F75-\u0F79\u0F81\u0F93\u0F9D\u0FA2\u0FA7\u0FAC\u0FB9\u1E9A-\u1E9B\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1FBB\u1FBD-\u1FC1\u1FC9\u1FCB\u1FCD-\u1FCF\u1FD3\u1FDB\u1FDD-\u1FDF\u1FE3\u1FEB\u1FED-\u1FEF\u1FF9\u1FFB\u1FFD-\u1FFE\u2000-\u200A\u2011\u2017\u2024-\u2026\u202F\u2033-\u2034\u2036-\u2037\u203C\u203E\u2048-\u2049\u2070\u2074-\u208E\u20A8\u2100-\u2103\u2105-\u2107\u2109-\u2113\u2115-\u2116\u2119-\u211D\u2120-\u2122\u2124\u2126\u2128\u212A-\u212D\u212F-\u2131\u2133-\u2139\u2153-\u217F\u222C-\u222D\u222F-\u2230\u2329-\u232A\u2460-\u24EA\u2E9F\u2EF3\u2F00-\u2FD5\u3000\u3036\u3038-\u303A\u309B-\u309C\u3131-\u318E\u3192-\u319F\u3200-\u321C\u3220-\u3243\u3260-\u327B\u3280-\u32B0\u32C0-\u32CB\u32D0-\u32FE\u3300-\u3376\u337B-\u33DD\u33E0-\u33FE\uF900-\uFA0D\uFA10\uFA12\uFA15-\uFA1E\uFA20\uFA22\uFA25-\uFA26\uFA2A-\uFA2D\uFB00-\uFB06\uFB13-\uFB17\uFB1F-\uFB36\uFB38-\uFB3C\uFB3E\uFB40-\uFB41\uFB43-\uFB44\uFB46-\uFBB1\uFBD3-\uFD3D\uFD50-\uFD8F\uFD92-\uFDC7\uFDF0-\uFDFB\uFE30-\uFE44\uFE49-\uFE52\uFE54-\uFE66\uFE68-\uFE6B\uFE70-\uFE72\uFE74\uFE76-\uFEFC\uFF01-\uFF5E\uFF61-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC\uFFE0-\uFFE6\uFFE8-\uFFEE]"),\r
274         },\r
275         {\r
276         new UnicodeSet("[A-Z\u00B2-\u00B3\u00B5\u00B9\u00BC-\u00BE\u00C0-\u00D6\u00D8-\u00DE\u0100\u0102\u0104\u0106\u0108\u010A\u010C\u010E\u0110\u0112\u0114\u0116\u0118\u011A\u011C\u011E\u0120\u0122\u0124\u0126\u0128\u012A\u012C\u012E\u0130-\u0132\u0134\u0136\u0139\u013B\u013D\u013F\u0141\u0143\u0145\u0147\u014A\u014C\u014E\u0150\u0152\u0154\u0156\u0158\u015A\u015C\u015E\u0160\u0162\u0164\u0166\u0168\u016A\u016C\u016E\u0170\u0172\u0174\u0176\u0178-\u0179\u017B\u017D\u017F\u0181-\u0182\u0184\u0186-\u0187\u0189-\u018B\u018E-\u0191\u0193-\u0194\u0196-\u0198\u019C-\u019D\u019F-\u01A0\u01A2\u01A4\u01A6-\u01A7\u01A9\u01AC\u01AE-\u01AF\u01B1-\u01B3\u01B5\u01B7-\u01B8\u01BC\u01C4-\u01CD\u01CF\u01D1\u01D3\u01D5\u01D7\u01D9\u01DB\u01DE\u01E0\u01E2\u01E4\u01E6\u01E8\u01EA\u01EC\u01EE\u01F1-\u01F4\u01F6-\u01F8\u01FA\u01FC\u01FE\u0200\u0202\u0204\u0206\u0208\u020A\u020C\u020E\u0210\u0212\u0214\u0216\u0218\u021A\u021C\u021E\u0222\u0224\u0226\u0228\u022A\u022C\u022E\u0230\u0232\u0345\u0386\u0388-\u038A\u038C\u038E-\u038F\u0391-\u03A1\u03A3-\u03AB\u03C2\u03D0-\u03D1\u03D5-\u03D6\u03DA\u03DC\u03DE\u03E0\u03E2\u03E4\u03E6\u03E8\u03EA\u03EC\u03EE\u03F0-\u03F2\u03F4-\u03F5\u0400-\u042F\u0460\u0462\u0464\u0466\u0468\u046A\u046C\u046E\u0470\u0472\u0474\u0476\u0478\u047A\u047C\u047E\u0480\u048C\u048E\u0490\u0492\u0494\u0496\u0498\u049A\u049C\u049E\u04A0\u04A2\u04A4\u04A6\u04A8\u04AA\u04AC\u04AE\u04B0\u04B2\u04B4\u04B6\u04B8\u04BA\u04BC\u04BE\u04C1\u04C3\u04C7\u04CB\u04D0\u04D2\u04D4\u04D6\u04D8\u04DA\u04DC\u04DE\u04E0\u04E2\u04E4\u04E6\u04E8\u04EA\u04EC\u04EE\u04F0\u04F2\u04F4\u04F8\u0531-\u0556\u09F8\u0F2A-\u0F33\u137C\u1E00\u1E02\u1E04\u1E06\u1E08\u1E0A\u1E0C\u1E0E\u1E10\u1E12\u1E14\u1E16\u1E18\u1E1A\u1E1C\u1E1E\u1E20\u1E22\u1E24\u1E26\u1E28\u1E2A\u1E2C\u1E2E\u1E30\u1E32\u1E34\u1E36\u1E38\u1E3A\u1E3C\u1E3E\u1E40\u1E42\u1E44\u1E46\u1E48\u1E4A\u1E4C\u1E4E\u1E50\u1E52\u1E54\u1E56\u1E58\u1E5A\u1E5C\u1E5E\u1E60\u1E62\u1E64\u1E66\u1E68\u1E6A\u1E6C\u1E6E\u1E70\u1E72\u1E74\u1E76\u1E78\u1E7A\u1E7C\u1E7E\u1E80\u1E82\u1E84\u1E86\u1E88\u1E8A\u1E8C\u1E8E\u1E90\u1E92\u1E94\u1E9B\u1EA0\u1EA2\u1EA4\u1EA6\u1EA8\u1EAA\u1EAC\u1EAE\u1EB0\u1EB2\u1EB4\u1EB6\u1EB8\u1EBA\u1EBC\u1EBE\u1EC0\u1EC2\u1EC4\u1EC6\u1EC8\u1ECA\u1ECC\u1ECE\u1ED0\u1ED2\u1ED4\u1ED6\u1ED8\u1EDA\u1EDC\u1EDE\u1EE0\u1EE2\u1EE4\u1EE6\u1EE8\u1EEA\u1EEC\u1EEE\u1EF0\u1EF2\u1EF4\u1EF6\u1EF8\u1F08-\u1F0F\u1F18-\u1F1D\u1F28-\u1F2F\u1F38-\u1F3F\u1F48-\u1F4D\u1F59\u1F5B\u1F5D\u1F5F\u1F68-\u1F6F\u1F88-\u1F8F\u1F98-\u1F9F\u1FA8-\u1FAF\u1FB8-\u1FBC\u1FBE\u1FC8-\u1FCC\u1FD8-\u1FDB\u1FE8-\u1FEC\u1FF8-\u1FFC\u2070\u2074-\u2079\u2080-\u2089\u2126\u212A-\u212B\u2153-\u215E\u2160-\u217F\u2181-\u2183\u2460-\u2468\u2474-\u247C\u2488-\u2490\u24B6-\u24EA\u2776-\u277E\u2780-\u2788\u278A-\u2792\uFF21-\uFF3A\\U0001034A\\U00010400-\\U00010425\\U0001D165-\\U0001D166\\U0001D16D-\\U0001D172]"),\r
277         new UnicodeSet("[A-Z\u00B2-\u00B3\u00B5\u00B9\u00BC-\u00BE\u00C0-\u00D6\u00D8-\u00DE\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u0110\u0112-\u0126\u0128-\u0132\u0134-\u0137\u0139-\u013F\u0141\u0143-\u0148\u014A\u014C-\u0152\u0154-\u0166\u0168-\u017F\u0181-\u0182\u0184\u0186-\u0187\u0189-\u018B\u018E-\u0191\u0193-\u0194\u0196-\u0198\u019C-\u019D\u019F-\u01A2\u01A4\u01A6-\u01A7\u01A9\u01AC\u01AE-\u01B3\u01B5\u01B7-\u01B8\u01BC\u01C4-\u01DC\u01DE-\u01E4\u01E6-\u021C\u021E-\u021F\u0222\u0224\u0226-\u0233\u0340-\u0341\u0343-\u0345\u0374\u037E\u0385-\u038A\u038C\u038E-\u03A1\u03A3-\u03B0\u03C2\u03CA-\u03CE\u03D0-\u03D1\u03D3-\u03D6\u03DA\u03DC\u03DE\u03E0\u03E2\u03E4\u03E6\u03E8\u03EA\u03EC\u03EE\u03F0-\u03F2\u03F4-\u03F5\u0400-\u042F\u0439\u0450-\u0451\u0453\u0457\u045C-\u045E\u0460\u0462\u0464\u0466\u0468\u046A\u046C\u046E\u0470\u0472\u0474\u0476-\u0478\u047A\u047C\u047E\u0480\u048C\u048E\u0490\u0492\u0494\u0496\u0498\u049A\u049C\u049E\u04A0\u04A2\u04A4\u04A6\u04A8\u04AA\u04AC\u04AE\u04B0\u04B2\u04B4\u04B6\u04B8\u04BA\u04BC\u04BE\u04C1-\u04C3\u04C7\u04CB\u04D0-\u04D4\u04D6-\u04D8\u04DA-\u04E0\u04E2-\u04E8\u04EA-\u04F5\u04F8-\u04F9\u0531-\u0556\u0622-\u0626\u06C0\u06C2\u06D3\u0929\u0931\u0934\u0958-\u095F\u09CB-\u09CC\u09DC-\u09DD\u09DF\u09F8\u0A33\u0A36\u0A59-\u0A5B\u0A5E\u0B48\u0B4B-\u0B4C\u0B5C-\u0B5D\u0B94\u0BCA-\u0BCC\u0C48\u0CC0\u0CC7-\u0CC8\u0CCA-\u0CCB\u0D4A-\u0D4C\u0DDA\u0DDC-\u0DDE\u0F2A-\u0F33\u0F43\u0F4D\u0F52\u0F57\u0F5C\u0F69\u0F73\u0F75-\u0F76\u0F78\u0F81\u0F93\u0F9D\u0FA2\u0FA7\u0FAC\u0FB9\u1026\u137C\u1E00-\u1E99\u1E9B\u1EA0-\u1EF9\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FBE\u1FC1-\u1FC4\u1FC6-\u1FD3\u1FD6-\u1FDB\u1FDD-\u1FEF\u1FF2-\u1FF4\u1FF6-\u1FFD\u2000-\u2001\u2070\u2074-\u2079\u2080-\u2089\u2126\u212A-\u212B\u2153-\u215E\u2160-\u217F\u2181-\u2183\u219A-\u219B\u21AE\u21CD-\u21CF\u2204\u2209\u220C\u2224\u2226\u2241\u2244\u2247\u2249\u2260\u2262\u226D-\u2271\u2274-\u2275\u2278-\u2279\u2280-\u2281\u2284-\u2285\u2288-\u2289\u22AC-\u22AF\u22E0-\u22E3\u22EA-\u22ED\u2329-\u232A\u2460-\u2468\u2474-\u247C\u2488-\u2490\u24B6-\u24EA\u2776-\u277E\u2780-\u2788\u278A-\u2792\u304C\u304E\u3050\u3052\u3054\u3056\u3058\u305A\u305C\u305E\u3060\u3062\u3065\u3067\u3069\u3070-\u3071\u3073-\u3074\u3076-\u3077\u3079-\u307A\u307C-\u307D\u3094\u309E\u30AC\u30AE\u30B0\u30B2\u30B4\u30B6\u30B8\u30BA\u30BC\u30BE\u30C0\u30C2\u30C5\u30C7\u30C9\u30D0-\u30D1\u30D3-\u30D4\u30D6-\u30D7\u30D9-\u30DA\u30DC-\u30DD\u30F4\u30F7-\u30FA\u30FE\uAC00-\uD7A3\uF900-\uFA0D\uFA10\uFA12\uFA15-\uFA1E\uFA20\uFA22\uFA25-\uFA26\uFA2A-\uFA2D\uFB1D\uFB1F\uFB2A-\uFB36\uFB38-\uFB3C\uFB3E\uFB40-\uFB41\uFB43-\uFB44\uFB46-\uFB4E\uFF21-\uFF3A\\U0001034A\\U00010400-\\U00010425\\U0001D165-\\U0001D166\\U0001D16D-\\U0001D172]"),\r
278         new UnicodeSet("[A-Z\u00B2-\u00B3\u00B5\u00B9\u00BC-\u00BE\u00C0-\u00D6\u00D8-\u00DE\u0100\u0102\u0104\u0106\u0108\u010A\u010C\u010E\u0110\u0112\u0114\u0116\u0118\u011A\u011C\u011E\u0120\u0122\u0124\u0126\u0128\u012A\u012C\u012E\u0130-\u0132\u0134\u0136\u0139\u013B\u013D\u013F\u0141\u0143\u0145\u0147\u014A\u014C\u014E\u0150\u0152\u0154\u0156\u0158\u015A\u015C\u015E\u0160\u0162\u0164\u0166\u0168\u016A\u016C\u016E\u0170\u0172\u0174\u0176\u0178-\u0179\u017B\u017D\u017F\u0181-\u0182\u0184\u0186-\u0187\u0189-\u018B\u018E-\u0191\u0193-\u0194\u0196-\u0198\u019C-\u019D\u019F-\u01A0\u01A2\u01A4\u01A6-\u01A7\u01A9\u01AC\u01AE-\u01AF\u01B1-\u01B3\u01B5\u01B7-\u01B8\u01BC\u01C4-\u01CD\u01CF\u01D1\u01D3\u01D5\u01D7\u01D9\u01DB\u01DE\u01E0\u01E2\u01E4\u01E6\u01E8\u01EA\u01EC\u01EE\u01F1-\u01F4\u01F6-\u01F8\u01FA\u01FC\u01FE\u0200\u0202\u0204\u0206\u0208\u020A\u020C\u020E\u0210\u0212\u0214\u0216\u0218\u021A\u021C\u021E\u0222\u0224\u0226\u0228\u022A\u022C\u022E\u0230\u0232\u0340-\u0341\u0343-\u0345\u0374\u037E\u0386-\u038A\u038C\u038E-\u038F\u0391-\u03A1\u03A3-\u03AB\u03C2\u03D0-\u03D1\u03D5-\u03D6\u03DA\u03DC\u03DE\u03E0\u03E2\u03E4\u03E6\u03E8\u03EA\u03EC\u03EE\u03F0-\u03F2\u03F4-\u03F5\u0400-\u042F\u0460\u0462\u0464\u0466\u0468\u046A\u046C\u046E\u0470\u0472\u0474\u0476\u0478\u047A\u047C\u047E\u0480\u048C\u048E\u0490\u0492\u0494\u0496\u0498\u049A\u049C\u049E\u04A0\u04A2\u04A4\u04A6\u04A8\u04AA\u04AC\u04AE\u04B0\u04B2\u04B4\u04B6\u04B8\u04BA\u04BC\u04BE\u04C1\u04C3\u04C7\u04CB\u04D0\u04D2\u04D4\u04D6\u04D8\u04DA\u04DC\u04DE\u04E0\u04E2\u04E4\u04E6\u04E8\u04EA\u04EC\u04EE\u04F0\u04F2\u04F4\u04F8\u0531-\u0556\u0958-\u095F\u09DC-\u09DD\u09DF\u09F8\u0A33\u0A36\u0A59-\u0A5B\u0A5E\u0B5C-\u0B5D\u0F2A-\u0F33\u0F43\u0F4D\u0F52\u0F57\u0F5C\u0F69\u0F73\u0F75-\u0F76\u0F78\u0F81\u0F93\u0F9D\u0FA2\u0FA7\u0FAC\u0FB9\u137C\u1E00\u1E02\u1E04\u1E06\u1E08\u1E0A\u1E0C\u1E0E\u1E10\u1E12\u1E14\u1E16\u1E18\u1E1A\u1E1C\u1E1E\u1E20\u1E22\u1E24\u1E26\u1E28\u1E2A\u1E2C\u1E2E\u1E30\u1E32\u1E34\u1E36\u1E38\u1E3A\u1E3C\u1E3E\u1E40\u1E42\u1E44\u1E46\u1E48\u1E4A\u1E4C\u1E4E\u1E50\u1E52\u1E54\u1E56\u1E58\u1E5A\u1E5C\u1E5E\u1E60\u1E62\u1E64\u1E66\u1E68\u1E6A\u1E6C\u1E6E\u1E70\u1E72\u1E74\u1E76\u1E78\u1E7A\u1E7C\u1E7E\u1E80\u1E82\u1E84\u1E86\u1E88\u1E8A\u1E8C\u1E8E\u1E90\u1E92\u1E94\u1E9B\u1EA0\u1EA2\u1EA4\u1EA6\u1EA8\u1EAA\u1EAC\u1EAE\u1EB0\u1EB2\u1EB4\u1EB6\u1EB8\u1EBA\u1EBC\u1EBE\u1EC0\u1EC2\u1EC4\u1EC6\u1EC8\u1ECA\u1ECC\u1ECE\u1ED0\u1ED2\u1ED4\u1ED6\u1ED8\u1EDA\u1EDC\u1EDE\u1EE0\u1EE2\u1EE4\u1EE6\u1EE8\u1EEA\u1EEC\u1EEE\u1EF0\u1EF2\u1EF4\u1EF6\u1EF8\u1F08-\u1F0F\u1F18-\u1F1D\u1F28-\u1F2F\u1F38-\u1F3F\u1F48-\u1F4D\u1F59\u1F5B\u1F5D\u1F5F\u1F68-\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F88-\u1F8F\u1F98-\u1F9F\u1FA8-\u1FAF\u1FB8-\u1FBC\u1FBE\u1FC8-\u1FCC\u1FD3\u1FD8-\u1FDB\u1FE3\u1FE8-\u1FEC\u1FEE-\u1FEF\u1FF8-\u1FFD\u2000-\u2001\u2070\u2074-\u2079\u2080-\u2089\u2126\u212A-\u212B\u2153-\u215E\u2160-\u217F\u2181-\u2183\u2329-\u232A\u2460-\u2468\u2474-\u247C\u2488-\u2490\u24B6-\u24EA\u2776-\u277E\u2780-\u2788\u278A-\u2792\uF900-\uFA0D\uFA10\uFA12\uFA15-\uFA1E\uFA20\uFA22\uFA25-\uFA26\uFA2A-\uFA2D\uFB1F\uFB2A-\uFB36\uFB38-\uFB3C\uFB3E\uFB40-\uFB41\uFB43-\uFB44\uFB46-\uFB4E\uFF21-\uFF3A\\U0001034A\\U00010400-\\U00010425\\U0001D165-\\U0001D166\\U0001D16D-\\U0001D172]"),\r
279         new UnicodeSet("[A-Z\u00A0\u00A8\u00AA\u00AF\u00B2-\u00B5\u00B8-\u00BA\u00BC-\u00BE\u00C0-\u00D6\u00D8-\u00DE\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u0110\u0112-\u0126\u0128-\u0137\u0139-\u0141\u0143-\u014A\u014C-\u0152\u0154-\u0166\u0168-\u017F\u0181-\u0182\u0184\u0186-\u0187\u0189-\u018B\u018E-\u0191\u0193-\u0194\u0196-\u0198\u019C-\u019D\u019F-\u01A2\u01A4\u01A6-\u01A7\u01A9\u01AC\u01AE-\u01B3\u01B5\u01B7-\u01B8\u01BC\u01C4-\u01DC\u01DE-\u01E4\u01E6-\u021C\u021E-\u021F\u0222\u0224\u0226-\u0233\u02B0-\u02B8\u02D8-\u02DD\u02E0-\u02E4\u0340-\u0341\u0343-\u0345\u0374\u037A\u037E\u0384-\u038A\u038C\u038E-\u03A1\u03A3-\u03B0\u03C2\u03CA-\u03CE\u03D0-\u03D6\u03DA\u03DC\u03DE\u03E0\u03E2\u03E4\u03E6\u03E8\u03EA\u03EC\u03EE\u03F0-\u03F2\u03F4-\u03F5\u0400-\u042F\u0439\u0450-\u0451\u0453\u0457\u045C-\u045E\u0460\u0462\u0464\u0466\u0468\u046A\u046C\u046E\u0470\u0472\u0474\u0476-\u0478\u047A\u047C\u047E\u0480\u048C\u048E\u0490\u0492\u0494\u0496\u0498\u049A\u049C\u049E\u04A0\u04A2\u04A4\u04A6\u04A8\u04AA\u04AC\u04AE\u04B0\u04B2\u04B4\u04B6\u04B8\u04BA\u04BC\u04BE\u04C1-\u04C3\u04C7\u04CB\u04D0-\u04D4\u04D6-\u04D8\u04DA-\u04E0\u04E2-\u04E8\u04EA-\u04F5\u04F8-\u04F9\u0531-\u0556\u0587\u0622-\u0626\u0675-\u0678\u06C0\u06C2\u06D3\u0929\u0931\u0934\u0958-\u095F\u09CB-\u09CC\u09DC-\u09DD\u09DF\u09F8\u0A33\u0A36\u0A59-\u0A5B\u0A5E\u0B48\u0B4B-\u0B4C\u0B5C-\u0B5D\u0B94\u0BCA-\u0BCC\u0C48\u0CC0\u0CC7-\u0CC8\u0CCA-\u0CCB\u0D4A-\u0D4C\u0DDA\u0DDC-\u0DDE\u0E33\u0EB3\u0EDC-\u0EDD\u0F0C\u0F2A-\u0F33\u0F43\u0F4D\u0F52\u0F57\u0F5C\u0F69\u0F73\u0F75-\u0F79\u0F81\u0F93\u0F9D\u0FA2\u0FA7\u0FAC\u0FB9\u1026\u137C\u1E00-\u1E9B\u1EA0-\u1EF9\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FC4\u1FC6-\u1FD3\u1FD6-\u1FDB\u1FDD-\u1FEF\u1FF2-\u1FF4\u1FF6-\u1FFE\u2000-\u200A\u2011\u2017\u2024-\u2026\u202F\u2033-\u2034\u2036-\u2037\u203C\u203E\u2048-\u2049\u2070\u2074-\u208E\u20A8\u2100-\u2103\u2105-\u2107\u2109-\u2113\u2115-\u2116\u2119-\u211D\u2120-\u2122\u2124\u2126\u2128\u212A-\u212D\u212F-\u2131\u2133-\u2139\u2153-\u217F\u2181-\u2183\u219A-\u219B\u21AE\u21CD-\u21CF\u2204\u2209\u220C\u2224\u2226\u222C-\u222D\u222F-\u2230\u2241\u2244\u2247\u2249\u2260\u2262\u226D-\u2271\u2274-\u2275\u2278-\u2279\u2280-\u2281\u2284-\u2285\u2288-\u2289\u22AC-\u22AF\u22E0-\u22E3\u22EA-\u22ED\u2329-\u232A\u2460-\u24EA\u2776-\u277E\u2780-\u2788\u278A-\u2792\u2E9F\u2EF3\u2F00-\u2FD5\u3000\u3036\u3038-\u303A\u304C\u304E\u3050\u3052\u3054\u3056\u3058\u305A\u305C\u305E\u3060\u3062\u3065\u3067\u3069\u3070-\u3071\u3073-\u3074\u3076-\u3077\u3079-\u307A\u307C-\u307D\u3094\u309B-\u309C\u309E\u30AC\u30AE\u30B0\u30B2\u30B4\u30B6\u30B8\u30BA\u30BC\u30BE\u30C0\u30C2\u30C5\u30C7\u30C9\u30D0-\u30D1\u30D3-\u30D4\u30D6-\u30D7\u30D9-\u30DA\u30DC-\u30DD\u30F4\u30F7-\u30FA\u30FE\u3131-\u318E\u3192-\u319F\u3200-\u321C\u3220-\u3243\u3260-\u327B\u3280-\u32B0\u32C0-\u32CB\u32D0-\u32FE\u3300-\u3376\u337B-\u33DD\u33E0-\u33FE\uAC00-\uD7A3\uF900-\uFA0D\uFA10\uFA12\uFA15-\uFA1E\uFA20\uFA22\uFA25-\uFA26\uFA2A-\uFA2D\uFB00-\uFB06\uFB13-\uFB17\uFB1D\uFB1F-\uFB36\uFB38-\uFB3C\uFB3E\uFB40-\uFB41\uFB43-\uFB44\uFB46-\uFBB1\uFBD3-\uFD3D\uFD50-\uFD8F\uFD92-\uFDC7\uFDF0-\uFDFB\uFE30-\uFE44\uFE49-\uFE52\uFE54-\uFE66\uFE68-\uFE6B\uFE70-\uFE72\uFE74\uFE76-\uFEFC\uFF01-\uFF5E\uFF61-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC\uFFE0-\uFFE6\uFFE8-\uFFEE\\U0001034A\\U00010400-\\U00010425\\U0001D165-\\U0001D166\\U0001D16D-\\U0001D172]"),\r
280         new UnicodeSet("[A-Z\u00A0\u00A8\u00AA\u00AF\u00B2-\u00B5\u00B8-\u00BA\u00BC-\u00BE\u00C0-\u00D6\u00D8-\u00DE\u0100\u0102\u0104\u0106\u0108\u010A\u010C\u010E\u0110\u0112\u0114\u0116\u0118\u011A\u011C\u011E\u0120\u0122\u0124\u0126\u0128\u012A\u012C\u012E\u0130-\u0134\u0136\u0139\u013B\u013D\u013F-\u0141\u0143\u0145\u0147\u0149-\u014A\u014C\u014E\u0150\u0152\u0154\u0156\u0158\u015A\u015C\u015E\u0160\u0162\u0164\u0166\u0168\u016A\u016C\u016E\u0170\u0172\u0174\u0176\u0178-\u0179\u017B\u017D\u017F\u0181-\u0182\u0184\u0186-\u0187\u0189-\u018B\u018E-\u0191\u0193-\u0194\u0196-\u0198\u019C-\u019D\u019F-\u01A0\u01A2\u01A4\u01A6-\u01A7\u01A9\u01AC\u01AE-\u01AF\u01B1-\u01B3\u01B5\u01B7-\u01B8\u01BC\u01C4-\u01CD\u01CF\u01D1\u01D3\u01D5\u01D7\u01D9\u01DB\u01DE\u01E0\u01E2\u01E4\u01E6\u01E8\u01EA\u01EC\u01EE\u01F1-\u01F4\u01F6-\u01F8\u01FA\u01FC\u01FE\u0200\u0202\u0204\u0206\u0208\u020A\u020C\u020E\u0210\u0212\u0214\u0216\u0218\u021A\u021C\u021E\u0222\u0224\u0226\u0228\u022A\u022C\u022E\u0230\u0232\u02B0-\u02B8\u02D8-\u02DD\u02E0-\u02E4\u0340-\u0341\u0343-\u0345\u0374\u037A\u037E\u0384-\u038A\u038C\u038E-\u038F\u0391-\u03A1\u03A3-\u03AB\u03C2\u03D0-\u03D6\u03DA\u03DC\u03DE\u03E0\u03E2\u03E4\u03E6\u03E8\u03EA\u03EC\u03EE\u03F0-\u03F2\u03F4-\u03F5\u0400-\u042F\u0460\u0462\u0464\u0466\u0468\u046A\u046C\u046E\u0470\u0472\u0474\u0476\u0478\u047A\u047C\u047E\u0480\u048C\u048E\u0490\u0492\u0494\u0496\u0498\u049A\u049C\u049E\u04A0\u04A2\u04A4\u04A6\u04A8\u04AA\u04AC\u04AE\u04B0\u04B2\u04B4\u04B6\u04B8\u04BA\u04BC\u04BE\u04C1\u04C3\u04C7\u04CB\u04D0\u04D2\u04D4\u04D6\u04D8\u04DA\u04DC\u04DE\u04E0\u04E2\u04E4\u04E6\u04E8\u04EA\u04EC\u04EE\u04F0\u04F2\u04F4\u04F8\u0531-\u0556\u0587\u0675-\u0678\u0958-\u095F\u09DC-\u09DD\u09DF\u09F8\u0A33\u0A36\u0A59-\u0A5B\u0A5E\u0B5C-\u0B5D\u0E33\u0EB3\u0EDC-\u0EDD\u0F0C\u0F2A-\u0F33\u0F43\u0F4D\u0F52\u0F57\u0F5C\u0F69\u0F73\u0F75-\u0F79\u0F81\u0F93\u0F9D\u0FA2\u0FA7\u0FAC\u0FB9\u137C\u1E00\u1E02\u1E04\u1E06\u1E08\u1E0A\u1E0C\u1E0E\u1E10\u1E12\u1E14\u1E16\u1E18\u1E1A\u1E1C\u1E1E\u1E20\u1E22\u1E24\u1E26\u1E28\u1E2A\u1E2C\u1E2E\u1E30\u1E32\u1E34\u1E36\u1E38\u1E3A\u1E3C\u1E3E\u1E40\u1E42\u1E44\u1E46\u1E48\u1E4A\u1E4C\u1E4E\u1E50\u1E52\u1E54\u1E56\u1E58\u1E5A\u1E5C\u1E5E\u1E60\u1E62\u1E64\u1E66\u1E68\u1E6A\u1E6C\u1E6E\u1E70\u1E72\u1E74\u1E76\u1E78\u1E7A\u1E7C\u1E7E\u1E80\u1E82\u1E84\u1E86\u1E88\u1E8A\u1E8C\u1E8E\u1E90\u1E92\u1E94\u1E9A-\u1E9B\u1EA0\u1EA2\u1EA4\u1EA6\u1EA8\u1EAA\u1EAC\u1EAE\u1EB0\u1EB2\u1EB4\u1EB6\u1EB8\u1EBA\u1EBC\u1EBE\u1EC0\u1EC2\u1EC4\u1EC6\u1EC8\u1ECA\u1ECC\u1ECE\u1ED0\u1ED2\u1ED4\u1ED6\u1ED8\u1EDA\u1EDC\u1EDE\u1EE0\u1EE2\u1EE4\u1EE6\u1EE8\u1EEA\u1EEC\u1EEE\u1EF0\u1EF2\u1EF4\u1EF6\u1EF8\u1F08-\u1F0F\u1F18-\u1F1D\u1F28-\u1F2F\u1F38-\u1F3F\u1F48-\u1F4D\u1F59\u1F5B\u1F5D\u1F5F\u1F68-\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F88-\u1F8F\u1F98-\u1F9F\u1FA8-\u1FAF\u1FB8-\u1FC1\u1FC8-\u1FCF\u1FD3\u1FD8-\u1FDB\u1FDD-\u1FDF\u1FE3\u1FE8-\u1FEF\u1FF8-\u1FFE\u2000-\u200A\u2011\u2017\u2024-\u2026\u202F\u2033-\u2034\u2036-\u2037\u203C\u203E\u2048-\u2049\u2070\u2074-\u208E\u20A8\u2100-\u2103\u2105-\u2107\u2109-\u2113\u2115-\u2116\u2119-\u211D\u2120-\u2122\u2124\u2126\u2128\u212A-\u212D\u212F-\u2131\u2133-\u2139\u2153-\u217F\u2181-\u2183\u222C-\u222D\u222F-\u2230\u2329-\u232A\u2460-\u24EA\u2776-\u277E\u2780-\u2788\u278A-\u2792\u2E9F\u2EF3\u2F00-\u2FD5\u3000\u3036\u3038-\u303A\u309B-\u309C\u3131-\u318E\u3192-\u319F\u3200-\u321C\u3220-\u3243\u3260-\u327B\u3280-\u32B0\u32C0-\u32CB\u32D0-\u32FE\u3300-\u3376\u337B-\u33DD\u33E0-\u33FE\uF900-\uFA0D\uFA10\uFA12\uFA15-\uFA1E\uFA20\uFA22\uFA25-\uFA26\uFA2A-\uFA2D\uFB00-\uFB06\uFB13-\uFB17\uFB1F-\uFB36\uFB38-\uFB3C\uFB3E\uFB40-\uFB41\uFB43-\uFB44\uFB46-\uFBB1\uFBD3-\uFD3D\uFD50-\uFD8F\uFD92-\uFDC7\uFDF0-\uFDFB\uFE30-\uFE44\uFE49-\uFE52\uFE54-\uFE66\uFE68-\uFE6B\uFE70-\uFE72\uFE74\uFE76-\uFEFC\uFF01-\uFF5E\uFF61-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC\uFFE0-\uFFE6\uFFE8-\uFFEE\\U0001034A\\U00010400-\\U00010425\\U0001D165-\\U0001D166\\U0001D16D-\\U0001D172]"),\r
281         },\r
282     };\r
283     */\r
284 \r
285     \r
286     static UnicodeSet generatedSet;\r
287     \r
288     // These should both be public, and on the respective classes\r
289     \r
290     public static void addAll(UnicodeSet s, String str) {\r
291         int cp;\r
292         for (int i = 0; i < str.length(); i += UTF16.getCharCount(cp)) {\r
293             cp = UTF16.charAt(str,i);\r
294             s.add(cp);\r
295         }\r
296     }\r
297     \r
298     public static boolean containsSome(UnicodeSet s, String str) {\r
299         int cp;\r
300         for (int i = 0; i < str.length(); i += UTF16.getCharCount(cp)) {\r
301             cp = UTF16.charAt(str,i);\r
302             if (s.contains(cp)) return true;\r
303         }\r
304         return false;\r
305     }\r
306 }