]> gitweb.fperrin.net Git - Dictionary.git/blob - jars/icu4j-4_2_1-src/src/com/ibm/icu/dev/tool/translit/UnicodeSetClosure.java
icu4jsrc
[Dictionary.git] / jars / icu4j-4_2_1-src / src / com / ibm / icu / dev / tool / translit / UnicodeSetClosure.java
1 /*\r
2  *******************************************************************************\r
3  * Copyright (C) 1996-2004, International Business Machines Corporation and    *\r
4  * others. All Rights Reserved.                                                *\r
5  *******************************************************************************\r
6  */\r
7 package com.ibm.icu.dev.tool.translit;\r
8 import com.ibm.icu.lang.*;\r
9 import com.ibm.icu.text.*;\r
10 //import java.text.*;\r
11 import java.io.*;\r
12 import java.util.Locale;\r
13 \r
14 // com.ibm.icu.dev.tool.translit.UnicodeSetClosure\r
15 // com.ibm.icu.dev.test.translit.TransliteratorTest\r
16 \r
17 public class UnicodeSetClosure {\r
18     public static void main(String[] args) throws Exception {\r
19         \r
20         UnicodeSet foo =         new UnicodeSet("[\u1FF6-\u1FFD\u2000-\u2001\u2126]");\r
21         \r
22         test();\r
23         if(foo==null){}\r
24         \r
25         /* The following is superceded by Alan's tool\r
26         \r
27         File f = new File("UnicodeSetClosure.txt");\r
28         String filename = f.getCanonicalFile().toString();\r
29         out = new PrintWriter(\r
30             new OutputStreamWriter(\r
31                 new FileOutputStream(filename), "UTF-8"));\r
32         System.out.println("Writing " + filename);\r
33         out.print('\uFEFF'); // BOM\r
34         \r
35         generateSets("Latin-Katakana", true, Normalizer.DECOMP_COMPAT, true,\r
36 "[',.a-z~\u00DF\u00E6\u00F0\u00F8\u00FE\u02BE\u0300-\u034E\u0360-\u0362\u0483-\u0486\u0591-\u05A1\u05A3-\u05B9\u05BB-\u05BD\u05BF\u05C1-\u05C2\u05C4\u064B-\u0655\u0670\u06D6-\u06DC\u06DF-\u06E4\u06E7-\u06E8\u06EA-\u06ED\u0711\u0730-\u074A\u07A6-\u07B0\u0901-\u0902\u093C\u0941-\u0948\u094D\u0951-\u0954\u0962-\u0963\u0981\u09BC\u09C1-\u09C4\u09CD\u09E2-\u09E3\u0A02\u0A3C\u0A41-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A70-\u0A71\u0A81-\u0A82\u0ABC\u0AC1-\u0AC5\u0AC7-\u0AC8\u0ACD\u0B01\u0B3C\u0B3F\u0B41-\u0B43\u0B4D\u0B56\u0B82\u0BC0\u0BCD\u0C3E-\u0C40\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0CBF\u0CC6\u0CCC-\u0CCD\u0D41-\u0D43\u0D4D\u0DCA\u0DD2-\u0DD4\u0DD6\u0E31\u0E34-\u0E3A\u0E47-\u0E4E\u0EB1\u0EB4-\u0EB9\u0EBB-\u0EBC\u0EC8-\u0ECD\u0F18-\u0F19\u0F35\u0F37\u0F39\u0F71-\u0F7E\u0F80-\u0F84\u0F86-\u0F87\u0F90-\u0F97\u0F99-\u0FBC\u0FC6\u102D-\u1030\u1032\u1036-\u1037\u1039\u1058-\u1059\u17B7-\u17BD\u17C6\u17C9-\u17D3\u18A9\u20D0-\u20DC\u20E1\u302A-\u302F\uFB1E\uFE20-\uFE23\\U0001D167-\\U0001D169\\U0001D17B-\\U0001D182\\U0001D185-\\U0001D18B\\U0001D1AA-\\U0001D1AD]"\r
37         );\r
38         generateSets("Latin-Katakana", false, Normalizer.DECOMP_COMPAT, false,\r
39 "[~\u3001-\u3002\u30A1-\u30AB\u30AD\u30AF\u30B1\u30B3\u30B5\u30B7\u30B9\u30BB\u30BD\u30BF\u30C1\u30C3-\u30C4\u30C6\u30C8\u30CA-\u30CF\u30D2\u30D5\u30D8\u30DB\u30DE-\u30F3\u30F5-\u30F6\u30FC-\u30FD]"\r
40         );\r
41         \r
42         out.close();\r
43         \r
44         /////////////////////////////////////////////////\r
45         if (true) return; // skip the stuff we've done already\r
46         \r
47         generateSets("Cyrillic-Latin", true, Normalizer.DECOMP, false,\r
48             "[\u0402\u0404-\u0406\u0408-\u040B\u040F-\u0418\u041A-\u0438\u043A-\u044F\u0452\u0454-\u0456\u0458-\u045B\u045F\u0490-\u0495\u0498-\u0499\u04D4-\u04D5\u04D8-\u04D9]"\r
49         );\r
50         generateSets("Latin-Cyrillic", false, Normalizer.DECOMP, false,\r
51             "[A-Za-z\u00C6\u00E6\u0110-\u0111\u018F\u0259\u02B9-\u02BA]"\r
52         );\r
53         */\r
54     }\r
55     \r
56     public static void generateSets(String label, boolean forward, \r
57             Normalizer.Mode m, boolean lowerFirst, String rules) {\r
58         UnicodeSet s = new UnicodeSet(rules);\r
59         System.out.println("Generating " + label + (forward ? "" : " BACKWARD"));\r
60         close(s, m, lowerFirst);\r
61         out.println("# MINIMAL FILTER GENERATED FOR: " + label + (forward ? "" : " BACKWARD"));\r
62         out.println(":: " \r
63             + (forward ? "" : "( ") \r
64             + s.toPattern(true) \r
65             + (forward ? "" : " )")\r
66             + " ;");\r
67         out.println();\r
68         out.println("Unicode: " + s.toPattern(false));\r
69     }\r
70     \r
71     static boolean GENERATE = false;\r
72     \r
73     public static void test() throws Exception {\r
74         File f = new File("TestUnicodeSetClosure.txt");\r
75         String filename = f.getCanonicalFile().toString();\r
76         out = new PrintWriter(\r
77             new OutputStreamWriter(\r
78                 new FileOutputStream(filename), "UTF-8"));\r
79         System.out.println("Writing " + filename);\r
80         out.print('\uFEFF'); // BOM\r
81         \r
82         GENERATE = true;\r
83         test("[:Devanagari:]");\r
84         /*\r
85         test("[\u00E0Bc]");\r
86         test("[m]");\r
87         */\r
88         \r
89         out.close();\r
90     }\r
91     \r
92     static final Normalizer.Mode[] testModes = {\r
93         Normalizer.NONE, Normalizer.NFD, Normalizer.NFC, Normalizer.NFKD, Normalizer.NFKC};\r
94     static final String[] modeNames = {\r
95         "NoNF", "NFD", "NFC", "NFKD", "NFKC"};\r
96         \r
97     static final boolean[] testCases = {\r
98         false, true};\r
99     static final String[] caseNames = {\r
100         "noLower", "lower"};\r
101     \r
102     public static void test(String testStr) throws Exception {\r
103         UnicodeSet original = new UnicodeSet(testStr);\r
104         \r
105         System.out.println("Testing Closure of: " + original.toPattern(true));\r
106         out.println("Testing Closure of: " + original.toPattern(false));\r
107         UnicodeSet raw = new UnicodeSet(original);\r
108         original.complement();\r
109         original.complement();\r
110         if (!raw.equals(original)) {\r
111             out.println("Equals:" + original.toPattern(false));\r
112         }\r
113         StringWriter swLog = null;\r
114         PrintWriter log = null;\r
115         \r
116         if (GENERATE) {\r
117             swLog = new StringWriter();\r
118             log = new PrintWriter(swLog);\r
119             log.println("static UnicodeSet[][] UNCHANGED = {");\r
120         }\r
121         for (int i = 0; i < testCases.length; ++i) {\r
122             if (GENERATE) log.println("    {");\r
123             for (int j = 0; j < testModes.length; ++j) {\r
124                 UnicodeSet test = new UnicodeSet(original);\r
125                 close(test, testModes[j], testCases[i]);\r
126                 if (GENERATE) {\r
127                     log.println("\tnew UnicodeSet(\"" + generatedSet.toPattern(true) + "\"),");\r
128                 }\r
129                 String label = caseNames[i]  + ", " + modeNames[j] ;\r
130                 System.out.println(label);\r
131                 out.println(label + ": " + test.toPattern(false));\r
132                 test.removeAll(original);\r
133                 if (test.isEmpty()) {\r
134                     out.println("\tNo Difference from original");\r
135                 } else {\r
136                     out.println("\tDifference = " + test.toPattern(false));\r
137                 }\r
138                 out.flush();\r
139             }\r
140             if (GENERATE) log.println("    },");\r
141             out.println();\r
142         }\r
143         \r
144         if (GENERATE) {\r
145             log.println("};");\r
146             out.print(swLog.getBuffer().toString());\r
147         }\r
148         \r
149         /*\r
150             close(test, Normalizer.DECOMP, false);\r
151             print("NFD", test);\r
152             \r
153             test = new UnicodeSet(testStr);\r
154             close(test, Normalizer.NO_OP, true);\r
155             print("Lower", test);\r
156 \r
157             test = new UnicodeSet(testStr);\r
158             close(test, Normalizer.COMPOSE, false);\r
159             print("NFC", test);\r
160 \r
161             test = new UnicodeSet(testStr);\r
162             close(test, Normalizer.DECOMP_COMPAT, false);\r
163             print("NFKD", test);\r
164 \r
165             test = new UnicodeSet(testStr);\r
166             close(test, Normalizer.COMPOSE_COMPAT, false);\r
167             print("NFKC", test);\r
168         */\r
169     }\r
170     \r
171     static PrintWriter out;\r
172     \r
173     /*\r
174     public static void print(String label, UnicodeSet test) {\r
175         System.out.println(label);\r
176         out.println(label + ": " + test.toPattern(false));\r
177         out.println();\r
178     }\r
179     */\r
180     \r
181     // dumb, slow implementations\r
182     public static class NFToString implements Char32ToString {\r
183         Normalizer.Mode mode;\r
184         boolean lowerFirst;\r
185         \r
186         NFToString(Normalizer.Mode m, boolean lowerFirst) {\r
187             mode = m;\r
188             this.lowerFirst = lowerFirst;\r
189         }\r
190         \r
191         public String get(int cp) {\r
192             String source = UTF16.valueOf(cp);\r
193             String result = source;\r
194             if (lowerFirst) result = UCharacter.toLowerCase(Locale.US, result);\r
195             result = Normalizer.normalize(result, mode);\r
196             if (lowerFirst) result = UCharacter.toLowerCase(Locale.US, result);\r
197             if (result.equals(source)) return null;\r
198             return result;\r
199         }\r
200     }\r
201         \r
202     \r
203     /** Returns a mapping from char32 to a string. If there is no change,\r
204      * null is returned.\r
205      */\r
206      \r
207     interface Char32ToString {\r
208         public String get(int cp);\r
209     }\r
210     \r
211     static boolean FAST = true;\r
212     \r
213     public static void close(UnicodeSet s, Normalizer.Mode m, boolean lowerFirst) {\r
214         Char32ToString f = new NFToString(m, lowerFirst);\r
215         if (FAST) {\r
216             int mm;\r
217             for (mm = 0; ; ++mm) if (m == testModes[mm]) break; // find mode\r
218             close2(s, f, lowerFirst ? 1 : 0, mm);\r
219             return;\r
220         }\r
221         close(s, f);\r
222     }\r
223     \r
224     public static void close(UnicodeSet s, Char32ToString f) {\r
225         if (GENERATE) generatedSet = new UnicodeSet();\r
226         \r
227         for (int cp = 0; cp <= 0x10FFFF; ++cp) {\r
228             int type = UCharacter.getType(cp);\r
229             if (type == Character.UNASSIGNED) continue;\r
230             \r
231             //if (cp == '\u00e7') {\r
232               //  System.out.println("debug");\r
233             //}\r
234             String result = f.get(cp);\r
235             if (result == null) continue;\r
236             if (GENERATE) {\r
237                 generatedSet.add(cp);\r
238             }\r
239             if (!containsSome(s, result)) continue;\r
240             s.add(cp);\r
241         }\r
242     }\r
243     \r
244     public static void close2(UnicodeSet s, Char32ToString f, int lc, int mode) {\r
245         UnicodeSet unchanged = new UnicodeSet(); // UNCHANGED[lc][mode];\r
246         int count = unchanged.getRangeCount();\r
247         for (int i = 0; i < count; ++i) {\r
248             int start = unchanged.getRangeStart(i);\r
249             int end = unchanged.getRangeEnd(i);\r
250             for (int cp = start; cp <= end; ++cp) {\r
251                 String result = f.get(cp);\r
252                 if (result == null) throw new IllegalArgumentException("Something wrong -- should never happen");\r
253                 if (!containsSome(s, result)) continue;\r
254                 s.add(cp);\r
255             }\r
256         }\r
257     }\r
258     \r
259     /*\r
260     static final UnicodeSet[][] UNCHANGED = {\r
261         { // \u1F80-\u1FB4\u1FB6-\u1FBC\u1FBE\u1FC1-\u1FC4\u1FC6-\u1FD3\u1FD6-\u1FDB\u1FDD-\u1FEF\u1FF2-\u1FF4\r
262         new UnicodeSet("[\u1FF6-\u1FFD\u2000-\u2001\u2126]"),\r
263         // \u212A-\u212B\u219A-\u219B\u21AE\u21CD-\u21CF\u2204\u2209\u220C\u2224\u2226\u2241\u2244\u2247\u2249\u2260\u2262\u226D-\u2271\u2274-\u2275\u2278\r
264         new UnicodeSet("[\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0340-\u0341\u0343-\u0344\u0374\u037E\u0385-\u038A\u038C\u038E-\u0390\u03AA-\u03B0\u03CA-\u03CE\u03D3-\u03D4\u0400-\u0401\u0403\u0407\u040C-\u040E\u0419\u0439\u0450-\u0451\u0453\u0457\u045C-\u045E\u0476-\u0477\u04C1-\u04C2\u04D0-\u04D3\u04D6-\u04D7\u04DA-\u04DF\u04E2-\u04E7\u04EA-\u04F5\u04F8-\u04F9\u0622-\u0626\u06C0\u06C2\u06D3\u0929\u0931\u0934\u0958-\u095F\u09CB-\u09CC\u09DC-\u09DD\u09DF\u0A33\u0A36\u0A59-\u0A5B\u0A5E\u0B48\u0B4B-\u0B4C\u0B5C-\u0B5D\u0B94\u0BCA-\u0BCC\u0C48\u0CC0\u0CC7-\u0CC8\u0CCA-\u0CCB\u0D4A-\u0D4C\u0DDA\u0DDC-\u0DDE\u0F43\u0F4D\u0F52\u0F57\u0F5C\u0F69\u0F73\u0F75-\u0F76\u0F78\u0F81\u0F93\u0F9D\u0FA2\u0FA7\u0FAC\u0FB9\u1026\u1E00-\u1E99\u1E9B\u1EA0-\u1EF9\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FBE\u1FC1-\u1FC4\u1FC6-\u1FD3\u1FD6-\u1FDB\u1FDD-\u1FEF\u1FF2-\u1FF4\u1FF6-\u1FFD\u2000-\u2001\u2126\u212A-\u212B\u219A-\u219B\u21AE\u21CD-\u21CF\u2204\u2209\u220C\u2224\u2226\u2241\u2244\u2247\u2249\u2260\u2262\u226D-\u2271\u2274-\u2275\u2278-\u2279\u2280-\u2281\u2284-\u2285\u2288-\u2289\u22AC-\u22AF\u22E0-\u22E3\u22EA-\u22ED\u2329-\u232A\u304C\u304E\u3050\u3052\u3054\u3056\u3058\u305A\u305C\u305E\u3060\u3062\u3065\u3067\u3069\u3070-\u3071\u3073-\u3074\u3076-\u3077\u3079-\u307A\u307C-\u307D\u3094\u309E\u30AC\u30AE\u30B0\u30B2\u30B4\u30B6\u30B8\u30BA\u30BC\u30BE\u30C0\u30C2\u30C5\u30C7\u30C9\u30D0-\u30D1\u30D3-\u30D4\u30D6-\u30D7\u30D9-\u30DA\u30DC-\u30DD\u30F4\u30F7-\u30FA\u30FE\uAC00-\uD7A3\uF900-\uFA0D\uFA10\uFA12\uFA15-\uFA1E\uFA20\uFA22\uFA25-\uFA26\uFA2A-\uFA2D\uFB1D\uFB1F\uFB2A-\uFB36\uFB38-\uFB3C\uFB3E\uFB40-\uFB41\uFB43-\uFB44\uFB46-\uFB4E]"),\r
265         new UnicodeSet("[\u0340-\u0341\u0343-\u0344\u0374\u037E\u0387\u0958-\u095F\u09DC-\u09DD\u09DF\u0A33\u0A36\u0A59-\u0A5B\u0A5E\u0B5C-\u0B5D\u0F43\u0F4D\u0F52\u0F57\u0F5C\u0F69\u0F73\u0F75-\u0F76\u0F78\u0F81\u0F93\u0F9D\u0FA2\u0FA7\u0FAC\u0FB9\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1FBB\u1FBE\u1FC9\u1FCB\u1FD3\u1FDB\u1FE3\u1FEB\u1FEE-\u1FEF\u1FF9\u1FFB\u1FFD\u2000-\u2001\u2126\u212A-\u212B\u2329-\u232A\uF900-\uFA0D\uFA10\uFA12\uFA15-\uFA1E\uFA20\uFA22\uFA25-\uFA26\uFA2A-\uFA2D\uFB1F\uFB2A-\uFB36\uFB38-\uFB3C\uFB3E\uFB40-\uFB41\uFB43-\uFB44\uFB46-\uFB4E]"),\r
266         new UnicodeSet("[\u00A0\u00A8\u00AA\u00AF\u00B2-\u00B5\u00B8-\u00BA\u00BC-\u00BE\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0132-\u0137\u0139-\u0140\u0143-\u0149\u014C-\u0151\u0154-\u0165\u0168-\u017F\u01A0-\u01A1\u01AF-\u01B0\u01C4-\u01DC\u01DE-\u01E3\u01E6-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u02B0-\u02B8\u02D8-\u02DD\u02E0-\u02E4\u0340-\u0341\u0343-\u0344\u0374\u037A\u037E\u0384-\u038A\u038C\u038E-\u0390\u03AA-\u03B0\u03CA-\u03CE\u03D0-\u03D6\u03F0-\u03F2\u0400-\u0401\u0403\u0407\u040C-\u040E\u0419\u0439\u0450-\u0451\u0453\u0457\u045C-\u045E\u0476-\u0477\u04C1-\u04C2\u04D0-\u04D3\u04D6-\u04D7\u04DA-\u04DF\u04E2-\u04E7\u04EA-\u04F5\u04F8-\u04F9\u0587\u0622-\u0626\u0675-\u0678\u06C0\u06C2\u06D3\u0929\u0931\u0934\u0958-\u095F\u09CB-\u09CC\u09DC-\u09DD\u09DF\u0A33\u0A36\u0A59-\u0A5B\u0A5E\u0B48\u0B4B-\u0B4C\u0B5C-\u0B5D\u0B94\u0BCA-\u0BCC\u0C48\u0CC0\u0CC7-\u0CC8\u0CCA-\u0CCB\u0D4A-\u0D4C\u0DDA\u0DDC-\u0DDE\u0E33\u0EB3\u0EDC-\u0EDD\u0F0C\u0F43\u0F4D\u0F52\u0F57\u0F5C\u0F69\u0F73\u0F75-\u0F79\u0F81\u0F93\u0F9D\u0FA2\u0FA7\u0FAC\u0FB9\u1026\u1E00-\u1E9B\u1EA0-\u1EF9\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FC4\u1FC6-\u1FD3\u1FD6-\u1FDB\u1FDD-\u1FEF\u1FF2-\u1FF4\u1FF6-\u1FFE\u2000-\u200A\u2011\u2017\u2024-\u2026\u202F\u2033-\u2034\u2036-\u2037\u203C\u203E\u2048-\u2049\u2070\u2074-\u208E\u20A8\u2100-\u2103\u2105-\u2107\u2109-\u2113\u2115-\u2116\u2119-\u211D\u2120-\u2122\u2124\u2126\u2128\u212A-\u212D\u212F-\u2131\u2133-\u2139\u2153-\u217F\u219A-\u219B\u21AE\u21CD-\u21CF\u2204\u2209\u220C\u2224\u2226\u222C-\u222D\u222F-\u2230\u2241\u2244\u2247\u2249\u2260\u2262\u226D-\u2271\u2274-\u2275\u2278-\u2279\u2280-\u2281\u2284-\u2285\u2288-\u2289\u22AC-\u22AF\u22E0-\u22E3\u22EA-\u22ED\u2329-\u232A\u2460-\u24EA\u2E9F\u2EF3\u2F00-\u2FD5\u3000\u3036\u3038-\u303A\u304C\u304E\u3050\u3052\u3054\u3056\u3058\u305A\u305C\u305E\u3060\u3062\u3065\u3067\u3069\u3070-\u3071\u3073-\u3074\u3076-\u3077\u3079-\u307A\u307C-\u307D\u3094\u309B-\u309C\u309E\u30AC\u30AE\u30B0\u30B2\u30B4\u30B6\u30B8\u30BA\u30BC\u30BE\u30C0\u30C2\u30C5\u30C7\u30C9\u30D0-\u30D1\u30D3-\u30D4\u30D6-\u30D7\u30D9-\u30DA\u30DC-\u30DD\u30F4\u30F7-\u30FA\u30FE\u3131-\u318E\u3192-\u319F\u3200-\u321C\u3220-\u3243\u3260-\u327B\u3280-\u32B0\u32C0-\u32CB\u32D0-\u32FE\u3300-\u3376\u337B-\u33DD\u33E0-\u33FE\uAC00-\uD7A3\uF900-\uFA0D\uFA10\uFA12\uFA15-\uFA1E\uFA20\uFA22\uFA25-\uFA26\uFA2A-\uFA2D\uFB00-\uFB06\uFB13-\uFB17\uFB1D\uFB1F-\uFB36\uFB38-\uFB3C\uFB3E\uFB40-\uFB41\uFB43-\uFB44\uFB46-\uFBB1\uFBD3-\uFD3D\uFD50-\uFD8F\uFD92-\uFDC7\uFDF0-\uFDFB\uFE30-\uFE44\uFE49-\uFE52\uFE54-\uFE66\uFE68-\uFE6B\uFE70-\uFE72\uFE74\uFE76-\uFEFC\uFF01-\uFF5E\uFF61-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC\uFFE0-\uFFE6\uFFE8-\uFFEE]"),\r
267         new UnicodeSet("[\u00A0\u00A8\u00AA\u00AF\u00B2-\u00B5\u00B8-\u00BA\u00BC-\u00BE\u0132-\u0133\u013F-\u0140\u0149\u017F\u01C4-\u01CC\u01F1-\u01F3\u02B0-\u02B8\u02D8-\u02DD\u02E0-\u02E4\u0340-\u0341\u0343-\u0344\u0374\u037A\u037E\u0384-\u0385\u0387\u03D0-\u03D6\u03F0-\u03F2\u0587\u0675-\u0678\u0958-\u095F\u09DC-\u09DD\u09DF\u0A33\u0A36\u0A59-\u0A5B\u0A5E\u0B5C-\u0B5D\u0E33\u0EB3\u0EDC-\u0EDD\u0F0C\u0F43\u0F4D\u0F52\u0F57\u0F5C\u0F69\u0F73\u0F75-\u0F79\u0F81\u0F93\u0F9D\u0FA2\u0FA7\u0FAC\u0FB9\u1E9A-\u1E9B\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1FBB\u1FBD-\u1FC1\u1FC9\u1FCB\u1FCD-\u1FCF\u1FD3\u1FDB\u1FDD-\u1FDF\u1FE3\u1FEB\u1FED-\u1FEF\u1FF9\u1FFB\u1FFD-\u1FFE\u2000-\u200A\u2011\u2017\u2024-\u2026\u202F\u2033-\u2034\u2036-\u2037\u203C\u203E\u2048-\u2049\u2070\u2074-\u208E\u20A8\u2100-\u2103\u2105-\u2107\u2109-\u2113\u2115-\u2116\u2119-\u211D\u2120-\u2122\u2124\u2126\u2128\u212A-\u212D\u212F-\u2131\u2133-\u2139\u2153-\u217F\u222C-\u222D\u222F-\u2230\u2329-\u232A\u2460-\u24EA\u2E9F\u2EF3\u2F00-\u2FD5\u3000\u3036\u3038-\u303A\u309B-\u309C\u3131-\u318E\u3192-\u319F\u3200-\u321C\u3220-\u3243\u3260-\u327B\u3280-\u32B0\u32C0-\u32CB\u32D0-\u32FE\u3300-\u3376\u337B-\u33DD\u33E0-\u33FE\uF900-\uFA0D\uFA10\uFA12\uFA15-\uFA1E\uFA20\uFA22\uFA25-\uFA26\uFA2A-\uFA2D\uFB00-\uFB06\uFB13-\uFB17\uFB1F-\uFB36\uFB38-\uFB3C\uFB3E\uFB40-\uFB41\uFB43-\uFB44\uFB46-\uFBB1\uFBD3-\uFD3D\uFD50-\uFD8F\uFD92-\uFDC7\uFDF0-\uFDFB\uFE30-\uFE44\uFE49-\uFE52\uFE54-\uFE66\uFE68-\uFE6B\uFE70-\uFE72\uFE74\uFE76-\uFEFC\uFF01-\uFF5E\uFF61-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC\uFFE0-\uFFE6\uFFE8-\uFFEE]"),\r
268         },\r
269         {\r
270         new UnicodeSet("[A-Z\u00B2-\u00B3\u00B5\u00B9\u00BC-\u00BE\u00C0-\u00D6\u00D8-\u00DE\u0100\u0102\u0104\u0106\u0108\u010A\u010C\u010E\u0110\u0112\u0114\u0116\u0118\u011A\u011C\u011E\u0120\u0122\u0124\u0126\u0128\u012A\u012C\u012E\u0130-\u0132\u0134\u0136\u0139\u013B\u013D\u013F\u0141\u0143\u0145\u0147\u014A\u014C\u014E\u0150\u0152\u0154\u0156\u0158\u015A\u015C\u015E\u0160\u0162\u0164\u0166\u0168\u016A\u016C\u016E\u0170\u0172\u0174\u0176\u0178-\u0179\u017B\u017D\u017F\u0181-\u0182\u0184\u0186-\u0187\u0189-\u018B\u018E-\u0191\u0193-\u0194\u0196-\u0198\u019C-\u019D\u019F-\u01A0\u01A2\u01A4\u01A6-\u01A7\u01A9\u01AC\u01AE-\u01AF\u01B1-\u01B3\u01B5\u01B7-\u01B8\u01BC\u01C4-\u01CD\u01CF\u01D1\u01D3\u01D5\u01D7\u01D9\u01DB\u01DE\u01E0\u01E2\u01E4\u01E6\u01E8\u01EA\u01EC\u01EE\u01F1-\u01F4\u01F6-\u01F8\u01FA\u01FC\u01FE\u0200\u0202\u0204\u0206\u0208\u020A\u020C\u020E\u0210\u0212\u0214\u0216\u0218\u021A\u021C\u021E\u0222\u0224\u0226\u0228\u022A\u022C\u022E\u0230\u0232\u0345\u0386\u0388-\u038A\u038C\u038E-\u038F\u0391-\u03A1\u03A3-\u03AB\u03C2\u03D0-\u03D1\u03D5-\u03D6\u03DA\u03DC\u03DE\u03E0\u03E2\u03E4\u03E6\u03E8\u03EA\u03EC\u03EE\u03F0-\u03F2\u03F4-\u03F5\u0400-\u042F\u0460\u0462\u0464\u0466\u0468\u046A\u046C\u046E\u0470\u0472\u0474\u0476\u0478\u047A\u047C\u047E\u0480\u048C\u048E\u0490\u0492\u0494\u0496\u0498\u049A\u049C\u049E\u04A0\u04A2\u04A4\u04A6\u04A8\u04AA\u04AC\u04AE\u04B0\u04B2\u04B4\u04B6\u04B8\u04BA\u04BC\u04BE\u04C1\u04C3\u04C7\u04CB\u04D0\u04D2\u04D4\u04D6\u04D8\u04DA\u04DC\u04DE\u04E0\u04E2\u04E4\u04E6\u04E8\u04EA\u04EC\u04EE\u04F0\u04F2\u04F4\u04F8\u0531-\u0556\u09F8\u0F2A-\u0F33\u137C\u1E00\u1E02\u1E04\u1E06\u1E08\u1E0A\u1E0C\u1E0E\u1E10\u1E12\u1E14\u1E16\u1E18\u1E1A\u1E1C\u1E1E\u1E20\u1E22\u1E24\u1E26\u1E28\u1E2A\u1E2C\u1E2E\u1E30\u1E32\u1E34\u1E36\u1E38\u1E3A\u1E3C\u1E3E\u1E40\u1E42\u1E44\u1E46\u1E48\u1E4A\u1E4C\u1E4E\u1E50\u1E52\u1E54\u1E56\u1E58\u1E5A\u1E5C\u1E5E\u1E60\u1E62\u1E64\u1E66\u1E68\u1E6A\u1E6C\u1E6E\u1E70\u1E72\u1E74\u1E76\u1E78\u1E7A\u1E7C\u1E7E\u1E80\u1E82\u1E84\u1E86\u1E88\u1E8A\u1E8C\u1E8E\u1E90\u1E92\u1E94\u1E9B\u1EA0\u1EA2\u1EA4\u1EA6\u1EA8\u1EAA\u1EAC\u1EAE\u1EB0\u1EB2\u1EB4\u1EB6\u1EB8\u1EBA\u1EBC\u1EBE\u1EC0\u1EC2\u1EC4\u1EC6\u1EC8\u1ECA\u1ECC\u1ECE\u1ED0\u1ED2\u1ED4\u1ED6\u1ED8\u1EDA\u1EDC\u1EDE\u1EE0\u1EE2\u1EE4\u1EE6\u1EE8\u1EEA\u1EEC\u1EEE\u1EF0\u1EF2\u1EF4\u1EF6\u1EF8\u1F08-\u1F0F\u1F18-\u1F1D\u1F28-\u1F2F\u1F38-\u1F3F\u1F48-\u1F4D\u1F59\u1F5B\u1F5D\u1F5F\u1F68-\u1F6F\u1F88-\u1F8F\u1F98-\u1F9F\u1FA8-\u1FAF\u1FB8-\u1FBC\u1FBE\u1FC8-\u1FCC\u1FD8-\u1FDB\u1FE8-\u1FEC\u1FF8-\u1FFC\u2070\u2074-\u2079\u2080-\u2089\u2126\u212A-\u212B\u2153-\u215E\u2160-\u217F\u2181-\u2183\u2460-\u2468\u2474-\u247C\u2488-\u2490\u24B6-\u24EA\u2776-\u277E\u2780-\u2788\u278A-\u2792\uFF21-\uFF3A\\U0001034A\\U00010400-\\U00010425\\U0001D165-\\U0001D166\\U0001D16D-\\U0001D172]"),\r
271         new UnicodeSet("[A-Z\u00B2-\u00B3\u00B5\u00B9\u00BC-\u00BE\u00C0-\u00D6\u00D8-\u00DE\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u0110\u0112-\u0126\u0128-\u0132\u0134-\u0137\u0139-\u013F\u0141\u0143-\u0148\u014A\u014C-\u0152\u0154-\u0166\u0168-\u017F\u0181-\u0182\u0184\u0186-\u0187\u0189-\u018B\u018E-\u0191\u0193-\u0194\u0196-\u0198\u019C-\u019D\u019F-\u01A2\u01A4\u01A6-\u01A7\u01A9\u01AC\u01AE-\u01B3\u01B5\u01B7-\u01B8\u01BC\u01C4-\u01DC\u01DE-\u01E4\u01E6-\u021C\u021E-\u021F\u0222\u0224\u0226-\u0233\u0340-\u0341\u0343-\u0345\u0374\u037E\u0385-\u038A\u038C\u038E-\u03A1\u03A3-\u03B0\u03C2\u03CA-\u03CE\u03D0-\u03D1\u03D3-\u03D6\u03DA\u03DC\u03DE\u03E0\u03E2\u03E4\u03E6\u03E8\u03EA\u03EC\u03EE\u03F0-\u03F2\u03F4-\u03F5\u0400-\u042F\u0439\u0450-\u0451\u0453\u0457\u045C-\u045E\u0460\u0462\u0464\u0466\u0468\u046A\u046C\u046E\u0470\u0472\u0474\u0476-\u0478\u047A\u047C\u047E\u0480\u048C\u048E\u0490\u0492\u0494\u0496\u0498\u049A\u049C\u049E\u04A0\u04A2\u04A4\u04A6\u04A8\u04AA\u04AC\u04AE\u04B0\u04B2\u04B4\u04B6\u04B8\u04BA\u04BC\u04BE\u04C1-\u04C3\u04C7\u04CB\u04D0-\u04D4\u04D6-\u04D8\u04DA-\u04E0\u04E2-\u04E8\u04EA-\u04F5\u04F8-\u04F9\u0531-\u0556\u0622-\u0626\u06C0\u06C2\u06D3\u0929\u0931\u0934\u0958-\u095F\u09CB-\u09CC\u09DC-\u09DD\u09DF\u09F8\u0A33\u0A36\u0A59-\u0A5B\u0A5E\u0B48\u0B4B-\u0B4C\u0B5C-\u0B5D\u0B94\u0BCA-\u0BCC\u0C48\u0CC0\u0CC7-\u0CC8\u0CCA-\u0CCB\u0D4A-\u0D4C\u0DDA\u0DDC-\u0DDE\u0F2A-\u0F33\u0F43\u0F4D\u0F52\u0F57\u0F5C\u0F69\u0F73\u0F75-\u0F76\u0F78\u0F81\u0F93\u0F9D\u0FA2\u0FA7\u0FAC\u0FB9\u1026\u137C\u1E00-\u1E99\u1E9B\u1EA0-\u1EF9\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FBE\u1FC1-\u1FC4\u1FC6-\u1FD3\u1FD6-\u1FDB\u1FDD-\u1FEF\u1FF2-\u1FF4\u1FF6-\u1FFD\u2000-\u2001\u2070\u2074-\u2079\u2080-\u2089\u2126\u212A-\u212B\u2153-\u215E\u2160-\u217F\u2181-\u2183\u219A-\u219B\u21AE\u21CD-\u21CF\u2204\u2209\u220C\u2224\u2226\u2241\u2244\u2247\u2249\u2260\u2262\u226D-\u2271\u2274-\u2275\u2278-\u2279\u2280-\u2281\u2284-\u2285\u2288-\u2289\u22AC-\u22AF\u22E0-\u22E3\u22EA-\u22ED\u2329-\u232A\u2460-\u2468\u2474-\u247C\u2488-\u2490\u24B6-\u24EA\u2776-\u277E\u2780-\u2788\u278A-\u2792\u304C\u304E\u3050\u3052\u3054\u3056\u3058\u305A\u305C\u305E\u3060\u3062\u3065\u3067\u3069\u3070-\u3071\u3073-\u3074\u3076-\u3077\u3079-\u307A\u307C-\u307D\u3094\u309E\u30AC\u30AE\u30B0\u30B2\u30B4\u30B6\u30B8\u30BA\u30BC\u30BE\u30C0\u30C2\u30C5\u30C7\u30C9\u30D0-\u30D1\u30D3-\u30D4\u30D6-\u30D7\u30D9-\u30DA\u30DC-\u30DD\u30F4\u30F7-\u30FA\u30FE\uAC00-\uD7A3\uF900-\uFA0D\uFA10\uFA12\uFA15-\uFA1E\uFA20\uFA22\uFA25-\uFA26\uFA2A-\uFA2D\uFB1D\uFB1F\uFB2A-\uFB36\uFB38-\uFB3C\uFB3E\uFB40-\uFB41\uFB43-\uFB44\uFB46-\uFB4E\uFF21-\uFF3A\\U0001034A\\U00010400-\\U00010425\\U0001D165-\\U0001D166\\U0001D16D-\\U0001D172]"),\r
272         new UnicodeSet("[A-Z\u00B2-\u00B3\u00B5\u00B9\u00BC-\u00BE\u00C0-\u00D6\u00D8-\u00DE\u0100\u0102\u0104\u0106\u0108\u010A\u010C\u010E\u0110\u0112\u0114\u0116\u0118\u011A\u011C\u011E\u0120\u0122\u0124\u0126\u0128\u012A\u012C\u012E\u0130-\u0132\u0134\u0136\u0139\u013B\u013D\u013F\u0141\u0143\u0145\u0147\u014A\u014C\u014E\u0150\u0152\u0154\u0156\u0158\u015A\u015C\u015E\u0160\u0162\u0164\u0166\u0168\u016A\u016C\u016E\u0170\u0172\u0174\u0176\u0178-\u0179\u017B\u017D\u017F\u0181-\u0182\u0184\u0186-\u0187\u0189-\u018B\u018E-\u0191\u0193-\u0194\u0196-\u0198\u019C-\u019D\u019F-\u01A0\u01A2\u01A4\u01A6-\u01A7\u01A9\u01AC\u01AE-\u01AF\u01B1-\u01B3\u01B5\u01B7-\u01B8\u01BC\u01C4-\u01CD\u01CF\u01D1\u01D3\u01D5\u01D7\u01D9\u01DB\u01DE\u01E0\u01E2\u01E4\u01E6\u01E8\u01EA\u01EC\u01EE\u01F1-\u01F4\u01F6-\u01F8\u01FA\u01FC\u01FE\u0200\u0202\u0204\u0206\u0208\u020A\u020C\u020E\u0210\u0212\u0214\u0216\u0218\u021A\u021C\u021E\u0222\u0224\u0226\u0228\u022A\u022C\u022E\u0230\u0232\u0340-\u0341\u0343-\u0345\u0374\u037E\u0386-\u038A\u038C\u038E-\u038F\u0391-\u03A1\u03A3-\u03AB\u03C2\u03D0-\u03D1\u03D5-\u03D6\u03DA\u03DC\u03DE\u03E0\u03E2\u03E4\u03E6\u03E8\u03EA\u03EC\u03EE\u03F0-\u03F2\u03F4-\u03F5\u0400-\u042F\u0460\u0462\u0464\u0466\u0468\u046A\u046C\u046E\u0470\u0472\u0474\u0476\u0478\u047A\u047C\u047E\u0480\u048C\u048E\u0490\u0492\u0494\u0496\u0498\u049A\u049C\u049E\u04A0\u04A2\u04A4\u04A6\u04A8\u04AA\u04AC\u04AE\u04B0\u04B2\u04B4\u04B6\u04B8\u04BA\u04BC\u04BE\u04C1\u04C3\u04C7\u04CB\u04D0\u04D2\u04D4\u04D6\u04D8\u04DA\u04DC\u04DE\u04E0\u04E2\u04E4\u04E6\u04E8\u04EA\u04EC\u04EE\u04F0\u04F2\u04F4\u04F8\u0531-\u0556\u0958-\u095F\u09DC-\u09DD\u09DF\u09F8\u0A33\u0A36\u0A59-\u0A5B\u0A5E\u0B5C-\u0B5D\u0F2A-\u0F33\u0F43\u0F4D\u0F52\u0F57\u0F5C\u0F69\u0F73\u0F75-\u0F76\u0F78\u0F81\u0F93\u0F9D\u0FA2\u0FA7\u0FAC\u0FB9\u137C\u1E00\u1E02\u1E04\u1E06\u1E08\u1E0A\u1E0C\u1E0E\u1E10\u1E12\u1E14\u1E16\u1E18\u1E1A\u1E1C\u1E1E\u1E20\u1E22\u1E24\u1E26\u1E28\u1E2A\u1E2C\u1E2E\u1E30\u1E32\u1E34\u1E36\u1E38\u1E3A\u1E3C\u1E3E\u1E40\u1E42\u1E44\u1E46\u1E48\u1E4A\u1E4C\u1E4E\u1E50\u1E52\u1E54\u1E56\u1E58\u1E5A\u1E5C\u1E5E\u1E60\u1E62\u1E64\u1E66\u1E68\u1E6A\u1E6C\u1E6E\u1E70\u1E72\u1E74\u1E76\u1E78\u1E7A\u1E7C\u1E7E\u1E80\u1E82\u1E84\u1E86\u1E88\u1E8A\u1E8C\u1E8E\u1E90\u1E92\u1E94\u1E9B\u1EA0\u1EA2\u1EA4\u1EA6\u1EA8\u1EAA\u1EAC\u1EAE\u1EB0\u1EB2\u1EB4\u1EB6\u1EB8\u1EBA\u1EBC\u1EBE\u1EC0\u1EC2\u1EC4\u1EC6\u1EC8\u1ECA\u1ECC\u1ECE\u1ED0\u1ED2\u1ED4\u1ED6\u1ED8\u1EDA\u1EDC\u1EDE\u1EE0\u1EE2\u1EE4\u1EE6\u1EE8\u1EEA\u1EEC\u1EEE\u1EF0\u1EF2\u1EF4\u1EF6\u1EF8\u1F08-\u1F0F\u1F18-\u1F1D\u1F28-\u1F2F\u1F38-\u1F3F\u1F48-\u1F4D\u1F59\u1F5B\u1F5D\u1F5F\u1F68-\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F88-\u1F8F\u1F98-\u1F9F\u1FA8-\u1FAF\u1FB8-\u1FBC\u1FBE\u1FC8-\u1FCC\u1FD3\u1FD8-\u1FDB\u1FE3\u1FE8-\u1FEC\u1FEE-\u1FEF\u1FF8-\u1FFD\u2000-\u2001\u2070\u2074-\u2079\u2080-\u2089\u2126\u212A-\u212B\u2153-\u215E\u2160-\u217F\u2181-\u2183\u2329-\u232A\u2460-\u2468\u2474-\u247C\u2488-\u2490\u24B6-\u24EA\u2776-\u277E\u2780-\u2788\u278A-\u2792\uF900-\uFA0D\uFA10\uFA12\uFA15-\uFA1E\uFA20\uFA22\uFA25-\uFA26\uFA2A-\uFA2D\uFB1F\uFB2A-\uFB36\uFB38-\uFB3C\uFB3E\uFB40-\uFB41\uFB43-\uFB44\uFB46-\uFB4E\uFF21-\uFF3A\\U0001034A\\U00010400-\\U00010425\\U0001D165-\\U0001D166\\U0001D16D-\\U0001D172]"),\r
273         new UnicodeSet("[A-Z\u00A0\u00A8\u00AA\u00AF\u00B2-\u00B5\u00B8-\u00BA\u00BC-\u00BE\u00C0-\u00D6\u00D8-\u00DE\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u0110\u0112-\u0126\u0128-\u0137\u0139-\u0141\u0143-\u014A\u014C-\u0152\u0154-\u0166\u0168-\u017F\u0181-\u0182\u0184\u0186-\u0187\u0189-\u018B\u018E-\u0191\u0193-\u0194\u0196-\u0198\u019C-\u019D\u019F-\u01A2\u01A4\u01A6-\u01A7\u01A9\u01AC\u01AE-\u01B3\u01B5\u01B7-\u01B8\u01BC\u01C4-\u01DC\u01DE-\u01E4\u01E6-\u021C\u021E-\u021F\u0222\u0224\u0226-\u0233\u02B0-\u02B8\u02D8-\u02DD\u02E0-\u02E4\u0340-\u0341\u0343-\u0345\u0374\u037A\u037E\u0384-\u038A\u038C\u038E-\u03A1\u03A3-\u03B0\u03C2\u03CA-\u03CE\u03D0-\u03D6\u03DA\u03DC\u03DE\u03E0\u03E2\u03E4\u03E6\u03E8\u03EA\u03EC\u03EE\u03F0-\u03F2\u03F4-\u03F5\u0400-\u042F\u0439\u0450-\u0451\u0453\u0457\u045C-\u045E\u0460\u0462\u0464\u0466\u0468\u046A\u046C\u046E\u0470\u0472\u0474\u0476-\u0478\u047A\u047C\u047E\u0480\u048C\u048E\u0490\u0492\u0494\u0496\u0498\u049A\u049C\u049E\u04A0\u04A2\u04A4\u04A6\u04A8\u04AA\u04AC\u04AE\u04B0\u04B2\u04B4\u04B6\u04B8\u04BA\u04BC\u04BE\u04C1-\u04C3\u04C7\u04CB\u04D0-\u04D4\u04D6-\u04D8\u04DA-\u04E0\u04E2-\u04E8\u04EA-\u04F5\u04F8-\u04F9\u0531-\u0556\u0587\u0622-\u0626\u0675-\u0678\u06C0\u06C2\u06D3\u0929\u0931\u0934\u0958-\u095F\u09CB-\u09CC\u09DC-\u09DD\u09DF\u09F8\u0A33\u0A36\u0A59-\u0A5B\u0A5E\u0B48\u0B4B-\u0B4C\u0B5C-\u0B5D\u0B94\u0BCA-\u0BCC\u0C48\u0CC0\u0CC7-\u0CC8\u0CCA-\u0CCB\u0D4A-\u0D4C\u0DDA\u0DDC-\u0DDE\u0E33\u0EB3\u0EDC-\u0EDD\u0F0C\u0F2A-\u0F33\u0F43\u0F4D\u0F52\u0F57\u0F5C\u0F69\u0F73\u0F75-\u0F79\u0F81\u0F93\u0F9D\u0FA2\u0FA7\u0FAC\u0FB9\u1026\u137C\u1E00-\u1E9B\u1EA0-\u1EF9\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FC4\u1FC6-\u1FD3\u1FD6-\u1FDB\u1FDD-\u1FEF\u1FF2-\u1FF4\u1FF6-\u1FFE\u2000-\u200A\u2011\u2017\u2024-\u2026\u202F\u2033-\u2034\u2036-\u2037\u203C\u203E\u2048-\u2049\u2070\u2074-\u208E\u20A8\u2100-\u2103\u2105-\u2107\u2109-\u2113\u2115-\u2116\u2119-\u211D\u2120-\u2122\u2124\u2126\u2128\u212A-\u212D\u212F-\u2131\u2133-\u2139\u2153-\u217F\u2181-\u2183\u219A-\u219B\u21AE\u21CD-\u21CF\u2204\u2209\u220C\u2224\u2226\u222C-\u222D\u222F-\u2230\u2241\u2244\u2247\u2249\u2260\u2262\u226D-\u2271\u2274-\u2275\u2278-\u2279\u2280-\u2281\u2284-\u2285\u2288-\u2289\u22AC-\u22AF\u22E0-\u22E3\u22EA-\u22ED\u2329-\u232A\u2460-\u24EA\u2776-\u277E\u2780-\u2788\u278A-\u2792\u2E9F\u2EF3\u2F00-\u2FD5\u3000\u3036\u3038-\u303A\u304C\u304E\u3050\u3052\u3054\u3056\u3058\u305A\u305C\u305E\u3060\u3062\u3065\u3067\u3069\u3070-\u3071\u3073-\u3074\u3076-\u3077\u3079-\u307A\u307C-\u307D\u3094\u309B-\u309C\u309E\u30AC\u30AE\u30B0\u30B2\u30B4\u30B6\u30B8\u30BA\u30BC\u30BE\u30C0\u30C2\u30C5\u30C7\u30C9\u30D0-\u30D1\u30D3-\u30D4\u30D6-\u30D7\u30D9-\u30DA\u30DC-\u30DD\u30F4\u30F7-\u30FA\u30FE\u3131-\u318E\u3192-\u319F\u3200-\u321C\u3220-\u3243\u3260-\u327B\u3280-\u32B0\u32C0-\u32CB\u32D0-\u32FE\u3300-\u3376\u337B-\u33DD\u33E0-\u33FE\uAC00-\uD7A3\uF900-\uFA0D\uFA10\uFA12\uFA15-\uFA1E\uFA20\uFA22\uFA25-\uFA26\uFA2A-\uFA2D\uFB00-\uFB06\uFB13-\uFB17\uFB1D\uFB1F-\uFB36\uFB38-\uFB3C\uFB3E\uFB40-\uFB41\uFB43-\uFB44\uFB46-\uFBB1\uFBD3-\uFD3D\uFD50-\uFD8F\uFD92-\uFDC7\uFDF0-\uFDFB\uFE30-\uFE44\uFE49-\uFE52\uFE54-\uFE66\uFE68-\uFE6B\uFE70-\uFE72\uFE74\uFE76-\uFEFC\uFF01-\uFF5E\uFF61-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC\uFFE0-\uFFE6\uFFE8-\uFFEE\\U0001034A\\U00010400-\\U00010425\\U0001D165-\\U0001D166\\U0001D16D-\\U0001D172]"),\r
274         new UnicodeSet("[A-Z\u00A0\u00A8\u00AA\u00AF\u00B2-\u00B5\u00B8-\u00BA\u00BC-\u00BE\u00C0-\u00D6\u00D8-\u00DE\u0100\u0102\u0104\u0106\u0108\u010A\u010C\u010E\u0110\u0112\u0114\u0116\u0118\u011A\u011C\u011E\u0120\u0122\u0124\u0126\u0128\u012A\u012C\u012E\u0130-\u0134\u0136\u0139\u013B\u013D\u013F-\u0141\u0143\u0145\u0147\u0149-\u014A\u014C\u014E\u0150\u0152\u0154\u0156\u0158\u015A\u015C\u015E\u0160\u0162\u0164\u0166\u0168\u016A\u016C\u016E\u0170\u0172\u0174\u0176\u0178-\u0179\u017B\u017D\u017F\u0181-\u0182\u0184\u0186-\u0187\u0189-\u018B\u018E-\u0191\u0193-\u0194\u0196-\u0198\u019C-\u019D\u019F-\u01A0\u01A2\u01A4\u01A6-\u01A7\u01A9\u01AC\u01AE-\u01AF\u01B1-\u01B3\u01B5\u01B7-\u01B8\u01BC\u01C4-\u01CD\u01CF\u01D1\u01D3\u01D5\u01D7\u01D9\u01DB\u01DE\u01E0\u01E2\u01E4\u01E6\u01E8\u01EA\u01EC\u01EE\u01F1-\u01F4\u01F6-\u01F8\u01FA\u01FC\u01FE\u0200\u0202\u0204\u0206\u0208\u020A\u020C\u020E\u0210\u0212\u0214\u0216\u0218\u021A\u021C\u021E\u0222\u0224\u0226\u0228\u022A\u022C\u022E\u0230\u0232\u02B0-\u02B8\u02D8-\u02DD\u02E0-\u02E4\u0340-\u0341\u0343-\u0345\u0374\u037A\u037E\u0384-\u038A\u038C\u038E-\u038F\u0391-\u03A1\u03A3-\u03AB\u03C2\u03D0-\u03D6\u03DA\u03DC\u03DE\u03E0\u03E2\u03E4\u03E6\u03E8\u03EA\u03EC\u03EE\u03F0-\u03F2\u03F4-\u03F5\u0400-\u042F\u0460\u0462\u0464\u0466\u0468\u046A\u046C\u046E\u0470\u0472\u0474\u0476\u0478\u047A\u047C\u047E\u0480\u048C\u048E\u0490\u0492\u0494\u0496\u0498\u049A\u049C\u049E\u04A0\u04A2\u04A4\u04A6\u04A8\u04AA\u04AC\u04AE\u04B0\u04B2\u04B4\u04B6\u04B8\u04BA\u04BC\u04BE\u04C1\u04C3\u04C7\u04CB\u04D0\u04D2\u04D4\u04D6\u04D8\u04DA\u04DC\u04DE\u04E0\u04E2\u04E4\u04E6\u04E8\u04EA\u04EC\u04EE\u04F0\u04F2\u04F4\u04F8\u0531-\u0556\u0587\u0675-\u0678\u0958-\u095F\u09DC-\u09DD\u09DF\u09F8\u0A33\u0A36\u0A59-\u0A5B\u0A5E\u0B5C-\u0B5D\u0E33\u0EB3\u0EDC-\u0EDD\u0F0C\u0F2A-\u0F33\u0F43\u0F4D\u0F52\u0F57\u0F5C\u0F69\u0F73\u0F75-\u0F79\u0F81\u0F93\u0F9D\u0FA2\u0FA7\u0FAC\u0FB9\u137C\u1E00\u1E02\u1E04\u1E06\u1E08\u1E0A\u1E0C\u1E0E\u1E10\u1E12\u1E14\u1E16\u1E18\u1E1A\u1E1C\u1E1E\u1E20\u1E22\u1E24\u1E26\u1E28\u1E2A\u1E2C\u1E2E\u1E30\u1E32\u1E34\u1E36\u1E38\u1E3A\u1E3C\u1E3E\u1E40\u1E42\u1E44\u1E46\u1E48\u1E4A\u1E4C\u1E4E\u1E50\u1E52\u1E54\u1E56\u1E58\u1E5A\u1E5C\u1E5E\u1E60\u1E62\u1E64\u1E66\u1E68\u1E6A\u1E6C\u1E6E\u1E70\u1E72\u1E74\u1E76\u1E78\u1E7A\u1E7C\u1E7E\u1E80\u1E82\u1E84\u1E86\u1E88\u1E8A\u1E8C\u1E8E\u1E90\u1E92\u1E94\u1E9A-\u1E9B\u1EA0\u1EA2\u1EA4\u1EA6\u1EA8\u1EAA\u1EAC\u1EAE\u1EB0\u1EB2\u1EB4\u1EB6\u1EB8\u1EBA\u1EBC\u1EBE\u1EC0\u1EC2\u1EC4\u1EC6\u1EC8\u1ECA\u1ECC\u1ECE\u1ED0\u1ED2\u1ED4\u1ED6\u1ED8\u1EDA\u1EDC\u1EDE\u1EE0\u1EE2\u1EE4\u1EE6\u1EE8\u1EEA\u1EEC\u1EEE\u1EF0\u1EF2\u1EF4\u1EF6\u1EF8\u1F08-\u1F0F\u1F18-\u1F1D\u1F28-\u1F2F\u1F38-\u1F3F\u1F48-\u1F4D\u1F59\u1F5B\u1F5D\u1F5F\u1F68-\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F88-\u1F8F\u1F98-\u1F9F\u1FA8-\u1FAF\u1FB8-\u1FC1\u1FC8-\u1FCF\u1FD3\u1FD8-\u1FDB\u1FDD-\u1FDF\u1FE3\u1FE8-\u1FEF\u1FF8-\u1FFE\u2000-\u200A\u2011\u2017\u2024-\u2026\u202F\u2033-\u2034\u2036-\u2037\u203C\u203E\u2048-\u2049\u2070\u2074-\u208E\u20A8\u2100-\u2103\u2105-\u2107\u2109-\u2113\u2115-\u2116\u2119-\u211D\u2120-\u2122\u2124\u2126\u2128\u212A-\u212D\u212F-\u2131\u2133-\u2139\u2153-\u217F\u2181-\u2183\u222C-\u222D\u222F-\u2230\u2329-\u232A\u2460-\u24EA\u2776-\u277E\u2780-\u2788\u278A-\u2792\u2E9F\u2EF3\u2F00-\u2FD5\u3000\u3036\u3038-\u303A\u309B-\u309C\u3131-\u318E\u3192-\u319F\u3200-\u321C\u3220-\u3243\u3260-\u327B\u3280-\u32B0\u32C0-\u32CB\u32D0-\u32FE\u3300-\u3376\u337B-\u33DD\u33E0-\u33FE\uF900-\uFA0D\uFA10\uFA12\uFA15-\uFA1E\uFA20\uFA22\uFA25-\uFA26\uFA2A-\uFA2D\uFB00-\uFB06\uFB13-\uFB17\uFB1F-\uFB36\uFB38-\uFB3C\uFB3E\uFB40-\uFB41\uFB43-\uFB44\uFB46-\uFBB1\uFBD3-\uFD3D\uFD50-\uFD8F\uFD92-\uFDC7\uFDF0-\uFDFB\uFE30-\uFE44\uFE49-\uFE52\uFE54-\uFE66\uFE68-\uFE6B\uFE70-\uFE72\uFE74\uFE76-\uFEFC\uFF01-\uFF5E\uFF61-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC\uFFE0-\uFFE6\uFFE8-\uFFEE\\U0001034A\\U00010400-\\U00010425\\U0001D165-\\U0001D166\\U0001D16D-\\U0001D172]"),\r
275         },\r
276     };\r
277     */\r
278 \r
279     \r
280     static UnicodeSet generatedSet;\r
281     \r
282     // These should both be public, and on the respective classes\r
283     \r
284     public static void addAll(UnicodeSet s, String str) {\r
285         int cp;\r
286         for (int i = 0; i < str.length(); i += UTF16.getCharCount(cp)) {\r
287             cp = UTF16.charAt(str,i);\r
288             s.add(cp);\r
289         }\r
290     }\r
291     \r
292     public static boolean containsSome(UnicodeSet s, String str) {\r
293         int cp;\r
294         for (int i = 0; i < str.length(); i += UTF16.getCharCount(cp)) {\r
295             cp = UTF16.charAt(str,i);\r
296             if (s.contains(cp)) return true;\r
297         }\r
298         return false;\r
299     }\r
300 }