]> gitweb.fperrin.net Git - Dictionary.git/blob - jars/icu4j-4_4_2-src/main/classes/core/src/com/ibm/icu/impl/locale/LanguageTag.java
go
[Dictionary.git] / jars / icu4j-4_4_2-src / main / classes / core / src / com / ibm / icu / impl / locale / LanguageTag.java
1 /*\r
2  *******************************************************************************\r
3  * Copyright (C) 2009, International Business Machines Corporation and         *\r
4  * others. All Rights Reserved.                                                *\r
5  *******************************************************************************\r
6  */\r
7 package com.ibm.icu.impl.locale;\r
8 \r
9 import java.util.ArrayList;\r
10 import java.util.Collections;\r
11 import java.util.HashMap;\r
12 import java.util.List;\r
13 import java.util.Map;\r
14 import java.util.Set;\r
15 import java.util.SortedMap;\r
16 import java.util.TreeMap;\r
17 import java.util.Map.Entry;\r
18 \r
19 public class LanguageTag {\r
20 \r
21     private static final boolean JDKIMPL = false;\r
22 \r
23     //\r
24     // static fields\r
25     //\r
26     public static final String SEP = "-";\r
27     public static final String PRIVATEUSE = "x";\r
28     public static String UNDETERMINED = "und";\r
29 \r
30     private static final String JAVAVARIANT = "variant";\r
31     private static final String JAVASEP = "_";\r
32 \r
33     private static final SortedMap<Character, Extension> EMPTY_EXTENSION_MAP = new TreeMap<Character, Extension>();\r
34 \r
35     //\r
36     // Language tag parser instances\r
37     //\r
38     public static final Parser DEFAULT_PARSER = new Parser(false);\r
39     public static final Parser JAVA_VARIANT_COMPATIBLE_PARSER = new Parser(true);\r
40 \r
41     //\r
42     // Language subtag fields\r
43     //\r
44     private String _grandfathered = ""; // grandfathered tag\r
45     private String _language = "";      // language subtag\r
46     private String _script = "";        // script subtag\r
47     private String _region = "";        // region subtag\r
48     private String _privateuse = "";    // privateuse, not including leading "x-"\r
49     private List<String> _extlangs = Collections.emptyList();   // extlang subtags\r
50     private List<String> _variants = Collections.emptyList();   // variant subtags\r
51     private SortedMap<Character, Extension> _extensions = EMPTY_EXTENSION_MAP;  // extension key/value pairs\r
52 \r
53     private boolean _javaCompatVariants = false;\r
54 \r
55     // Map contains grandfathered tags and its preferred mappings from\r
56     // http://www.ietf.org/rfc/rfc5646.txt\r
57     private static final Map<AsciiUtil.CaseInsensitiveKey, String[]> GRANDFATHERED =\r
58         new HashMap<AsciiUtil.CaseInsensitiveKey, String[]>();\r
59 \r
60     static {\r
61         // grandfathered = irregular           ; non-redundant tags registered\r
62         //               / regular             ; during the RFC 3066 era\r
63         //\r
64         // irregular     = "en-GB-oed"         ; irregular tags do not match\r
65         //               / "i-ami"             ; the 'langtag' production and\r
66         //               / "i-bnn"             ; would not otherwise be\r
67         //               / "i-default"         ; considered 'well-formed'\r
68         //               / "i-enochian"        ; These tags are all valid,\r
69         //               / "i-hak"             ; but most are deprecated\r
70         //               / "i-klingon"         ; in favor of more modern\r
71         //               / "i-lux"             ; subtags or subtag\r
72         //               / "i-mingo"           ; combination\r
73         //               / "i-navajo"\r
74         //               / "i-pwn"\r
75         //               / "i-tao"\r
76         //               / "i-tay"\r
77         //               / "i-tsu"\r
78         //               / "sgn-BE-FR"\r
79         //               / "sgn-BE-NL"\r
80         //               / "sgn-CH-DE"\r
81         //\r
82         // regular       = "art-lojban"        ; these tags match the 'langtag'\r
83         //               / "cel-gaulish"       ; production, but their subtags\r
84         //               / "no-bok"            ; are not extended language\r
85         //               / "no-nyn"            ; or variant subtags: their meaning\r
86         //               / "zh-guoyu"          ; is defined by their registration\r
87         //               / "zh-hakka"          ; and all of these are deprecated\r
88         //               / "zh-min"            ; in favor of a more modern\r
89         //               / "zh-min-nan"        ; subtag or sequence of subtags\r
90         //               / "zh-xiang"\r
91 \r
92         final String[][] entries = {\r
93           //{"tag",         "preferred"},\r
94             {"art-lojban",  "jbo"},\r
95             {"cel-gaulish", "cel-gaulish"}, // gaulish is parsed as a variant\r
96             {"en-GB-oed",   "en-GB"},       // oed (Oxford English Dictionary spelling) is ignored\r
97             {"i-ami",       "ami"},\r
98             {"i-bnn",       "bnn"},\r
99             {"i-default",   UNDETERMINED},  // fallback\r
100             {"i-enochian",  UNDETERMINED},  // fallback\r
101             {"i-hak",       "hak"},\r
102             {"i-klingon",   "tlh"},\r
103             {"i-lux",       "lb"},\r
104             {"i-mingo",     UNDETERMINED},  // fallback\r
105             {"i-navajo",    "nv"},\r
106             {"i-pwn",       "pwn"},\r
107             {"i-tao",       "tao"},\r
108             {"i-tay",       "tay"},\r
109             {"i-tsu",       "tsu"},\r
110             {"no-bok",      "nb"},\r
111             {"no-nyn",      "nn"},\r
112             {"sgn-BE-FR",   "sfb"},\r
113             {"sgn-BE-NL",   "vgt"},\r
114             {"sgn-CH-DE",   "sgg"},\r
115             {"zh-guoyu",    "cmn"},\r
116             {"zh-hakka",    "hak"},\r
117             {"zh-min",      "zh"},          // fallback\r
118             {"zh-min-nan",  "nan"},\r
119             {"zh-xiang",    "hsn"},\r
120         };\r
121         for (String[] e : entries) {\r
122             GRANDFATHERED.put(new AsciiUtil.CaseInsensitiveKey(e[0]), e);\r
123         }\r
124     }\r
125 \r
126     private LanguageTag() {\r
127     }\r
128 \r
129     //\r
130     // Getter methods for language subtag fields\r
131     //\r
132 \r
133     public String getLanguage() {\r
134         return _language;\r
135     }\r
136 \r
137     public List<String> getExtlangs() {\r
138         return Collections.unmodifiableList(_extlangs);\r
139     }\r
140 \r
141     public String getScript() {\r
142         return _script;\r
143     }\r
144 \r
145     public String getRegion() {\r
146         return _region;\r
147     }\r
148 \r
149     public List<String> getVariants() {\r
150         return Collections.unmodifiableList(_variants);\r
151     }\r
152 \r
153     public SortedMap<Character, Extension> getExtensions() {\r
154         return Collections.unmodifiableSortedMap(_extensions);\r
155     }\r
156 \r
157     public String getPrivateuse() {\r
158         return _privateuse;\r
159     }\r
160 \r
161     public String getGrandfathered() {\r
162         return _grandfathered;\r
163     }\r
164 \r
165     private String getJavaVariant() {\r
166         StringBuilder buf = new StringBuilder();\r
167         for (String var : _variants) {\r
168             if (buf.length() > 0) {\r
169                 buf.append(JAVASEP);\r
170             }\r
171             buf.append(var);\r
172         }\r
173         if (_javaCompatVariants) {\r
174             return getJavaCompatibleVariant(buf.toString(), _privateuse);\r
175         }\r
176 \r
177         return buf.toString();\r
178     }\r
179 \r
180     private String getJavaPrivateuse() {\r
181         if (_javaCompatVariants) {\r
182             return getJavaCompatiblePrivateuse(_privateuse);\r
183         }\r
184         return _privateuse;\r
185     }\r
186 \r
187     static String getJavaCompatibleVariant(String bcpVariants, String bcpPrivuse) {\r
188         StringBuilder buf = new StringBuilder(bcpVariants);\r
189         if (bcpPrivuse.length() > 0) {\r
190             int idx = -1;\r
191             if (bcpPrivuse.startsWith(JAVAVARIANT + SEP)) {\r
192                 idx = (JAVAVARIANT + SEP).length();\r
193             } else {\r
194                 idx = bcpPrivuse.indexOf(SEP + JAVAVARIANT + SEP);\r
195                 if (idx != -1) {\r
196                     idx += (SEP + JAVAVARIANT + SEP).length();\r
197                 }\r
198             }\r
199             if (idx != -1) {\r
200                 if (buf.length() != 0) {\r
201                     buf.append(JAVASEP);\r
202                 }\r
203                 buf.append(bcpPrivuse.substring(idx).replace(SEP, JAVASEP));\r
204             }\r
205         }\r
206         return buf.toString();\r
207     }\r
208 \r
209     static String getJavaCompatiblePrivateuse(String bcpPrivuse) {\r
210         if (bcpPrivuse.length() > 0) {\r
211             int idx = -1;\r
212             if (bcpPrivuse.startsWith(JAVAVARIANT + SEP)) {\r
213                 idx = 0;\r
214             } else {\r
215                 idx = bcpPrivuse.indexOf(SEP + JAVAVARIANT + SEP);\r
216             }\r
217             if (idx != -1) {\r
218                 return bcpPrivuse.substring(0, idx);\r
219             }\r
220         }\r
221         return bcpPrivuse;\r
222     }\r
223 \r
224     public BaseLocale getBaseLocale() {\r
225         String lang = _language;\r
226         if (_extlangs.size() > 0) {\r
227             // Extended language subtags are used for various historical\r
228             // and compatibility reasons.  Each extended language subtag\r
229             // has a "Preferred-Value', that is exactly same with the extended\r
230             // language subtag itself.  For example,\r
231             //\r
232             // Type: extlang\r
233             // Subtag: aao\r
234             // Description: Algerian Saharan Arabic\r
235             // Added: 2009-07-29\r
236             // Preferred-Value: aao\r
237             // Prefix: ar\r
238             // Macrolanguage: ar\r
239             //\r
240             // For example, language tag "ar-aao-DZ" is equivalent to\r
241             // "aao-DZ".\r
242             //\r
243             // Strictly speaking, the mapping requires prefix validation \r
244             // (e.g. primary language must be "ar" in the example above).\r
245             // However, this implementation does not check the prefix\r
246             // and simply use the first extlang value as locale's language.\r
247             lang = _extlangs.get(0);\r
248         }\r
249         if (lang.equals(UNDETERMINED)) {\r
250             lang = "";\r
251         }\r
252         return BaseLocale.getInstance(lang, _script, _region, getJavaVariant());\r
253     }\r
254 \r
255     public LocaleExtensions getLocaleExtensions() {\r
256         String javaPrivuse = getJavaPrivateuse();\r
257         if (_extensions == null && javaPrivuse.length() == 0) {\r
258             return LocaleExtensions.EMPTY_EXTENSIONS;\r
259         }\r
260         SortedMap<Character, Extension> exts = new TreeMap<Character, Extension>();\r
261         if (_extensions != null) {\r
262             exts.putAll(_extensions);\r
263         }\r
264         if (javaPrivuse.length() > 0) {\r
265             PrivateuseExtension pext = new PrivateuseExtension(javaPrivuse);\r
266             exts.put(Character.valueOf(PrivateuseExtension.SINGLETON), pext);\r
267         }\r
268         return LocaleExtensions.getInstance(exts);\r
269     }\r
270 \r
271     public String getID() {\r
272         if (_grandfathered.length() > 0) {\r
273             return _grandfathered;\r
274         }\r
275         StringBuilder buf = new StringBuilder();\r
276         if (_language.length() > 0) {\r
277             buf.append(_language);\r
278             if (_extlangs.size() > 0) {\r
279                 for (String el : _extlangs) {\r
280                     buf.append(SEP);\r
281                     buf.append(el);\r
282                 }\r
283             }\r
284             if (_script.length() > 0) {\r
285                 buf.append(SEP);\r
286                 buf.append(_script);\r
287             }\r
288             if (_region.length() > 0) {\r
289                 buf.append(SEP);\r
290                 buf.append(_region);\r
291             }\r
292             if (_variants.size() > 0) {\r
293                 for (String var : _variants) {\r
294                     buf.append(SEP);\r
295                     buf.append(var);\r
296                 }\r
297             }\r
298             if (_extensions.size() > 0) {\r
299                 Set<Entry<Character, Extension>> exts = _extensions.entrySet();\r
300                 for (Entry<Character, Extension> ext : exts) {\r
301                     buf.append(SEP);\r
302                     buf.append(ext.getKey());\r
303                     buf.append(SEP);\r
304                     buf.append(ext.getValue().getValue());\r
305                 }\r
306             }\r
307         }\r
308         if (_privateuse.length() > 0) {\r
309             if (buf.length() > 0) {\r
310                 buf.append(SEP);\r
311             }\r
312             buf.append(PRIVATEUSE);\r
313             buf.append(SEP);\r
314             buf.append(_privateuse);\r
315         }\r
316         return buf.toString();\r
317     }\r
318 \r
319     public String toString() {\r
320         return getID();\r
321     }\r
322 \r
323     //\r
324     // Language subtag syntax checking methods\r
325     //\r
326 \r
327     public static boolean isLanguage(String s) {\r
328         // language      = 2*3ALPHA            ; shortest ISO 639 code\r
329         //                 ["-" extlang]       ; sometimes followed by\r
330         //                                     ;   extended language subtags\r
331         //               / 4ALPHA              ; or reserved for future use\r
332         //               / 5*8ALPHA            ; or registered language subtag\r
333         return (s.length() >= 2) && (s.length() <= 8) && AsciiUtil.isAlphaString(s);\r
334     }\r
335 \r
336     public static boolean isExtlang(String s) {\r
337         // extlang       = 3ALPHA              ; selected ISO 639 codes\r
338         //                 *2("-" 3ALPHA)      ; permanently reserved\r
339         return (s.length() == 3) && AsciiUtil.isAlphaString(s);\r
340     }\r
341 \r
342     public static boolean isScript(String s) {\r
343         // script        = 4ALPHA              ; ISO 15924 code\r
344         return (s.length() == 4) && AsciiUtil.isAlphaString(s);\r
345     }\r
346 \r
347     public static boolean isRegion(String s) {\r
348         // region        = 2ALPHA              ; ISO 3166-1 code\r
349         //               / 3DIGIT              ; UN M.49 code\r
350         return ((s.length() == 2) && AsciiUtil.isAlphaString(s))\r
351                 || ((s.length() == 3) && AsciiUtil.isNumericString(s));\r
352     }\r
353 \r
354     public static boolean isVariant(String s) {\r
355         // variant       = 5*8alphanum         ; registered variants\r
356         //               / (DIGIT 3alphanum)\r
357         int len = s.length();\r
358         if (len >= 5 && len <= 8) {\r
359             return AsciiUtil.isAlphaNumericString(s);\r
360         }\r
361         if (len == 4) {\r
362             return AsciiUtil.isNumeric(s.charAt(0))\r
363                     && AsciiUtil.isAlphaNumeric(s.charAt(1))\r
364                     && AsciiUtil.isAlphaNumeric(s.charAt(2))\r
365                     && AsciiUtil.isAlphaNumeric(s.charAt(3));\r
366         }\r
367         return false;\r
368     }\r
369 \r
370     public static boolean isExtensionSingleton(String s) {\r
371         // singleton     = DIGIT               ; 0 - 9\r
372         //               / %x41-57             ; A - W\r
373         //               / %x59-5A             ; Y - Z\r
374         //               / %x61-77             ; a - w\r
375         //               / %x79-7A             ; y - z\r
376 \r
377         return (s.length() == 1)\r
378                 && AsciiUtil.isAlphaString(s)\r
379                 && !AsciiUtil.caseIgnoreMatch(PRIVATEUSE, s);\r
380     }\r
381 \r
382     public static boolean isExtensionSubtag(String s) {\r
383         // extension     = singleton 1*("-" (2*8alphanum))\r
384         return (s.length() >= 2) && (s.length() <= 8) && AsciiUtil.isAlphaNumericString(s);\r
385     }\r
386 \r
387     public static boolean isPrivateuseSingleton(String s) {\r
388         // privateuse    = "x" 1*("-" (1*8alphanum))\r
389         return (s.length() == 1)\r
390                 && AsciiUtil.caseIgnoreMatch(PRIVATEUSE, s);\r
391     }\r
392 \r
393     public static boolean isPrivateuseSubtag(String s) {\r
394         // privateuse    = "x" 1*("-" (1*8alphanum))\r
395         return (s.length() >= 1) && (s.length() <= 8) && AsciiUtil.isAlphaNumericString(s);\r
396     }\r
397 \r
398     //\r
399     // Language subtag canonicalization methods\r
400     //\r
401 \r
402     public static String canonicalizeLanguage(String s) {\r
403         return AsciiUtil.toLowerString(s);\r
404     }\r
405 \r
406     public static String canonicalizeExtlang(String s) {\r
407         return AsciiUtil.toLowerString(s);\r
408     }\r
409 \r
410     public static String canonicalizeScript(String s) {\r
411         return AsciiUtil.toTitleString(s);\r
412     }\r
413 \r
414     public static String canonicalizeRegion(String s) {\r
415         return AsciiUtil.toUpperString(s);\r
416     }\r
417 \r
418     public static String canonicalizeVariant(String s) {\r
419         return AsciiUtil.toLowerString(s);\r
420     }\r
421 \r
422     public static String canonicalizeExtensionSingleton(String s) {\r
423         return AsciiUtil.toLowerString(s);\r
424     }\r
425 \r
426     public static String canonicalizeExtensionSubtag(String s) {\r
427         return AsciiUtil.toLowerString(s);\r
428     }\r
429 \r
430     public static String canonicalizePrivateuseSubtag(String s) {\r
431         return AsciiUtil.toLowerString(s);\r
432     }\r
433 \r
434 \r
435     public static LanguageTag parse(String str, boolean javaCompatVar) {\r
436         LanguageTag tag = new LanguageTag();\r
437         tag.parseString(str, javaCompatVar);\r
438         return tag;\r
439     }\r
440 \r
441     public static LanguageTag parseStrict(String str, boolean javaCompatVar) throws LocaleSyntaxException {\r
442         LanguageTag tag = new LanguageTag();\r
443         ParseStatus sts = tag.parseString(str, javaCompatVar);\r
444         if (sts.isError()) {\r
445             throw new LocaleSyntaxException(sts.errorMsg, sts.errorIndex);\r
446         }\r
447         return tag;\r
448     }\r
449 \r
450     public static LanguageTag parseLocale(BaseLocale base, LocaleExtensions locExts) {\r
451         LanguageTag tag = new LanguageTag();\r
452         tag._javaCompatVariants = true;\r
453 \r
454         String language = base.getLanguage();\r
455         String script = base.getScript();\r
456         String region = base.getRegion();\r
457         String variant = base.getVariant();\r
458 \r
459         String privuseVar = null;   // store ill-formed variant subtags\r
460 \r
461         if (language.length() > 0 && isLanguage(language)) {\r
462             // Convert a deprecated language code used by Java to\r
463             // a new code\r
464             language = canonicalizeLanguage(language);\r
465             if (language.equals("iw")) {\r
466                 language = "he";\r
467             } else if (language.equals("ji")) {\r
468                 language = "yi";\r
469             } else if (language.equals("in")) {\r
470                 language = "id";\r
471             }\r
472             tag._language = language;\r
473         }\r
474         if (script.length() > 0 && isScript(script)) {\r
475             tag._script = canonicalizeScript(script);\r
476         }\r
477         if (region.length() > 0 && isRegion(region)) {\r
478             tag._region = canonicalizeRegion(region);\r
479         }\r
480         if (variant.length() > 0) {\r
481             List<String> variants = null;\r
482             StringTokenIterator varitr = new StringTokenIterator(variant, JAVASEP);\r
483             while (!varitr.isDone()) {\r
484                 String var = varitr.current();\r
485                 if (!isVariant(var)) {\r
486                     break;\r
487                 }\r
488                 if (variants == null) {\r
489                     variants = new ArrayList<String>();\r
490                 }\r
491                 if (JDKIMPL) {\r
492                     variants.add(var);  // Do not canonicalize!\r
493                 } else {\r
494                     variants.add(canonicalizeVariant(var));\r
495                 }\r
496                 varitr.next();\r
497             }\r
498             if (variants != null) {\r
499                 tag._variants = variants;\r
500             }\r
501             if (!varitr.isDone()) {\r
502                 // ill-formed variant subtags\r
503                 StringBuilder buf = new StringBuilder();\r
504                 while (!varitr.isDone()) {\r
505                     String prvv = varitr.current();\r
506                     if (!isPrivateuseSubtag(prvv)) {\r
507                         // cannot use private use subtag - truncated\r
508                         break;\r
509                     }\r
510                     if (buf.length() > 0) {\r
511                         buf.append(SEP);\r
512                     }\r
513                     if (!JDKIMPL) {\r
514                         prvv = AsciiUtil.toLowerString(prvv);\r
515                     }\r
516                     buf.append(prvv);\r
517                     varitr.next();\r
518                 }\r
519                 if (buf.length() > 0) {\r
520                     privuseVar = buf.toString();\r
521                 }\r
522             }\r
523         }\r
524 \r
525         TreeMap<Character, Extension> extensions = null;\r
526         String privateuse = null;\r
527 \r
528         Set<Character> locextKeys = locExts.getKeys();\r
529         for (Character locextKey : locextKeys) {\r
530             Extension ext = locExts.getExtension(locextKey);\r
531             if (ext instanceof PrivateuseExtension) {\r
532                 privateuse = ext.getValue();\r
533             } else {\r
534                 if (extensions == null) {\r
535                     extensions = new TreeMap<Character, Extension>();\r
536                 }\r
537                 extensions.put(locextKey, ext);\r
538             }\r
539         }\r
540 \r
541         if (extensions != null) {\r
542             tag._extensions = extensions;\r
543         }\r
544 \r
545         // append ill-formed variant subtags to private use\r
546         if (privuseVar != null) {\r
547             if (privateuse == null) {\r
548                 privateuse = JAVAVARIANT + SEP + privuseVar;\r
549             } else {\r
550                 privateuse = privateuse + SEP + JAVAVARIANT + SEP + privuseVar.replace(JAVASEP, SEP);\r
551             }\r
552         }\r
553 \r
554         if (privateuse != null) {\r
555             tag._privateuse = privateuse;\r
556         } else if (tag._language.length() == 0) {\r
557             // use "und" if neither language nor privateuse is available\r
558             tag._language = UNDETERMINED;\r
559         }\r
560 \r
561         return tag;\r
562     }\r
563 \r
564     private ParseStatus parseString(String str, boolean javaCompatVar) {\r
565         // Check if the tag is grandfathered\r
566         String[] gfmap = GRANDFATHERED.get(new AsciiUtil.CaseInsensitiveKey(str));\r
567         ParseStatus sts;\r
568         if (gfmap != null) {\r
569             _grandfathered = gfmap[0];\r
570             sts = parseLanguageTag(gfmap[1], javaCompatVar);\r
571             sts.parseLength = str.length();\r
572         } else {\r
573             _grandfathered = "";\r
574             sts = parseLanguageTag(str, javaCompatVar);\r
575         }\r
576         return sts;\r
577     }\r
578 \r
579     /*\r
580      * Parse Language-Tag, except grandfathered.\r
581      * \r
582      * BNF in RFC5464\r
583      *  \r
584      * Language-Tag  = langtag             ; normal language tags\r
585      *               / privateuse          ; private use tag\r
586      *               / grandfathered       ; grandfathered tags\r
587      *\r
588      * \r
589      * langtag       = language\r
590      *                 ["-" script]\r
591      *                 ["-" region]\r
592      *                 *("-" variant)\r
593      *                 *("-" extension)\r
594      *                 ["-" privateuse]\r
595      * \r
596      * language      = 2*3ALPHA            ; shortest ISO 639 code\r
597      *                 ["-" extlang]       ; sometimes followed by\r
598      *                                     ; extended language subtags\r
599      *               / 4ALPHA              ; or reserved for future use\r
600      *               / 5*8ALPHA            ; or registered language subtag\r
601      * \r
602      * extlang       = 3ALPHA              ; selected ISO 639 codes\r
603      *                 *2("-" 3ALPHA)      ; permanently reserved\r
604      * \r
605      * script        = 4ALPHA              ; ISO 15924 code\r
606      * \r
607      * region        = 2ALPHA              ; ISO 3166-1 code\r
608      *               / 3DIGIT              ; UN M.49 code\r
609      * \r
610      * variant       = 5*8alphanum         ; registered variants\r
611      *               / (DIGIT 3alphanum)\r
612      * \r
613      * extension     = singleton 1*("-" (2*8alphanum))\r
614      * \r
615      *                                     ; Single alphanumerics\r
616      *                                     ; "x" reserved for private use\r
617      * singleton     = DIGIT               ; 0 - 9\r
618      *               / %x41-57             ; A - W\r
619      *               / %x59-5A             ; Y - Z\r
620      *               / %x61-77             ; a - w\r
621      *               / %x79-7A             ; y - z\r
622      * \r
623      * privateuse    = "x" 1*("-" (1*8alphanum))\r
624      * \r
625      */\r
626     private ParseStatus parseLanguageTag(String langtag, boolean javaCompat) {\r
627         ParseStatus sts = new ParseStatus();\r
628         StringTokenIterator itr = new StringTokenIterator(langtag, SEP);\r
629         Parser parser = javaCompat ? JAVA_VARIANT_COMPATIBLE_PARSER : DEFAULT_PARSER;\r
630 \r
631         _javaCompatVariants = javaCompat;\r
632 \r
633         // langtag must start with either language or privateuse\r
634         _language = parser.parseLanguage(itr, sts);\r
635         if (_language.length() > 0) {\r
636             _extlangs = parser.parseExtlangs(itr, sts);\r
637             _script = parser.parseScript(itr, sts);\r
638             _region = parser.parseRegion(itr, sts);\r
639             _variants = parser.parseVariants(itr, sts);\r
640             _extensions = parser.parseExtensions(itr, sts);\r
641         }\r
642         _privateuse = parser.parsePrivateuse(itr, sts);\r
643 \r
644         if (!itr.isDone() && !sts.isError()) {\r
645             String s = itr.current();\r
646             sts.errorIndex = itr.currentStart();\r
647             if (s.length() == 0) {\r
648                 sts.errorMsg = "Empty subtag";\r
649             } else {\r
650                 sts.errorMsg = "Invalid subtag: " + s; \r
651             }\r
652         }\r
653 \r
654         return sts;\r
655     }\r
656 \r
657     public static class ParseStatus {\r
658         int parseLength = 0;\r
659         int errorIndex = -1;\r
660         String errorMsg = null;\r
661 \r
662         public void reset() {\r
663             parseLength = 0;\r
664             errorIndex = -1;\r
665             errorMsg = null;\r
666         }\r
667 \r
668         boolean isError() {\r
669             return (errorIndex >= 0);\r
670         }\r
671     }\r
672 \r
673     static class Parser {\r
674         private boolean _javaCompatVar;\r
675 \r
676         Parser(boolean javaCompatVar) {\r
677             _javaCompatVar = javaCompatVar;\r
678         }\r
679 \r
680         //\r
681         // Language subtag parsers\r
682         //\r
683 \r
684         public String parseLanguage(StringTokenIterator itr, ParseStatus sts) {\r
685             String language = "";\r
686 \r
687             if (itr.isDone() || sts.isError()) {\r
688                 return language;\r
689             }\r
690 \r
691             String s = itr.current();\r
692             if (isLanguage(s)) {\r
693                 language = canonicalizeLanguage(s);\r
694                 sts.parseLength = itr.currentEnd();\r
695                 itr.next();\r
696             }\r
697             return language;\r
698         }\r
699 \r
700         public List<String> parseExtlangs(StringTokenIterator itr, ParseStatus sts) {\r
701             List<String> extlangs = null;\r
702 \r
703             if (itr.isDone() || sts.isError()) {\r
704                 return Collections.emptyList();\r
705             }\r
706 \r
707             while (!itr.isDone()) {\r
708                 String s = itr.current();\r
709                 if (!isExtlang(s)) {\r
710                     break;\r
711                 }\r
712                 if (extlangs == null) {\r
713                     extlangs = new ArrayList<String>(3);\r
714                 }\r
715                 extlangs.add(canonicalizeExtlang(s));\r
716                 sts.parseLength = itr.currentEnd();\r
717                 itr.next();\r
718 \r
719                 if (extlangs.size() == 3) {\r
720                     // Maximum 3 extlangs\r
721                     break;\r
722                 }\r
723             }\r
724 \r
725             if (extlangs == null) {\r
726                 return Collections.emptyList();\r
727             }\r
728 \r
729             return extlangs;\r
730         }\r
731 \r
732         public String parseScript(StringTokenIterator itr, ParseStatus sts) {\r
733             String script = "";\r
734 \r
735             if (itr.isDone() || sts.isError()) {\r
736                 return script;\r
737             }\r
738 \r
739             String s = itr.current();\r
740             if (isScript(s)) {\r
741                 script = canonicalizeScript(s);\r
742                 sts.parseLength = itr.currentEnd();\r
743                 itr.next();\r
744             }\r
745 \r
746             return script;\r
747         }\r
748 \r
749         public String parseRegion(StringTokenIterator itr, ParseStatus sts) {\r
750             String region = "";\r
751 \r
752             if (itr.isDone() || sts.isError()) {\r
753                 return region;\r
754             }\r
755 \r
756             String s = itr.current();\r
757             if (isRegion(s)) {\r
758                 region = canonicalizeRegion(s);\r
759                 sts.parseLength = itr.currentEnd();\r
760                 itr.next();\r
761             }\r
762 \r
763             return region;\r
764         }\r
765 \r
766         public List<String> parseVariants(StringTokenIterator itr, ParseStatus sts) {\r
767             List<String> variants = null;\r
768 \r
769             if (itr.isDone() || sts.isError()) {\r
770                 return Collections.emptyList();\r
771             }\r
772 \r
773             while (!itr.isDone()) {\r
774                 String s = itr.current();\r
775                 if (!isVariant(s)) {\r
776                     break;\r
777                 }\r
778                 if (variants == null) {\r
779                     variants = new ArrayList<String>(3);\r
780                 }\r
781                 if (_javaCompatVar) {\r
782                     // preserve casing when Java compatibility option\r
783                     // is enabled\r
784                     variants.add(s);\r
785                 } else {\r
786                     variants.add(canonicalizeVariant(s));\r
787                 }\r
788                 sts.parseLength = itr.currentEnd();\r
789                 itr.next();\r
790             }\r
791 \r
792             if (variants == null) {\r
793                 return Collections.emptyList();\r
794             }\r
795 \r
796             return variants;\r
797         }\r
798 \r
799         public SortedMap<Character, Extension> parseExtensions(StringTokenIterator itr, ParseStatus sts) {\r
800             SortedMap<Character, Extension> extensionMap = null;\r
801 \r
802             if (itr.isDone() || sts.isError()) {\r
803                 return EMPTY_EXTENSION_MAP;\r
804             }\r
805 \r
806             while (!itr.isDone()) {\r
807                 String s = itr.current();\r
808                 if (!isExtensionSingleton(s)) {\r
809                     break;\r
810                 }\r
811                 if (!itr.hasNext()) {\r
812                     sts.errorIndex = itr.currentStart();\r
813                     sts.errorMsg = "Missing extension subtag for extension :" + s;\r
814                     break;\r
815                 }\r
816 \r
817                 if (extensionMap == null) {\r
818                     extensionMap = new TreeMap<Character, Extension>();\r
819                 }\r
820 \r
821                 String singletonStr = canonicalizeExtensionSingleton(s);\r
822                 Character singleton = Character.valueOf(singletonStr.charAt(0));\r
823 \r
824                 if (extensionMap.containsKey(singleton)) {\r
825                     sts.errorIndex = itr.currentStart();\r
826                     sts.errorMsg = "Duplicated extension: " + s;\r
827                     break;\r
828                 }\r
829 \r
830                 itr.next();\r
831                 Extension ext = Extension.create(singleton.charValue(), itr, sts);\r
832                 if (ext != null) {\r
833                     extensionMap.put(singleton, ext);\r
834                 }\r
835                 if (sts.isError()) {\r
836                     break;\r
837                 }\r
838             }\r
839 \r
840             if (extensionMap == null || extensionMap.size() == 0) {\r
841                 return EMPTY_EXTENSION_MAP;\r
842             }\r
843 \r
844             return extensionMap;\r
845         }\r
846 \r
847         public String parsePrivateuse(StringTokenIterator itr, ParseStatus sts) {\r
848             String privateuse = "";\r
849 \r
850             if (itr.isDone() || sts.isError()) {\r
851                 return privateuse;\r
852             }\r
853 \r
854             String s = itr.current();\r
855             if (isPrivateuseSingleton(s)) {\r
856                 StringBuilder buf = new StringBuilder();\r
857                 int singletonOffset = itr.currentStart();\r
858                 boolean preserveCasing = false;\r
859                 itr.next();\r
860 \r
861                 while (!itr.isDone()) {\r
862                     s = itr.current();\r
863                     if (!isPrivateuseSubtag(s)) {\r
864                         break;\r
865                     }\r
866                     if (buf.length() != 0) {\r
867                          buf.append(SEP);\r
868                     }\r
869                     if (!preserveCasing) {\r
870                         s = canonicalizePrivateuseSubtag(s);\r
871                     }\r
872                     buf.append(s);\r
873                     sts.parseLength = itr.currentEnd();\r
874 \r
875                     if (_javaCompatVar && s.equals(JAVAVARIANT)) {\r
876                         // preserve casing after the special\r
877                         // java reserved private use subtag\r
878                         // when java compatibility variant option\r
879                         // is enabled.\r
880                         preserveCasing = true;\r
881                     }\r
882                     itr.next();\r
883                 }\r
884 \r
885                 if (buf.length() == 0) {\r
886                     // need at least 1 private subtag\r
887                     sts.errorIndex = singletonOffset;\r
888                     sts.errorMsg = "Incomplete privateuse";\r
889                 } else {\r
890                     privateuse = buf.toString();\r
891                 }\r
892             }\r
893 \r
894             return privateuse;\r
895         }\r
896     }\r
897 }\r