2 ******************************************************************************
\r
3 * Copyright (C) 2003-2009, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 ******************************************************************************
\r
8 package com.ibm.icu.util;
\r
10 import java.io.Serializable;
\r
11 import java.text.ParseException;
\r
12 import java.util.Collections;
\r
13 import java.util.Comparator;
\r
14 import java.util.Iterator;
\r
15 import java.util.Locale;
\r
16 import java.util.Map;
\r
17 import java.util.MissingResourceException;
\r
18 import java.util.Set;
\r
19 import java.util.TreeMap;
\r
21 import com.ibm.icu.impl.ICUCache;
\r
22 import com.ibm.icu.impl.ICUResourceBundle;
\r
23 import com.ibm.icu.impl.LocaleUtility;
\r
24 import com.ibm.icu.impl.SimpleCache;
\r
25 import com.ibm.icu.impl.Utility;
\r
26 import com.ibm.icu.impl.locale.AsciiUtil;
\r
27 import com.ibm.icu.impl.locale.BaseLocale;
\r
28 import com.ibm.icu.impl.locale.InternalLocaleBuilder;
\r
29 import com.ibm.icu.impl.locale.LanguageTag;
\r
30 import com.ibm.icu.impl.locale.LocaleExtensions;
\r
31 import com.ibm.icu.impl.locale.LocaleSyntaxException;
\r
32 import com.ibm.icu.impl.locale.LanguageTag.Extension;
\r
35 * A class analogous to {@link java.util.Locale} that provides additional
\r
36 * support for ICU protocol. In ICU 3.0 this class is enhanced to support
\r
37 * RFC 3066 language identifiers.
\r
39 * <p>Many classes and services in ICU follow a factory idiom, in
\r
40 * which a factory method or object responds to a client request with
\r
41 * an object. The request includes a locale (the <i>requested</i>
\r
42 * locale), and the returned object is constructed using data for that
\r
43 * locale. The system may lack data for the requested locale, in
\r
44 * which case the locale fallback mechanism will be invoked until a
\r
45 * populated locale is found (the <i>valid</i> locale). Furthermore,
\r
46 * even when a populated locale is found (the <i>valid</i> locale),
\r
47 * further fallback may be required to reach a locale containing the
\r
48 * specific data required by the service (the <i>actual</i> locale).
\r
50 * <p>ULocale performs <b>'normalization'</b> and <b>'canonicalization'</b> of locale ids.
\r
51 * Normalization 'cleans up' ICU locale ids as follows:
\r
53 * <li>language, script, country, variant, and keywords are properly cased<br>
\r
54 * (lower, title, upper, upper, and lower case respectively)</li>
\r
55 * <li>hyphens used as separators are converted to underscores</li>
\r
56 * <li>three-letter language and country ids are converted to two-letter
\r
57 * equivalents where available</li>
\r
58 * <li>surrounding spaces are removed from keywords and values</li>
\r
59 * <li>if there are multiple keywords, they are put in sorted order</li>
\r
61 * Canonicalization additionally performs the following:
\r
63 * <li>POSIX ids are converted to ICU format IDs</li>
\r
64 * <li>'grandfathered' 3066 ids are converted to ICU standard form</li>
\r
65 * <li>'PREEURO' and 'EURO' variants are converted to currency keyword form, with the currency
\r
66 * id appropriate to the country of the locale (for PREEURO) or EUR (for EURO).
\r
68 * All ULocale constructors automatically normalize the locale id. To handle
\r
69 * POSIX ids, <code>canonicalize</code> can be called to convert the id
\r
70 * to canonical form, or the <code>canonicalInstance</code> factory method
\r
71 * can be called.</p>
\r
73 * <p>This class provides selectors {@link #VALID_LOCALE} and {@link
\r
74 * #ACTUAL_LOCALE} intended for use in methods named
\r
75 * <tt>getLocale()</tt>. These methods exist in several ICU classes,
\r
76 * including {@link com.ibm.icu.util.Calendar}, {@link
\r
77 * com.ibm.icu.util.Currency}, {@link com.ibm.icu.text.UFormat},
\r
78 * {@link com.ibm.icu.text.BreakIterator}, {@link
\r
79 * com.ibm.icu.text.Collator}, {@link
\r
80 * com.ibm.icu.text.DateFormatSymbols}, and {@link
\r
81 * com.ibm.icu.text.DecimalFormatSymbols} and their subclasses, if
\r
82 * any. Once an object of one of these classes has been created,
\r
83 * <tt>getLocale()</tt> may be called on it to determine the valid and
\r
84 * actual locale arrived at during the object's construction.
\r
86 * <p>Note: The <tt>getLocale()</tt> method will be implemented in ICU
\r
87 * 3.0; ICU 2.8 contains a partial preview implementation. The
\r
88 * <i>actual</i> locale is returned correctly, but the <i>valid</i>
\r
89 * locale is not, in most cases.
\r
91 * @see java.util.Locale
\r
94 * @author Ram Viswanadha
\r
97 public final class ULocale implements Serializable {
\r
98 // using serialver from jdk1.4.2_05
\r
99 private static final long serialVersionUID = 3715177670352309217L;
\r
102 * Useful constant for language.
\r
105 public static final ULocale ENGLISH = new ULocale("en", Locale.ENGLISH);
\r
108 * Useful constant for language.
\r
111 public static final ULocale FRENCH = new ULocale("fr", Locale.FRENCH);
\r
114 * Useful constant for language.
\r
117 public static final ULocale GERMAN = new ULocale("de", Locale.GERMAN);
\r
120 * Useful constant for language.
\r
123 public static final ULocale ITALIAN = new ULocale("it", Locale.ITALIAN);
\r
126 * Useful constant for language.
\r
129 public static final ULocale JAPANESE = new ULocale("ja", Locale.JAPANESE);
\r
132 * Useful constant for language.
\r
135 public static final ULocale KOREAN = new ULocale("ko", Locale.KOREAN);
\r
138 * Useful constant for language.
\r
141 public static final ULocale CHINESE = new ULocale("zh", Locale.CHINESE);
\r
144 * Useful constant for language.
\r
147 public static final ULocale SIMPLIFIED_CHINESE = new ULocale("zh_Hans", Locale.CHINESE);
\r
150 * Useful constant for language.
\r
153 public static final ULocale TRADITIONAL_CHINESE = new ULocale("zh_Hant", Locale.CHINESE);
\r
156 * Useful constant for country/region.
\r
159 public static final ULocale FRANCE = new ULocale("fr_FR", Locale.FRANCE);
\r
162 * Useful constant for country/region.
\r
165 public static final ULocale GERMANY = new ULocale("de_DE", Locale.GERMANY);
\r
168 * Useful constant for country/region.
\r
171 public static final ULocale ITALY = new ULocale("it_IT", Locale.ITALY);
\r
174 * Useful constant for country/region.
\r
177 public static final ULocale JAPAN = new ULocale("ja_JP", Locale.JAPAN);
\r
180 * Useful constant for country/region.
\r
183 public static final ULocale KOREA = new ULocale("ko_KR", Locale.KOREA);
\r
186 * Useful constant for country/region.
\r
189 public static final ULocale CHINA = new ULocale("zh_Hans_CN", Locale.CHINA);
\r
192 * Useful constant for country/region.
\r
195 public static final ULocale PRC = CHINA;
\r
198 * Useful constant for country/region.
\r
201 public static final ULocale TAIWAN = new ULocale("zh_Hant_TW", Locale.TAIWAN);
\r
204 * Useful constant for country/region.
\r
207 public static final ULocale UK = new ULocale("en_GB", Locale.UK);
\r
210 * Useful constant for country/region.
\r
213 public static final ULocale US = new ULocale("en_US", Locale.US);
\r
216 * Useful constant for country/region.
\r
219 public static final ULocale CANADA = new ULocale("en_CA", Locale.CANADA);
\r
222 * Useful constant for country/region.
\r
225 public static final ULocale CANADA_FRENCH = new ULocale("fr_CA", Locale.CANADA_FRENCH);
\r
230 private static final String EMPTY_STRING = "";
\r
232 // Used in both ULocale and IDParser, so moved up here.
\r
233 private static final char UNDERSCORE = '_';
\r
235 // default empty locale
\r
236 private static final Locale EMPTY_LOCALE = new Locale("", "");
\r
239 * The root ULocale.
\r
242 public static final ULocale ROOT = new ULocale("root", EMPTY_LOCALE);
\r
244 private static final SimpleCache CACHE = new SimpleCache();
\r
247 * Cache the locale.
\r
249 private transient Locale locale;
\r
252 * The raw localeID that we were passed in.
\r
254 private String localeID;
\r
257 * Tables used in normalizing portions of the id.
\r
259 /* tables updated per http://lcweb.loc.gov/standards/iso639-2/
\r
260 to include the revisions up to 2001/7/27 *CWB*/
\r
261 /* The 3 character codes are the terminology codes like RFC 3066.
\r
262 This is compatible with prior ICU codes */
\r
263 /* "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in
\r
264 the table but now at the end of the table because
\r
265 3 character codes are duplicates. This avoids bad searches
\r
266 going from 3 to 2 character codes.*/
\r
267 /* The range qaa-qtz is reserved for local use. */
\r
269 private static String[] _languages;
\r
270 private static String[] _replacementLanguages;
\r
271 private static String[] _obsoleteLanguages;
\r
272 private static String[] _languages3;
\r
273 private static String[] _obsoleteLanguages3;
\r
275 // Avoid initializing languages tables unless we have to.
\r
276 private static void initLanguageTables() {
\r
277 if (_languages == null) {
\r
279 /* This list MUST be in sorted order, and MUST contain the two-letter codes
\r
280 if one exists otherwise use the three letter code */
\r
281 String[] tempLanguages = {
\r
282 "aa", "ab", "ace", "ach", "ada", "ady", "ae", "af", "afa",
\r
283 "afh", "ak", "akk", "ale", "alg", "am", "an", "ang", "apa",
\r
284 "ar", "arc", "arn", "arp", "art", "arw", "as", "ast",
\r
285 "ath", "aus", "av", "awa", "ay", "az", "ba", "bad",
\r
286 "bai", "bal", "ban", "bas", "bat", "be", "bej",
\r
287 "bem", "ber", "bg", "bh", "bho", "bi", "bik", "bin",
\r
288 "bla", "bm", "bn", "bnt", "bo", "br", "bra", "bs",
\r
289 "btk", "bua", "bug", "byn", "ca", "cad", "cai", "car", "cau",
\r
290 "ce", "ceb", "cel", "ch", "chb", "chg", "chk", "chm",
\r
291 "chn", "cho", "chp", "chr", "chy", "cmc", "co", "cop",
\r
292 "cpe", "cpf", "cpp", "cr", "crh", "crp", "cs", "csb", "cu", "cus",
\r
293 "cv", "cy", "da", "dak", "dar", "day", "de", "del", "den",
\r
294 "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "dv", "dyu",
\r
295 "dz", "ee", "efi", "egy", "eka", "el", "elx", "en",
\r
296 "enm", "eo", "es", "et", "eu", "ewo", "fa",
\r
297 "fan", "fat", "ff", "fi", "fiu", "fj", "fo", "fon",
\r
298 "fr", "frm", "fro", "fur", "fy", "ga", "gaa", "gay",
\r
299 "gba", "gd", "gem", "gez", "gil", "gl", "gmh", "gn",
\r
300 "goh", "gon", "gor", "got", "grb", "grc", "gu", "gv",
\r
301 "gwi", "ha", "hai", "haw", "he", "hi", "hil", "him",
\r
302 "hit", "hmn", "ho", "hr", "hsb", "ht", "hu", "hup", "hy", "hz",
\r
303 "ia", "iba", "id", "ie", "ig", "ii", "ijo", "ik",
\r
304 "ilo", "inc", "ine", "inh", "io", "ira", "iro", "is", "it",
\r
305 "iu", "ja", "jbo", "jpr", "jrb", "jv", "ka", "kaa", "kab",
\r
306 "kac", "kam", "kar", "kaw", "kbd", "kg", "kha", "khi",
\r
307 "kho", "ki", "kj", "kk", "kl", "km", "kmb", "kn",
\r
308 "ko", "kok", "kos", "kpe", "kr", "krc", "kro", "kru", "ks",
\r
309 "ku", "kum", "kut", "kv", "kw", "ky", "la", "lad",
\r
310 "lah", "lam", "lb", "lez", "lg", "li", "ln", "lo", "lol",
\r
311 "loz", "lt", "lu", "lua", "lui", "lun", "luo", "lus",
\r
312 "lv", "mad", "mag", "mai", "mak", "man", "map", "mas",
\r
313 "mdf", "mdr", "men", "mg", "mga", "mh", "mi", "mic", "min",
\r
314 "mis", "mk", "mkh", "ml", "mn", "mnc", "mni", "mno",
\r
315 "mo", "moh", "mos", "mr", "ms", "mt", "mul", "mun",
\r
316 "mus", "mwr", "my", "myn", "myv", "na", "nah", "nai", "nap",
\r
317 "nb", "nd", "nds", "ne", "new", "ng", "nia", "nic",
\r
318 "niu", "nl", "nn", "no", "nog", "non", "nr", "nso", "nub",
\r
319 "nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi", "oc", "oj",
\r
320 "om", "or", "os", "osa", "ota", "oto", "pa", "paa",
\r
321 "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn",
\r
322 "pi", "pl", "pon", "pra", "pro", "ps", "pt", "qu",
\r
323 "raj", "rap", "rar", "rm", "rn", "ro", "roa", "rom",
\r
324 "ru", "rup", "rw", "sa", "sad", "sah", "sai", "sal", "sam",
\r
325 "sas", "sat", "sc", "sco", "sd", "se", "sel", "sem",
\r
326 "sg", "sga", "sgn", "shn", "si", "sid", "sio", "sit",
\r
327 "sk", "sl", "sla", "sm", "sma", "smi", "smj", "smn",
\r
328 "sms", "sn", "snk", "so", "sog", "son", "sq", "sr",
\r
329 "srr", "ss", "ssa", "st", "su", "suk", "sus", "sux",
\r
330 "sv", "sw", "syr", "ta", "tai", "te", "tem", "ter",
\r
331 "tet", "tg", "th", "ti", "tig", "tiv", "tk", "tkl",
\r
332 "tl", "tlh", "tli", "tmh", "tn", "to", "tog", "tpi", "tr",
\r
333 "ts", "tsi", "tt", "tum", "tup", "tut", "tvl", "tw",
\r
334 "ty", "tyv", "udm", "ug", "uga", "uk", "umb", "und", "ur",
\r
335 "uz", "vai", "ve", "vi", "vo", "vot", "wa", "wak",
\r
336 "wal", "war", "was", "wen", "wo", "xal", "xh", "yao", "yap",
\r
337 "yi", "yo", "ypk", "za", "zap", "zen", "zh", "znd",
\r
341 String[] tempReplacementLanguages = {
\r
342 "id", "he", "yi", "jv", "sr", "nb",/* replacement language codes */
\r
345 String[] tempObsoleteLanguages = {
\r
346 "in", "iw", "ji", "jw", "sh", "no", /* obsolete language codes */
\r
349 /* This list MUST contain a three-letter code for every two-letter code in the
\r
350 list above, and they MUST ne in the same order (i.e., the same language must
\r
351 be in the same place in both lists)! */
\r
352 String[] tempLanguages3 = {
\r
353 /*"aa", "ab", "ace", "ach", "ada", "ady", "ae", "af", "afa", */
\r
354 "aar", "abk", "ace", "ach", "ada", "ady", "ave", "afr", "afa",
\r
355 /*"afh", "ak", "akk", "ale", "alg", "am", "an", "ang", "apa", */
\r
356 "afh", "aka", "akk", "ale", "alg", "amh", "arg", "ang", "apa",
\r
357 /*"ar", "arc", "arn", "arp", "art", "arw", "as", "ast", */
\r
358 "ara", "arc", "arn", "arp", "art", "arw", "asm", "ast",
\r
359 /*"ath", "aus", "av", "awa", "ay", "az", "ba", "bad", */
\r
360 "ath", "aus", "ava", "awa", "aym", "aze", "bak", "bad",
\r
361 /*"bai", "bal", "ban", "bas", "bat", "be", "bej", */
\r
362 "bai", "bal", "ban", "bas", "bat", "bel", "bej",
\r
363 /*"bem", "ber", "bg", "bh", "bho", "bi", "bik", "bin", */
\r
364 "bem", "ber", "bul", "bih", "bho", "bis", "bik", "bin",
\r
365 /*"bla", "bm", "bn", "bnt", "bo", "br", "bra", "bs", */
\r
366 "bla", "bam", "ben", "bnt", "bod", "bre", "bra", "bos",
\r
367 /*"btk", "bua", "bug", "byn", "ca", "cad", "cai", "car", "cau", */
\r
368 "btk", "bua", "bug", "byn", "cat", "cad", "cai", "car", "cau",
\r
369 /*"ce", "ceb", "cel", "ch", "chb", "chg", "chk", "chm", */
\r
370 "che", "ceb", "cel", "cha", "chb", "chg", "chk", "chm",
\r
371 /*"chn", "cho", "chp", "chr", "chy", "cmc", "co", "cop", */
\r
372 "chn", "cho", "chp", "chr", "chy", "cmc", "cos", "cop",
\r
373 /*"cpe", "cpf", "cpp", "cr", "crh", "crp", "cs", "csb", "cu", "cus", */
\r
374 "cpe", "cpf", "cpp", "cre", "crh", "crp", "ces", "csb", "chu", "cus",
\r
375 /*"cv", "cy", "da", "dak", "dar", "day", "de", "del", "den", */
\r
376 "chv", "cym", "dan", "dak", "dar", "day", "deu", "del", "den",
\r
377 /*"dgr", "din", "doi", "dra", "dsb", "dua", "dum", "dv", "dyu", */
\r
378 "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "div", "dyu",
\r
379 /*"dz", "ee", "efi", "egy", "eka", "el", "elx", "en", */
\r
380 "dzo", "ewe", "efi", "egy", "eka", "ell", "elx", "eng",
\r
381 /*"enm", "eo", "es", "et", "eu", "ewo", "fa", */
\r
382 "enm", "epo", "spa", "est", "eus", "ewo", "fas",
\r
383 /*"fan", "fat", "ff", "fi", "fiu", "fj", "fo", "fon", */
\r
384 "fan", "fat", "ful", "fin", "fiu", "fij", "fao", "fon",
\r
385 /*"fr", "frm", "fro", "fur", "fy", "ga", "gaa", "gay", */
\r
386 "fra", "frm", "fro", "fur", "fry", "gle", "gaa", "gay",
\r
387 /*"gba", "gd", "gem", "gez", "gil", "gl", "gmh", "gn", */
\r
388 "gba", "gla", "gem", "gez", "gil", "glg", "gmh", "grn",
\r
389 /*"goh", "gon", "gor", "got", "grb", "grc", "gu", "gv", */
\r
390 "goh", "gon", "gor", "got", "grb", "grc", "guj", "glv",
\r
391 /*"gwi", "ha", "hai", "haw", "he", "hi", "hil", "him", */
\r
392 "gwi", "hau", "hai", "haw", "heb", "hin", "hil", "him",
\r
393 /*"hit", "hmn", "ho", "hr", "hsb", "ht", "hu", "hup", "hy", "hz", */
\r
394 "hit", "hmn", "hmo", "hrv", "hsb", "hat", "hun", "hup", "hye", "her",
\r
395 /*"ia", "iba", "id", "ie", "ig", "ii", "ijo", "ik", */
\r
396 "ina", "iba", "ind", "ile", "ibo", "iii", "ijo", "ipk",
\r
397 /*"ilo", "inc", "ine", "inh", "io", "ira", "iro", "is", "it", */
\r
398 "ilo", "inc", "ine", "inh", "ido", "ira", "iro", "isl", "ita",
\r
399 /*"iu", "ja", "jbo", "jpr", "jrb", "jv", "ka", "kaa", "kab", */
\r
400 "iku", "jpn", "jbo", "jpr", "jrb", "jaw", "kat", "kaa", "kab",
\r
401 /*"kac", "kam", "kar", "kaw", "kbd", "kg", "kha", "khi", */
\r
402 "kac", "kam", "kar", "kaw", "kbd", "kon", "kha", "khi",
\r
403 /*"kho", "ki", "kj", "kk", "kl", "km", "kmb", "kn", */
\r
404 "kho", "kik", "kua", "kaz", "kal", "khm", "kmb", "kan",
\r
405 /*"ko", "kok", "kos", "kpe", "kr", "krc", "kro", "kru", "ks", */
\r
406 "kor", "kok", "kos", "kpe", "kau", "krc", "kro", "kru", "kas",
\r
407 /*"ku", "kum", "kut", "kv", "kw", "ky", "la", "lad", */
\r
408 "kur", "kum", "kut", "kom", "cor", "kir", "lat", "lad",
\r
409 /*"lah", "lam", "lb", "lez", "lg", "li", "ln", "lo", "lol", */
\r
410 "lah", "lam", "ltz", "lez", "lug", "lim", "lin", "lao", "lol",
\r
411 /*"loz", "lt", "lu", "lua", "lui", "lun", "luo", "lus", */
\r
412 "loz", "lit", "lub", "lua", "lui", "lun", "luo", "lus",
\r
413 /*"lv", "mad", "mag", "mai", "mak", "man", "map", "mas", */
\r
414 "lav", "mad", "mag", "mai", "mak", "man", "map", "mas",
\r
415 /*"mdf", "mdr", "men", "mg", "mga", "mh", "mi", "mic", "min", */
\r
416 "mdf", "mdr", "men", "mlg", "mga", "mah", "mri", "mic", "min",
\r
417 /*"mis", "mk", "mkh", "ml", "mn", "mnc", "mni", "mno", */
\r
418 "mis", "mkd", "mkh", "mal", "mon", "mnc", "mni", "mno",
\r
419 /*"mo", "moh", "mos", "mr", "ms", "mt", "mul", "mun", */
\r
420 "mol", "moh", "mos", "mar", "msa", "mlt", "mul", "mun",
\r
421 /*"mus", "mwr", "my", "myn", "myv", "na", "nah", "nai", "nap", */
\r
422 "mus", "mwr", "mya", "myn", "myv", "nau", "nah", "nai", "nap",
\r
423 /*"nb", "nd", "nds", "ne", "new", "ng", "nia", "nic", */
\r
424 "nob", "nde", "nds", "nep", "new", "ndo", "nia", "nic",
\r
425 /*"niu", "nl", "nn", "no", "nog", "non", "nr", "nso", "nub", */
\r
426 "niu", "nld", "nno", "nor", "nog", "non", "nbl", "nso", "nub",
\r
427 /*"nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi", "oc", "oj", */
\r
428 "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi", "oci", "oji",
\r
429 /*"om", "or", "os", "osa", "ota", "oto", "pa", "paa", */
\r
430 "orm", "ori", "oss", "osa", "ota", "oto", "pan", "paa",
\r
431 /*"pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn", */
\r
432 "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn",
\r
433 /*"pi", "pl", "pon", "pra", "pro", "ps", "pt", "qu", */
\r
434 "pli", "pol", "pon", "pra", "pro", "pus", "por", "que",
\r
435 /*"raj", "rap", "rar", "rm", "rn", "ro", "roa", "rom", */
\r
436 "raj", "rap", "rar", "roh", "run", "ron", "roa", "rom",
\r
437 /*"ru", "rup", "rw", "sa", "sad", "sah", "sai", "sal", "sam", */
\r
438 "rus", "rup", "kin", "san", "sad", "sah", "sai", "sal", "sam",
\r
439 /*"sas", "sat", "sc", "sco", "sd", "se", "sel", "sem", */
\r
440 "sas", "sat", "srd", "sco", "snd", "sme", "sel", "sem",
\r
441 /*"sg", "sga", "sgn", "shn", "si", "sid", "sio", "sit", */
\r
442 "sag", "sga", "sgn", "shn", "sin", "sid", "sio", "sit",
\r
443 /*"sk", "sl", "sla", "sm", "sma", "smi", "smj", "smn", */
\r
444 "slk", "slv", "sla", "smo", "sma", "smi", "smj", "smn",
\r
445 /*"sms", "sn", "snk", "so", "sog", "son", "sq", "sr", */
\r
446 "sms", "sna", "snk", "som", "sog", "son", "sqi", "srp",
\r
447 /*"srr", "ss", "ssa", "st", "su", "suk", "sus", "sux", */
\r
448 "srr", "ssw", "ssa", "sot", "sun", "suk", "sus", "sux",
\r
449 /*"sv", "sw", "syr", "ta", "tai", "te", "tem", "ter", */
\r
450 "swe", "swa", "syr", "tam", "tai", "tel", "tem", "ter",
\r
451 /*"tet", "tg", "th", "ti", "tig", "tiv", "tk", "tkl", */
\r
452 "tet", "tgk", "tha", "tir", "tig", "tiv", "tuk", "tkl",
\r
453 /*"tl", "tlh", "tli", "tmh", "tn", "to", "tog", "tpi", "tr", */
\r
454 "tgl", "tlh", "tli", "tmh", "tsn", "ton", "tog", "tpi", "tur",
\r
455 /*"ts", "tsi", "tt", "tum", "tup", "tut", "tvl", "tw", */
\r
456 "tso", "tsi", "tat", "tum", "tup", "tut", "tvl", "twi",
\r
457 /*"ty", "tyv", "udm", "ug", "uga", "uk", "umb", "und", "ur", */
\r
458 "tah", "tyv", "udm", "uig", "uga", "ukr", "umb", "und", "urd",
\r
459 /*"uz", "vai", "ve", "vi", "vo", "vot", "wa", "wak", */
\r
460 "uzb", "vai", "ven", "vie", "vol", "vot", "wln", "wak",
\r
461 /*"wal", "war", "was", "wen", "wo", "xal", "xh", "yao", "yap", */
\r
462 "wal", "war", "was", "wen", "wol", "xal", "xho", "yao", "yap",
\r
463 /*"yi", "yo", "ypk", "za", "zap", "zen", "zh", "znd", */
\r
464 "yid", "yor", "ypk", "zha", "zap", "zen", "zho", "znd",
\r
469 String[] tempObsoleteLanguages3 = {
\r
470 /* "in", "iw", "ji", "jw", "sh", */
\r
471 "ind", "heb", "yid", "jaw", "srp",
\r
474 synchronized (ULocale.class) {
\r
475 if (_languages == null) {
\r
476 _languages = tempLanguages;
\r
477 _replacementLanguages = tempReplacementLanguages;
\r
478 _obsoleteLanguages = tempObsoleteLanguages;
\r
479 _languages3 = tempLanguages3;
\r
480 _obsoleteLanguages3 = tempObsoleteLanguages3;
\r
486 private static String[] _countries;
\r
487 private static String[] _deprecatedCountries;
\r
488 private static String[] _replacementCountries;
\r
489 private static String[] _obsoleteCountries;
\r
490 private static String[] _countries3;
\r
491 private static String[] _obsoleteCountries3;
\r
493 // Avoid initializing country tables unless we have to.
\r
494 private static void initCountryTables() {
\r
495 if (_countries == null) {
\r
496 /* ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
\r
497 http://www.evertype.com/standards/iso3166/iso3166-1-en.html
\r
498 added new codes keeping the old ones for compatibility
\r
499 updated to include 1999/12/03 revisions *CWB*/
\r
501 /* RO(ROM) is now RO(ROU) according to
\r
502 http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
\r
505 /* This list MUST be in sorted order, and MUST contain only two-letter codes! */
\r
506 String[] tempCountries = {
\r
507 "AD", "AE", "AF", "AG", "AI", "AL", "AM", "AN",
\r
508 "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ",
\r
509 "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI",
\r
510 "BJ", "BL", "BM", "BN", "BO", "BR", "BS", "BT", "BV",
\r
511 "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG",
\r
512 "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR",
\r
513 "CU", "CV", "CX", "CY", "CZ", "DE", "DJ", "DK",
\r
514 "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER",
\r
515 "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR",
\r
516 "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL",
\r
517 "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU",
\r
518 "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU",
\r
519 "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS",
\r
520 "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI",
\r
521 "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA",
\r
522 "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU",
\r
523 "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK",
\r
524 "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS",
\r
525 "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA",
\r
526 "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP",
\r
527 "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG",
\r
528 "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT",
\r
529 "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA",
\r
530 "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ",
\r
531 "SK", "SL", "SM", "SN", "SO", "SR", "ST", "SV",
\r
532 "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ",
\r
533 "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV",
\r
534 "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ",
\r
535 "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF",
\r
536 "WS", "YE", "YT", "ZA", "ZM", "ZW",
\r
539 /* this table is used for 3 letter codes */
\r
540 String[] tempObsoleteCountries = {
\r
541 "FX", "CS", "RO", "TP", "YU", "ZR", /* obsolete country codes */
\r
544 String[] tempDeprecatedCountries = {
\r
545 "BU", "CS", "DY", "FX", "HV", "NH", "RH", "TP", "YU", "ZR" /* deprecated country list */
\r
547 String[] tempReplacementCountries = {
\r
548 /* "BU", "CS", "DY", "FX", "HV", "NH", "RH", "TP", "YU", "ZR" */
\r
549 "MM", "RS", "BJ", "FR", "BF", "VU", "ZW", "TL", "RS", "CD", /* replacement country codes */
\r
552 /* This list MUST contain a three-letter code for every two-letter code in
\r
553 the above list, and they MUST be listed in the same order! */
\r
554 String[] tempCountries3 = {
\r
555 /* "AD", "AE", "AF", "AG", "AI", "AL", "AM", "AN", */
\r
556 "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM", "ANT",
\r
557 /* "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", */
\r
558 "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
\r
559 /* "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", */
\r
560 "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
\r
561 /* "BJ", "BL", "BM", "BN", "BO", "BR", "BS", "BT", "BV", */
\r
562 "BEN", "BLM", "BMU", "BRN", "BOL", "BRA", "BHS", "BTN", "BVT",
\r
563 /* "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", */
\r
564 "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
\r
565 /* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", */
\r
566 "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",
\r
567 /* "CU", "CV", "CX", "CY", "CZ", "DE", "DJ", "DK", */
\r
568 "CUB", "CPV", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK",
\r
569 /* "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", */
\r
570 "DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI",
\r
571 /* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */
\r
572 "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
\r
573 /* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */
\r
574 "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
\r
575 /* "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", */
\r
576 "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
\r
577 /* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */
\r
578 "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
\r
579 /* "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */
\r
580 "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
\r
581 /* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */
\r
582 "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
\r
583 /* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */
\r
584 "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
\r
585 /* "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", */
\r
586 "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
\r
587 /* "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", */
\r
588 "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
\r
589 /* "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", */
\r
590 "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
\r
591 /* "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", */
\r
592 "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
\r
593 /* "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", */
\r
594 "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
\r
595 /* "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", */
\r
596 "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
\r
597 /* "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", */
\r
598 "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
\r
599 /* "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", */
\r
600 "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
\r
601 /* "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", */
\r
602 "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
\r
603 /* "SK", "SL", "SM", "SN", "SO", "SR", "ST", "SV", */
\r
604 "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "STP", "SLV",
\r
605 /* "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", */
\r
606 "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
\r
607 /* "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", */
\r
608 "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
\r
609 /* "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", */
\r
610 "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
\r
611 /* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */
\r
612 "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
\r
613 /* "WS", "YE", "YT", "ZA", "ZM", "ZW" */
\r
614 "WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
\r
617 String[] tempObsoleteCountries3 = {
\r
618 /*"FX", "CS", "RO", "TP", "YU", "ZR", */
\r
619 "FXX", "SCG", "ROM", "TMP", "YUG", "ZAR",
\r
622 synchronized (ULocale.class) {
\r
623 if (_countries == null) {
\r
624 _countries = tempCountries;
\r
625 _deprecatedCountries = tempDeprecatedCountries;
\r
626 _replacementCountries = tempReplacementCountries;
\r
627 _obsoleteCountries = tempObsoleteCountries;
\r
628 _countries3 = tempCountries3;
\r
629 _obsoleteCountries3 = tempObsoleteCountries3;
\r
635 private static String[][] CANONICALIZE_MAP;
\r
636 private static String[][] variantsToKeywords;
\r
638 private static void initCANONICALIZE_MAP() {
\r
639 if (CANONICALIZE_MAP == null) {
\r
641 * This table lists pairs of locale ids for canonicalization. The
\r
642 * The 1st item is the normalized id. The 2nd item is the
\r
643 * canonicalized id. The 3rd is the keyword. The 4th is the keyword value.
\r
645 String[][] tempCANONICALIZE_MAP = {
\r
646 // { EMPTY_STRING, "en_US_POSIX", null, null }, /* .NET name */
\r
647 { "C", "en_US_POSIX", null, null }, /* POSIX name */
\r
648 { "art_LOJBAN", "jbo", null, null }, /* registered name */
\r
649 { "az_AZ_CYRL", "az_Cyrl_AZ", null, null }, /* .NET name */
\r
650 { "az_AZ_LATN", "az_Latn_AZ", null, null }, /* .NET name */
\r
651 { "ca_ES_PREEURO", "ca_ES", "currency", "ESP" },
\r
652 { "cel_GAULISH", "cel__GAULISH", null, null }, /* registered name */
\r
653 { "de_1901", "de__1901", null, null }, /* registered name */
\r
654 { "de_1906", "de__1906", null, null }, /* registered name */
\r
655 { "de__PHONEBOOK", "de", "collation", "phonebook" }, /* Old ICU name */
\r
656 { "de_AT_PREEURO", "de_AT", "currency", "ATS" },
\r
657 { "de_DE_PREEURO", "de_DE", "currency", "DEM" },
\r
658 { "de_LU_PREEURO", "de_LU", "currency", "EUR" },
\r
659 { "el_GR_PREEURO", "el_GR", "currency", "GRD" },
\r
660 { "en_BOONT", "en__BOONT", null, null }, /* registered name */
\r
661 { "en_SCOUSE", "en__SCOUSE", null, null }, /* registered name */
\r
662 { "en_BE_PREEURO", "en_BE", "currency", "BEF" },
\r
663 { "en_IE_PREEURO", "en_IE", "currency", "IEP" },
\r
664 { "es__TRADITIONAL", "es", "collation", "traditional" }, /* Old ICU name */
\r
665 { "es_ES_PREEURO", "es_ES", "currency", "ESP" },
\r
666 { "eu_ES_PREEURO", "eu_ES", "currency", "ESP" },
\r
667 { "fi_FI_PREEURO", "fi_FI", "currency", "FIM" },
\r
668 { "fr_BE_PREEURO", "fr_BE", "currency", "BEF" },
\r
669 { "fr_FR_PREEURO", "fr_FR", "currency", "FRF" },
\r
670 { "fr_LU_PREEURO", "fr_LU", "currency", "LUF" },
\r
671 { "ga_IE_PREEURO", "ga_IE", "currency", "IEP" },
\r
672 { "gl_ES_PREEURO", "gl_ES", "currency", "ESP" },
\r
673 { "hi__DIRECT", "hi", "collation", "direct" }, /* Old ICU name */
\r
674 { "it_IT_PREEURO", "it_IT", "currency", "ITL" },
\r
675 { "ja_JP_TRADITIONAL", "ja_JP", "calendar", "japanese" },
\r
676 // { "nb_NO_NY", "nn_NO", null, null },
\r
677 { "nl_BE_PREEURO", "nl_BE", "currency", "BEF" },
\r
678 { "nl_NL_PREEURO", "nl_NL", "currency", "NLG" },
\r
679 { "pt_PT_PREEURO", "pt_PT", "currency", "PTE" },
\r
680 { "sl_ROZAJ", "sl__ROZAJ", null, null }, /* registered name */
\r
681 { "sr_SP_CYRL", "sr_Cyrl_RS", null, null }, /* .NET name */
\r
682 { "sr_SP_LATN", "sr_Latn_RS", null, null }, /* .NET name */
\r
683 { "sr_YU_CYRILLIC", "sr_Cyrl_RS", null, null }, /* Linux name */
\r
684 { "th_TH_TRADITIONAL", "th_TH", "calendar", "buddhist" }, /* Old ICU name */
\r
685 { "uz_UZ_CYRILLIC", "uz_Cyrl_UZ", null, null }, /* Linux name */
\r
686 { "uz_UZ_CYRL", "uz_Cyrl_UZ", null, null }, /* .NET name */
\r
687 { "uz_UZ_LATN", "uz_Latn_UZ", null, null }, /* .NET name */
\r
688 { "zh_CHS", "zh_Hans", null, null }, /* .NET name */
\r
689 { "zh_CHT", "zh_Hant", null, null }, /* .NET name */
\r
690 { "zh_GAN", "zh__GAN", null, null }, /* registered name */
\r
691 { "zh_GUOYU", "zh", null, null }, /* registered name */
\r
692 { "zh_HAKKA", "zh__HAKKA", null, null }, /* registered name */
\r
693 { "zh_MIN", "zh__MIN", null, null }, /* registered name */
\r
694 { "zh_MIN_NAN", "zh__MINNAN", null, null }, /* registered name */
\r
695 { "zh_WUU", "zh__WUU", null, null }, /* registered name */
\r
696 { "zh_XIANG", "zh__XIANG", null, null }, /* registered name */
\r
697 { "zh_YUE", "zh__YUE", null, null } /* registered name */
\r
700 synchronized (ULocale.class) {
\r
701 if (CANONICALIZE_MAP == null) {
\r
702 CANONICALIZE_MAP = tempCANONICALIZE_MAP;
\r
706 if (variantsToKeywords == null) {
\r
708 * This table lists pairs of locale ids for canonicalization. The
\r
709 * The first item is the normalized variant id.
\r
711 String[][] tempVariantsToKeywords = {
\r
712 { "EURO", "currency", "EUR" },
\r
713 { "PINYIN", "collation", "pinyin" }, /* Solaris variant */
\r
714 { "STROKE", "collation", "stroke" } /* Solaris variant */
\r
717 synchronized (ULocale.class) {
\r
718 if (variantsToKeywords == null) {
\r
719 variantsToKeywords = tempVariantsToKeywords;
\r
726 * This table is used for mapping between ICU and special Java
\r
727 * locales. When an ICU locale matches <minumum base> with
\r
728 * <keyword>/<value>, the ICU locale is mapped to <Java> locale.
\r
729 * For example, both ja_JP@calendar=japanese and ja@calendar=japanese
\r
730 * are mapped to Java locale "ja_JP_JP". ICU locale "nn" is mapped
\r
731 * to Java locale "no_NO_NY".
\r
733 private static final String[][] _javaLocaleMap = {
\r
734 // { <Java>, <ICU base>, <keyword>, <value>, <minimum base>
\r
735 { "ja_JP_JP", "ja_JP", "calendar", "japanese", "ja"},
\r
736 { "no_NO_NY", "nn_NO", null, null, "nn"},
\r
737 { "th_TH_TH", "th_TH", "numbers", "thai", "th"},
\r
741 * Private constructor used by static initializers.
\r
743 private ULocale(String localeID, Locale locale) {
\r
744 this.localeID = localeID;
\r
745 this.locale = locale;
\r
749 * Construct a ULocale object from a {@link java.util.Locale}.
\r
750 * @param loc a JDK locale
\r
754 private ULocale(Locale loc) {
\r
755 this.localeID = getName(forLocale(loc).toString());
\r
760 * Return a ULocale object for a {@link java.util.Locale}.
\r
761 * The ULocale is canonicalized.
\r
762 * @param loc a JDK locale
\r
765 public static ULocale forLocale(Locale loc) {
\r
769 ULocale result = (ULocale)CACHE.get(loc);
\r
770 if (result == null) {
\r
771 if (defaultULocale != null && loc == defaultULocale.locale) {
\r
772 result = defaultULocale;
\r
774 String locStr = loc.toString();
\r
775 if (locStr.length() == 0) {
\r
778 for (int i = 0; i < _javaLocaleMap.length; i++) {
\r
779 if (_javaLocaleMap[i][0].equals(locStr)) {
\r
780 IDParser p = new IDParser(_javaLocaleMap[i][1]);
\r
781 p.setKeywordValue(_javaLocaleMap[i][2], _javaLocaleMap[i][3]);
\r
782 locStr = p.getName();
\r
786 result = new ULocale(locStr, loc);
\r
789 CACHE.put(loc, result);
\r
795 * Construct a ULocale from a RFC 3066 locale ID. The locale ID consists
\r
796 * of optional language, script, country, and variant fields in that order,
\r
797 * separated by underscores, followed by an optional keyword list. The
\r
798 * script, if present, is four characters long-- this distinguishes it
\r
799 * from a country code, which is two characters long. Other fields
\r
800 * are distinguished by position as indicated by the underscores. The
\r
801 * start of the keyword list is indicated by '@', and consists of two
\r
802 * or more keyword/value pairs separated by semicolons(';').
\r
804 * This constructor does not canonicalize the localeID. So, for
\r
805 * example, "zh__pinyin" remains unchanged instead of converting
\r
806 * to "zh@collation=pinyin". By default ICU only recognizes the
\r
807 * latter as specifying pinyin collation. Use {@link #createCanonical}
\r
808 * or {@link #canonicalize} if you need to canonicalize the localeID.
\r
810 * @param localeID string representation of the locale, e.g:
\r
811 * "en_US", "sy_Cyrl_YU", "zh__pinyin", "es_ES@currency=EUR;collation=traditional"
\r
814 public ULocale(String localeID) {
\r
815 this.localeID = getName(localeID);
\r
819 * Convenience overload of ULocale(String, String, String) for
\r
820 * compatibility with java.util.Locale.
\r
821 * @see #ULocale(String, String, String)
\r
824 public ULocale(String a, String b) {
\r
829 * Construct a ULocale from a localeID constructed from the three 'fields' a, b, and c. These
\r
830 * fields are concatenated using underscores to form a localeID of
\r
831 * the form a_b_c, which is then handled like the localeID passed
\r
832 * to <code>ULocale(String localeID)</code>.
\r
834 * <p>Java locale strings consisting of language, country, and
\r
835 * variant will be handled by this form, since the country code
\r
836 * (being shorter than four letters long) will not be interpreted
\r
837 * as a script code. If a script code is present, the final
\r
838 * argument ('c') will be interpreted as the country code. It is
\r
839 * recommended that this constructor only be used to ease porting,
\r
840 * and that clients instead use the single-argument constructor
\r
841 * when constructing a ULocale from a localeID.
\r
842 * @param a first component of the locale id
\r
843 * @param b second component of the locale id
\r
844 * @param c third component of the locale id
\r
845 * @see #ULocale(String)
\r
848 public ULocale(String a, String b, String c) {
\r
849 localeID = getName(lscvToID(a, b, c, EMPTY_STRING));
\r
853 * Create a ULocale from the id by first canonicalizing the id.
\r
854 * @param nonCanonicalID the locale id to canonicalize
\r
855 * @return the locale created from the canonical version of the ID.
\r
858 public static ULocale createCanonical(String nonCanonicalID) {
\r
859 return new ULocale(canonicalize(nonCanonicalID), (Locale)null);
\r
862 private static String lscvToID(String lang, String script, String country, String variant) {
\r
863 StringBuffer buf = new StringBuffer();
\r
865 if (lang != null && lang.length() > 0) {
\r
868 if (script != null && script.length() > 0) {
\r
869 buf.append(UNDERSCORE);
\r
870 buf.append(script);
\r
872 if (country != null && country.length() > 0) {
\r
873 buf.append(UNDERSCORE);
\r
874 buf.append(country);
\r
876 if (variant != null && variant.length() > 0) {
\r
877 if (country == null || country.length() == 0) {
\r
878 buf.append(UNDERSCORE);
\r
880 buf.append(UNDERSCORE);
\r
881 buf.append(variant);
\r
883 return buf.toString();
\r
887 * Convert this ULocale object to a {@link java.util.Locale}.
\r
888 * @return a JDK locale that either exactly represents this object
\r
889 * or is the closest approximation.
\r
892 public Locale toLocale() {
\r
893 if (locale == null) {
\r
894 IDParser p = new IDParser(localeID);
\r
895 String base = p.getBaseName();
\r
896 for (int i = 0; i < _javaLocaleMap.length; i++) {
\r
897 if (base.equals(_javaLocaleMap[i][1]) || base.equals(_javaLocaleMap[i][4])) {
\r
898 if (_javaLocaleMap[i][2] != null) {
\r
899 String val = p.getKeywordValue(_javaLocaleMap[i][2]);
\r
900 if (val != null && val.equals(_javaLocaleMap[i][3])) {
\r
901 p = new IDParser(_javaLocaleMap[i][0]);
\r
905 p = new IDParser(_javaLocaleMap[i][0]);
\r
910 String[] names = p.getLanguageScriptCountryVariant();
\r
911 locale = new Locale(names[0], names[2], names[3]);
\r
916 private static ICUCache nameCache = new SimpleCache();
\r
918 * Keep our own default ULocale.
\r
920 private static Locale defaultLocale = Locale.getDefault();
\r
921 private static ULocale defaultULocale = new ULocale(defaultLocale);
\r
924 * Returns the current default ULocale.
\r
927 public static ULocale getDefault() {
\r
928 synchronized (ULocale.class) {
\r
929 Locale currentDefault = Locale.getDefault();
\r
930 if (!defaultLocale.equals(currentDefault)) {
\r
931 defaultLocale = currentDefault;
\r
932 defaultULocale = new ULocale(defaultLocale);
\r
934 return defaultULocale;
\r
939 * Sets the default ULocale. This also sets the default Locale.
\r
940 * If the caller does not have write permission to the
\r
941 * user.language property, a security exception will be thrown,
\r
942 * and the default ULocale will remain unchanged.
\r
943 * @param newLocale the new default locale
\r
944 * @throws SecurityException
\r
945 * if a security manager exists and its
\r
946 * <code>checkPermission</code> method doesn't allow the operation.
\r
947 * @throws NullPointerException if <code>newLocale</code> is null
\r
948 * @see SecurityManager#checkPermission(java.security.Permission)
\r
949 * @see java.util.PropertyPermission
\r
952 public static synchronized void setDefault(ULocale newLocale){
\r
953 Locale.setDefault(newLocale.toLocale());
\r
954 defaultULocale = newLocale;
\r
958 * This is for compatibility with Locale-- in actuality, since ULocale is
\r
959 * immutable, there is no reason to clone it, so this API returns 'this'.
\r
962 public Object clone() {
\r
967 * Returns the hashCode.
\r
970 public int hashCode() {
\r
971 return localeID.hashCode();
\r
975 * Returns true if the other object is another ULocale with the
\r
976 * same full name, or is a String localeID that matches the full name.
\r
977 * Note that since names are not canonicalized, two ULocales that
\r
978 * function identically might not compare equal.
\r
980 * @return true if this Locale is equal to the specified object.
\r
983 public boolean equals(Object obj) {
\r
987 if (obj instanceof String) {
\r
988 return localeID.equals((String)obj);
\r
990 if (obj instanceof ULocale) {
\r
991 return localeID.equals(((ULocale)obj).localeID);
\r
997 * Returns a list of all installed locales.
\r
1000 public static ULocale[] getAvailableLocales() {
\r
1001 return ICUResourceBundle.getAvailableULocales();
\r
1004 private static VersionInfo gCLDRVersion = null;
\r
1007 * Returns the current CLDR version
\r
1009 * @provisional This API might change or be removed in a future release.
\r
1011 public static VersionInfo getCLDRVersion() {
\r
1012 // fetching this data should be idempotent.
\r
1013 if(gCLDRVersion == null) {
\r
1014 // from ZoneMeta.java
\r
1015 UResourceBundle supplementalDataBundle = UResourceBundle.getBundleInstance(ICUResourceBundle.ICU_BASE_NAME, "supplementalData", ICUResourceBundle.ICU_DATA_CLASS_LOADER);
\r
1016 UResourceBundle cldrVersionBundle = supplementalDataBundle.get("cldrVersion");
\r
1017 gCLDRVersion = VersionInfo.getInstance(cldrVersionBundle.getString());
\r
1019 return gCLDRVersion;
\r
1023 * Returns a list of all 2-letter country codes defined in ISO 3166.
\r
1024 * Can be used to create Locales.
\r
1027 public static String[] getISOCountries() {
\r
1028 initCountryTables();
\r
1029 return (String[])_countries.clone();
\r
1033 * Returns a list of all 2-letter language codes defined in ISO 639.
\r
1034 * Can be used to create Locales.
\r
1035 * [NOTE: ISO 639 is not a stable standard-- some languages' codes have changed.
\r
1036 * The list this function returns includes both the new and the old codes for the
\r
1037 * languages whose codes have changed.]
\r
1040 public static String[] getISOLanguages() {
\r
1041 initLanguageTables();
\r
1042 return (String[])_languages.clone();
\r
1046 * Returns the language code for this locale, which will either be the empty string
\r
1047 * or a lowercase ISO 639 code.
\r
1048 * @see #getDisplayLanguage()
\r
1049 * @see #getDisplayLanguage(ULocale)
\r
1052 public String getLanguage() {
\r
1053 return getLanguage(localeID);
\r
1057 * Returns the language code for the locale ID,
\r
1058 * which will either be the empty string
\r
1059 * or a lowercase ISO 639 code.
\r
1060 * @see #getDisplayLanguage()
\r
1061 * @see #getDisplayLanguage(ULocale)
\r
1064 public static String getLanguage(String localeID) {
\r
1065 return new IDParser(localeID).getLanguage();
\r
1069 * Returns the script code for this locale, which might be the empty string.
\r
1070 * @see #getDisplayScript()
\r
1071 * @see #getDisplayScript(ULocale)
\r
1074 public String getScript() {
\r
1075 return getScript(localeID);
\r
1079 * Returns the script code for the specified locale, which might be the empty string.
\r
1080 * @see #getDisplayScript()
\r
1081 * @see #getDisplayScript(ULocale)
\r
1084 public static String getScript(String localeID) {
\r
1085 return new IDParser(localeID).getScript();
\r
1089 * Returns the country/region code for this locale, which will either be the empty string
\r
1090 * or an uppercase ISO 3166 2-letter code.
\r
1091 * @see #getDisplayCountry()
\r
1092 * @see #getDisplayCountry(ULocale)
\r
1095 public String getCountry() {
\r
1096 return getCountry(localeID);
\r
1100 * Returns the country/region code for this locale, which will either be the empty string
\r
1101 * or an uppercase ISO 3166 2-letter code.
\r
1103 * @see #getDisplayCountry()
\r
1104 * @see #getDisplayCountry(ULocale)
\r
1107 public static String getCountry(String localeID) {
\r
1108 return new IDParser(localeID).getCountry();
\r
1112 * Returns the variant code for this locale, which might be the empty string.
\r
1113 * @see #getDisplayVariant()
\r
1114 * @see #getDisplayVariant(ULocale)
\r
1117 public String getVariant() {
\r
1118 return getVariant(localeID);
\r
1122 * Returns the variant code for the specified locale, which might be the empty string.
\r
1123 * @see #getDisplayVariant()
\r
1124 * @see #getDisplayVariant(ULocale)
\r
1127 public static String getVariant(String localeID) {
\r
1128 return new IDParser(localeID).getVariant();
\r
1132 * Returns the fallback locale for the specified locale, which might be the empty string.
\r
1135 public static String getFallback(String localeID) {
\r
1136 return getFallbackString(getName(localeID));
\r
1140 * Returns the fallback locale for this locale. If this locale is root, returns null.
\r
1143 public ULocale getFallback() {
\r
1144 if (localeID.length() == 0 || localeID.charAt(0) == '@') {
\r
1147 return new ULocale(getFallbackString(localeID), (Locale)null);
\r
1151 * Return the given (canonical) locale id minus the last part before the tags.
\r
1153 private static String getFallbackString(String fallback) {
\r
1154 int limit = fallback.indexOf('@');
\r
1155 if (limit == -1) {
\r
1156 limit = fallback.length();
\r
1158 int start = fallback.lastIndexOf('_', limit);
\r
1159 if (start == -1) {
\r
1162 return fallback.substring(0, start) + fallback.substring(limit);
\r
1166 * Returns the (normalized) base name for this locale.
\r
1167 * @return the base name as a String.
\r
1170 public String getBaseName() {
\r
1171 return getBaseName(localeID);
\r
1175 * Returns the (normalized) base name for the specified locale.
\r
1176 * @param localeID the locale ID as a string
\r
1177 * @return the base name as a String.
\r
1180 public static String getBaseName(String localeID){
\r
1181 if (localeID.indexOf('@') == -1) {
\r
1184 return new IDParser(localeID).getBaseName();
\r
1188 * Returns the (normalized) full name for this locale.
\r
1190 * @return String the full name of the localeID
\r
1193 public String getName() {
\r
1194 return localeID; // always normalized
\r
1198 * Returns the (normalized) full name for the specified locale.
\r
1200 * @param localeID the localeID as a string
\r
1201 * @return String the full name of the localeID
\r
1204 public static String getName(String localeID){
\r
1205 String name = (String)nameCache.get(localeID);
\r
1206 if (name == null) {
\r
1207 name = new IDParser(localeID).getName();
\r
1208 nameCache.put(localeID, name);
\r
1214 * Returns a string representation of this object.
\r
1217 public String toString() {
\r
1222 * Returns an iterator over keywords for this locale. If there
\r
1223 * are no keywords, returns null.
\r
1224 * @return iterator over keywords, or null if there are no keywords.
\r
1227 public Iterator getKeywords() {
\r
1228 return getKeywords(localeID);
\r
1232 * Returns an iterator over keywords for the specified locale. If there
\r
1233 * are no keywords, returns null.
\r
1234 * @return an iterator over the keywords in the specified locale, or null
\r
1235 * if there are no keywords.
\r
1238 public static Iterator getKeywords(String localeID){
\r
1239 return new IDParser(localeID).getKeywords();
\r
1243 * Returns the value for a keyword in this locale. If the keyword is not defined, returns null.
\r
1244 * @param keywordName name of the keyword whose value is desired. Case insensitive.
\r
1245 * @return the value of the keyword, or null.
\r
1248 public String getKeywordValue(String keywordName){
\r
1249 return getKeywordValue(localeID, keywordName);
\r
1253 * Returns the value for a keyword in the specified locale. If the keyword is not defined, returns null.
\r
1254 * The locale name does not need to be normalized.
\r
1255 * @param keywordName name of the keyword whose value is desired. Case insensitive.
\r
1256 * @return String the value of the keyword as a string
\r
1259 public static String getKeywordValue(String localeID, String keywordName) {
\r
1260 return new IDParser(localeID).getKeywordValue(keywordName);
\r
1264 * Utility class to parse and normalize locale ids (including POSIX style)
\r
1266 private static final class IDParser {
\r
1267 private char[] id;
\r
1268 private int index;
\r
1269 private char[] buffer;
\r
1271 // um, don't handle POSIX ids unless we request it. why not? well... because.
\r
1272 private boolean canonicalize;
\r
1273 private boolean hadCountry;
\r
1275 // used when canonicalizing
\r
1280 * Parsing constants.
\r
1282 private static final char KEYWORD_SEPARATOR = '@';
\r
1283 private static final char HYPHEN = '-';
\r
1284 private static final char KEYWORD_ASSIGN = '=';
\r
1285 private static final char COMMA = ',';
\r
1286 private static final char ITEM_SEPARATOR = ';';
\r
1287 private static final char DOT = '.';
\r
1289 private IDParser(String localeID) {
\r
1290 this(localeID, false);
\r
1293 private IDParser(String localeID, boolean canonicalize) {
\r
1294 id = localeID.toCharArray();
\r
1296 buffer = new char[id.length + 5];
\r
1298 this.canonicalize = canonicalize;
\r
1301 private void reset() {
\r
1305 // utilities for working on text in the buffer
\r
1308 * Append c to the buffer.
\r
1310 private void append(char c) {
\r
1314 catch (IndexOutOfBoundsException e) {
\r
1315 if (buffer.length > 512) {
\r
1316 // something is seriously wrong, let this go
\r
1319 char[] nbuffer = new char[buffer.length * 2];
\r
1320 System.arraycopy(buffer, 0, nbuffer, 0, buffer.length);
\r
1321 nbuffer[blen] = c;
\r
1327 private void addSeparator() {
\r
1328 append(UNDERSCORE);
\r
1332 * Returns the text in the buffer from start to blen as a String.
\r
1334 private String getString(int start) {
\r
1335 if (start == blen) {
\r
1336 return EMPTY_STRING;
\r
1338 return new String(buffer, start, blen-start);
\r
1342 * Set the length of the buffer to pos, then append the string.
\r
1344 private void set(int pos, String s) {
\r
1345 this.blen = pos; // no safety
\r
1350 * Append the string to the buffer.
\r
1352 private void append(String s) {
\r
1353 for (int i = 0; i < s.length(); ++i) {
\r
1354 append(s.charAt(i));
\r
1358 // utilities for parsing text out of the id
\r
1361 * Character to indicate no more text is available in the id.
\r
1363 private static final char DONE = '\uffff';
\r
1366 * Returns the character at index in the id, and advance index. The returned character
\r
1367 * is DONE if index was at the limit of the buffer. The index is advanced regardless
\r
1368 * so that decrementing the index will always 'unget' the last character returned.
\r
1370 private char next() {
\r
1371 if (index == id.length) {
\r
1376 return id[index++];
\r
1380 * Advance index until the next terminator or id separator, and leave it there.
\r
1382 private void skipUntilTerminatorOrIDSeparator() {
\r
1383 while (!isTerminatorOrIDSeparator(next())) {
\r
1389 * Returns true if the character at index in the id is a terminator.
\r
1391 private boolean atTerminator() {
\r
1392 return index >= id.length || isTerminator(id[index]);
\r
1396 * Returns true if the character is an id separator (underscore or hyphen).
\r
1398 /* private boolean isIDSeparator(char c) {
\r
1399 return c == UNDERSCORE || c == HYPHEN;
\r
1403 * Returns true if the character is a terminator (keyword separator, dot, or DONE).
\r
1404 * Dot is a terminator because of the POSIX form, where dot precedes the codepage.
\r
1406 private boolean isTerminator(char c) {
\r
1407 // always terminate at DOT, even if not handling POSIX. It's an error...
\r
1408 return c == KEYWORD_SEPARATOR || c == DONE || c == DOT;
\r
1412 * Returns true if the character is a terminator or id separator.
\r
1414 private boolean isTerminatorOrIDSeparator(char c) {
\r
1415 return c == KEYWORD_SEPARATOR || c == UNDERSCORE || c == HYPHEN ||
\r
1416 c == DONE || c == DOT;
\r
1420 * Returns true if the start of the buffer has an experimental or private language
\r
1421 * prefix, the pattern '[ixIX][-_].' shows the syntax checked.
\r
1423 private boolean haveExperimentalLanguagePrefix() {
\r
1424 if (id.length > 2) {
\r
1426 if (c == HYPHEN || c == UNDERSCORE) {
\r
1428 return c == 'x' || c == 'X' || c == 'i' || c == 'I';
\r
1435 * Returns true if a value separator occurs at or after index.
\r
1437 private boolean haveKeywordAssign() {
\r
1438 // assume it is safe to start from index
\r
1439 for (int i = index; i < id.length; ++i) {
\r
1440 if (id[i] == KEYWORD_ASSIGN) {
\r
1448 * Advance index past language, and accumulate normalized language code in buffer.
\r
1449 * Index must be at 0 when this is called. Index is left at a terminator or id
\r
1450 * separator. Returns the start of the language code in the buffer.
\r
1452 private int parseLanguage() {
\r
1453 if (haveExperimentalLanguagePrefix()) {
\r
1454 append(Character.toLowerCase(id[0]));
\r
1460 while(!isTerminatorOrIDSeparator(c = next())) {
\r
1461 append(Character.toLowerCase(c));
\r
1466 initLanguageTables();
\r
1468 /* convert 3 character code to 2 character code if possible *CWB*/
\r
1469 String lang = getString(0);
\r
1470 int offset = findIndex(_languages3, lang);
\r
1471 if (offset >= 0) {
\r
1472 set(0, _languages[offset]);
\r
1474 offset = findIndex(_obsoleteLanguages3, lang);
\r
1475 if (offset >= 0) {
\r
1476 set(0, _obsoleteLanguages[offset]);
\r
1485 * Advance index past language. Index must be at 0 when this is called. Index
\r
1486 * is left at a terminator or id separator.
\r
1488 private void skipLanguage() {
\r
1489 if (haveExperimentalLanguagePrefix()) {
\r
1492 skipUntilTerminatorOrIDSeparator();
\r
1496 * Advance index past script, and accumulate normalized script in buffer.
\r
1497 * Index must be immediately after the language.
\r
1498 * If the item at this position is not a script (is not four characters
\r
1499 * long) leave index and buffer unchanged. Otherwise index is left at
\r
1500 * a terminator or id separator. Returns the start of the script code
\r
1501 * in the buffer (this may be equal to the buffer length, if there is no
\r
1504 private int parseScript() {
\r
1505 if (!atTerminator()) {
\r
1506 int oldIndex = index; // save original index
\r
1509 int oldBlen = blen; // get before append hyphen, if we truncate everything is undone
\r
1511 while(!isTerminatorOrIDSeparator(c = next())) {
\r
1512 if (blen == oldBlen) { // first pass
\r
1514 append(Character.toUpperCase(c));
\r
1516 append(Character.toLowerCase(c));
\r
1521 /* If it's not exactly 4 characters long, then it's not a script. */
\r
1522 if (index - oldIndex != 5) { // +1 to account for separator
\r
1526 oldBlen++; // index past hyphen, for clients who want to extract just the script
\r
1535 * Advance index past script.
\r
1536 * Index must be immediately after the language and IDSeparator.
\r
1537 * If the item at this position is not a script (is not four characters
\r
1538 * long) leave index. Otherwise index is left at a terminator or
\r
1541 private void skipScript() {
\r
1542 if (!atTerminator()) {
\r
1543 int oldIndex = index;
\r
1546 skipUntilTerminatorOrIDSeparator();
\r
1547 if (index - oldIndex != 5) { // +1 to account for separator
\r
1554 * Advance index past country, and accumulate normalized country in buffer.
\r
1555 * Index must be immediately after the script (if there is one, else language)
\r
1556 * and IDSeparator. Return the start of the country code in the buffer.
\r
1558 private int parseCountry() {
\r
1559 if (!atTerminator()) {
\r
1560 int oldIndex = index;
\r
1563 int oldBlen = blen;
\r
1565 while (!isTerminatorOrIDSeparator(c = next())) {
\r
1566 if (oldBlen == blen) { // first, add hyphen
\r
1567 hadCountry = true; // we have a country, let variant parsing know
\r
1569 ++oldBlen; // increment past hyphen
\r
1571 append(Character.toUpperCase(c));
\r
1575 int charsAppended = blen - oldBlen;
\r
1577 if (charsAppended == 0) {
\r
1580 else if (charsAppended < 2 || charsAppended > 3) {
\r
1581 // It's not a country, so return index and blen to
\r
1582 // their previous values.
\r
1586 hadCountry = false;
\r
1588 else if (charsAppended == 3) {
\r
1589 initCountryTables();
\r
1591 /* convert 3 character code to 2 character code if possible *CWB*/
\r
1592 int offset = findIndex(_countries3, getString(oldBlen));
\r
1593 if (offset >= 0) {
\r
1594 set(oldBlen, _countries[offset]);
\r
1596 offset = findIndex(_obsoleteCountries3, getString(oldBlen));
\r
1597 if (offset >= 0) {
\r
1598 set(oldBlen, _obsoleteCountries[offset]);
\r
1610 * Advance index past country.
\r
1611 * Index must be immediately after the script (if there is one, else language)
\r
1612 * and IDSeparator.
\r
1614 private void skipCountry() {
\r
1615 if (!atTerminator()) {
\r
1618 * Save the index point after the separator, since the format
\r
1619 * requires two separators if the country is not present.
\r
1621 int oldIndex = index;
\r
1623 skipUntilTerminatorOrIDSeparator();
\r
1624 int charsSkipped = index - oldIndex;
\r
1625 if (charsSkipped < 2 || charsSkipped > 3) {
\r
1632 * Advance index past variant, and accumulate normalized variant in buffer. This ignores
\r
1633 * the codepage information from POSIX ids. Index must be immediately after the country
\r
1634 * or script. Index is left at the keyword separator or at the end of the text. Return
\r
1635 * the start of the variant code in the buffer.
\r
1637 * In standard form, we can have the following forms:
\r
1643 * This also handles POSIX ids, which can have the following forms (pppp is code page id):
\r
1644 * ll_CC.pppp --> ll_CC
\r
1645 * ll_CC.pppp@VVVV --> ll_CC_VVVV
\r
1646 * ll_CC@VVVV --> ll_CC_VVVV
\r
1648 * We identify this use of '@' in POSIX ids by looking for an '=' following
\r
1649 * the '@'. If there is one, we consider '@' to start a keyword list, instead of
\r
1650 * being part of a POSIX id.
\r
1652 * Note: since it was decided that we want an option to not handle POSIX ids, this
\r
1653 * becomes a bit more complex.
\r
1655 private int parseVariant() {
\r
1656 int oldBlen = blen;
\r
1658 boolean start = true;
\r
1659 boolean needSeparator = true;
\r
1660 boolean skipping = false;
\r
1662 while ((c = next()) != DONE) {
\r
1666 } else if (c == KEYWORD_SEPARATOR) {
\r
1667 if (haveKeywordAssign()) {
\r
1672 needSeparator = true; // add another underscore if we have more text
\r
1673 } else if (start) {
\r
1675 } else if (!skipping) {
\r
1676 if (needSeparator) {
\r
1677 boolean incOldBlen = blen == oldBlen; // need to skip separators
\r
1678 needSeparator = false;
\r
1679 if (incOldBlen && !hadCountry) { // no country, we'll need two
\r
1681 ++oldBlen; // for sure
\r
1684 if (incOldBlen) { // only for the first separator
\r
1688 c = Character.toUpperCase(c);
\r
1689 if (c == HYPHEN || c == COMMA) {
\r
1700 // no need for skipvariant, to get the keywords we'll just scan directly for
\r
1701 // the keyword separator
\r
1704 * Returns the normalized language id, or the empty string.
\r
1706 public String getLanguage() {
\r
1708 return getString(parseLanguage());
\r
1712 * Returns the normalized script id, or the empty string.
\r
1714 public String getScript() {
\r
1717 return getString(parseScript());
\r
1721 * return the normalized country id, or the empty string.
\r
1723 public String getCountry() {
\r
1727 return getString(parseCountry());
\r
1731 * Returns the normalized variant id, or the empty string.
\r
1733 public String getVariant() {
\r
1738 return getString(parseVariant());
\r
1742 * Returns the language, script, country, and variant as separate strings.
\r
1744 public String[] getLanguageScriptCountryVariant() {
\r
1746 return new String[] {
\r
1747 getString(parseLanguage()),
\r
1748 getString(parseScript()),
\r
1749 getString(parseCountry()),
\r
1750 getString(parseVariant())
\r
1754 public void setBaseName(String baseName) {
\r
1755 this.baseName = baseName;
\r
1758 public void parseBaseName() {
\r
1759 if (baseName != null) {
\r
1768 // catch unwanted trailing underscore after country if there was no variant
\r
1769 if (blen > 1 && buffer[blen-1] == UNDERSCORE) {
\r
1776 * Returns the normalized base form of the locale id. The base
\r
1777 * form does not include keywords.
\r
1779 public String getBaseName() {
\r
1780 if (baseName != null) {
\r
1784 return getString(0);
\r
1788 * Returns the normalized full form of the locale id. The full
\r
1789 * form includes keywords if they are present.
\r
1791 public String getName() {
\r
1794 return getString(0);
\r
1797 // keyword utilities
\r
1800 * If we have keywords, advance index to the start of the keywords and return true,
\r
1801 * otherwise return false.
\r
1803 private boolean setToKeywordStart() {
\r
1804 for (int i = index; i < id.length; ++i) {
\r
1805 if (id[i] == KEYWORD_SEPARATOR) {
\r
1806 if (canonicalize) {
\r
1807 for (int j = ++i; j < id.length; ++j) { // increment i past separator for return
\r
1808 if (id[j] == KEYWORD_ASSIGN) {
\r
1814 if (++i < id.length) {
\r
1825 private static boolean isDoneOrKeywordAssign(char c) {
\r
1826 return c == DONE || c == KEYWORD_ASSIGN;
\r
1829 private static boolean isDoneOrItemSeparator(char c) {
\r
1830 return c == DONE || c == ITEM_SEPARATOR;
\r
1833 private String getKeyword() {
\r
1834 int start = index;
\r
1835 while (!isDoneOrKeywordAssign(next())) {
\r
1838 return AsciiUtil.toLowerString(new String(id, start, index-start).trim());
\r
1841 private String getValue() {
\r
1842 int start = index;
\r
1843 while (!isDoneOrItemSeparator(next())) {
\r
1846 return new String(id, start, index-start).trim(); // leave case alone
\r
1849 private Comparator getKeyComparator() {
\r
1850 final Comparator comp = new Comparator() {
\r
1851 public int compare(Object lhs, Object rhs) {
\r
1852 return ((String)lhs).compareTo((String)rhs);
\r
1859 * Returns a map of the keywords and values, or null if there are none.
\r
1861 private Map getKeywordMap() {
\r
1862 if (keywords == null) {
\r
1864 if (setToKeywordStart()) {
\r
1865 // trim spaces and convert to lower case, both keywords and values.
\r
1867 String key = getKeyword();
\r
1868 if (key.length() == 0) {
\r
1872 if (c != KEYWORD_ASSIGN) {
\r
1873 // throw new IllegalArgumentException("key '" + key + "' missing a value.");
\r
1880 String value = getValue();
\r
1881 if (value.length() == 0) {
\r
1882 // throw new IllegalArgumentException("key '" + key + "' missing a value.");
\r
1886 m = new TreeMap(getKeyComparator());
\r
1887 } else if (m.containsKey(key)) {
\r
1888 // throw new IllegalArgumentException("key '" + key + "' already has a value.");
\r
1891 m.put(key, value);
\r
1892 } while (next() == ITEM_SEPARATOR);
\r
1894 keywords = m != null ? m : Collections.EMPTY_MAP;
\r
1902 * Parse the keywords and return start of the string in the buffer.
\r
1904 private int parseKeywords() {
\r
1905 int oldBlen = blen;
\r
1906 Map m = getKeywordMap();
\r
1907 if (!m.isEmpty()) {
\r
1908 Iterator iter = m.entrySet().iterator();
\r
1909 boolean first = true;
\r
1910 while (iter.hasNext()) {
\r
1911 append(first ? KEYWORD_SEPARATOR : ITEM_SEPARATOR);
\r
1913 Map.Entry e = (Map.Entry)iter.next();
\r
1914 append((String)e.getKey());
\r
1915 append(KEYWORD_ASSIGN);
\r
1916 append((String)e.getValue());
\r
1918 if (blen != oldBlen) {
\r
1926 * Returns an iterator over the keywords, or null if we have an empty map.
\r
1928 public Iterator getKeywords() {
\r
1929 Map m = getKeywordMap();
\r
1930 return m.isEmpty() ? null : m.keySet().iterator();
\r
1934 * Returns the value for the named keyword, or null if the keyword is not
\r
1937 public String getKeywordValue(String keywordName) {
\r
1938 Map m = getKeywordMap();
\r
1939 return m.isEmpty() ? null : (String)m.get(AsciiUtil.toLowerString(keywordName.trim()));
\r
1943 * Set the keyword value only if it is not already set to something else.
\r
1945 public void defaultKeywordValue(String keywordName, String value) {
\r
1946 setKeywordValue(keywordName, value, false);
\r
1950 * Set the value for the named keyword, or unset it if value is null. If
\r
1951 * keywordName itself is null, unset all keywords. If keywordName is not null,
\r
1952 * value must not be null.
\r
1954 public void setKeywordValue(String keywordName, String value) {
\r
1955 setKeywordValue(keywordName, value, true);
\r
1959 * Set the value for the named keyword, or unset it if value is null. If
\r
1960 * keywordName itself is null, unset all keywords. If keywordName is not null,
\r
1961 * value must not be null. If reset is true, ignore any previous value for
\r
1962 * the keyword, otherwise do not change the keyword (including removal of
\r
1963 * one or all keywords).
\r
1965 private void setKeywordValue(String keywordName, String value, boolean reset) {
\r
1966 if (keywordName == null) {
\r
1968 // force new map, ignore value
\r
1969 keywords = Collections.EMPTY_MAP;
\r
1972 keywordName = AsciiUtil.toLowerString(keywordName.trim());
\r
1973 if (keywordName.length() == 0) {
\r
1974 throw new IllegalArgumentException("keyword must not be empty");
\r
1976 if (value != null) {
\r
1977 value = value.trim();
\r
1978 if (value.length() == 0) {
\r
1979 throw new IllegalArgumentException("value must not be empty");
\r
1982 Map m = getKeywordMap();
\r
1983 if (m.isEmpty()) { // it is EMPTY_MAP
\r
1984 if (value != null) {
\r
1986 keywords = new TreeMap(getKeyComparator());
\r
1987 keywords.put(keywordName, value.trim());
\r
1990 if (reset || !m.containsKey(keywordName)) {
\r
1991 if (value != null) {
\r
1992 m.put(keywordName, value);
\r
1994 m.remove(keywordName);
\r
1995 if (m.isEmpty()) {
\r
1997 keywords = Collections.EMPTY_MAP;
\r
2007 * linear search of the string array. the arrays are unfortunately ordered by the
\r
2008 * two-letter target code, not the three-letter search code, which seems backwards.
\r
2010 private static int findIndex(String[] array, String target){
\r
2011 for (int i = 0; i < array.length; i++) {
\r
2012 if (target.equals(array[i])) {
\r
2020 * Returns the canonical name for the specified locale ID. This is used to convert POSIX
\r
2021 * and other grandfathered IDs to standard ICU form.
\r
2022 * @param localeID the locale id
\r
2023 * @return the canonicalized id
\r
2026 public static String canonicalize(String localeID){
\r
2027 IDParser parser = new IDParser(localeID, true);
\r
2028 String baseName = parser.getBaseName();
\r
2029 boolean foundVariant = false;
\r
2031 // formerly, we always set to en_US_POSIX if the basename was empty, but
\r
2032 // now we require that the entire id be empty, so that "@foo=bar"
\r
2033 // will pass through unchanged.
\r
2034 // {dlf} I'd rather keep "" unchanged.
\r
2035 if (localeID.equals("")) {
\r
2037 // return "en_US_POSIX";
\r
2040 // we have an ID in the form xx_Yyyy_ZZ_KKKKK
\r
2042 initCANONICALIZE_MAP();
\r
2044 /* convert the variants to appropriate ID */
\r
2045 for (int i = 0; i < variantsToKeywords.length; i++) {
\r
2046 String[] vals = variantsToKeywords[i];
\r
2047 int idx = baseName.lastIndexOf("_" + vals[0]);
\r
2049 foundVariant = true;
\r
2051 baseName = baseName.substring(0, idx);
\r
2052 if (baseName.endsWith("_")) {
\r
2053 baseName = baseName.substring(0, --idx);
\r
2055 parser.setBaseName(baseName);
\r
2056 parser.defaultKeywordValue(vals[1], vals[2]);
\r
2061 /* See if this is an already known locale */
\r
2062 for (int i = 0; i < CANONICALIZE_MAP.length; i++) {
\r
2063 if (CANONICALIZE_MAP[i][0].equals(baseName)) {
\r
2064 foundVariant = true;
\r
2066 String[] vals = CANONICALIZE_MAP[i];
\r
2067 parser.setBaseName(vals[1]);
\r
2068 if (vals[2] != null) {
\r
2069 parser.defaultKeywordValue(vals[2], vals[3]);
\r
2075 /* total mondo hack for Norwegian, fortunately the main NY case is handled earlier */
\r
2076 if (!foundVariant) {
\r
2077 if (parser.getLanguage().equals("nb") && parser.getVariant().equals("NY")) {
\r
2078 parser.setBaseName(lscvToID("nn", parser.getScript(), parser.getCountry(), null));
\r
2082 return parser.getName();
\r
2086 * Given a keyword and a value, return a new locale with an updated
\r
2087 * keyword and value. If keyword is null, this removes all keywords from the locale id.
\r
2088 * Otherwise, if the value is null, this removes the value for this keyword from the
\r
2089 * locale id. Otherwise, this adds/replaces the value for this keyword in the locale id.
\r
2090 * The keyword and value must not be empty.
\r
2091 * @param keyword the keyword to add/remove, or null to remove all keywords.
\r
2092 * @param value the value to add/set, or null to remove this particular keyword.
\r
2093 * @return the updated locale
\r
2096 public ULocale setKeywordValue(String keyword, String value) {
\r
2097 return new ULocale(setKeywordValue(localeID, keyword, value), (Locale)null);
\r
2101 * Given a locale id, a keyword, and a value, return a new locale id with an updated
\r
2102 * keyword and value. If keyword is null, this removes all keywords from the locale id.
\r
2103 * Otherwise, if the value is null, this removes the value for this keyword from the
\r
2104 * locale id. Otherwise, this adds/replaces the value for this keyword in the locale id.
\r
2105 * The keyword and value must not be empty.
\r
2106 * @param localeID the locale id to modify
\r
2107 * @param keyword the keyword to add/remove, or null to remove all keywords.
\r
2108 * @param value the value to add/set, or null to remove this particular keyword.
\r
2109 * @return the updated locale id
\r
2112 public static String setKeywordValue(String localeID, String keyword, String value) {
\r
2113 IDParser parser = new IDParser(localeID);
\r
2114 parser.setKeywordValue(keyword, value);
\r
2115 return parser.getName();
\r
2119 * Given a locale id, a keyword, and a value, return a new locale id with an updated
\r
2120 * keyword and value, if the keyword does not already have a value. The keyword and
\r
2121 * value must not be null or empty.
\r
2122 * @param localeID the locale id to modify
\r
2123 * @param keyword the keyword to add, if not already present
\r
2124 * @param value the value to add, if not already present
\r
2125 * @return the updated locale id
\r
2128 /* private static String defaultKeywordValue(String localeID, String keyword, String value) {
\r
2129 IDParser parser = new IDParser(localeID);
\r
2130 parser.defaultKeywordValue(keyword, value);
\r
2131 return parser.getName();
\r
2135 * Returns a three-letter abbreviation for this locale's language. If the locale
\r
2136 * doesn't specify a language, returns the empty string. Otherwise, returns
\r
2137 * a lowercase ISO 639-2/T language code.
\r
2138 * The ISO 639-2 language codes can be found on-line at
\r
2139 * <a href="ftp://dkuug.dk/i18n/iso-639-2.txt"><code>ftp://dkuug.dk/i18n/iso-639-2.txt</code></a>
\r
2140 * @exception MissingResourceException Throws MissingResourceException if the
\r
2141 * three-letter language abbreviation is not available for this locale.
\r
2144 public String getISO3Language(){
\r
2145 return getISO3Language(localeID);
\r
2149 * Returns a three-letter abbreviation for this locale's language. If the locale
\r
2150 * doesn't specify a language, returns the empty string. Otherwise, returns
\r
2151 * a lowercase ISO 639-2/T language code.
\r
2152 * The ISO 639-2 language codes can be found on-line at
\r
2153 * <a href="ftp://dkuug.dk/i18n/iso-639-2.txt"><code>ftp://dkuug.dk/i18n/iso-639-2.txt</code></a>
\r
2154 * @exception MissingResourceException Throws MissingResourceException if the
\r
2155 * three-letter language abbreviation is not available for this locale.
\r
2158 public static String getISO3Language(String localeID){
\r
2159 initLanguageTables();
\r
2161 String language = getLanguage(localeID);
\r
2162 int offset = findIndex(_languages, language);
\r
2164 return _languages3[offset];
\r
2166 offset = findIndex(_obsoleteLanguages, language);
\r
2167 if (offset >= 0) {
\r
2168 return _obsoleteLanguages3[offset];
\r
2171 return EMPTY_STRING;
\r
2175 * Returns a three-letter abbreviation for this locale's country/region. If the locale
\r
2176 * doesn't specify a country, returns the empty string. Otherwise, returns
\r
2177 * an uppercase ISO 3166 3-letter country code.
\r
2178 * @exception MissingResourceException Throws MissingResourceException if the
\r
2179 * three-letter country abbreviation is not available for this locale.
\r
2182 public String getISO3Country(){
\r
2183 return getISO3Country(localeID);
\r
2186 * Returns a three-letter abbreviation for this locale's country/region. If the locale
\r
2187 * doesn't specify a country, returns the empty string. Otherwise, returns
\r
2188 * an uppercase ISO 3166 3-letter country code.
\r
2189 * @exception MissingResourceException Throws MissingResourceException if the
\r
2190 * three-letter country abbreviation is not available for this locale.
\r
2193 public static String getISO3Country(String localeID){
\r
2194 initCountryTables();
\r
2196 String country = getCountry(localeID);
\r
2197 int offset = findIndex(_countries, country);
\r
2199 return _countries3[offset];
\r
2201 offset = findIndex(_obsoleteCountries, country);
\r
2203 return _obsoleteCountries3[offset];
\r
2206 return EMPTY_STRING;
\r
2212 * Utility to fetch locale display data from resource bundle tables.
\r
2214 private static String getTableString(String tableName, String subtableName, String item, String displayLocaleID) {
\r
2215 if (item.length() > 0) {
\r
2217 ICUResourceBundle bundle = (ICUResourceBundle)UResourceBundle.
\r
2218 getBundleInstance(ICUResourceBundle.ICU_BASE_NAME, displayLocaleID);
\r
2219 return getTableString(tableName, subtableName, item, bundle);
\r
2220 } catch (Exception e) {
\r
2221 // System.out.println("gtsu: " + e.getMessage());
\r
2228 * Utility to fetch locale display data from resource bundle tables.
\r
2230 private static String getTableString(String tableName, String subtableName, String item, ICUResourceBundle bundle) {
\r
2231 // System.out.println("gts table: " + tableName +
\r
2232 // " subtable: " + subtableName +
\r
2233 // " item: " + item +
\r
2234 // " bundle: " + bundle.getULocale());
\r
2237 // special case currency
\r
2238 if ("currency".equals(subtableName)) {
\r
2239 ICUResourceBundle table = bundle.getWithFallback("Currencies");
\r
2240 table = table.getWithFallback(item);
\r
2241 return table.getString(1);
\r
2243 ICUResourceBundle table = bundle.getWithFallback(tableName);
\r
2245 if (subtableName != null) {
\r
2246 table = table.getWithFallback(subtableName);
\r
2248 return table.getStringWithFallback(item);
\r
2250 catch (MissingResourceException e) {
\r
2252 if(subtableName==null){
\r
2254 // may be a deprecated code
\r
2255 String currentName = null;
\r
2256 if(tableName.equals("Countries")){
\r
2257 currentName = getCurrentCountryID(item);
\r
2258 }else if(tableName.equals("Languages")){
\r
2259 currentName = getCurrentLanguageID(item);
\r
2261 return table.getStringWithFallback(currentName);
\r
2262 }catch (MissingResourceException ex){/* fall through*/}
\r
2265 // still can't figure out ?.. try the fallback mechanism
\r
2266 String fallbackLocale = table.getWithFallback("Fallback").getString();
\r
2267 if (fallbackLocale.length() == 0) {
\r
2268 fallbackLocale = "root";
\r
2270 // System.out.println("bundle: " + bundle.getULocale() + " fallback: " + fallbackLocale);
\r
2271 if(fallbackLocale.equals(table.getULocale().localeID)){
\r
2274 bundle = (ICUResourceBundle)UResourceBundle.getBundleInstance(ICUResourceBundle.ICU_BASE_NAME,
\r
2276 // System.out.println("fallback from " + table.getULocale() + " to " + fallbackLocale +
\r
2277 // ", got bundle " + bundle.getULocale());
\r
2282 catch (Exception e) {
\r
2283 // System.out.println("gtsi: " + e.getMessage());
\r
2289 * Returns this locale's language localized for display in the default locale.
\r
2290 * @return the localized language name.
\r
2293 public String getDisplayLanguage() {
\r
2294 return getDisplayLanguageInternal(localeID, getDefault().localeID);
\r
2298 * Returns this locale's language localized for display in the provided locale.
\r
2299 * @param displayLocale the locale in which to display the name.
\r
2300 * @return the localized language name.
\r
2303 public String getDisplayLanguage(ULocale displayLocale) {
\r
2304 return getDisplayLanguageInternal(localeID, displayLocale.localeID);
\r
2308 * Returns a locale's language localized for display in the provided locale.
\r
2309 * This is a cover for the ICU4C API.
\r
2310 * @param localeID the id of the locale whose language will be displayed
\r
2311 * @param displayLocaleID the id of the locale in which to display the name.
\r
2312 * @return the localized language name.
\r
2315 public static String getDisplayLanguage(String localeID, String displayLocaleID) {
\r
2316 return getDisplayLanguageInternal(localeID, getName(displayLocaleID));
\r
2320 * Returns a locale's language localized for display in the provided locale.
\r
2321 * This is a cover for the ICU4C API.
\r
2322 * @param localeID the id of the locale whose language will be displayed.
\r
2323 * @param displayLocale the locale in which to display the name.
\r
2324 * @return the localized language name.
\r
2327 public static String getDisplayLanguage(String localeID, ULocale displayLocale) {
\r
2328 return getDisplayLanguageInternal(localeID, displayLocale.localeID);
\r
2331 static String getCurrentCountryID(String oldID){
\r
2332 initCountryTables();
\r
2333 int offset = findIndex(_deprecatedCountries, oldID);
\r
2334 if (offset >= 0) {
\r
2335 return _replacementCountries[offset];
\r
2339 static String getCurrentLanguageID(String oldID){
\r
2340 initLanguageTables();
\r
2341 int offset = findIndex(_obsoleteLanguages, oldID);
\r
2342 if (offset >= 0) {
\r
2343 return _replacementLanguages[offset];
\r
2349 // displayLocaleID is canonical, localeID need not be since parsing will fix this.
\r
2350 private static String getDisplayLanguageInternal(String localeID, String displayLocaleID) {
\r
2351 return getTableString("Languages", null, new IDParser(localeID).getLanguage(), displayLocaleID);
\r
2355 * Returns this locale's script localized for display in the default locale.
\r
2356 * @return the localized script name.
\r
2359 public String getDisplayScript() {
\r
2360 return getDisplayScriptInternal(localeID, getDefault().localeID);
\r
2364 * Returns this locale's script localized for display in the provided locale.
\r
2365 * @param displayLocale the locale in which to display the name.
\r
2366 * @return the localized script name.
\r
2369 public String getDisplayScript(ULocale displayLocale) {
\r
2370 return getDisplayScriptInternal(localeID, displayLocale.localeID);
\r
2374 * Returns a locale's script localized for display in the provided locale.
\r
2375 * This is a cover for the ICU4C API.
\r
2376 * @param localeID the id of the locale whose script will be displayed
\r
2377 * @param displayLocaleID the id of the locale in which to display the name.
\r
2378 * @return the localized script name.
\r
2381 public static String getDisplayScript(String localeID, String displayLocaleID) {
\r
2382 return getDisplayScriptInternal(localeID, getName(displayLocaleID));
\r
2386 * Returns a locale's script localized for display in the provided locale.
\r
2387 * @param localeID the id of the locale whose script will be displayed.
\r
2388 * @param displayLocale the locale in which to display the name.
\r
2389 * @return the localized script name.
\r
2392 public static String getDisplayScript(String localeID, ULocale displayLocale) {
\r
2393 return getDisplayScriptInternal(localeID, displayLocale.localeID);
\r
2396 // displayLocaleID is canonical, localeID need not be since parsing will fix this.
\r
2397 private static String getDisplayScriptInternal(String localeID, String displayLocaleID) {
\r
2398 return getTableString("Scripts", null, new IDParser(localeID).getScript(), displayLocaleID);
\r
2402 * Returns this locale's country localized for display in the default locale.
\r
2403 * @return the localized country name.
\r
2406 public String getDisplayCountry() {
\r
2407 return getDisplayCountryInternal(localeID, getDefault().localeID);
\r
2411 * Returns this locale's country localized for display in the provided locale.
\r
2412 * @param displayLocale the locale in which to display the name.
\r
2413 * @return the localized country name.
\r
2416 public String getDisplayCountry(ULocale displayLocale){
\r
2417 return getDisplayCountryInternal(localeID, displayLocale.localeID);
\r
2421 * Returns a locale's country localized for display in the provided locale.
\r
2422 * This is a cover for the ICU4C API.
\r
2423 * @param localeID the id of the locale whose country will be displayed
\r
2424 * @param displayLocaleID the id of the locale in which to display the name.
\r
2425 * @return the localized country name.
\r
2428 public static String getDisplayCountry(String localeID, String displayLocaleID) {
\r
2429 return getDisplayCountryInternal(localeID, getName(displayLocaleID));
\r
2433 * Returns a locale's country localized for display in the provided locale.
\r
2434 * This is a cover for the ICU4C API.
\r
2435 * @param localeID the id of the locale whose country will be displayed.
\r
2436 * @param displayLocale the locale in which to display the name.
\r
2437 * @return the localized country name.
\r
2440 public static String getDisplayCountry(String localeID, ULocale displayLocale) {
\r
2441 return getDisplayCountryInternal(localeID, displayLocale.localeID);
\r
2444 // displayLocaleID is canonical, localeID need not be since parsing will fix this.
\r
2445 private static String getDisplayCountryInternal(String localeID, String displayLocaleID) {
\r
2446 return getTableString("Countries", null, new IDParser(localeID).getCountry(), displayLocaleID);
\r
2450 * Returns this locale's variant localized for display in the default locale.
\r
2451 * @return the localized variant name.
\r
2454 public String getDisplayVariant() {
\r
2455 return getDisplayVariantInternal(localeID, getDefault().localeID);
\r
2459 * Returns this locale's variant localized for display in the provided locale.
\r
2460 * @param displayLocale the locale in which to display the name.
\r
2461 * @return the localized variant name.
\r
2464 public String getDisplayVariant(ULocale displayLocale) {
\r
2465 return getDisplayVariantInternal(localeID, displayLocale.localeID);
\r
2469 * Returns a locale's variant localized for display in the provided locale.
\r
2470 * This is a cover for the ICU4C API.
\r
2471 * @param localeID the id of the locale whose variant will be displayed
\r
2472 * @param displayLocaleID the id of the locale in which to display the name.
\r
2473 * @return the localized variant name.
\r
2476 public static String getDisplayVariant(String localeID, String displayLocaleID){
\r
2477 return getDisplayVariantInternal(localeID, getName(displayLocaleID));
\r
2481 * Returns a locale's variant localized for display in the provided locale.
\r
2482 * This is a cover for the ICU4C API.
\r
2483 * @param localeID the id of the locale whose variant will be displayed.
\r
2484 * @param displayLocale the locale in which to display the name.
\r
2485 * @return the localized variant name.
\r
2488 public static String getDisplayVariant(String localeID, ULocale displayLocale) {
\r
2489 return getDisplayVariantInternal(localeID, displayLocale.localeID);
\r
2492 // displayLocaleID is canonical, localeID need not be since parsing will fix this.
\r
2493 private static String getDisplayVariantInternal(String localeID, String displayLocaleID) {
\r
2494 return getTableString("Variants", null, new IDParser(localeID).getVariant(), displayLocaleID);
\r
2498 * Returns a keyword localized for display in the default locale.
\r
2499 * @param keyword the keyword to be displayed.
\r
2500 * @return the localized keyword name.
\r
2501 * @see #getKeywords()
\r
2504 public static String getDisplayKeyword(String keyword) {
\r
2505 return getDisplayKeywordInternal(keyword, getDefault().localeID);
\r
2509 * Returns a keyword localized for display in the specified locale.
\r
2510 * @param keyword the keyword to be displayed.
\r
2511 * @param displayLocaleID the id of the locale in which to display the keyword.
\r
2512 * @return the localized keyword name.
\r
2513 * @see #getKeywords(String)
\r
2516 public static String getDisplayKeyword(String keyword, String displayLocaleID) {
\r
2517 return getDisplayKeywordInternal(keyword, getName(displayLocaleID));
\r
2521 * Returns a keyword localized for display in the specified locale.
\r
2522 * @param keyword the keyword to be displayed.
\r
2523 * @param displayLocale the locale in which to display the keyword.
\r
2524 * @return the localized keyword name.
\r
2525 * @see #getKeywords(String)
\r
2528 public static String getDisplayKeyword(String keyword, ULocale displayLocale) {
\r
2529 return getDisplayKeywordInternal(keyword, displayLocale.localeID);
\r
2532 // displayLocaleID is canonical, localeID need not be since parsing will fix this.
\r
2533 private static String getDisplayKeywordInternal(String keyword, String displayLocaleID) {
\r
2534 return getTableString("Keys", null, AsciiUtil.toLowerString(keyword.trim()), displayLocaleID);
\r
2538 * Returns a keyword value localized for display in the default locale.
\r
2539 * @param keyword the keyword whose value is to be displayed.
\r
2540 * @return the localized value name.
\r
2543 public String getDisplayKeywordValue(String keyword) {
\r
2544 return getDisplayKeywordValueInternal(localeID, keyword, getDefault().localeID);
\r
2548 * Returns a keyword value localized for display in the specified locale.
\r
2549 * @param keyword the keyword whose value is to be displayed.
\r
2550 * @param displayLocale the locale in which to display the value.
\r
2551 * @return the localized value name.
\r
2554 public String getDisplayKeywordValue(String keyword, ULocale displayLocale) {
\r
2555 return getDisplayKeywordValueInternal(localeID, keyword, displayLocale.localeID);
\r
2559 * Returns a keyword value localized for display in the specified locale.
\r
2560 * This is a cover for the ICU4C API.
\r
2561 * @param localeID the id of the locale whose keyword value is to be displayed.
\r
2562 * @param keyword the keyword whose value is to be displayed.
\r
2563 * @param displayLocaleID the id of the locale in which to display the value.
\r
2564 * @return the localized value name.
\r
2567 public static String getDisplayKeywordValue(String localeID, String keyword, String displayLocaleID) {
\r
2568 return getDisplayKeywordValueInternal(localeID, keyword, getName(displayLocaleID));
\r
2572 * Returns a keyword value localized for display in the specified locale.
\r
2573 * This is a cover for the ICU4C API.
\r
2574 * @param localeID the id of the locale whose keyword value is to be displayed.
\r
2575 * @param keyword the keyword whose value is to be displayed.
\r
2576 * @param displayLocale the id of the locale in which to display the value.
\r
2577 * @return the localized value name.
\r
2580 public static String getDisplayKeywordValue(String localeID, String keyword, ULocale displayLocale) {
\r
2581 return getDisplayKeywordValueInternal(localeID, keyword, displayLocale.localeID);
\r
2584 // displayLocaleID is canonical, localeID need not be since parsing will fix this.
\r
2585 private static String getDisplayKeywordValueInternal(String localeID, String keyword, String displayLocaleID) {
\r
2586 keyword = AsciiUtil.toLowerString(keyword.trim());
\r
2587 String value = new IDParser(localeID).getKeywordValue(keyword);
\r
2588 return getTableString("Types", keyword, value, displayLocaleID);
\r
2592 * Returns this locale name localized for display in the default locale.
\r
2593 * @return the localized locale name.
\r
2596 public String getDisplayName() {
\r
2597 return getDisplayNameInternal(localeID, getDefault().localeID);
\r
2601 * Returns this locale name localized for display in the provided locale.
\r
2602 * @param displayLocale the locale in which to display the locale name.
\r
2603 * @return the localized locale name.
\r
2606 public String getDisplayName(ULocale displayLocale) {
\r
2607 return getDisplayNameInternal(localeID, displayLocale.localeID);
\r
2611 * Returns the locale ID localized for display in the provided locale.
\r
2612 * This is a cover for the ICU4C API.
\r
2613 * @param localeID the locale whose name is to be displayed.
\r
2614 * @param displayLocaleID the id of the locale in which to display the locale name.
\r
2615 * @return the localized locale name.
\r
2618 public static String getDisplayName(String localeID, String displayLocaleID) {
\r
2619 return getDisplayNameInternal(localeID, getName(displayLocaleID));
\r
2623 * Returns the locale ID localized for display in the provided locale.
\r
2624 * This is a cover for the ICU4C API.
\r
2625 * @param localeID the locale whose name is to be displayed.
\r
2626 * @param displayLocale the locale in which to display the locale name.
\r
2627 * @return the localized locale name.
\r
2630 public static String getDisplayName(String localeID, ULocale displayLocale) {
\r
2631 return getDisplayNameInternal(localeID, displayLocale.localeID);
\r
2634 // displayLocaleID is canonical, localeID need not be since parsing will fix this.
\r
2635 private static String getDisplayNameInternal(String localeID, String displayLocaleID) {
\r
2637 // lang (script, country, variant, keyword=value, ...)
\r
2638 // script, country, variant, keyword=value, ...
\r
2640 final String[] tableNames = { "Languages", "Scripts", "Countries", "Variants" };
\r
2642 ICUResourceBundle bundle = (ICUResourceBundle)UResourceBundle.getBundleInstance(ICUResourceBundle.ICU_BASE_NAME, displayLocaleID);
\r
2644 StringBuffer buf = new StringBuffer();
\r
2646 IDParser parser = new IDParser(localeID);
\r
2647 String[] names = parser.getLanguageScriptCountryVariant();
\r
2649 boolean haveLanguage = names[0].length() > 0;
\r
2650 boolean openParen = false;
\r
2651 for (int i = 0; i < names.length; ++i) {
\r
2652 String name = names[i];
\r
2653 if (name.length() > 0) {
\r
2654 name = getTableString(tableNames[i], null, name, bundle);
\r
2655 if (buf.length() > 0) { // need a separator
\r
2656 if (haveLanguage & !openParen) {
\r
2667 Map m = parser.getKeywordMap();
\r
2668 if (!m.isEmpty()) {
\r
2669 Iterator keys = m.entrySet().iterator();
\r
2670 while (keys.hasNext()) {
\r
2671 if (buf.length() > 0) {
\r
2672 if (haveLanguage & !openParen) {
\r
2679 Map.Entry e = (Map.Entry)keys.next();
\r
2680 String key = (String)e.getKey();
\r
2681 String val = (String)e.getValue();
\r
2682 buf.append(getTableString("Keys", null, key, bundle));
\r
2684 buf.append(getTableString("Types", key, val, bundle));
\r
2692 return buf.toString();
\r
2696 * Returns this locale's layout orientation for characters. The possible
\r
2697 * values are "left-to-right", "right-to-left", "top-to-bottom" or
\r
2698 * "bottom-to-top".
\r
2699 * @return The locale's layout orientation for characters.
\r
2702 public String getCharacterOrientation() {
\r
2703 return getTableString("layout", null, "characters", getName());
\r
2707 * Returns this locale's layout orientation for lines. The possible
\r
2708 * values are "left-to-right", "right-to-left", "top-to-bottom" or
\r
2709 * "bottom-to-top".
\r
2710 * @return The locale's layout orientation for lines.
\r
2713 public String getLineOrientation() {
\r
2714 return getTableString("layout", null, "lines", getName());
\r
2718 * Selector for <tt>getLocale()</tt> indicating the locale of the
\r
2719 * resource containing the data. This is always at or above the
\r
2720 * valid locale. If the valid locale does not contain the
\r
2721 * specific data being requested, then the actual locale will be
\r
2722 * above the valid locale. If the object was not constructed from
\r
2723 * locale data, then the valid locale is <i>null</i>.
\r
2725 * @draft ICU 2.8 (retain)
\r
2726 * @provisional This API might change or be removed in a future release.
\r
2728 public static Type ACTUAL_LOCALE = new Type();
\r
2731 * Selector for <tt>getLocale()</tt> indicating the most specific
\r
2732 * locale for which any data exists. This is always at or above
\r
2733 * the requested locale, and at or below the actual locale. If
\r
2734 * the requested locale does not correspond to any resource data,
\r
2735 * then the valid locale will be above the requested locale. If
\r
2736 * the object was not constructed from locale data, then the
\r
2737 * actual locale is <i>null</i>.
\r
2739 * <p>Note: The valid locale will be returned correctly in ICU
\r
2740 * 3.0 or later. In ICU 2.8, it is not returned correctly.
\r
2741 * @draft ICU 2.8 (retain)
\r
2742 * @provisional This API might change or be removed in a future release.
\r
2744 public static Type VALID_LOCALE = new Type();
\r
2747 * Opaque selector enum for <tt>getLocale()</tt>.
\r
2748 * @see com.ibm.icu.util.ULocale
\r
2749 * @see com.ibm.icu.util.ULocale#ACTUAL_LOCALE
\r
2750 * @see com.ibm.icu.util.ULocale#VALID_LOCALE
\r
2751 * @draft ICU 2.8 (retainAll)
\r
2752 * @provisional This API might change or be removed in a future release.
\r
2754 public static final class Type {
\r
2759 * Based on a HTTP formatted list of acceptable locales, determine an available locale for the user.
\r
2760 * NullPointerException is thrown if acceptLanguageList or availableLocales is
\r
2761 * null. If fallback is non-null, it will contain true if a fallback locale (one
\r
2762 * not in the acceptLanguageList) was returned. The value on entry is ignored.
\r
2763 * ULocale will be one of the locales in availableLocales, or the ROOT ULocale if
\r
2764 * if a ROOT locale was used as a fallback (because nothing else in
\r
2765 * availableLocales matched). No ULocale array element should be null; behavior
\r
2766 * is undefined if this is the case.
\r
2767 * @param acceptLanguageList list in HTTP "Accept-Language:" format of acceptable locales
\r
2768 * @param availableLocales list of available locales. One of these will be returned.
\r
2769 * @param fallback if non-null, a 1-element array containing a boolean to be set with the fallback status
\r
2770 * @return one of the locales from the availableLocales list, or null if none match
\r
2774 public static ULocale acceptLanguage(String acceptLanguageList, ULocale[] availableLocales,
\r
2775 boolean[] fallback) {
\r
2776 if (acceptLanguageList == null) {
\r
2777 throw new NullPointerException();
\r
2779 ULocale acceptList[] = null;
\r
2781 acceptList = parseAcceptLanguage(acceptLanguageList, true);
\r
2782 } catch (ParseException pe) {
\r
2783 acceptList = null;
\r
2785 if (acceptList == null) {
\r
2788 return acceptLanguage(acceptList, availableLocales, fallback);
\r
2792 * Based on a list of acceptable locales, determine an available locale for the user.
\r
2793 * NullPointerException is thrown if acceptLanguageList or availableLocales is
\r
2794 * null. If fallback is non-null, it will contain true if a fallback locale (one
\r
2795 * not in the acceptLanguageList) was returned. The value on entry is ignored.
\r
2796 * ULocale will be one of the locales in availableLocales, or the ROOT ULocale if
\r
2797 * if a ROOT locale was used as a fallback (because nothing else in
\r
2798 * availableLocales matched). No ULocale array element should be null; behavior
\r
2799 * is undefined if this is the case.
\r
2800 * @param acceptLanguageList list of acceptable locales
\r
2801 * @param availableLocales list of available locales. One of these will be returned.
\r
2802 * @param fallback if non-null, a 1-element array containing a boolean to be set with the fallback status
\r
2803 * @return one of the locales from the availableLocales list, or null if none match
\r
2807 public static ULocale acceptLanguage(ULocale[] acceptLanguageList, ULocale[]
\r
2808 availableLocales, boolean[] fallback) {
\r
2811 if(fallback != null) {
\r
2814 for(i=0;i<acceptLanguageList.length;i++) {
\r
2815 ULocale aLocale = acceptLanguageList[i];
\r
2816 boolean[] setFallback = fallback;
\r
2818 for(j=0;j<availableLocales.length;j++) {
\r
2819 if(availableLocales[j].equals(aLocale)) {
\r
2820 if(setFallback != null) {
\r
2821 setFallback[0]=false; // first time with this locale - not a fallback.
\r
2823 return availableLocales[j];
\r
2826 Locale loc = aLocale.toLocale();
\r
2827 Locale parent = LocaleUtility.fallback(loc);
\r
2828 if(parent != null) {
\r
2829 aLocale = new ULocale(parent);
\r
2833 setFallback = null; // Do not set fallback in later iterations
\r
2834 } while (aLocale != null);
\r
2840 * Based on a HTTP formatted list of acceptable locales, determine an available locale for the user.
\r
2841 * NullPointerException is thrown if acceptLanguageList or availableLocales is
\r
2842 * null. If fallback is non-null, it will contain true if a fallback locale (one
\r
2843 * not in the acceptLanguageList) was returned. The value on entry is ignored.
\r
2844 * ULocale will be one of the locales in availableLocales, or the ROOT ULocale if
\r
2845 * if a ROOT locale was used as a fallback (because nothing else in
\r
2846 * availableLocales matched). No ULocale array element should be null; behavior
\r
2847 * is undefined if this is the case.
\r
2848 * This function will choose a locale from the ULocale.getAvailableLocales() list as available.
\r
2849 * @param acceptLanguageList list in HTTP "Accept-Language:" format of acceptable locales
\r
2850 * @param fallback if non-null, a 1-element array containing a boolean to be set with the fallback status
\r
2851 * @return one of the locales from the ULocale.getAvailableLocales() list, or null if none match
\r
2855 public static ULocale acceptLanguage(String acceptLanguageList, boolean[] fallback) {
\r
2856 return acceptLanguage(acceptLanguageList, ULocale.getAvailableLocales(),
\r
2861 * Based on an ordered array of acceptable locales, determine an available locale for the user.
\r
2862 * NullPointerException is thrown if acceptLanguageList or availableLocales is
\r
2863 * null. If fallback is non-null, it will contain true if a fallback locale (one
\r
2864 * not in the acceptLanguageList) was returned. The value on entry is ignored.
\r
2865 * ULocale will be one of the locales in availableLocales, or the ROOT ULocale if
\r
2866 * if a ROOT locale was used as a fallback (because nothing else in
\r
2867 * availableLocales matched). No ULocale array element should be null; behavior
\r
2868 * is undefined if this is the case.
\r
2869 * This function will choose a locale from the ULocale.getAvailableLocales() list as available.
\r
2870 * @param acceptLanguageList ordered array of acceptable locales (preferred are listed first)
\r
2871 * @param fallback if non-null, a 1-element array containing a boolean to be set with the fallback status
\r
2872 * @return one of the locales from the ULocale.getAvailableLocales() list, or null if none match
\r
2876 public static ULocale acceptLanguage(ULocale[] acceptLanguageList, boolean[]
\r
2878 return acceptLanguage(acceptLanguageList, ULocale.getAvailableLocales(),
\r
2883 * Package local method used for parsing Accept-Language string
\r
2884 * @internal ICU 3.8
\r
2886 static ULocale[] parseAcceptLanguage(String acceptLanguage, boolean isLenient) throws ParseException {
\r
2888 * @internal ICU 3.4
\r
2890 class ULocaleAcceptLanguageQ implements Comparable {
\r
2892 private double serial;
\r
2893 public ULocaleAcceptLanguageQ(double theq, int theserial) {
\r
2895 serial = theserial;
\r
2897 public int compareTo(Object o) {
\r
2898 ULocaleAcceptLanguageQ other = (ULocaleAcceptLanguageQ) o;
\r
2899 if (q > other.q) { // reverse - to sort in descending order
\r
2901 } else if (q < other.q) {
\r
2904 if (serial < other.serial) {
\r
2906 } else if (serial > other.serial) {
\r
2909 return 0; // same object
\r
2914 // parse out the acceptLanguage into an array
\r
2915 TreeMap map = new TreeMap();
\r
2916 StringBuffer languageRangeBuf = new StringBuffer();
\r
2917 StringBuffer qvalBuf = new StringBuffer();
\r
2919 acceptLanguage += ","; // append comma to simplify the parsing code
\r
2921 boolean subTag = false;
\r
2922 boolean q1 = false;
\r
2923 for (n = 0; n < acceptLanguage.length(); n++) {
\r
2924 boolean gotLanguageQ = false;
\r
2925 char c = acceptLanguage.charAt(n);
\r
2927 case 0: // before language-range start
\r
2928 if (('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z')) {
\r
2929 // in language-range
\r
2930 languageRangeBuf.append(c);
\r
2933 } else if (c == '*') {
\r
2934 languageRangeBuf.append(c);
\r
2936 } else if (c != ' ' && c != '\t') {
\r
2937 // invalid character
\r
2941 case 1: // in language-range
\r
2942 if (('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z')) {
\r
2943 languageRangeBuf.append(c);
\r
2944 } else if (c == '-') {
\r
2946 languageRangeBuf.append(c);
\r
2947 } else if (c == '_') {
\r
2950 languageRangeBuf.append(c);
\r
2954 } else if ('0' <= c && c <= '9') {
\r
2956 languageRangeBuf.append(c);
\r
2958 // DIGIT is allowed only in language sub tag
\r
2961 } else if (c == ',') {
\r
2963 gotLanguageQ = true;
\r
2964 } else if (c == ' ' || c == '\t') {
\r
2965 // language-range end
\r
2967 } else if (c == ';') {
\r
2971 // invalid character for language-range
\r
2975 case 2: // saw wild card range
\r
2978 gotLanguageQ = true;
\r
2979 } else if (c == ' ' || c == '\t') {
\r
2980 // language-range end
\r
2982 } else if (c == ';') {
\r
2990 case 3: // language-range end
\r
2993 gotLanguageQ = true;
\r
2994 } else if (c == ';') {
\r
2997 } else if (c != ' ' && c != '\t') {
\r
3002 case 4: // before q
\r
3006 } else if (c != ' ' && c != '\t') {
\r
3011 case 5: // before equal
\r
3015 } else if (c != ' ' && c != '\t') {
\r
3020 case 6: // before q value
\r
3022 // q value start with 0
\r
3024 qvalBuf.append(c);
\r
3026 } else if (c == '1') {
\r
3027 // q value start with 1
\r
3028 qvalBuf.append(c);
\r
3030 } else if (c == '.') {
\r
3032 qvalBuf.append(c);
\r
3037 } else if (c != ' ' && c != '\t') {
\r
3042 case 7: // q value start
\r
3044 // before q value fraction part
\r
3045 qvalBuf.append(c);
\r
3047 } else if (c == ',') {
\r
3049 gotLanguageQ = true;
\r
3050 } else if (c == ' ' || c == '\t') {
\r
3058 case 8: // before q value fraction part
\r
3059 if ('0' <= c || c <= '9') {
\r
3060 if (q1 && c != '0' && !isLenient) {
\r
3061 // if q value starts with 1, the fraction part must be 0
\r
3064 // in q value fraction part
\r
3065 qvalBuf.append(c);
\r
3073 case 9: // in q value fraction part
\r
3074 if ('0' <= c && c <= '9') {
\r
3075 if (q1 && c != '0') {
\r
3076 // if q value starts with 1, the fraction part must be 0
\r
3079 qvalBuf.append(c);
\r
3081 } else if (c == ',') {
\r
3083 gotLanguageQ = true;
\r
3084 } else if (c == ' ' || c == '\t') {
\r
3092 case 10: // after q value
\r
3095 gotLanguageQ = true;
\r
3096 } else if (c != ' ' && c != '\t') {
\r
3102 if (state == -1) {
\r
3104 throw new ParseException("Invalid Accept-Language", n);
\r
3106 if (gotLanguageQ) {
\r
3108 if (qvalBuf.length() != 0) {
\r
3110 q = Double.parseDouble(qvalBuf.toString());
\r
3111 } catch (NumberFormatException nfe) {
\r
3112 // Already validated, so it should never happen
\r
3119 if (languageRangeBuf.charAt(0) != '*') {
\r
3120 int serial = map.size();
\r
3121 ULocaleAcceptLanguageQ entry = new ULocaleAcceptLanguageQ(q, serial);
\r
3122 map.put(entry, new ULocale(canonicalize(languageRangeBuf.toString()))); // sort in reverse order.. 1.0, 0.9, 0.8 .. etc
\r
3125 // reset buffer and parse state
\r
3126 languageRangeBuf.setLength(0);
\r
3127 qvalBuf.setLength(0);
\r
3132 // Well, the parser should handle all cases. So just in case.
\r
3133 throw new ParseException("Invalid AcceptlLanguage", n);
\r
3136 // pull out the map
\r
3137 ULocale acceptList[] = (ULocale[])map.values().toArray(new ULocale[map.size()]);
\r
3138 return acceptList;
\r
3141 private static final String UNDEFINED_LANGUAGE = "und";
\r
3142 private static final String UNDEFINED_SCRIPT = "Zzzz";
\r
3143 private static final String UNDEFINED_REGION = "ZZ";
\r
3146 * Add the likely subtags for a provided locale ID, per the algorithm described
\r
3147 * in the following CLDR technical report:
\r
3149 * http://www.unicode.org/reports/tr35/#Likely_Subtags
\r
3151 * If the provided ULocale instance is already in the maximal form, or there is no
\r
3152 * data available available for maximization, it will be returned. For example,
\r
3153 * "und-Zzzz" cannot be maximized, since there is no reasonable maximization.
\r
3154 * Otherwise, a new ULocale instance with the maximal form is returned.
\r
3158 * "en" maximizes to "en_Latn_US"
\r
3160 * "de" maximizes to "de_Latn_US"
\r
3162 * "sr" maximizes to "sr_Cyrl_RS"
\r
3164 * "sh" maximizes to "sr_Latn_RS" (Note this will not reverse.)
\r
3166 * "zh_Hani" maximizes to "zh_Hans_CN" (Note this will not reverse.)
\r
3168 * @param loc The ULocale to maximize
\r
3169 * @return The maximized ULocale instance.
\r
3172 public static ULocale
\r
3173 addLikelySubtags(ULocale loc)
\r
3175 String[] tags = new String[3];
\r
3176 String trailing = null;
\r
3178 int trailingIndex = parseTagString(
\r
3182 if (trailingIndex < loc.localeID.length()) {
\r
3183 trailing = loc.localeID.substring(trailingIndex);
\r
3186 String newLocaleID =
\r
3187 createLikelySubtagsString(
\r
3193 return newLocaleID == null ? loc : new ULocale(newLocaleID);
\r
3197 * Minimize the subtags for a provided locale ID, per the algorithm described
\r
3198 * in the following CLDR technical report:
\r
3200 * http://www.unicode.org/reports/tr35/#Likely_Subtags
\r
3202 * If the provided ULocale instance is already in the minimal form, or there
\r
3203 * is no data available for minimization, it will be returned. Since the
\r
3204 * minimization algorithm relies on proper maximization, see the comments
\r
3205 * for addLikelySubtags for reasons why there might not be any data.
\r
3209 * "en_Latn_US" minimizes to "en"
\r
3211 * "de_Latn_US" minimizes to "de"
\r
3213 * "sr_Cyrl_RS" minimizes to "sr"
\r
3215 * "zh_Hant_TW" minimizes to "zh_TW" (The region is preferred to the
\r
3216 * script, and minimizing to "zh" would imply "zh_Hans_CN".)
\r
3218 * @param loc The ULocale to minimize
\r
3219 * @return The minimized ULocale instance.
\r
3222 public static ULocale
\r
3223 minimizeSubtags(ULocale loc)
\r
3225 String[] tags = new String[3];
\r
3227 int trailingIndex = parseTagString(
\r
3231 String originalLang = (String)tags[0];
\r
3232 String originalScript = (String)tags[1];
\r
3233 String originalRegion = (String)tags[2];
\r
3234 String originalTrailing = null;
\r
3236 if (trailingIndex < loc.localeID.length()) {
\r
3238 * Create a String that contains everything
\r
3239 * after the language, script, and region.
\r
3241 originalTrailing = loc.localeID.substring(trailingIndex);
\r
3245 * First, we need to first get the maximization
\r
3246 * by adding any likely subtags.
\r
3248 String maximizedLocaleID =
\r
3249 createLikelySubtagsString(
\r
3256 * If maximization fails, there's nothing
\r
3259 if (isEmptyString(maximizedLocaleID)) {
\r
3264 * Start first with just the language.
\r
3267 createLikelySubtagsString(
\r
3273 if (tag.equals(maximizedLocaleID)) {
\r
3274 String newLocaleID =
\r
3279 originalTrailing);
\r
3281 return new ULocale(newLocaleID);
\r
3286 * Next, try the language and region.
\r
3288 if (originalRegion.length() != 0) {
\r
3291 createLikelySubtagsString(
\r
3297 if (tag.equals(maximizedLocaleID)) {
\r
3298 String newLocaleID =
\r
3303 originalTrailing);
\r
3305 return new ULocale(newLocaleID);
\r
3310 * Finally, try the language and script. This is our last chance,
\r
3311 * since trying with all three subtags would only yield the
\r
3312 * maximal version that we already have.
\r
3314 if (originalRegion.length() != 0 &&
\r
3315 originalScript.length() != 0) {
\r
3318 createLikelySubtagsString(
\r
3324 if (tag.equals(maximizedLocaleID)) {
\r
3325 String newLocaleID =
\r
3330 originalTrailing);
\r
3332 return new ULocale(newLocaleID);
\r
3340 * A trivial utility function that checks for a null
\r
3341 * reference or checks the length of the supplied String.
\r
3343 * @param string The string to check
\r
3345 * @return true if the String is empty, or if the reference is null.
\r
3347 private static boolean isEmptyString(String string) {
\r
3348 return string == null || string.length() == 0;
\r
3352 * Append a tag to a StringBuffer, adding the separator if necessary.The tag must
\r
3353 * not be a zero-length string.
\r
3355 * @param tag The tag to add.
\r
3356 * @param buffer The output buffer.
\r
3358 private static void
\r
3361 StringBuffer buffer) {
\r
3363 if (buffer.length() != 0) {
\r
3364 buffer.append(UNDERSCORE);
\r
3367 buffer.append(tag);
\r
3371 * Create a tag string from the supplied parameters. The lang, script and region
\r
3372 * parameters may be null references.
\r
3374 * If any of the language, script or region parameters are empty, and the alternateTags
\r
3375 * parameter is not null, it will be parsed for potential language, script and region tags
\r
3376 * to be used when constructing the new tag. If the alternateTags parameter is null, or
\r
3377 * it contains no language tag, the default tag for the unknown language is used.
\r
3379 * @param lang The language tag to use.
\r
3380 * @param script The script tag to use.
\r
3381 * @param region The region tag to use.
\r
3382 * @param trailing Any trailing data to append to the new tag.
\r
3383 * @param alternateTags A string containing any alternate tags.
\r
3384 * @return The new tag string.
\r
3386 private static String
\r
3392 String alternateTags) {
\r
3394 IDParser parser = null;
\r
3395 boolean regionAppended = false;
\r
3397 StringBuffer tag = new StringBuffer();
\r
3399 if (!isEmptyString(lang)) {
\r
3404 else if (isEmptyString(alternateTags)) {
\r
3406 * Append the value for an unknown language, if
\r
3407 * we found no language.
\r
3410 UNDEFINED_LANGUAGE,
\r
3414 parser = new IDParser(alternateTags);
\r
3416 String alternateLang = parser.getLanguage();
\r
3419 * Append the value for an unknown language, if
\r
3420 * we found no language.
\r
3423 !isEmptyString(alternateLang) ? alternateLang : UNDEFINED_LANGUAGE,
\r
3427 if (!isEmptyString(script)) {
\r
3432 else if (!isEmptyString(alternateTags)) {
\r
3434 * Parse the alternateTags string for the script.
\r
3436 if (parser == null) {
\r
3437 parser = new IDParser(alternateTags);
\r
3440 String alternateScript = parser.getScript();
\r
3442 if (!isEmptyString(alternateScript)) {
\r
3449 if (!isEmptyString(region)) {
\r
3454 regionAppended = true;
\r
3456 else if (!isEmptyString(alternateTags)) {
\r
3458 * Parse the alternateTags string for the region.
\r
3460 if (parser == null) {
\r
3461 parser = new IDParser(alternateTags);
\r
3464 String alternateRegion = parser.getCountry();
\r
3466 if (!isEmptyString(alternateRegion)) {
\r
3471 regionAppended = true;
\r
3475 if (trailing != null && trailing.length() > 1) {
\r
3477 * The current ICU format expects two underscores
\r
3478 * will separate the variant from the preceeding
\r
3479 * parts of the tag, if there is no region.
\r
3481 int separators = 0;
\r
3483 if (trailing.charAt(0) == UNDERSCORE) {
\r
3484 if (trailing.charAt(1) == UNDERSCORE) {
\r
3492 if (regionAppended) {
\r
3494 * If we appended a region, we may need to strip
\r
3495 * the extra separator from the variant portion.
\r
3497 if (separators == 2) {
\r
3498 tag.append(trailing.substring(1));
\r
3501 tag.append(trailing);
\r
3506 * If we did not append a region, we may need to add
\r
3507 * an extra separator to the variant portion.
\r
3509 if (separators == 1) {
\r
3510 tag.append(UNDERSCORE);
\r
3512 tag.append(trailing);
\r
3516 return tag.toString();
\r
3520 * Create a tag string from the supplied parameters. The lang, script and region
\r
3521 * parameters may be null references.If the lang parameter is an empty string, the
\r
3522 * default value for an unknown language is written to the output buffer.
\r
3524 * @param lang The language tag to use.
\r
3525 * @param script The script tag to use.
\r
3526 * @param region The region tag to use.
\r
3527 * @param trailing Any trailing data to append to the new tag.
\r
3528 * @return The new String.
\r
3535 String trailing) {
\r
3537 return createTagString(
\r
3546 * Parse the language, script, and region subtags from a tag string, and return the results.
\r
3548 * This function does not return the canonical strings for the unknown script and region.
\r
3550 * @param localeID The locale ID to parse.
\r
3551 * @param tags An array of three String references to return the subtag strings.
\r
3552 * @return The number of chars of the localeID parameter consumed.
\r
3554 private static int
\r
3559 IDParser parser = new IDParser(localeID);
\r
3561 String lang = parser.getLanguage();
\r
3562 String script = parser.getScript();
\r
3563 String region = parser.getCountry();
\r
3565 if (isEmptyString(lang)) {
\r
3566 tags[0] = UNDEFINED_LANGUAGE;
\r
3572 if (script.equals(UNDEFINED_SCRIPT)) {
\r
3579 if (region.equals(UNDEFINED_REGION)) {
\r
3587 * Search for the variant. If there is one, then return the index of
\r
3588 * the preceeding separator.
\r
3589 * If there's no variant, search for the keyword delimiter,
\r
3590 * and return its index. Otherwise, return the length of the
\r
3593 * $TOTO(dbertoni) we need to take into account that we might
\r
3594 * find a part of the language as the variant, since it can
\r
3595 * can have a variant portion that is long enough to contain
\r
3596 * the same characters as the variant.
\r
3598 String variant = parser.getVariant();
\r
3600 if (!isEmptyString(variant)){
\r
3601 int index = localeID.indexOf(variant);
\r
3604 return index > 0 ? index - 1 : index;
\r
3608 int index = localeID.indexOf('@');
\r
3610 return index == -1 ? localeID.length() : index;
\r
3614 private static String
\r
3615 lookupLikelySubtags(String localeId) {
\r
3616 UResourceBundle bundle =
\r
3617 UResourceBundle.getBundleInstance(
\r
3618 ICUResourceBundle.ICU_BASE_NAME, "likelySubtags");
\r
3620 return bundle.getString(localeId);
\r
3622 catch(MissingResourceException e) {
\r
3627 private static String
\r
3628 createLikelySubtagsString(
\r
3632 String variants) {
\r
3635 * Try the language with the script and region first.
\r
3637 if (!isEmptyString(script) && !isEmptyString(region)) {
\r
3639 String searchTag =
\r
3646 String likelySubtags = lookupLikelySubtags(searchTag);
\r
3649 if (likelySubtags == null) {
\r
3650 if (likelySubtags2 != null) {
\r
3651 System.err.println("Tag mismatch: \"(null)\" \"" + likelySubtags2 + "\"");
\r
3654 else if (likelySubtags2 == null) {
\r
3655 System.err.println("Tag mismatch: \"" + likelySubtags + "\" \"(null)\"");
\r
3657 else if (!likelySubtags.equals(likelySubtags2)) {
\r
3658 System.err.println("Tag mismatch: \"" + likelySubtags + "\" \"" + likelySubtags2 + "\"");
\r
3661 if (likelySubtags != null) {
\r
3662 // Always use the language tag from the
\r
3663 // maximal string, since it may be more
\r
3664 // specific than the one provided.
\r
3665 return createTagString(
\r
3675 * Try the language with just the script.
\r
3677 if (!isEmptyString(script)) {
\r
3679 String searchTag =
\r
3686 String likelySubtags = lookupLikelySubtags(searchTag);
\r
3687 if (likelySubtags != null) {
\r
3688 // Always use the language tag from the
\r
3689 // maximal string, since it may be more
\r
3690 // specific than the one provided.
\r
3691 return createTagString(
\r
3701 * Try the language with just the region.
\r
3703 if (!isEmptyString(region)) {
\r
3705 String searchTag =
\r
3712 String likelySubtags = lookupLikelySubtags(searchTag);
\r
3714 if (likelySubtags != null) {
\r
3715 // Always use the language tag from the
\r
3716 // maximal string, since it may be more
\r
3717 // specific than the one provided.
\r
3718 return createTagString(
\r
3728 * Finally, try just the language.
\r
3731 String searchTag =
\r
3738 String likelySubtags = lookupLikelySubtags(searchTag);
\r
3740 if (likelySubtags != null) {
\r
3741 // Always use the language tag from the
\r
3742 // maximal string, since it may be more
\r
3743 // specific than the one provided.
\r
3744 return createTagString(
\r
3756 // --------------------------------
\r
3757 // BCP47/OpenJDK APIs
\r
3758 // --------------------------------
\r
3761 * The key for private use locale extension.
\r
3762 * @see #getExtension(char)
\r
3763 * @see Builder#setExtension(char, String)
\r
3766 * @provisional This API might change or be removed in a future release.
\r
3768 static public final char PRIVATE_USE_EXTENSION = 'x';
\r
3771 * The key for LDML extension.
\r
3772 * @see #getExtension(char)
\r
3773 * @see Builder#setExtension(char, String)
\r
3776 * @provisional This API might change or be removed in a future release.
\r
3778 static public final char LDML_EXTENSION = 'u';
\r
3781 * Returns the extension associated with the specified extension key, or
\r
3782 * null if there is no extension associated with the key. The key must
\r
3783 * be one of <code>[0-9A-Za-z]</code>.
\r
3784 * <BR><STRONG>Note:</STRONG>Extension key 'x' and 'X' is reserved for BCP47
\r
3785 * private use. To get the private use value, use <code>PRIVATE_USE_KEY</code>.
\r
3787 * @param key the extension key
\r
3788 * @return the extension, or null if this locale
\r
3789 * defines no extension for the specified key.
\r
3790 * @throws IllegalArgumentException if the key is not valid.
\r
3791 * @see #PRIVATE_USE_EXTENSION
\r
3794 * @provisional This API might change or be removed in a future release.
\r
3796 public String getExtension(char key) {
\r
3797 if (!LocaleExtensions.isValidExtensionKey(key)) {
\r
3798 throw new IllegalArgumentException("Invalid extension key: " + key);
\r
3800 return extensions().getExtensionValue(key);
\r
3804 * Returns the set of extension keys associated with this locale, or null
\r
3805 * if it has no extensions. The* returned set is immutable.
\r
3806 * @return the set of extension keys, or null if this locale has
\r
3810 * @provisional This API might change or be removed in a future release.
\r
3812 public Set getExtensionKeys() {
\r
3813 return extensions().getExtensionKeys();
\r
3817 * Returns the LDML keyword value ('type') associated with
\r
3818 * the specified LDML key for this locale. LDML keywords are specified
\r
3819 * by the 'u' extension and consist of key/type pairs. The key must be
\r
3820 * two alphanumeric characters in length, or an IllegalArgumentException
\r
3822 * @param key the LDML key
\r
3823 * @return the value ('type') associated with the key, or null if the
\r
3824 * locale does not define a value for the key.
\r
3825 * @throws IllegalArgumentException if the key is not valid.
\r
3828 * @provisional This API might change or be removed in a future release.
\r
3830 public String getLDMLExtensionValue(String key) {
\r
3831 if (!LocaleExtensions.isValidLDMLKey(key)) {
\r
3832 throw new IllegalArgumentException("Invalid LDML key: " + key);
\r
3834 return extensions().getLDMLKeywordType(key);
\r
3838 * Returns the set of keys for LDML keywords defined by this locale, or
\r
3839 * null if this locale has no locale extension. The returned set is
\r
3841 * @return The set of the LDML keys, or null
\r
3844 * @provisional This API might change or be removed in a future release.
\r
3846 public Set getLDMLExtensionKeys() {
\r
3847 return extensions().getLDMLKeywordKeys();
\r
3851 * Returns a well-formed language tag representing this locale.
\r
3853 * <b>Note</b>: If the language, country, or variant fields do
\r
3854 * not satisfy BCP47 language tag syntax requirements, they are
\r
3855 * omitted from the result. For example, using the constructor it
\r
3856 * is possible to create a Locale instance with digits in the
\r
3857 * language field, or only two characters in the variant field.
\r
3858 * Since these are not well-formed BCP47 language tag syntax, they
\r
3859 * cannot be expressed in BCP47. Since such 'legacy' locales lose
\r
3860 * information when converting to BCP47, it is
\r
3861 * recommended that clients switch to conforming locales.
\r
3863 * <b>Note</b>: Underscores in the variant tag are normalized to
\r
3864 * hyphen, and all fields, keys, and values are normalized to
\r
3866 * @return a BCP47 language tag representing the locale.
\r
3869 * @provisional This API might change or be removed in a future release.
\r
3871 public String toLanguageTag() {
\r
3872 return LanguageTag.toLanguageTag(base(), extensions());
\r
3876 * Returns a locale for the specified language tag string. If the
\r
3877 * specified language tag contains any ill-formed subtags, the first
\r
3878 * such subtag and all following subtags are ignored.
\r
3880 * This implements the 'Language-Tag' production of BCP47, and so supports
\r
3881 * grandfathered (regular and irregular) as well as private use language
\r
3882 * tags. Private use tags are represented as 'und-x-whatever', and
\r
3883 * grandfathered tags are converted to their canonical replacements where
\r
3884 * they exist. Note that a few grandfathered tags have no modern replacement,
\r
3885 * these will be converted using the fallback described in the first paragraph,
\r
3886 * so some information might be lost.
\r
3888 * For a list of grandfathered tags, see
\r
3889 * <a href="http://www.ietf.org/internet-drafts/draft-ietf-ltru-4646bis-21.txt">
\r
3891 * (<span style="background-color: #00ccff; font-weight: bold">Currently Draft,
\r
3892 * remove or reference final version before release.</span>)
\r
3893 * @param langtag the language tag
\r
3894 * @return the locale that best represents the language tag
\r
3897 * @provisional This API might change or be removed in a future release.
\r
3899 public static ULocale forLanguageTag(String langtag) {
\r
3900 // ULocale locale = ULocale.ROOT;
\r
3901 ULocale locale = new ULocale("");
\r
3902 LanguageTag tag = null;
\r
3905 tag = LanguageTag.parse(langtag);
\r
3907 Builder bldr = new Builder();
\r
3909 String extlang = tag.getExtlang(0);
\r
3910 String language = (extlang == null) ? tag.getLanguage() : extlang;
\r
3911 // do nothing with language code "und"
\r
3912 if (!language.equals("und")) {
\r
3913 bldr.setLanguage(language);
\r
3916 bldr.setScript(tag.getScript())
\r
3917 .setRegion(tag.getRegion()).setVariant(tag.getVariant());
\r
3919 // setExtension may throw an exception if
\r
3920 // it contains malformed LDML keys.
\r
3921 Set exts = tag.getExtensions();
\r
3922 if (exts != null) {
\r
3923 Iterator itr = exts.iterator();
\r
3924 while (itr.hasNext()) {
\r
3925 Extension e = (Extension)itr.next();
\r
3926 bldr.setExtension(e.getSingleton(), e.getValue());
\r
3929 bldr.setExtension(PRIVATE_USE_EXTENSION, tag.getPrivateUse());
\r
3930 locale = bldr.create();
\r
3932 } catch (LocaleSyntaxException e) {
\r
3933 // this exception was thrown by LanguageTag#parse
\r
3935 } catch (IllformedLocaleException e) {
\r
3936 // this expection was thrown by setExtension with
\r
3937 // malformed LDML keys - fall through
\r
3939 // remove the last subtag and try it again
\r
3940 int idx = langtag.lastIndexOf('-');
\r
3942 // no more subtags
\r
3945 langtag = langtag.substring(0, idx);
\r
3953 * Builder is used to build instances of Locale from values
\r
3954 * configured by the setter.
\r
3956 * Builder supports the 'langtag' production of RFC 4646.
\r
3957 * Language tags consist of the ASCII digits, upper and lower case
\r
3958 * letters, and hyphen (which appears only as a field separator).
\r
3959 * As a convenience, underscores are accepted and normalized to
\r
3960 * hyphen. Values with any other character are ill-formed. Since
\r
3961 * language tags are case-insensitive, they are normalized
\r
3962 * to lower case, case distinctions are <b>not</b>
\r
3963 * preserved by the builder.
\r
3965 * Note that since this implements 'langtag' and not 'Language-Tag',
\r
3966 * grandfathered language tags are not supported by the builder.
\r
3967 * Clients should use {@link #forLanguageTag} instead.
\r
3969 * Builders can be reused; <code>clear()</code> resets all fields
\r
3970 * to their default values.
\r
3971 * @see Builder#create
\r
3972 * @see Builder#clear
\r
3975 * @provisional This API might change or be removed in a future release.
\r
3977 public static final class Builder {
\r
3979 private InternalLocaleBuilder _locbld = new InternalLocaleBuilder();
\r
3982 * Constructs an empty Builder.
\r
3983 * The default values of all fields, extensions, and private
\r
3984 * use information are empty, the language is undefined.
\r
3987 * @provisional This API might change or be removed in a future release.
\r
3989 public Builder() {
\r
3993 * Resets the builder to match the provided locale. The previous state
\r
3994 * of the builder is discarded. Fields that do not
\r
3995 * conform to BCP47 syntax are ill-formed.
\r
3996 * @param loc the locale
\r
3997 * @return this builder
\r
3998 * @throws IllformedLocaleException if <code>loc</code> has any ill-formed
\r
4002 * @provisional This API might change or be removed in a future release.
\r
4004 public Builder setLocale(ULocale loc) {
\r
4006 setLanguage(loc.getLanguage())
\r
4007 .setScript(loc.getScript())
\r
4008 .setRegion(loc.getCountry())
\r
4009 .setVariant(loc.getVariant());
\r
4011 Set extKeys = loc.getExtensionKeys();
\r
4012 if (extKeys != null) {
\r
4013 Iterator itr = extKeys.iterator();
\r
4014 while (itr.hasNext()) {
\r
4015 char key = ((Character)itr.next()).charValue();
\r
4016 String value = loc.getExtension(key);
\r
4017 if (value != null && value.length() > 0) {
\r
4018 setExtension(key, value);
\r
4026 * Resets the builder to match the provided language tag. The previous state
\r
4027 * of the builder is discarded.
\r
4028 * @param langtag the language tag
\r
4029 * @return this builder
\r
4030 * @throws IllformedLocaleException if <code>langtag</code> is ill-formed.
\r
4031 * @see #forLanguageTag(String)
\r
4034 * @provisional This API might change or be removed in a future release.
\r
4036 public Builder setLanguageTag(String langtag) {
\r
4038 LanguageTag tag = null;
\r
4040 tag = LanguageTag.parse(langtag);
\r
4041 } catch (LocaleSyntaxException e) {
\r
4042 throw new IllformedLocaleException(e.getMessage(), e.getErrorIndex());
\r
4045 // base locale fields
\r
4046 String extlang = tag.getExtlang(0);
\r
4047 String language = extlang == null ? tag.getLanguage() : extlang;
\r
4048 setLanguage(language.equals("und") ? "": language).setScript(tag.getScript())
\r
4049 .setRegion(tag.getRegion()).setVariant(tag.getVariant());
\r
4052 Set exts = tag.getExtensions();
\r
4053 if (exts != null) {
\r
4054 Iterator itr = exts.iterator();
\r
4055 while (itr.hasNext()) {
\r
4056 Extension e = (Extension)itr.next();
\r
4057 setExtension(e.getSingleton(), e.getValue());
\r
4058 //TODO: setExtension may throw an IllformedLocaleException.
\r
4059 // In this csae, error index must be recalculated.
\r
4063 setExtension(PRIVATE_USE_EXTENSION, tag.getPrivateUse());
\r
4068 * Sets the language. If language is the empty string,
\r
4069 * the language is defaulted. Language should be a two or
\r
4070 * three-letter language code as defined in ISO639.
\r
4071 * Well-formed values are any string of two to eight ASCII letters.
\r
4072 * @param language the language
\r
4073 * @return this builder
\r
4074 * @throws IllformedLocaleException if <code>language</code> is ill-formed
\r
4077 * @provisional This API might change or be removed in a future release.
\r
4079 public Builder setLanguage(String language) {
\r
4081 _locbld.setLanguage(language);
\r
4082 } catch (LocaleSyntaxException e) {
\r
4083 throw new IllformedLocaleException(e.getMessage(), e.getErrorIndex());
\r
4089 * Sets the script. If script is
\r
4090 * the empty string, the script is defaulted. Scripts should
\r
4091 * be a four-letter script code as defined in ISO 15924.
\r
4092 * Well-formed values are any string of four ASCII letters.
\r
4093 * @param script the script
\r
4094 * @return this builder
\r
4095 * @throws IllformedLocaleException if <code>script</code> is ill-formed
\r
4098 * @provisional This API might change or be removed in a future release.
\r
4100 public Builder setScript(String script) {
\r
4102 _locbld.setScript(script);
\r
4103 } catch (LocaleSyntaxException e) {
\r
4104 throw new IllformedLocaleException(e.getMessage(), e.getErrorIndex());
\r
4110 * Sets the region. If region is
\r
4111 * the empty string, the region is defaulted. Regions should
\r
4112 * be a two-letter ISO 3166 code or a three-digit M. 49 code.
\r
4113 * Well-formed values are any two-letter or three-digit
\r
4115 * @param region the region
\r
4116 * @return this builder
\r
4117 * @throws IllformedLocaleException if <code>region</code> is ill-formed
\r
4120 * @provisional This API might change or be removed in a future release.
\r
4122 public Builder setRegion(String region) {
\r
4124 _locbld.setRegion(region);
\r
4125 } catch (LocaleSyntaxException e) {
\r
4126 throw new IllformedLocaleException(e.getMessage(), e.getErrorIndex());
\r
4132 * Sets the variant. If variant is
\r
4133 * or the empty string, the variant is defaulted. Variants
\r
4134 * should be registered variants (see
\r
4135 * <a href="http://www.iana.org/assignments/language-subtag-registry">
\r
4136 * IANA Language Subtag Registry</a>) for the prefix. Well-formed
\r
4137 * variants are any series of fields of either four characters
\r
4138 * starting with a digit, or five to eight alphanumeric
\r
4139 * characters, separated by hyphen or underscore.
\r
4140 * @param variant the variant
\r
4141 * @return this builder
\r
4142 * @throws IllformedLocaleException if <code>variant</code> is ill-formed
\r
4145 * @provisional This API might change or be removed in a future release.
\r
4147 public Builder setVariant(String variant) {
\r
4149 _locbld.setVariant(variant);
\r
4150 } catch (LocaleSyntaxException e) {
\r
4151 throw new IllformedLocaleException(e.getMessage(), e.getErrorIndex());
\r
4157 * Sets the extension for the given key. If the value is
\r
4158 * the empty string, the extension is removed. Legal
\r
4159 * keys are the <code>[0-9A-WY-Za-wy-z]</code>. Well-formed
\r
4160 * values are any series of fields of two to eight
\r
4161 * alphanumeric characters, separated by hyphen or underscore.
\r
4163 * <b>note</b>:The extension 'u' is used for LDML Keywords.
\r
4164 * Setting the 'u' extension replaces any existing LDML
\r
4165 * keywords with those defined in the extension. To be
\r
4166 * well-formed, a value for the 'u' extension must meet the
\r
4167 * additional constraint that the number of fields be even
\r
4168 * (fields represent key value pairs, where the value is
\r
4169 * mandatory), and that the keys and values be legal locale
\r
4170 * extension keys and values.
\r
4171 * @param key the extension key
\r
4172 * @param value the extension value
\r
4173 * @return this builder
\r
4174 * @throws IllformedLocaleException if <code>key</code> is illegal
\r
4175 * or <code>value</code> is ill-formed
\r
4176 * @see #setLDMLExtensionValue
\r
4179 * @provisional This API might change or be removed in a future release.
\r
4181 public Builder setExtension(char key, String value) {
\r
4183 _locbld.setExtension(key, value);
\r
4184 } catch (LocaleSyntaxException e) {
\r
4185 throw new IllformedLocaleException(e.getMessage(), e.getErrorIndex());
\r
4191 * Sets the LDML keyword value ('type') for the given key. If the
\r
4192 * value is the empty string, the LDML keyword is removed.
\r
4193 * Well-formed keys are strings of two alphanumeric characters. Well-formed
\r
4194 * values are strings of three to eight alphanumeric characters.
\r
4196 * <b>Note</b>:Setting the 'u' extension replaces all LDML
\r
4197 * keywords with those defined in the extension.
\r
4198 * @param key the LDML extension key
\r
4199 * @param value the LDML extension value
\r
4200 * @return this builder
\r
4201 * @throws IllformedLocaleException if <code>key</code> or <code>value</code>
\r
4203 * @see #setExtension(char, String)
\r
4206 * @provisional This API might change or be removed in a future release.
\r
4208 public Builder setLDMLExtensionValue(String key, String value) {
\r
4210 _locbld.setLDMLExtensionValue(key, value);
\r
4211 } catch (LocaleSyntaxException e) {
\r
4212 throw new IllformedLocaleException(e.getMessage(), e.getErrorIndex());
\r
4218 * Resets the builder to its initial, default state.
\r
4219 * @return this builder
\r
4222 * @provisional This API might change or be removed in a future release.
\r
4224 public Builder clear() {
\r
4230 * Resets the extensions to their initial, default state.
\r
4231 * Language, script, region and variant are unchanged.
\r
4232 * @return this builder
\r
4233 * @see #setExtension(char, String)
\r
4236 * @provisional This API might change or be removed in a future release.
\r
4238 public Builder clearExtensions() {
\r
4239 _locbld.removeLocaleExtensions();
\r
4244 * Returns an instance of locale created from the fields set
\r
4245 * on this builder.
\r
4246 * @return a new locale
\r
4249 * @provisional This API might change or be removed in a future release.
\r
4251 public ULocale create() {
\r
4252 return getInstance(_locbld.getBaseLocale(), _locbld.getLocaleExtensions());
\r
4256 private static ULocale getInstance(BaseLocale base, LocaleExtensions ext) {
\r
4257 StringBuffer id = new StringBuffer(base.getID());
\r
4259 TreeMap kwds = null;
\r
4260 Set extKeys = ext.getExtensionKeys();
\r
4261 if (extKeys != null) {
\r
4262 // legacy locale ID assume LDML keywords and
\r
4263 // other extensions are at the same level.
\r
4264 // e.g. @a=ext-for-aa;calendar=japanese;m=ext-for-mm;x=priv-use
\r
4265 kwds = new TreeMap();
\r
4266 Iterator itr = extKeys.iterator();
\r
4267 boolean hasLDMLKeywords = false;
\r
4268 while (itr.hasNext()) {
\r
4269 Character key = (Character)itr.next();
\r
4270 if (key.charValue() == 'u') {
\r
4272 hasLDMLKeywords = true;
\r
4275 String value = ext.getExtensionValue(key.charValue());
\r
4276 kwds.put(String.valueOf(key), value);
\r
4279 if (hasLDMLKeywords) {
\r
4280 Set ldmlKeys = ext.getLDMLKeywordKeys();
\r
4281 if (ldmlKeys != null) {
\r
4282 Iterator litr = ldmlKeys.iterator();
\r
4283 while (litr.hasNext()) {
\r
4284 String bcpKey = (String)litr.next();
\r
4285 String bcpValue = ext.getLDMLKeywordType(bcpKey);
\r
4286 // transform to legacy key/type
\r
4287 String lkey = bcp47ToLDMLKey(bcpKey);
\r
4288 String ltype = bcp47ToLDMLType(lkey, bcpValue);
\r
4289 kwds.put(lkey, ltype);
\r
4294 if (kwds.size() > 0) {
\r
4296 Set kset = kwds.entrySet();
\r
4297 Iterator kitr = kset.iterator();
\r
4298 boolean insertSep = false;
\r
4299 while (kitr.hasNext()) {
\r
4305 Map.Entry kwd = (Map.Entry)kitr.next();
\r
4306 id.append(kwd.getKey());
\r
4308 id.append(kwd.getValue());
\r
4313 return new ULocale(id.toString());
\r
4316 private BaseLocale base() {
\r
4317 String language = getLanguage();
\r
4318 if (equals(ULocale.ROOT)) {
\r
4321 return BaseLocale.getInstance(language, getScript(), getCountry(), getVariant());
\r
4324 private LocaleExtensions extensions() {
\r
4325 Iterator kwitr = getKeywords();
\r
4326 if (kwitr == null) {
\r
4327 return LocaleExtensions.EMPTY_EXTENSIONS;
\r
4330 TreeMap extMap = null;
\r
4331 TreeMap ldmlKwMap = null;
\r
4333 while (kwitr.hasNext()) {
\r
4334 String key = (String)kwitr.next();
\r
4335 String value = getKeywordValue(key);
\r
4336 if (key.length() == 1) {
\r
4337 // non LDML extension or private use
\r
4338 // We want to keep only valid subtags
\r
4339 boolean isPrivUse = (key.charAt(0) == PRIVATE_USE_EXTENSION);
\r
4340 if (isPrivUse || LanguageTag.isExtensionSingleton(key)) {
\r
4341 boolean isValid = true;
\r
4342 String[] subtags = Utility.split(value, '-');
\r
4343 for (int i = 0; i < subtags.length; i++) {
\r
4345 if (!LanguageTag.isPrivateuseValueSubtag(subtags[i])) {
\r
4350 if (!LanguageTag.isExtensionSubtag(subtags[i])) {
\r
4357 if (extMap == null) {
\r
4358 extMap = new TreeMap();
\r
4360 extMap.put(new Character(key.charAt(0)), value.intern());
\r
4365 String bcpKey = ldmlKeyToBCP47(key);
\r
4366 String bcpVal = ldmlTypeToBCP47(key, value);
\r
4367 if (bcpKey != null && bcpVal != null) {
\r
4368 if (ldmlKwMap == null) {
\r
4369 ldmlKwMap = new TreeMap();
\r
4371 ldmlKwMap.put(bcpKey.intern(), bcpVal.intern());
\r
4376 if (ldmlKwMap != null) {
\r
4377 // create LDML extension string
\r
4378 StringBuffer buf = new StringBuffer();
\r
4379 LocaleExtensions.keywordsToString(ldmlKwMap, buf);
\r
4380 if (extMap == null) {
\r
4381 extMap = new TreeMap();
\r
4383 extMap.put(new Character('u'), buf.toString().intern());
\r
4386 return LocaleExtensions.getInstance(extMap, ldmlKwMap);
\r
4390 // LDML legacy/BCP47 key and type mapping functions
\r
4392 private static String ldmlKeyToBCP47(String key) {
\r
4393 UResourceBundle supplemental = UResourceBundle.getBundleInstance(
\r
4394 ICUResourceBundle.ICU_BASE_NAME,
\r
4395 "supplementalData",
\r
4396 ICUResourceBundle.ICU_DATA_CLASS_LOADER);
\r
4397 UResourceBundle bcp47Mappings = supplemental.get("bcp47KeywordMappings");
\r
4398 UResourceBundle keyMaps = bcp47Mappings.get("key");
\r
4400 // normalize key to lowercase
\r
4401 key = AsciiUtil.toLowerString(key);
\r
4402 String bcpKey = null;
\r
4404 bcpKey = keyMaps.getString(key);
\r
4405 } catch (MissingResourceException mre) {
\r
4409 if (bcpKey == null) {
\r
4410 if (key.length() == 2 && LanguageTag.isExtensionSubtag(key)) {
\r
4418 private static String bcp47ToLDMLKey(String bcpKey) {
\r
4419 UResourceBundle supplemental = UResourceBundle.getBundleInstance(
\r
4420 ICUResourceBundle.ICU_BASE_NAME,
\r
4421 "supplementalData",
\r
4422 ICUResourceBundle.ICU_DATA_CLASS_LOADER);
\r
4423 UResourceBundle bcp47Mappings = supplemental.get("bcp47KeywordMappings");
\r
4424 UResourceBundle keyMaps = bcp47Mappings.get("key");
\r
4426 // normalize bcp key to lowercase
\r
4427 bcpKey = AsciiUtil.toLowerString(bcpKey);
\r
4428 String key = null;
\r
4429 for (int i = 0; i < keyMaps.getSize(); i++) {
\r
4430 UResourceBundle keyMap = keyMaps.get(i);
\r
4431 if (bcpKey.equals(keyMap.getString())) {
\r
4432 key = keyMap.getKey();
\r
4436 if (key == null) {
\r
4442 private static String ldmlTypeToBCP47(String key, String type) {
\r
4443 UResourceBundle supplemental = UResourceBundle.getBundleInstance(
\r
4444 ICUResourceBundle.ICU_BASE_NAME,
\r
4445 "supplementalData",
\r
4446 ICUResourceBundle.ICU_DATA_CLASS_LOADER);
\r
4447 UResourceBundle bcp47Mappings = supplemental.get("bcp47KeywordMappings");
\r
4449 // normalize key/type to lowercase
\r
4450 key = AsciiUtil.toLowerString(key);
\r
4451 type = AsciiUtil.toLowerString(type);
\r
4452 String bcpType = null;
\r
4454 UResourceBundle typeMaps = bcp47Mappings.get(key);
\r
4455 String tmp = key.equals("timezone") ? Utility.replaceAll(type, "/", ":") : type;
\r
4456 bcpType = typeMaps.getString(tmp);
\r
4457 } catch (MissingResourceException mre) {
\r
4461 if (bcpType == null) {
\r
4462 int typeLen = type.length();
\r
4463 if (typeLen >= 3 && typeLen <= 8 && LanguageTag.isExtensionSubtag(type)) {
\r
4471 private static String bcp47ToLDMLType(String key, String bcpType) {
\r
4472 UResourceBundle supplemental = UResourceBundle.getBundleInstance(
\r
4473 ICUResourceBundle.ICU_BASE_NAME,
\r
4474 "supplementalData",
\r
4475 ICUResourceBundle.ICU_DATA_CLASS_LOADER);
\r
4476 UResourceBundle bcp47Mappings = supplemental.get("bcp47KeywordMappings");
\r
4478 // normalize key/bcpType to lowercase
\r
4479 key = AsciiUtil.toLowerString(key);
\r
4480 bcpType = AsciiUtil.toLowerString(bcpType);
\r
4482 String type = null;
\r
4484 UResourceBundle typeMaps = bcp47Mappings.get(key);
\r
4486 // Note: Linear search for time zone ID might be too slow.
\r
4487 // ICU services do not use timezone keywords for now.
\r
4488 // In future, we may need to build the optimized inverse
\r
4491 for (int i = 0; i < typeMaps.getSize(); i++) {
\r
4492 UResourceBundle typeMap = typeMaps.get(i);
\r
4493 if (bcpType.equals(typeMap.getString())) {
\r
4494 type = typeMap.getKey();
\r
4495 if (key.equals("timezone")) {
\r
4496 type = Utility.replaceAll(type, ":", "/");
\r
4501 } catch (MissingResourceException mre) {
\r
4505 if (type == null) {
\r