2 ******************************************************************************
3 * Copyright (C) 2003-2011, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 ******************************************************************************
8 package com.ibm.icu.util;
10 import java.io.Serializable;
11 import java.lang.reflect.InvocationTargetException;
12 import java.lang.reflect.Method;
13 import java.text.ParseException;
14 import java.util.Iterator;
15 import java.util.List;
16 import java.util.Locale;
18 import java.util.Map.Entry;
19 import java.util.MissingResourceException;
21 import java.util.TreeMap;
22 import java.util.TreeSet;
24 import com.ibm.icu.impl.ICUCache;
25 import com.ibm.icu.impl.ICUResourceBundle;
26 import com.ibm.icu.impl.ICUResourceTableAccess;
27 import com.ibm.icu.impl.LocaleIDParser;
28 import com.ibm.icu.impl.LocaleIDs;
29 import com.ibm.icu.impl.LocaleUtility;
30 import com.ibm.icu.impl.SimpleCache;
31 import com.ibm.icu.impl.locale.AsciiUtil;
32 import com.ibm.icu.impl.locale.BaseLocale;
33 import com.ibm.icu.impl.locale.Extension;
34 import com.ibm.icu.impl.locale.InternalLocaleBuilder;
35 import com.ibm.icu.impl.locale.LanguageTag;
36 import com.ibm.icu.impl.locale.LocaleExtensions;
37 import com.ibm.icu.impl.locale.LocaleSyntaxException;
38 import com.ibm.icu.impl.locale.ParseStatus;
39 import com.ibm.icu.impl.locale.UnicodeLocaleExtension;
40 import com.ibm.icu.text.LocaleDisplayNames;
41 import com.ibm.icu.text.LocaleDisplayNames.DialectHandling;
44 * {@icuenhanced java.util.Locale}.{@icu _usage_}
46 * A class analogous to {@link java.util.Locale} that provides additional
47 * support for ICU protocol. In ICU 3.0 this class is enhanced to support
48 * RFC 3066 language identifiers.
50 * <p>Many classes and services in ICU follow a factory idiom, in
51 * which a factory method or object responds to a client request with
52 * an object. The request includes a locale (the <i>requested</i>
53 * locale), and the returned object is constructed using data for that
54 * locale. The system may lack data for the requested locale, in
55 * which case the locale fallback mechanism will be invoked until a
56 * populated locale is found (the <i>valid</i> locale). Furthermore,
57 * even when a populated locale is found (the <i>valid</i> locale),
58 * further fallback may be required to reach a locale containing the
59 * specific data required by the service (the <i>actual</i> locale).
61 * <p>ULocale performs <b>'normalization'</b> and <b>'canonicalization'</b> of locale ids.
62 * Normalization 'cleans up' ICU locale ids as follows:
64 * <li>language, script, country, variant, and keywords are properly cased<br>
65 * (lower, title, upper, upper, and lower case respectively)</li>
66 * <li>hyphens used as separators are converted to underscores</li>
67 * <li>three-letter language and country ids are converted to two-letter
68 * equivalents where available</li>
69 * <li>surrounding spaces are removed from keywords and values</li>
70 * <li>if there are multiple keywords, they are put in sorted order</li>
72 * Canonicalization additionally performs the following:
74 * <li>POSIX ids are converted to ICU format IDs</li>
75 * <li>'grandfathered' 3066 ids are converted to ICU standard form</li>
76 * <li>'PREEURO' and 'EURO' variants are converted to currency keyword form,
78 * id appropriate to the country of the locale (for PREEURO) or EUR (for EURO).
80 * All ULocale constructors automatically normalize the locale id. To handle
81 * POSIX ids, <code>canonicalize</code> can be called to convert the id
82 * to canonical form, or the <code>canonicalInstance</code> factory method
85 * <p>This class provides selectors {@link #VALID_LOCALE} and {@link
86 * #ACTUAL_LOCALE} intended for use in methods named
87 * <tt>getLocale()</tt>. These methods exist in several ICU classes,
88 * including {@link com.ibm.icu.util.Calendar}, {@link
89 * com.ibm.icu.util.Currency}, {@link com.ibm.icu.text.UFormat},
90 * {@link com.ibm.icu.text.BreakIterator},
91 * <a href="../text/Collator.html" title="class in com.ibm.icu.text"><code>Collator</code></a>,
92 * {@link com.ibm.icu.text.DateFormatSymbols}, and {@link
93 * com.ibm.icu.text.DecimalFormatSymbols} and their subclasses, if
94 * any. Once an object of one of these classes has been created,
95 * <tt>getLocale()</tt> may be called on it to determine the valid and
96 * actual locale arrived at during the object's construction.
98 * <p>Note: The <i>actual</i> locale is returned correctly, but the <i>valid</i>
99 * locale is not, in most cases.
101 * @see java.util.Locale
104 * @author Ram Viswanadha
107 public final class ULocale implements Serializable {
108 // using serialver from jdk1.4.2_05
109 private static final long serialVersionUID = 3715177670352309217L;
112 * Useful constant for language.
115 public static final ULocale ENGLISH = new ULocale("en", Locale.ENGLISH);
118 * Useful constant for language.
121 public static final ULocale FRENCH = new ULocale("fr", Locale.FRENCH);
124 * Useful constant for language.
127 public static final ULocale GERMAN = new ULocale("de", Locale.GERMAN);
130 * Useful constant for language.
133 public static final ULocale ITALIAN = new ULocale("it", Locale.ITALIAN);
136 * Useful constant for language.
139 public static final ULocale JAPANESE = new ULocale("ja", Locale.JAPANESE);
142 * Useful constant for language.
145 public static final ULocale KOREAN = new ULocale("ko", Locale.KOREAN);
148 * Useful constant for language.
151 public static final ULocale CHINESE = new ULocale("zh", Locale.CHINESE);
154 * Useful constant for language.
157 public static final ULocale SIMPLIFIED_CHINESE = new ULocale("zh_Hans", Locale.CHINESE);
160 * Useful constant for language.
163 public static final ULocale TRADITIONAL_CHINESE = new ULocale("zh_Hant", Locale.CHINESE);
166 * Useful constant for country/region.
169 public static final ULocale FRANCE = new ULocale("fr_FR", Locale.FRANCE);
172 * Useful constant for country/region.
175 public static final ULocale GERMANY = new ULocale("de_DE", Locale.GERMANY);
178 * Useful constant for country/region.
181 public static final ULocale ITALY = new ULocale("it_IT", Locale.ITALY);
184 * Useful constant for country/region.
187 public static final ULocale JAPAN = new ULocale("ja_JP", Locale.JAPAN);
190 * Useful constant for country/region.
193 public static final ULocale KOREA = new ULocale("ko_KR", Locale.KOREA);
196 * Useful constant for country/region.
199 public static final ULocale CHINA = new ULocale("zh_Hans_CN", Locale.CHINA);
202 * Useful constant for country/region.
205 public static final ULocale PRC = CHINA;
208 * Useful constant for country/region.
211 public static final ULocale TAIWAN = new ULocale("zh_Hant_TW", Locale.TAIWAN);
214 * Useful constant for country/region.
217 public static final ULocale UK = new ULocale("en_GB", Locale.UK);
220 * Useful constant for country/region.
223 public static final ULocale US = new ULocale("en_US", Locale.US);
226 * Useful constant for country/region.
229 public static final ULocale CANADA = new ULocale("en_CA", Locale.CANADA);
232 * Useful constant for country/region.
235 public static final ULocale CANADA_FRENCH = new ULocale("fr_CA", Locale.CANADA_FRENCH);
240 private static final String EMPTY_STRING = "";
242 // Used in both ULocale and LocaleIDParser, so moved up here.
243 private static final char UNDERSCORE = '_';
245 // default empty locale
246 private static final Locale EMPTY_LOCALE = new Locale("", "");
248 // special keyword key for Unicode locale attributes
249 private static final String LOCALE_ATTRIBUTE_KEY = "attribute";
255 public static final ULocale ROOT = new ULocale("", EMPTY_LOCALE);
258 * Enum for locale categories. These locale categories are used to get/set the default locale for
259 * the specific functionality represented by the category.
262 public enum Category {
264 * Category used to represent the default locale for displaying user interfaces.
269 * Category used to represent the default locale for formatting date, number and/or currency.
275 private static final SimpleCache<Locale, ULocale> CACHE = new SimpleCache<Locale, ULocale>();
280 private transient volatile Locale locale;
283 * The raw localeID that we were passed in.
285 private String localeID;
288 * Cache the locale data container fields.
289 * In future, we want to use them as the primary locale identifier storage.
291 private transient volatile BaseLocale baseLocale;
292 private transient volatile LocaleExtensions extensions;
295 private static String[][] CANONICALIZE_MAP;
296 private static String[][] variantsToKeywords;
298 private static void initCANONICALIZE_MAP() {
299 if (CANONICALIZE_MAP == null) {
301 * This table lists pairs of locale ids for canonicalization. The
302 * The 1st item is the normalized id. The 2nd item is the
303 * canonicalized id. The 3rd is the keyword. The 4th is the keyword value.
305 String[][] tempCANONICALIZE_MAP = {
306 // { EMPTY_STRING, "en_US_POSIX", null, null }, /* .NET name */
307 { "C", "en_US_POSIX", null, null }, /* POSIX name */
308 { "art_LOJBAN", "jbo", null, null }, /* registered name */
309 { "az_AZ_CYRL", "az_Cyrl_AZ", null, null }, /* .NET name */
310 { "az_AZ_LATN", "az_Latn_AZ", null, null }, /* .NET name */
311 { "ca_ES_PREEURO", "ca_ES", "currency", "ESP" },
312 { "cel_GAULISH", "cel__GAULISH", null, null }, /* registered name */
313 { "de_1901", "de__1901", null, null }, /* registered name */
314 { "de_1906", "de__1906", null, null }, /* registered name */
315 { "de__PHONEBOOK", "de", "collation", "phonebook" }, /* Old ICU name */
316 { "de_AT_PREEURO", "de_AT", "currency", "ATS" },
317 { "de_DE_PREEURO", "de_DE", "currency", "DEM" },
318 { "de_LU_PREEURO", "de_LU", "currency", "EUR" },
319 { "el_GR_PREEURO", "el_GR", "currency", "GRD" },
320 { "en_BOONT", "en__BOONT", null, null }, /* registered name */
321 { "en_SCOUSE", "en__SCOUSE", null, null }, /* registered name */
322 { "en_BE_PREEURO", "en_BE", "currency", "BEF" },
323 { "en_IE_PREEURO", "en_IE", "currency", "IEP" },
324 { "es__TRADITIONAL", "es", "collation", "traditional" }, /* Old ICU name */
325 { "es_ES_PREEURO", "es_ES", "currency", "ESP" },
326 { "eu_ES_PREEURO", "eu_ES", "currency", "ESP" },
327 { "fi_FI_PREEURO", "fi_FI", "currency", "FIM" },
328 { "fr_BE_PREEURO", "fr_BE", "currency", "BEF" },
329 { "fr_FR_PREEURO", "fr_FR", "currency", "FRF" },
330 { "fr_LU_PREEURO", "fr_LU", "currency", "LUF" },
331 { "ga_IE_PREEURO", "ga_IE", "currency", "IEP" },
332 { "gl_ES_PREEURO", "gl_ES", "currency", "ESP" },
333 { "hi__DIRECT", "hi", "collation", "direct" }, /* Old ICU name */
334 { "it_IT_PREEURO", "it_IT", "currency", "ITL" },
335 { "ja_JP_TRADITIONAL", "ja_JP", "calendar", "japanese" },
336 // { "nb_NO_NY", "nn_NO", null, null },
337 { "nl_BE_PREEURO", "nl_BE", "currency", "BEF" },
338 { "nl_NL_PREEURO", "nl_NL", "currency", "NLG" },
339 { "pt_PT_PREEURO", "pt_PT", "currency", "PTE" },
340 { "sl_ROZAJ", "sl__ROZAJ", null, null }, /* registered name */
341 { "sr_SP_CYRL", "sr_Cyrl_RS", null, null }, /* .NET name */
342 { "sr_SP_LATN", "sr_Latn_RS", null, null }, /* .NET name */
343 { "sr_YU_CYRILLIC", "sr_Cyrl_RS", null, null }, /* Linux name */
344 { "th_TH_TRADITIONAL", "th_TH", "calendar", "buddhist" }, /* Old ICU name */
345 { "uz_UZ_CYRILLIC", "uz_Cyrl_UZ", null, null }, /* Linux name */
346 { "uz_UZ_CYRL", "uz_Cyrl_UZ", null, null }, /* .NET name */
347 { "uz_UZ_LATN", "uz_Latn_UZ", null, null }, /* .NET name */
348 { "zh_CHS", "zh_Hans", null, null }, /* .NET name */
349 { "zh_CHT", "zh_Hant", null, null }, /* .NET name */
350 { "zh_GAN", "zh__GAN", null, null }, /* registered name */
351 { "zh_GUOYU", "zh", null, null }, /* registered name */
352 { "zh_HAKKA", "zh__HAKKA", null, null }, /* registered name */
353 { "zh_MIN", "zh__MIN", null, null }, /* registered name */
354 { "zh_MIN_NAN", "zh__MINNAN", null, null }, /* registered name */
355 { "zh_WUU", "zh__WUU", null, null }, /* registered name */
356 { "zh_XIANG", "zh__XIANG", null, null }, /* registered name */
357 { "zh_YUE", "zh__YUE", null, null } /* registered name */
360 synchronized (ULocale.class) {
361 if (CANONICALIZE_MAP == null) {
362 CANONICALIZE_MAP = tempCANONICALIZE_MAP;
366 if (variantsToKeywords == null) {
368 * This table lists pairs of locale ids for canonicalization. The
369 * The first item is the normalized variant id.
371 String[][] tempVariantsToKeywords = {
372 { "EURO", "currency", "EUR" },
373 { "PINYIN", "collation", "pinyin" }, /* Solaris variant */
374 { "STROKE", "collation", "stroke" } /* Solaris variant */
377 synchronized (ULocale.class) {
378 if (variantsToKeywords == null) {
379 variantsToKeywords = tempVariantsToKeywords;
386 * Private constructor used by static initializers.
388 private ULocale(String localeID, Locale locale) {
389 this.localeID = localeID;
390 this.locale = locale;
394 * Construct a ULocale object from a {@link java.util.Locale}.
395 * @param loc a JDK locale
397 private ULocale(Locale loc) {
398 this.localeID = getName(forLocale(loc).toString());
403 * {@icu} Returns a ULocale object for a {@link java.util.Locale}.
404 * The ULocale is canonicalized.
405 * @param loc a JDK locale
408 public static ULocale forLocale(Locale loc) {
412 ULocale result = CACHE.get(loc);
413 if (result == null) {
414 result = JDKLocaleHelper.toULocale(loc);
415 CACHE.put(loc, result);
421 * {@icu} Constructs a ULocale from a RFC 3066 locale ID. The locale ID consists
422 * of optional language, script, country, and variant fields in that order,
423 * separated by underscores, followed by an optional keyword list. The
424 * script, if present, is four characters long-- this distinguishes it
425 * from a country code, which is two characters long. Other fields
426 * are distinguished by position as indicated by the underscores. The
427 * start of the keyword list is indicated by '@', and consists of two
428 * or more keyword/value pairs separated by semicolons(';').
430 * <p>This constructor does not canonicalize the localeID. So, for
431 * example, "zh__pinyin" remains unchanged instead of converting
432 * to "zh@collation=pinyin". By default ICU only recognizes the
433 * latter as specifying pinyin collation. Use {@link #createCanonical}
434 * or {@link #canonicalize} if you need to canonicalize the localeID.
436 * @param localeID string representation of the locale, e.g:
437 * "en_US", "sy_Cyrl_YU", "zh__pinyin", "es_ES@currency=EUR;collation=traditional"
440 public ULocale(String localeID) {
441 this.localeID = getName(localeID);
445 * Convenience overload of ULocale(String, String, String) for
446 * compatibility with java.util.Locale.
447 * @see #ULocale(String, String, String)
450 public ULocale(String a, String b) {
455 * Constructs a ULocale from a localeID constructed from the three 'fields' a, b, and
456 * c. These fields are concatenated using underscores to form a localeID of the form
457 * a_b_c, which is then handled like the localeID passed to <code>ULocale(String
460 * <p>Java locale strings consisting of language, country, and
461 * variant will be handled by this form, since the country code
462 * (being shorter than four letters long) will not be interpreted
463 * as a script code. If a script code is present, the final
464 * argument ('c') will be interpreted as the country code. It is
465 * recommended that this constructor only be used to ease porting,
466 * and that clients instead use the single-argument constructor
467 * when constructing a ULocale from a localeID.
468 * @param a first component of the locale id
469 * @param b second component of the locale id
470 * @param c third component of the locale id
471 * @see #ULocale(String)
474 public ULocale(String a, String b, String c) {
475 localeID = getName(lscvToID(a, b, c, EMPTY_STRING));
479 * {@icu} Creates a ULocale from the id by first canonicalizing the id.
480 * @param nonCanonicalID the locale id to canonicalize
481 * @return the locale created from the canonical version of the ID.
484 public static ULocale createCanonical(String nonCanonicalID) {
485 return new ULocale(canonicalize(nonCanonicalID), (Locale)null);
488 private static String lscvToID(String lang, String script, String country, String variant) {
489 StringBuilder buf = new StringBuilder();
491 if (lang != null && lang.length() > 0) {
494 if (script != null && script.length() > 0) {
495 buf.append(UNDERSCORE);
498 if (country != null && country.length() > 0) {
499 buf.append(UNDERSCORE);
502 if (variant != null && variant.length() > 0) {
503 if (country == null || country.length() == 0) {
504 buf.append(UNDERSCORE);
506 buf.append(UNDERSCORE);
509 return buf.toString();
513 * {@icu} Converts this ULocale object to a {@link java.util.Locale}.
514 * @return a JDK locale that either exactly represents this object
515 * or is the closest approximation.
518 public Locale toLocale() {
519 if (locale == null) {
520 locale = JDKLocaleHelper.toLocale(this);
525 private static ICUCache<String, String> nameCache = new SimpleCache<String, String>();
528 * Keep our own default ULocale.
530 private static Locale defaultLocale = Locale.getDefault();
531 private static ULocale defaultULocale = forLocale(defaultLocale);
533 private static Locale[] defaultCategoryLocales = new Locale[Category.values().length];
534 private static ULocale[] defaultCategoryULocales = new ULocale[Category.values().length];
537 for (Category cat: Category.values()) {
538 int idx = cat.ordinal();
539 defaultCategoryLocales[idx] = JDKLocaleHelper.getDefault(cat);
540 defaultCategoryULocales[idx] = forLocale(defaultCategoryLocales[idx]);
545 * Returns the current default ULocale.
546 * @return the default ULocale.
549 public static ULocale getDefault() {
550 synchronized (ULocale.class) {
551 Locale currentDefault = Locale.getDefault();
552 if (!defaultLocale.equals(currentDefault)) {
553 defaultLocale = currentDefault;
554 defaultULocale = forLocale(currentDefault);
556 if (!JDKLocaleHelper.isJava7orNewer()) {
557 // Detected Java default Locale change.
558 // We need to update category defaults to match the
559 // Java 7's behavior on Java 6 or older environment.
560 for (Category cat : Category.values()) {
561 int idx = cat.ordinal();
562 defaultCategoryLocales[idx] = currentDefault;
563 defaultCategoryULocales[idx] = forLocale(currentDefault);
567 return defaultULocale;
572 * {@icu} Sets the default ULocale. This also sets the default Locale.
573 * If the caller does not have write permission to the
574 * user.language property, a security exception will be thrown,
575 * and the default ULocale will remain unchanged.
577 * By setting the default ULocale with this method, all of the default categoy locales
578 * are also set to the specified default ULocale.
579 * @param newLocale the new default locale
580 * @throws SecurityException if a security manager exists and its
581 * <code>checkPermission</code> method doesn't allow the operation.
582 * @throws NullPointerException if <code>newLocale</code> is null
583 * @see SecurityManager#checkPermission(java.security.Permission)
584 * @see java.util.PropertyPermission
585 * @see ULocale#setDefault(Category, ULocale)
588 public static synchronized void setDefault(ULocale newLocale){
589 defaultLocale = newLocale.toLocale();
590 Locale.setDefault(defaultLocale);
591 defaultULocale = newLocale;
592 // This method also updates all category default locales
593 for (Category cat : Category.values()) {
594 setDefault(cat, newLocale);
599 * Returns the current default ULocale for the specified category.
601 * @param category the category
602 * @return the default ULocale for the specified category.
605 public static ULocale getDefault(Category category) {
606 synchronized (ULocale.class) {
607 int idx = category.ordinal();
608 if (defaultCategoryULocales[idx] == null) {
609 // Just in case this method is called during ULocale class
610 // initialization. Unlike getDefault(), we do not have
611 // cyclic dependency for category default.
614 if (JDKLocaleHelper.isJava7orNewer()) {
615 Locale currentCategoryDefault = JDKLocaleHelper.getDefault(category);
616 if (!defaultCategoryLocales[idx].equals(currentCategoryDefault)) {
617 defaultCategoryLocales[idx] = currentCategoryDefault;
618 defaultCategoryULocales[idx] = forLocale(currentCategoryDefault);
621 // java.util.Locale.setDefault(Locale) in Java 7 updates
622 // category locale defaults. On Java 6 or older environment,
623 // ICU4J checks if the default locale has changed and update
624 // category ULocales here if necessary.
626 // Note: When java.util.Locale.setDefault(Locale) is called
627 // with a Locale same with the previous one, Java 7 still
628 // updates category locale defaults. On Java 6 or older env,
629 // there is no good way to detect the event, ICU4J simply
630 // check if the default Java Locale has changed since last
633 Locale currentDefault = Locale.getDefault();
634 if (!defaultLocale.equals(currentDefault)) {
635 defaultLocale = currentDefault;
636 defaultULocale = forLocale(currentDefault);
638 for (Category cat : Category.values()) {
639 int tmpIdx = cat.ordinal();
640 defaultCategoryLocales[tmpIdx] = currentDefault;
641 defaultCategoryULocales[tmpIdx] = forLocale(currentDefault);
645 // No synchronization with JDK Locale, because category default
646 // is not supported in Java 6 or older versions
648 return defaultCategoryULocales[idx];
653 * Sets the default <code>ULocale</code> for the specified <code>Category</code>.
654 * This also sets the default <code>Locale</code> for the specified <code>Category</code>
655 * of the JVM. If the caller does not have write permission to the
656 * user.language property, a security exception will be thrown,
657 * and the default ULocale for the specified Category will remain unchanged.
659 * @param category the specified category to set the default locale
660 * @param newLocale the new default locale
661 * @see SecurityManager#checkPermission(java.security.Permission)
662 * @see java.util.PropertyPermission
665 public static synchronized void setDefault(Category category, ULocale newLocale) {
666 Locale newJavaDefault = newLocale.toLocale();
667 int idx = category.ordinal();
668 defaultCategoryULocales[idx] = newLocale;
669 defaultCategoryLocales[idx] = newJavaDefault;
670 JDKLocaleHelper.setDefault(category, newJavaDefault);
674 * This is for compatibility with Locale-- in actuality, since ULocale is
675 * immutable, there is no reason to clone it, so this API returns 'this'.
678 public Object clone() {
683 * Returns the hashCode.
686 public int hashCode() {
687 return localeID.hashCode();
691 * Returns true if the other object is another ULocale with the
692 * same full name, or is a String localeID that matches the full name.
693 * Note that since names are not canonicalized, two ULocales that
694 * function identically might not compare equal.
696 * @return true if this Locale is equal to the specified object.
699 public boolean equals(Object obj) {
703 if (obj instanceof String) {
704 return localeID.equals((String)obj);
706 if (obj instanceof ULocale) {
707 return localeID.equals(((ULocale)obj).localeID);
713 * {@icunote} Unlike the Locale API, this returns an array of <code>ULocale</code>,
714 * not <code>Locale</code>. Returns a list of all installed locales.
717 public static ULocale[] getAvailableLocales() {
718 return ICUResourceBundle.getAvailableULocales();
722 * Returns a list of all 2-letter country codes defined in ISO 3166.
723 * Can be used to create Locales.
726 public static String[] getISOCountries() {
727 return LocaleIDs.getISOCountries();
731 * Returns a list of all 2-letter language codes defined in ISO 639.
732 * Can be used to create Locales.
733 * [NOTE: ISO 639 is not a stable standard-- some languages' codes have changed.
734 * The list this function returns includes both the new and the old codes for the
735 * languages whose codes have changed.]
738 public static String[] getISOLanguages() {
739 return LocaleIDs.getISOLanguages();
743 * Returns the language code for this locale, which will either be the empty string
744 * or a lowercase ISO 639 code.
745 * @see #getDisplayLanguage()
746 * @see #getDisplayLanguage(ULocale)
749 public String getLanguage() {
750 return getLanguage(localeID);
754 * Returns the language code for the locale ID,
755 * which will either be the empty string
756 * or a lowercase ISO 639 code.
757 * @see #getDisplayLanguage()
758 * @see #getDisplayLanguage(ULocale)
761 public static String getLanguage(String localeID) {
762 return new LocaleIDParser(localeID).getLanguage();
766 * {@icu} Returns the script code for this locale, which might be the empty string.
767 * @see #getDisplayScript()
768 * @see #getDisplayScript(ULocale)
771 public String getScript() {
772 return getScript(localeID);
776 * {@icu} Returns the script code for the specified locale, which might be the empty
778 * @see #getDisplayScript()
779 * @see #getDisplayScript(ULocale)
782 public static String getScript(String localeID) {
783 return new LocaleIDParser(localeID).getScript();
787 * Returns the country/region code for this locale, which will either be the empty string
788 * or an uppercase ISO 3166 2-letter code.
789 * @see #getDisplayCountry()
790 * @see #getDisplayCountry(ULocale)
793 public String getCountry() {
794 return getCountry(localeID);
798 * Returns the country/region code for this locale, which will either be the empty string
799 * or an uppercase ISO 3166 2-letter code.
800 * @param localeID The locale identification string.
801 * @see #getDisplayCountry()
802 * @see #getDisplayCountry(ULocale)
805 public static String getCountry(String localeID) {
806 return new LocaleIDParser(localeID).getCountry();
810 * Returns the variant code for this locale, which might be the empty string.
811 * @see #getDisplayVariant()
812 * @see #getDisplayVariant(ULocale)
815 public String getVariant() {
816 return getVariant(localeID);
820 * Returns the variant code for the specified locale, which might be the empty string.
821 * @see #getDisplayVariant()
822 * @see #getDisplayVariant(ULocale)
825 public static String getVariant(String localeID) {
826 return new LocaleIDParser(localeID).getVariant();
830 * {@icu} Returns the fallback locale for the specified locale, which might be the
834 public static String getFallback(String localeID) {
835 return getFallbackString(getName(localeID));
839 * {@icu} Returns the fallback locale for this locale. If this locale is root,
843 public ULocale getFallback() {
844 if (localeID.length() == 0 || localeID.charAt(0) == '@') {
847 return new ULocale(getFallbackString(localeID), (Locale)null);
851 * Returns the given (canonical) locale id minus the last part before the tags.
853 private static String getFallbackString(String fallback) {
854 int extStart = fallback.indexOf('@');
855 if (extStart == -1) {
856 extStart = fallback.length();
858 int last = fallback.lastIndexOf('_', extStart);
862 // truncate empty segment
864 if (fallback.charAt(last - 1) != '_') {
870 return fallback.substring(0, last) + fallback.substring(extStart);
874 * {@icu} Returns the (normalized) base name for this locale.
875 * @return the base name as a String.
878 public String getBaseName() {
879 return getBaseName(localeID);
883 * {@icu} Returns the (normalized) base name for the specified locale.
884 * @param localeID the locale ID as a string
885 * @return the base name as a String.
888 public static String getBaseName(String localeID){
889 if (localeID.indexOf('@') == -1) {
892 return new LocaleIDParser(localeID).getBaseName();
896 * {@icu} Returns the (normalized) full name for this locale.
898 * @return String the full name of the localeID
901 public String getName() {
902 return localeID; // always normalized
906 * Gets the shortest length subtag's size.
909 * @return The size of the shortest length subtag
911 private static int getShortestSubtagLength(String localeID) {
912 int localeIDLength = localeID.length();
913 int length = localeIDLength;
914 boolean reset = true;
917 for (int i = 0; i < localeIDLength; i++) {
918 if (localeID.charAt(i) != '_' && localeID.charAt(i) != '-') {
925 if (tmpLength != 0 && tmpLength < length) {
936 * {@icu} Returns the (normalized) full name for the specified locale.
938 * @param localeID the localeID as a string
939 * @return String the full name of the localeID
942 public static String getName(String localeID){
944 // Convert BCP47 id if necessary
945 if (localeID != null && !localeID.contains("@") && getShortestSubtagLength(localeID) == 1) {
946 tmpLocaleID = forLanguageTag(localeID).getName();
947 if (tmpLocaleID.length() == 0) {
948 tmpLocaleID = localeID;
951 tmpLocaleID = localeID;
953 String name = nameCache.get(tmpLocaleID);
955 name = new LocaleIDParser(tmpLocaleID).getName();
956 nameCache.put(tmpLocaleID, name);
962 * Returns a string representation of this object.
965 public String toString() {
970 * {@icu} Returns an iterator over keywords for this locale. If there
971 * are no keywords, returns null.
972 * @return iterator over keywords, or null if there are no keywords.
975 public Iterator<String> getKeywords() {
976 return getKeywords(localeID);
980 * {@icu} Returns an iterator over keywords for the specified locale. If there
981 * are no keywords, returns null.
982 * @return an iterator over the keywords in the specified locale, or null
983 * if there are no keywords.
986 public static Iterator<String> getKeywords(String localeID){
987 return new LocaleIDParser(localeID).getKeywords();
991 * {@icu} Returns the value for a keyword in this locale. If the keyword is not
992 * defined, returns null.
993 * @param keywordName name of the keyword whose value is desired. Case insensitive.
994 * @return the value of the keyword, or null.
997 public String getKeywordValue(String keywordName){
998 return getKeywordValue(localeID, keywordName);
1002 * {@icu} Returns the value for a keyword in the specified locale. If the keyword is
1003 * not defined, returns null. The locale name does not need to be normalized.
1004 * @param keywordName name of the keyword whose value is desired. Case insensitive.
1005 * @return String the value of the keyword as a string
1008 public static String getKeywordValue(String localeID, String keywordName) {
1009 return new LocaleIDParser(localeID).getKeywordValue(keywordName);
1013 * {@icu} Returns the canonical name for the specified locale ID. This is used to
1014 * convert POSIX and other grandfathered IDs to standard ICU form.
1015 * @param localeID the locale id
1016 * @return the canonicalized id
1019 public static String canonicalize(String localeID){
1020 LocaleIDParser parser = new LocaleIDParser(localeID, true);
1021 String baseName = parser.getBaseName();
1022 boolean foundVariant = false;
1024 // formerly, we always set to en_US_POSIX if the basename was empty, but
1025 // now we require that the entire id be empty, so that "@foo=bar"
1026 // will pass through unchanged.
1027 // {dlf} I'd rather keep "" unchanged.
1028 if (localeID.equals("")) {
1030 // return "en_US_POSIX";
1033 // we have an ID in the form xx_Yyyy_ZZ_KKKKK
1035 initCANONICALIZE_MAP();
1037 /* convert the variants to appropriate ID */
1038 for (int i = 0; i < variantsToKeywords.length; i++) {
1039 String[] vals = variantsToKeywords[i];
1040 int idx = baseName.lastIndexOf("_" + vals[0]);
1042 foundVariant = true;
1044 baseName = baseName.substring(0, idx);
1045 if (baseName.endsWith("_")) {
1046 baseName = baseName.substring(0, --idx);
1048 parser.setBaseName(baseName);
1049 parser.defaultKeywordValue(vals[1], vals[2]);
1054 /* See if this is an already known locale */
1055 for (int i = 0; i < CANONICALIZE_MAP.length; i++) {
1056 if (CANONICALIZE_MAP[i][0].equals(baseName)) {
1057 foundVariant = true;
1059 String[] vals = CANONICALIZE_MAP[i];
1060 parser.setBaseName(vals[1]);
1061 if (vals[2] != null) {
1062 parser.defaultKeywordValue(vals[2], vals[3]);
1068 /* total mondo hack for Norwegian, fortunately the main NY case is handled earlier */
1069 if (!foundVariant) {
1070 if (parser.getLanguage().equals("nb") && parser.getVariant().equals("NY")) {
1071 parser.setBaseName(lscvToID("nn", parser.getScript(), parser.getCountry(), null));
1075 return parser.getName();
1079 * Given a keyword and a value, return a new locale with an updated
1080 * keyword and value. If keyword is null, this removes all keywords from the locale id.
1081 * Otherwise, if the value is null, this removes the value for this keyword from the
1082 * locale id. Otherwise, this adds/replaces the value for this keyword in the locale id.
1083 * The keyword and value must not be empty.
1084 * @param keyword the keyword to add/remove, or null to remove all keywords.
1085 * @param value the value to add/set, or null to remove this particular keyword.
1086 * @return the updated locale
1089 public ULocale setKeywordValue(String keyword, String value) {
1090 return new ULocale(setKeywordValue(localeID, keyword, value), (Locale)null);
1094 * Given a locale id, a keyword, and a value, return a new locale id with an updated
1095 * keyword and value. If keyword is null, this removes all keywords from the locale id.
1096 * Otherwise, if the value is null, this removes the value for this keyword from the
1097 * locale id. Otherwise, this adds/replaces the value for this keyword in the locale id.
1098 * The keyword and value must not be empty.
1099 * @param localeID the locale id to modify
1100 * @param keyword the keyword to add/remove, or null to remove all keywords.
1101 * @param value the value to add/set, or null to remove this particular keyword.
1102 * @return the updated locale id
1105 public static String setKeywordValue(String localeID, String keyword, String value) {
1106 LocaleIDParser parser = new LocaleIDParser(localeID);
1107 parser.setKeywordValue(keyword, value);
1108 return parser.getName();
1112 * Given a locale id, a keyword, and a value, return a new locale id with an updated
1113 * keyword and value, if the keyword does not already have a value. The keyword and
1114 * value must not be null or empty.
1115 * @param localeID the locale id to modify
1116 * @param keyword the keyword to add, if not already present
1117 * @param value the value to add, if not already present
1118 * @return the updated locale id
1120 /* private static String defaultKeywordValue(String localeID, String keyword, String value) {
1121 LocaleIDParser parser = new LocaleIDParser(localeID);
1122 parser.defaultKeywordValue(keyword, value);
1123 return parser.getName();
1127 * Returns a three-letter abbreviation for this locale's language. If the locale
1128 * doesn't specify a language, returns the empty string. Otherwise, returns
1129 * a lowercase ISO 639-2/T language code.
1130 * The ISO 639-2 language codes can be found on-line at
1131 * <a href="ftp://dkuug.dk/i18n/iso-639-2.txt"><code>ftp://dkuug.dk/i18n/iso-639-2.txt</code></a>
1132 * @exception MissingResourceException Throws MissingResourceException if the
1133 * three-letter language abbreviation is not available for this locale.
1136 public String getISO3Language(){
1137 return getISO3Language(localeID);
1141 * Returns a three-letter abbreviation for this locale's language. If the locale
1142 * doesn't specify a language, returns the empty string. Otherwise, returns
1143 * a lowercase ISO 639-2/T language code.
1144 * The ISO 639-2 language codes can be found on-line at
1145 * <a href="ftp://dkuug.dk/i18n/iso-639-2.txt"><code>ftp://dkuug.dk/i18n/iso-639-2.txt</code></a>
1146 * @exception MissingResourceException Throws MissingResourceException if the
1147 * three-letter language abbreviation is not available for this locale.
1150 public static String getISO3Language(String localeID) {
1151 return LocaleIDs.getISO3Language(getLanguage(localeID));
1155 * Returns a three-letter abbreviation for this locale's country/region. If the locale
1156 * doesn't specify a country, returns the empty string. Otherwise, returns
1157 * an uppercase ISO 3166 3-letter country code.
1158 * @exception MissingResourceException Throws MissingResourceException if the
1159 * three-letter country abbreviation is not available for this locale.
1162 public String getISO3Country() {
1163 return getISO3Country(localeID);
1167 * Returns a three-letter abbreviation for this locale's country/region. If the locale
1168 * doesn't specify a country, returns the empty string. Otherwise, returns
1169 * an uppercase ISO 3166 3-letter country code.
1170 * @exception MissingResourceException Throws MissingResourceException if the
1171 * three-letter country abbreviation is not available for this locale.
1174 public static String getISO3Country(String localeID) {
1175 return LocaleIDs.getISO3Country(getCountry(localeID));
1181 * Returns this locale's language localized for display in the default <code>DISPLAY</code> locale.
1182 * @return the localized language name.
1183 * @see Category#DISPLAY
1186 public String getDisplayLanguage() {
1187 return getDisplayLanguageInternal(this, getDefault(Category.DISPLAY), false);
1191 * {@icu} Returns this locale's language localized for display in the provided locale.
1192 * @param displayLocale the locale in which to display the name.
1193 * @return the localized language name.
1196 public String getDisplayLanguage(ULocale displayLocale) {
1197 return getDisplayLanguageInternal(this, displayLocale, false);
1201 * Returns a locale's language localized for display in the provided locale.
1202 * This is a cover for the ICU4C API.
1203 * @param localeID the id of the locale whose language will be displayed
1204 * @param displayLocaleID the id of the locale in which to display the name.
1205 * @return the localized language name.
1208 public static String getDisplayLanguage(String localeID, String displayLocaleID) {
1209 return getDisplayLanguageInternal(new ULocale(localeID), new ULocale(displayLocaleID),
1214 * Returns a locale's language localized for display in the provided locale.
1215 * This is a cover for the ICU4C API.
1216 * @param localeID the id of the locale whose language will be displayed.
1217 * @param displayLocale the locale in which to display the name.
1218 * @return the localized language name.
1221 public static String getDisplayLanguage(String localeID, ULocale displayLocale) {
1222 return getDisplayLanguageInternal(new ULocale(localeID), displayLocale, false);
1225 * {@icu} Returns this locale's language localized for display in the default <code>DISPLAY</code> locale.
1226 * If a dialect name is present in the data, then it is returned.
1227 * @return the localized language name.
1228 * @see Category#DISPLAY
1231 public String getDisplayLanguageWithDialect() {
1232 return getDisplayLanguageInternal(this, getDefault(Category.DISPLAY), true);
1236 * {@icu} Returns this locale's language localized for display in the provided locale.
1237 * If a dialect name is present in the data, then it is returned.
1238 * @param displayLocale the locale in which to display the name.
1239 * @return the localized language name.
1242 public String getDisplayLanguageWithDialect(ULocale displayLocale) {
1243 return getDisplayLanguageInternal(this, displayLocale, true);
1247 * {@icu} Returns a locale's language localized for display in the provided locale.
1248 * If a dialect name is present in the data, then it is returned.
1249 * This is a cover for the ICU4C API.
1250 * @param localeID the id of the locale whose language will be displayed
1251 * @param displayLocaleID the id of the locale in which to display the name.
1252 * @return the localized language name.
1255 public static String getDisplayLanguageWithDialect(String localeID, String displayLocaleID) {
1256 return getDisplayLanguageInternal(new ULocale(localeID), new ULocale(displayLocaleID),
1261 * {@icu} Returns a locale's language localized for display in the provided locale.
1262 * If a dialect name is present in the data, then it is returned.
1263 * This is a cover for the ICU4C API.
1264 * @param localeID the id of the locale whose language will be displayed.
1265 * @param displayLocale the locale in which to display the name.
1266 * @return the localized language name.
1269 public static String getDisplayLanguageWithDialect(String localeID, ULocale displayLocale) {
1270 return getDisplayLanguageInternal(new ULocale(localeID), displayLocale, true);
1273 private static String getDisplayLanguageInternal(ULocale locale, ULocale displayLocale,
1274 boolean useDialect) {
1275 String lang = useDialect ? locale.getBaseName() : locale.getLanguage();
1276 return LocaleDisplayNames.getInstance(displayLocale).languageDisplayName(lang);
1280 * {@icu} Returns this locale's script localized for display in the default <code>DISPLAY</code> locale.
1281 * @return the localized script name.
1282 * @see Category#DISPLAY
1285 public String getDisplayScript() {
1286 return getDisplayScriptInternal(this, getDefault(Category.DISPLAY));
1290 * {@icu} Returns this locale's script localized for display in the provided locale.
1291 * @param displayLocale the locale in which to display the name.
1292 * @return the localized script name.
1295 public String getDisplayScript(ULocale displayLocale) {
1296 return getDisplayScriptInternal(this, displayLocale);
1300 * {@icu} Returns a locale's script localized for display in the provided locale.
1301 * This is a cover for the ICU4C API.
1302 * @param localeID the id of the locale whose script will be displayed
1303 * @param displayLocaleID the id of the locale in which to display the name.
1304 * @return the localized script name.
1307 public static String getDisplayScript(String localeID, String displayLocaleID) {
1308 return getDisplayScriptInternal(new ULocale(localeID), new ULocale(displayLocaleID));
1312 * {@icu} Returns a locale's script localized for display in the provided locale.
1313 * @param localeID the id of the locale whose script will be displayed.
1314 * @param displayLocale the locale in which to display the name.
1315 * @return the localized script name.
1318 public static String getDisplayScript(String localeID, ULocale displayLocale) {
1319 return getDisplayScriptInternal(new ULocale(localeID), displayLocale);
1322 // displayLocaleID is canonical, localeID need not be since parsing will fix this.
1323 private static String getDisplayScriptInternal(ULocale locale, ULocale displayLocale) {
1324 return LocaleDisplayNames.getInstance(displayLocale)
1325 .scriptDisplayName(locale.getScript());
1329 * Returns this locale's country localized for display in the default <code>DISPLAY</code> locale.
1330 * @return the localized country name.
1331 * @see Category#DISPLAY
1334 public String getDisplayCountry() {
1335 return getDisplayCountryInternal(this, getDefault(Category.DISPLAY));
1339 * Returns this locale's country localized for display in the provided locale.
1340 * @param displayLocale the locale in which to display the name.
1341 * @return the localized country name.
1344 public String getDisplayCountry(ULocale displayLocale){
1345 return getDisplayCountryInternal(this, displayLocale);
1349 * Returns a locale's country localized for display in the provided locale.
1350 * This is a cover for the ICU4C API.
1351 * @param localeID the id of the locale whose country will be displayed
1352 * @param displayLocaleID the id of the locale in which to display the name.
1353 * @return the localized country name.
1356 public static String getDisplayCountry(String localeID, String displayLocaleID) {
1357 return getDisplayCountryInternal(new ULocale(localeID), new ULocale(displayLocaleID));
1361 * Returns a locale's country localized for display in the provided locale.
1362 * This is a cover for the ICU4C API.
1363 * @param localeID the id of the locale whose country will be displayed.
1364 * @param displayLocale the locale in which to display the name.
1365 * @return the localized country name.
1368 public static String getDisplayCountry(String localeID, ULocale displayLocale) {
1369 return getDisplayCountryInternal(new ULocale(localeID), displayLocale);
1372 // displayLocaleID is canonical, localeID need not be since parsing will fix this.
1373 private static String getDisplayCountryInternal(ULocale locale, ULocale displayLocale) {
1374 return LocaleDisplayNames.getInstance(displayLocale)
1375 .regionDisplayName(locale.getCountry());
1379 * Returns this locale's variant localized for display in the default <code>DISPLAY</code> locale.
1380 * @return the localized variant name.
1381 * @see Category#DISPLAY
1384 public String getDisplayVariant() {
1385 return getDisplayVariantInternal(this, getDefault(Category.DISPLAY));
1389 * Returns this locale's variant localized for display in the provided locale.
1390 * @param displayLocale the locale in which to display the name.
1391 * @return the localized variant name.
1394 public String getDisplayVariant(ULocale displayLocale) {
1395 return getDisplayVariantInternal(this, displayLocale);
1399 * Returns a locale's variant localized for display in the provided locale.
1400 * This is a cover for the ICU4C API.
1401 * @param localeID the id of the locale whose variant will be displayed
1402 * @param displayLocaleID the id of the locale in which to display the name.
1403 * @return the localized variant name.
1406 public static String getDisplayVariant(String localeID, String displayLocaleID){
1407 return getDisplayVariantInternal(new ULocale(localeID), new ULocale(displayLocaleID));
1411 * Returns a locale's variant localized for display in the provided locale.
1412 * This is a cover for the ICU4C API.
1413 * @param localeID the id of the locale whose variant will be displayed.
1414 * @param displayLocale the locale in which to display the name.
1415 * @return the localized variant name.
1418 public static String getDisplayVariant(String localeID, ULocale displayLocale) {
1419 return getDisplayVariantInternal(new ULocale(localeID), displayLocale);
1422 private static String getDisplayVariantInternal(ULocale locale, ULocale displayLocale) {
1423 return LocaleDisplayNames.getInstance(displayLocale)
1424 .variantDisplayName(locale.getVariant());
1428 * {@icu} Returns a keyword localized for display in the default <code>DISPLAY</code> locale.
1429 * @param keyword the keyword to be displayed.
1430 * @return the localized keyword name.
1431 * @see #getKeywords()
1432 * @see Category#DISPLAY
1435 public static String getDisplayKeyword(String keyword) {
1436 return getDisplayKeywordInternal(keyword, getDefault(Category.DISPLAY));
1440 * {@icu} Returns a keyword localized for display in the specified locale.
1441 * @param keyword the keyword to be displayed.
1442 * @param displayLocaleID the id of the locale in which to display the keyword.
1443 * @return the localized keyword name.
1444 * @see #getKeywords(String)
1447 public static String getDisplayKeyword(String keyword, String displayLocaleID) {
1448 return getDisplayKeywordInternal(keyword, new ULocale(displayLocaleID));
1452 * {@icu} Returns a keyword localized for display in the specified locale.
1453 * @param keyword the keyword to be displayed.
1454 * @param displayLocale the locale in which to display the keyword.
1455 * @return the localized keyword name.
1456 * @see #getKeywords(String)
1459 public static String getDisplayKeyword(String keyword, ULocale displayLocale) {
1460 return getDisplayKeywordInternal(keyword, displayLocale);
1463 private static String getDisplayKeywordInternal(String keyword, ULocale displayLocale) {
1464 return LocaleDisplayNames.getInstance(displayLocale).keyDisplayName(keyword);
1468 * {@icu} Returns a keyword value localized for display in the default <code>DISPLAY</code> locale.
1469 * @param keyword the keyword whose value is to be displayed.
1470 * @return the localized value name.
1471 * @see Category#DISPLAY
1474 public String getDisplayKeywordValue(String keyword) {
1475 return getDisplayKeywordValueInternal(this, keyword, getDefault(Category.DISPLAY));
1479 * {@icu} Returns a keyword value localized for display in the specified locale.
1480 * @param keyword the keyword whose value is to be displayed.
1481 * @param displayLocale the locale in which to display the value.
1482 * @return the localized value name.
1485 public String getDisplayKeywordValue(String keyword, ULocale displayLocale) {
1486 return getDisplayKeywordValueInternal(this, keyword, displayLocale);
1490 * {@icu} Returns a keyword value localized for display in the specified locale.
1491 * This is a cover for the ICU4C API.
1492 * @param localeID the id of the locale whose keyword value is to be displayed.
1493 * @param keyword the keyword whose value is to be displayed.
1494 * @param displayLocaleID the id of the locale in which to display the value.
1495 * @return the localized value name.
1498 public static String getDisplayKeywordValue(String localeID, String keyword,
1499 String displayLocaleID) {
1500 return getDisplayKeywordValueInternal(new ULocale(localeID), keyword,
1501 new ULocale(displayLocaleID));
1505 * {@icu} Returns a keyword value localized for display in the specified locale.
1506 * This is a cover for the ICU4C API.
1507 * @param localeID the id of the locale whose keyword value is to be displayed.
1508 * @param keyword the keyword whose value is to be displayed.
1509 * @param displayLocale the id of the locale in which to display the value.
1510 * @return the localized value name.
1513 public static String getDisplayKeywordValue(String localeID, String keyword,
1514 ULocale displayLocale) {
1515 return getDisplayKeywordValueInternal(new ULocale(localeID), keyword, displayLocale);
1518 // displayLocaleID is canonical, localeID need not be since parsing will fix this.
1519 private static String getDisplayKeywordValueInternal(ULocale locale, String keyword,
1520 ULocale displayLocale) {
1521 keyword = AsciiUtil.toLowerString(keyword.trim());
1522 String value = locale.getKeywordValue(keyword);
1523 return LocaleDisplayNames.getInstance(displayLocale).keyValueDisplayName(keyword, value);
1527 * Returns this locale name localized for display in the default <code>DISPLAY</code> locale.
1528 * @return the localized locale name.
1529 * @see Category#DISPLAY
1532 public String getDisplayName() {
1533 return getDisplayNameInternal(this, getDefault(Category.DISPLAY));
1537 * Returns this locale name localized for display in the provided locale.
1538 * @param displayLocale the locale in which to display the locale name.
1539 * @return the localized locale name.
1542 public String getDisplayName(ULocale displayLocale) {
1543 return getDisplayNameInternal(this, displayLocale);
1547 * Returns the locale ID localized for display in the provided locale.
1548 * This is a cover for the ICU4C API.
1549 * @param localeID the locale whose name is to be displayed.
1550 * @param displayLocaleID the id of the locale in which to display the locale name.
1551 * @return the localized locale name.
1554 public static String getDisplayName(String localeID, String displayLocaleID) {
1555 return getDisplayNameInternal(new ULocale(localeID), new ULocale(displayLocaleID));
1559 * Returns the locale ID localized for display in the provided locale.
1560 * This is a cover for the ICU4C API.
1561 * @param localeID the locale whose name is to be displayed.
1562 * @param displayLocale the locale in which to display the locale name.
1563 * @return the localized locale name.
1566 public static String getDisplayName(String localeID, ULocale displayLocale) {
1567 return getDisplayNameInternal(new ULocale(localeID), displayLocale);
1570 private static String getDisplayNameInternal(ULocale locale, ULocale displayLocale) {
1571 return LocaleDisplayNames.getInstance(displayLocale).localeDisplayName(locale);
1575 * {@icu} Returns this locale name localized for display in the default <code>DISPLAY</code> locale.
1576 * If a dialect name is present in the locale data, then it is returned.
1577 * @return the localized locale name.
1578 * @see Category#DISPLAY
1581 public String getDisplayNameWithDialect() {
1582 return getDisplayNameWithDialectInternal(this, getDefault(Category.DISPLAY));
1586 * {@icu} Returns this locale name localized for display in the provided locale.
1587 * If a dialect name is present in the locale data, then it is returned.
1588 * @param displayLocale the locale in which to display the locale name.
1589 * @return the localized locale name.
1592 public String getDisplayNameWithDialect(ULocale displayLocale) {
1593 return getDisplayNameWithDialectInternal(this, displayLocale);
1597 * {@icu} Returns the locale ID localized for display in the provided locale.
1598 * If a dialect name is present in the locale data, then it is returned.
1599 * This is a cover for the ICU4C API.
1600 * @param localeID the locale whose name is to be displayed.
1601 * @param displayLocaleID the id of the locale in which to display the locale name.
1602 * @return the localized locale name.
1605 public static String getDisplayNameWithDialect(String localeID, String displayLocaleID) {
1606 return getDisplayNameWithDialectInternal(new ULocale(localeID),
1607 new ULocale(displayLocaleID));
1611 * {@icu} Returns the locale ID localized for display in the provided locale.
1612 * If a dialect name is present in the locale data, then it is returned.
1613 * This is a cover for the ICU4C API.
1614 * @param localeID the locale whose name is to be displayed.
1615 * @param displayLocale the locale in which to display the locale name.
1616 * @return the localized locale name.
1619 public static String getDisplayNameWithDialect(String localeID, ULocale displayLocale) {
1620 return getDisplayNameWithDialectInternal(new ULocale(localeID), displayLocale);
1623 private static String getDisplayNameWithDialectInternal(ULocale locale, ULocale displayLocale) {
1624 return LocaleDisplayNames.getInstance(displayLocale, DialectHandling.DIALECT_NAMES)
1625 .localeDisplayName(locale);
1629 * {@icu} Returns this locale's layout orientation for characters. The possible
1630 * values are "left-to-right", "right-to-left", "top-to-bottom" or
1632 * @return The locale's layout orientation for characters.
1635 public String getCharacterOrientation() {
1636 return ICUResourceTableAccess.getTableString(ICUResourceBundle.ICU_BASE_NAME, this,
1637 "layout", "characters");
1641 * {@icu} Returns this locale's layout orientation for lines. The possible
1642 * values are "left-to-right", "right-to-left", "top-to-bottom" or
1644 * @return The locale's layout orientation for lines.
1647 public String getLineOrientation() {
1648 return ICUResourceTableAccess.getTableString(ICUResourceBundle.ICU_BASE_NAME, this,
1653 * {@icu} Selector for <tt>getLocale()</tt> indicating the locale of the
1654 * resource containing the data. This is always at or above the
1655 * valid locale. If the valid locale does not contain the
1656 * specific data being requested, then the actual locale will be
1657 * above the valid locale. If the object was not constructed from
1658 * locale data, then the valid locale is <i>null</i>.
1660 * @draft ICU 2.8 (retain)
1661 * @provisional This API might change or be removed in a future release.
1663 public static Type ACTUAL_LOCALE = new Type();
1666 * {@icu} Selector for <tt>getLocale()</tt> indicating the most specific
1667 * locale for which any data exists. This is always at or above
1668 * the requested locale, and at or below the actual locale. If
1669 * the requested locale does not correspond to any resource data,
1670 * then the valid locale will be above the requested locale. If
1671 * the object was not constructed from locale data, then the
1672 * actual locale is <i>null</i>.
1674 * <p>Note: The valid locale will be returned correctly in ICU
1675 * 3.0 or later. In ICU 2.8, it is not returned correctly.
1676 * @draft ICU 2.8 (retain)
1677 * @provisional This API might change or be removed in a future release.
1679 public static Type VALID_LOCALE = new Type();
1682 * Opaque selector enum for <tt>getLocale()</tt>.
1683 * @see com.ibm.icu.util.ULocale
1684 * @see com.ibm.icu.util.ULocale#ACTUAL_LOCALE
1685 * @see com.ibm.icu.util.ULocale#VALID_LOCALE
1686 * @draft ICU 2.8 (retainAll)
1687 * @provisional This API might change or be removed in a future release.
1689 public static final class Type {
1694 * {@icu} Based on a HTTP formatted list of acceptable locales, determine an available
1695 * locale for the user. NullPointerException is thrown if acceptLanguageList or
1696 * availableLocales is null. If fallback is non-null, it will contain true if a
1697 * fallback locale (one not in the acceptLanguageList) was returned. The value on
1698 * entry is ignored. ULocale will be one of the locales in availableLocales, or the
1699 * ROOT ULocale if if a ROOT locale was used as a fallback (because nothing else in
1700 * availableLocales matched). No ULocale array element should be null; behavior is
1701 * undefined if this is the case.
1702 * @param acceptLanguageList list in HTTP "Accept-Language:" format of acceptable locales
1703 * @param availableLocales list of available locales. One of these will be returned.
1704 * @param fallback if non-null, a 1-element array containing a boolean to be set with
1705 * the fallback status
1706 * @return one of the locales from the availableLocales list, or null if none match
1709 public static ULocale acceptLanguage(String acceptLanguageList, ULocale[] availableLocales,
1710 boolean[] fallback) {
1711 if (acceptLanguageList == null) {
1712 throw new NullPointerException();
1714 ULocale acceptList[] = null;
1716 acceptList = parseAcceptLanguage(acceptLanguageList, true);
1717 } catch (ParseException pe) {
1720 if (acceptList == null) {
1723 return acceptLanguage(acceptList, availableLocales, fallback);
1727 * {@icu} Based on a list of acceptable locales, determine an available locale for the
1728 * user. NullPointerException is thrown if acceptLanguageList or availableLocales is
1729 * null. If fallback is non-null, it will contain true if a fallback locale (one not
1730 * in the acceptLanguageList) was returned. The value on entry is ignored. ULocale
1731 * will be one of the locales in availableLocales, or the ROOT ULocale if if a ROOT
1732 * locale was used as a fallback (because nothing else in availableLocales matched).
1733 * No ULocale array element should be null; behavior is undefined if this is the case.
1734 * @param acceptLanguageList list of acceptable locales
1735 * @param availableLocales list of available locales. One of these will be returned.
1736 * @param fallback if non-null, a 1-element array containing a boolean to be set with
1737 * the fallback status
1738 * @return one of the locales from the availableLocales list, or null if none match
1742 public static ULocale acceptLanguage(ULocale[] acceptLanguageList, ULocale[]
1743 availableLocales, boolean[] fallback) {
1746 if(fallback != null) {
1749 for(i=0;i<acceptLanguageList.length;i++) {
1750 ULocale aLocale = acceptLanguageList[i];
1751 boolean[] setFallback = fallback;
1753 for(j=0;j<availableLocales.length;j++) {
1754 if(availableLocales[j].equals(aLocale)) {
1755 if(setFallback != null) {
1756 setFallback[0]=false; // first time with this locale - not a fallback.
1758 return availableLocales[j];
1760 // compare to scriptless alias, so locales such as
1761 // zh_TW, zh_CN are considered as available locales - see #7190
1762 if (aLocale.getScript().length() == 0
1763 && availableLocales[j].getScript().length() > 0
1764 && availableLocales[j].getLanguage().equals(aLocale.getLanguage())
1765 && availableLocales[j].getCountry().equals(aLocale.getCountry())
1766 && availableLocales[j].getVariant().equals(aLocale.getVariant())) {
1767 ULocale minAvail = ULocale.minimizeSubtags(availableLocales[j]);
1768 if (minAvail.getScript().length() == 0) {
1769 if(setFallback != null) {
1770 setFallback[0] = false; // not a fallback.
1776 Locale loc = aLocale.toLocale();
1777 Locale parent = LocaleUtility.fallback(loc);
1778 if(parent != null) {
1779 aLocale = new ULocale(parent);
1783 setFallback = null; // Do not set fallback in later iterations
1784 } while (aLocale != null);
1790 * {@icu} Based on a HTTP formatted list of acceptable locales, determine an available
1791 * locale for the user. NullPointerException is thrown if acceptLanguageList or
1792 * availableLocales is null. If fallback is non-null, it will contain true if a
1793 * fallback locale (one not in the acceptLanguageList) was returned. The value on
1794 * entry is ignored. ULocale will be one of the locales in availableLocales, or the
1795 * ROOT ULocale if if a ROOT locale was used as a fallback (because nothing else in
1796 * availableLocales matched). No ULocale array element should be null; behavior is
1797 * undefined if this is the case. This function will choose a locale from the
1798 * ULocale.getAvailableLocales() list as available.
1799 * @param acceptLanguageList list in HTTP "Accept-Language:" format of acceptable locales
1800 * @param fallback if non-null, a 1-element array containing a boolean to be set with
1801 * the fallback status
1802 * @return one of the locales from the ULocale.getAvailableLocales() list, or null if
1806 public static ULocale acceptLanguage(String acceptLanguageList, boolean[] fallback) {
1807 return acceptLanguage(acceptLanguageList, ULocale.getAvailableLocales(),
1812 * {@icu} Based on an ordered array of acceptable locales, determine an available
1813 * locale for the user. NullPointerException is thrown if acceptLanguageList or
1814 * availableLocales is null. If fallback is non-null, it will contain true if a
1815 * fallback locale (one not in the acceptLanguageList) was returned. The value on
1816 * entry is ignored. ULocale will be one of the locales in availableLocales, or the
1817 * ROOT ULocale if if a ROOT locale was used as a fallback (because nothing else in
1818 * availableLocales matched). No ULocale array element should be null; behavior is
1819 * undefined if this is the case. This function will choose a locale from the
1820 * ULocale.getAvailableLocales() list as available.
1821 * @param acceptLanguageList ordered array of acceptable locales (preferred are listed first)
1822 * @param fallback if non-null, a 1-element array containing a boolean to be set with
1823 * the fallback status
1824 * @return one of the locales from the ULocale.getAvailableLocales() list, or null if none match
1827 public static ULocale acceptLanguage(ULocale[] acceptLanguageList, boolean[] fallback) {
1828 return acceptLanguage(acceptLanguageList, ULocale.getAvailableLocales(),
1833 * Package local method used for parsing Accept-Language string
1835 static ULocale[] parseAcceptLanguage(String acceptLanguage, boolean isLenient)
1836 throws ParseException {
1837 class ULocaleAcceptLanguageQ implements Comparable<ULocaleAcceptLanguageQ> {
1839 private double serial;
1840 public ULocaleAcceptLanguageQ(double theq, int theserial) {
1844 public int compareTo(ULocaleAcceptLanguageQ other) {
1845 if (q > other.q) { // reverse - to sort in descending order
1847 } else if (q < other.q) {
1850 if (serial < other.serial) {
1852 } else if (serial > other.serial) {
1855 return 0; // same object
1860 // parse out the acceptLanguage into an array
1861 TreeMap<ULocaleAcceptLanguageQ, ULocale> map =
1862 new TreeMap<ULocaleAcceptLanguageQ, ULocale>();
1863 StringBuilder languageRangeBuf = new StringBuilder();
1864 StringBuilder qvalBuf = new StringBuilder();
1866 acceptLanguage += ","; // append comma to simplify the parsing code
1868 boolean subTag = false;
1870 for (n = 0; n < acceptLanguage.length(); n++) {
1871 boolean gotLanguageQ = false;
1872 char c = acceptLanguage.charAt(n);
1874 case 0: // before language-range start
1875 if (('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z')) {
1876 // in language-range
1877 languageRangeBuf.append(c);
1880 } else if (c == '*') {
1881 languageRangeBuf.append(c);
1883 } else if (c != ' ' && c != '\t') {
1884 // invalid character
1888 case 1: // in language-range
1889 if (('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z')) {
1890 languageRangeBuf.append(c);
1891 } else if (c == '-') {
1893 languageRangeBuf.append(c);
1894 } else if (c == '_') {
1897 languageRangeBuf.append(c);
1901 } else if ('0' <= c && c <= '9') {
1903 languageRangeBuf.append(c);
1905 // DIGIT is allowed only in language sub tag
1908 } else if (c == ',') {
1910 gotLanguageQ = true;
1911 } else if (c == ' ' || c == '\t') {
1912 // language-range end
1914 } else if (c == ';') {
1918 // invalid character for language-range
1922 case 2: // saw wild card range
1925 gotLanguageQ = true;
1926 } else if (c == ' ' || c == '\t') {
1927 // language-range end
1929 } else if (c == ';') {
1937 case 3: // language-range end
1940 gotLanguageQ = true;
1941 } else if (c == ';') {
1944 } else if (c != ' ' && c != '\t') {
1953 } else if (c != ' ' && c != '\t') {
1958 case 5: // before equal
1962 } else if (c != ' ' && c != '\t') {
1967 case 6: // before q value
1969 // q value start with 0
1973 } else if (c == '1') {
1974 // q value start with 1
1977 } else if (c == '.') {
1984 } else if (c != ' ' && c != '\t') {
1989 case 7: // q value start
1991 // before q value fraction part
1994 } else if (c == ',') {
1996 gotLanguageQ = true;
1997 } else if (c == ' ' || c == '\t') {
2005 case 8: // before q value fraction part
2006 if ('0' <= c || c <= '9') {
2007 if (q1 && c != '0' && !isLenient) {
2008 // if q value starts with 1, the fraction part must be 0
2011 // in q value fraction part
2020 case 9: // in q value fraction part
2021 if ('0' <= c && c <= '9') {
2022 if (q1 && c != '0') {
2023 // if q value starts with 1, the fraction part must be 0
2028 } else if (c == ',') {
2030 gotLanguageQ = true;
2031 } else if (c == ' ' || c == '\t') {
2039 case 10: // after q value
2042 gotLanguageQ = true;
2043 } else if (c != ' ' && c != '\t') {
2051 throw new ParseException("Invalid Accept-Language", n);
2055 if (qvalBuf.length() != 0) {
2057 q = Double.parseDouble(qvalBuf.toString());
2058 } catch (NumberFormatException nfe) {
2059 // Already validated, so it should never happen
2066 if (languageRangeBuf.charAt(0) != '*') {
2067 int serial = map.size();
2068 ULocaleAcceptLanguageQ entry = new ULocaleAcceptLanguageQ(q, serial);
2069 // sort in reverse order.. 1.0, 0.9, 0.8 .. etc
2070 map.put(entry, new ULocale(canonicalize(languageRangeBuf.toString())));
2073 // reset buffer and parse state
2074 languageRangeBuf.setLength(0);
2075 qvalBuf.setLength(0);
2080 // Well, the parser should handle all cases. So just in case.
2081 throw new ParseException("Invalid AcceptlLanguage", n);
2085 ULocale acceptList[] = map.values().toArray(new ULocale[map.size()]);
2089 private static final String UNDEFINED_LANGUAGE = "und";
2090 private static final String UNDEFINED_SCRIPT = "Zzzz";
2091 private static final String UNDEFINED_REGION = "ZZ";
2094 * {@icu} Adds the likely subtags for a provided locale ID, per the algorithm
2095 * described in the following CLDR technical report:
2097 * http://www.unicode.org/reports/tr35/#Likely_Subtags
2099 * If the provided ULocale instance is already in the maximal form, or there is no
2100 * data available available for maximization, it will be returned. For example,
2101 * "und-Zzzz" cannot be maximized, since there is no reasonable maximization.
2102 * Otherwise, a new ULocale instance with the maximal form is returned.
2106 * "en" maximizes to "en_Latn_US"
2108 * "de" maximizes to "de_Latn_US"
2110 * "sr" maximizes to "sr_Cyrl_RS"
2112 * "sh" maximizes to "sr_Latn_RS" (Note this will not reverse.)
2114 * "zh_Hani" maximizes to "zh_Hans_CN" (Note this will not reverse.)
2116 * @param loc The ULocale to maximize
2117 * @return The maximized ULocale instance.
2120 public static ULocale addLikelySubtags(ULocale loc) {
2121 String[] tags = new String[3];
2122 String trailing = null;
2124 int trailingIndex = parseTagString(
2128 if (trailingIndex < loc.localeID.length()) {
2129 trailing = loc.localeID.substring(trailingIndex);
2132 String newLocaleID =
2133 createLikelySubtagsString(
2139 return newLocaleID == null ? loc : new ULocale(newLocaleID);
2143 * {@icu} Minimizes the subtags for a provided locale ID, per the algorithm described
2144 * in the following CLDR technical report:<blockquote>
2146 * <a href="http://www.unicode.org/reports/tr35/#Likely_Subtags"
2147 *>http://www.unicode.org/reports/tr35/#Likely_Subtags</a></blockquote>
2149 * If the provided ULocale instance is already in the minimal form, or there
2150 * is no data available for minimization, it will be returned. Since the
2151 * minimization algorithm relies on proper maximization, see the comments
2152 * for addLikelySubtags for reasons why there might not be any data.
2156 * "en_Latn_US" minimizes to "en"
2158 * "de_Latn_US" minimizes to "de"
2160 * "sr_Cyrl_RS" minimizes to "sr"
2162 * "zh_Hant_TW" minimizes to "zh_TW" (The region is preferred to the
2163 * script, and minimizing to "zh" would imply "zh_Hans_CN".) </pre>
2165 * @param loc The ULocale to minimize
2166 * @return The minimized ULocale instance.
2169 public static ULocale minimizeSubtags(ULocale loc) {
2170 String[] tags = new String[3];
2172 int trailingIndex = parseTagString(
2176 String originalLang = tags[0];
2177 String originalScript = tags[1];
2178 String originalRegion = tags[2];
2179 String originalTrailing = null;
2181 if (trailingIndex < loc.localeID.length()) {
2183 * Create a String that contains everything
2184 * after the language, script, and region.
2186 originalTrailing = loc.localeID.substring(trailingIndex);
2190 * First, we need to first get the maximization
2191 * by adding any likely subtags.
2193 String maximizedLocaleID =
2194 createLikelySubtagsString(
2201 * If maximization fails, there's nothing
2204 if (isEmptyString(maximizedLocaleID)) {
2209 * Start first with just the language.
2212 createLikelySubtagsString(
2218 if (tag.equals(maximizedLocaleID)) {
2219 String newLocaleID =
2226 return new ULocale(newLocaleID);
2231 * Next, try the language and region.
2233 if (originalRegion.length() != 0) {
2236 createLikelySubtagsString(
2242 if (tag.equals(maximizedLocaleID)) {
2243 String newLocaleID =
2250 return new ULocale(newLocaleID);
2255 * Finally, try the language and script. This is our last chance,
2256 * since trying with all three subtags would only yield the
2257 * maximal version that we already have.
2259 if (originalRegion.length() != 0 &&
2260 originalScript.length() != 0) {
2263 createLikelySubtagsString(
2269 if (tag.equals(maximizedLocaleID)) {
2270 String newLocaleID =
2277 return new ULocale(newLocaleID);
2285 * A trivial utility function that checks for a null
2286 * reference or checks the length of the supplied String.
2288 * @param string The string to check
2290 * @return true if the String is empty, or if the reference is null.
2292 private static boolean isEmptyString(String string) {
2293 return string == null || string.length() == 0;
2297 * Append a tag to a StringBuilder, adding the separator if necessary.The tag must
2298 * not be a zero-length string.
2300 * @param tag The tag to add.
2301 * @param buffer The output buffer.
2303 private static void appendTag(String tag, StringBuilder buffer) {
2304 if (buffer.length() != 0) {
2305 buffer.append(UNDERSCORE);
2312 * Create a tag string from the supplied parameters. The lang, script and region
2313 * parameters may be null references.
2315 * If any of the language, script or region parameters are empty, and the alternateTags
2316 * parameter is not null, it will be parsed for potential language, script and region tags
2317 * to be used when constructing the new tag. If the alternateTags parameter is null, or
2318 * it contains no language tag, the default tag for the unknown language is used.
2320 * @param lang The language tag to use.
2321 * @param script The script tag to use.
2322 * @param region The region tag to use.
2323 * @param trailing Any trailing data to append to the new tag.
2324 * @param alternateTags A string containing any alternate tags.
2325 * @return The new tag string.
2327 private static String createTagString(String lang, String script, String region,
2328 String trailing, String alternateTags) {
2330 LocaleIDParser parser = null;
2331 boolean regionAppended = false;
2333 StringBuilder tag = new StringBuilder();
2335 if (!isEmptyString(lang)) {
2340 else if (isEmptyString(alternateTags)) {
2342 * Append the value for an unknown language, if
2343 * we found no language.
2350 parser = new LocaleIDParser(alternateTags);
2352 String alternateLang = parser.getLanguage();
2355 * Append the value for an unknown language, if
2356 * we found no language.
2359 !isEmptyString(alternateLang) ? alternateLang : UNDEFINED_LANGUAGE,
2363 if (!isEmptyString(script)) {
2368 else if (!isEmptyString(alternateTags)) {
2370 * Parse the alternateTags string for the script.
2372 if (parser == null) {
2373 parser = new LocaleIDParser(alternateTags);
2376 String alternateScript = parser.getScript();
2378 if (!isEmptyString(alternateScript)) {
2385 if (!isEmptyString(region)) {
2390 regionAppended = true;
2392 else if (!isEmptyString(alternateTags)) {
2394 * Parse the alternateTags string for the region.
2396 if (parser == null) {
2397 parser = new LocaleIDParser(alternateTags);
2400 String alternateRegion = parser.getCountry();
2402 if (!isEmptyString(alternateRegion)) {
2407 regionAppended = true;
2411 if (trailing != null && trailing.length() > 1) {
2413 * The current ICU format expects two underscores
2414 * will separate the variant from the preceeding
2415 * parts of the tag, if there is no region.
2419 if (trailing.charAt(0) == UNDERSCORE) {
2420 if (trailing.charAt(1) == UNDERSCORE) {
2428 if (regionAppended) {
2430 * If we appended a region, we may need to strip
2431 * the extra separator from the variant portion.
2433 if (separators == 2) {
2434 tag.append(trailing.substring(1));
2437 tag.append(trailing);
2442 * If we did not append a region, we may need to add
2443 * an extra separator to the variant portion.
2445 if (separators == 1) {
2446 tag.append(UNDERSCORE);
2448 tag.append(trailing);
2452 return tag.toString();
2456 * Create a tag string from the supplied parameters. The lang, script and region
2457 * parameters may be null references.If the lang parameter is an empty string, the
2458 * default value for an unknown language is written to the output buffer.
2460 * @param lang The language tag to use.
2461 * @param script The script tag to use.
2462 * @param region The region tag to use.
2463 * @param trailing Any trailing data to append to the new tag.
2464 * @return The new String.
2466 static String createTagString(String lang, String script, String region, String trailing) {
2467 return createTagString(lang, script, region, trailing, null);
2471 * Parse the language, script, and region subtags from a tag string, and return the results.
2473 * This function does not return the canonical strings for the unknown script and region.
2475 * @param localeID The locale ID to parse.
2476 * @param tags An array of three String references to return the subtag strings.
2477 * @return The number of chars of the localeID parameter consumed.
2479 private static int parseTagString(String localeID, String tags[]) {
2480 LocaleIDParser parser = new LocaleIDParser(localeID);
2482 String lang = parser.getLanguage();
2483 String script = parser.getScript();
2484 String region = parser.getCountry();
2486 if (isEmptyString(lang)) {
2487 tags[0] = UNDEFINED_LANGUAGE;
2493 if (script.equals(UNDEFINED_SCRIPT)) {
2500 if (region.equals(UNDEFINED_REGION)) {
2508 * Search for the variant. If there is one, then return the index of
2509 * the preceeding separator.
2510 * If there's no variant, search for the keyword delimiter,
2511 * and return its index. Otherwise, return the length of the
2514 * $TOTO(dbertoni) we need to take into account that we might
2515 * find a part of the language as the variant, since it can
2516 * can have a variant portion that is long enough to contain
2517 * the same characters as the variant.
2519 String variant = parser.getVariant();
2521 if (!isEmptyString(variant)){
2522 int index = localeID.indexOf(variant);
2525 return index > 0 ? index - 1 : index;
2529 int index = localeID.indexOf('@');
2531 return index == -1 ? localeID.length() : index;
2535 private static String lookupLikelySubtags(String localeId) {
2536 UResourceBundle bundle =
2537 UResourceBundle.getBundleInstance(
2538 ICUResourceBundle.ICU_BASE_NAME, "likelySubtags");
2540 return bundle.getString(localeId);
2542 catch(MissingResourceException e) {
2547 private static String createLikelySubtagsString(String lang, String script, String region,
2551 * Try the language with the script and region first.
2553 if (!isEmptyString(script) && !isEmptyString(region)) {
2562 String likelySubtags = lookupLikelySubtags(searchTag);
2565 if (likelySubtags == null) {
2566 if (likelySubtags2 != null) {
2567 System.err.println("Tag mismatch: \"(null)\" \"" + likelySubtags2 + "\"");
2570 else if (likelySubtags2 == null) {
2571 System.err.println("Tag mismatch: \"" + likelySubtags + "\" \"(null)\"");
2573 else if (!likelySubtags.equals(likelySubtags2)) {
2574 System.err.println("Tag mismatch: \"" + likelySubtags + "\" \"" + likelySubtags2
2578 if (likelySubtags != null) {
2579 // Always use the language tag from the
2580 // maximal string, since it may be more
2581 // specific than the one provided.
2582 return createTagString(
2592 * Try the language with just the script.
2594 if (!isEmptyString(script)) {
2603 String likelySubtags = lookupLikelySubtags(searchTag);
2604 if (likelySubtags != null) {
2605 // Always use the language tag from the
2606 // maximal string, since it may be more
2607 // specific than the one provided.
2608 return createTagString(
2618 * Try the language with just the region.
2620 if (!isEmptyString(region)) {
2629 String likelySubtags = lookupLikelySubtags(searchTag);
2631 if (likelySubtags != null) {
2632 // Always use the language tag from the
2633 // maximal string, since it may be more
2634 // specific than the one provided.
2635 return createTagString(
2645 * Finally, try just the language.
2655 String likelySubtags = lookupLikelySubtags(searchTag);
2657 if (likelySubtags != null) {
2658 // Always use the language tag from the
2659 // maximal string, since it may be more
2660 // specific than the one provided.
2661 return createTagString(
2673 // --------------------------------
2674 // BCP47/OpenJDK APIs
2675 // --------------------------------
2678 * {@icu} The key for the private use locale extension ('x').
2680 * @see #getExtension(char)
2681 * @see Builder#setExtension(char, String)
2684 * @provisional This API might change or be removed in a future release.
2686 public static final char PRIVATE_USE_EXTENSION = 'x';
2689 * {@icu} The key for Unicode locale extension ('u').
2691 * @see #getExtension(char)
2692 * @see Builder#setExtension(char, String)
2695 * @provisional This API might change or be removed in a future release.
2697 public static final char UNICODE_LOCALE_EXTENSION = 'u';
2700 * {@icu} Returns the extension (or private use) value associated with
2701 * the specified key, or null if there is no extension
2702 * associated with the key. To be well-formed, the key must be one
2703 * of <code>[0-9A-Za-z]</code>. Keys are case-insensitive, so
2704 * for example 'z' and 'Z' represent the same extension.
2706 * @param key the extension key
2707 * @return The extension, or null if this locale defines no
2708 * extension for the specified key.
2709 * @throws IllegalArgumentException if key is not well-formed
2710 * @see #PRIVATE_USE_EXTENSION
2711 * @see #UNICODE_LOCALE_EXTENSION
2714 * @provisional This API might change or be removed in a future release.
2716 public String getExtension(char key) {
2717 if (!LocaleExtensions.isValidKey(key)) {
2718 throw new IllegalArgumentException("Invalid extension key: " + key);
2720 return extensions().getExtensionValue(key);
2724 * {@icu} Returns the set of extension keys associated with this locale, or the
2725 * empty set if it has no extensions. The returned set is unmodifiable.
2726 * The keys will all be lower-case.
2728 * @return the set of extension keys, or the empty set if this locale has
2731 * @provisional This API might change or be removed in a future release.
2733 public Set<Character> getExtensionKeys() {
2734 return extensions().getKeys();
2738 * {@icu} Returns the set of unicode locale attributes associated with
2739 * this locale, or the empty set if it has no attributes. The
2740 * returned set is unmodifiable.
2742 * @return The set of attributes.
2744 * @provisional This API might change or be removed in a future release.
2746 public Set<String> getUnicodeLocaleAttributes() {
2747 return extensions().getUnicodeLocaleAttributes();
2751 * {@icu} Returns the Unicode locale type associated with the specified Unicode locale key
2752 * for this locale. Returns the empty string for keys that are defined with no type.
2753 * Returns null if the key is not defined. Keys are case-insensitive. The key must
2754 * be two alphanumeric characters ([0-9a-zA-Z]), or an IllegalArgumentException is
2757 * @param key the Unicode locale key
2758 * @return The Unicode locale type associated with the key, or null if the
2759 * locale does not define the key.
2760 * @throws IllegalArgumentException if the key is not well-formed
2761 * @throws NullPointerException if <code>key</code> is null
2764 * @provisional This API might change or be removed in a future release.
2766 public String getUnicodeLocaleType(String key) {
2767 if (!LocaleExtensions.isValidUnicodeLocaleKey(key)) {
2768 throw new IllegalArgumentException("Invalid Unicode locale key: " + key);
2770 return extensions().getUnicodeLocaleType(key);
2774 * {@icu} Returns the set of Unicode locale keys defined by this locale, or the empty set if
2775 * this locale has none. The returned set is immutable. Keys are all lower case.
2777 * @return The set of Unicode locale keys, or the empty set if this locale has
2778 * no Unicode locale keywords.
2781 * @provisional This API might change or be removed in a future release.
2783 public Set<String> getUnicodeLocaleKeys() {
2784 return extensions().getUnicodeLocaleKeys();
2788 * {@icu} Returns a well-formed IETF BCP 47 language tag representing
2791 * <p>If this <code>ULocale</code> has a language, script, country, or
2792 * variant that does not satisfy the IETF BCP 47 language tag
2793 * syntax requirements, this method handles these fields as
2796 * <p><b>Language:</b> If language is empty, or not well-formed
2797 * (for example "a" or "e2"), it will be emitted as "und" (Undetermined).
2799 * <p><b>Script:</b> If script is not well-formed (for example "12"
2800 * or "Latin"), it will be omitted.
2802 * <p><b>Country:</b> If country is not well-formed (for example "12"
2803 * or "USA"), it will be omitted.
2805 * <p><b>Variant:</b> If variant <b>is</b> well-formed, each sub-segment
2806 * (delimited by '-' or '_') is emitted as a subtag. Otherwise:
2809 * <li>if all sub-segments match <code>[0-9a-zA-Z]{1,8}</code>
2810 * (for example "WIN" or "Oracle_JDK_Standard_Edition"), the first
2811 * ill-formed sub-segment and all following will be appended to
2812 * the private use subtag. The first appended subtag will be
2813 * "lvariant", followed by the sub-segments in order, separated by
2814 * hyphen. For example, "x-lvariant-WIN",
2815 * "Oracle-x-lvariant-JDK-Standard-Edition".
2817 * <li>if any sub-segment does not match
2818 * <code>[0-9a-zA-Z]{1,8}</code>, the variant will be truncated
2819 * and the problematic sub-segment and all following sub-segments
2820 * will be omitted. If the remainder is non-empty, it will be
2821 * emitted as a private use subtag as above (even if the remainder
2822 * turns out to be well-formed). For example,
2823 * "Solaris_isjustthecoolestthing" is emitted as
2824 * "x-lvariant-Solaris", not as "solaris".</li></ul>
2826 * <p><b>Note:</b> Although the language tag created by this
2827 * method is well-formed (satisfies the syntax requirements
2828 * defined by the IETF BCP 47 specification), it is not
2829 * necessarily a valid BCP 47 language tag. For example,
2831 * new Locale("xx", "YY").toLanguageTag();</pre>
2833 * will return "xx-YY", but the language subtag "xx" and the
2834 * region subtag "YY" are invalid because they are not registered
2835 * in the IANA Language Subtag Registry.
2837 * @return a BCP47 language tag representing the locale
2838 * @see #forLanguageTag(String)
2841 * @provisional This API might change or be removed in a future release.
2843 public String toLanguageTag() {
2844 BaseLocale base = base();
2845 LocaleExtensions exts = extensions();
2847 if (base.getVariant().equalsIgnoreCase("POSIX")) {
2848 // special handling for variant POSIX
2849 base = BaseLocale.getInstance(base.getLanguage(), base.getScript(), base.getRegion(), "");
2850 if (exts.getUnicodeLocaleType("va") == null) {
2852 InternalLocaleBuilder ilocbld = new InternalLocaleBuilder();
2854 ilocbld.setLocale(BaseLocale.ROOT, exts);
2855 ilocbld.setUnicodeLocaleKeyword("va", "posix");
2856 exts = ilocbld.getLocaleExtensions();
2857 } catch (LocaleSyntaxException e) {
2858 // this should not happen
2859 throw new RuntimeException(e);
2864 LanguageTag tag = LanguageTag.parseLocale(base, exts);
2866 StringBuilder buf = new StringBuilder();
2867 String subtag = tag.getLanguage();
2868 if (subtag.length() > 0) {
2869 buf.append(LanguageTag.canonicalizeLanguage(subtag));
2872 subtag = tag.getScript();
2873 if (subtag.length() > 0) {
2874 buf.append(LanguageTag.SEP);
2875 buf.append(LanguageTag.canonicalizeScript(subtag));
2878 subtag = tag.getRegion();
2879 if (subtag.length() > 0) {
2880 buf.append(LanguageTag.SEP);
2881 buf.append(LanguageTag.canonicalizeRegion(subtag));
2884 List<String>subtags = tag.getVariants();
2885 for (String s : subtags) {
2886 buf.append(LanguageTag.SEP);
2887 buf.append(LanguageTag.canonicalizeVariant(s));
2890 subtags = tag.getExtensions();
2891 for (String s : subtags) {
2892 buf.append(LanguageTag.SEP);
2893 buf.append(LanguageTag.canonicalizeExtension(s));
2896 subtag = tag.getPrivateuse();
2897 if (subtag.length() > 0) {
2898 if (buf.length() > 0) {
2899 buf.append(LanguageTag.SEP);
2901 buf.append(LanguageTag.PRIVATEUSE).append(LanguageTag.SEP);
2902 buf.append(LanguageTag.canonicalizePrivateuse(subtag));
2905 return buf.toString();
2909 * {@icu} Returns a locale for the specified IETF BCP 47 language tag string.
2911 * <p>If the specified language tag contains any ill-formed subtags,
2912 * the first such subtag and all following subtags are ignored. Compare
2913 * to {@link ULocale.Builder#setLanguageTag} which throws an exception
2916 * <p>The following <b>conversions</b> are performed:<ul>
2918 * <li>The language code "und" is mapped to language "".
2920 * <li>The portion of a private use subtag prefixed by "lvariant",
2921 * if any, is removed and appended to the variant field in the
2922 * result locale (without case normalization). If it is then
2923 * empty, the private use subtag is discarded:
2927 * loc = ULocale.forLanguageTag("en-US-x-lvariant-icu4j);
2928 * loc.getVariant(); // returns "ICU4J"
2929 * loc.getExtension('x'); // returns null
2931 * loc = Locale.forLanguageTag("de-icu4j-x-URP-lvariant-Abc-Def");
2932 * loc.getVariant(); // returns "ICU4J_ABC_DEF"
2933 * loc.getExtension('x'); // returns "urp"
2936 * <li>When the languageTag argument contains an extlang subtag,
2937 * the first such subtag is used as the language, and the primary
2938 * language subtag and other extlang subtags are ignored:
2941 * ULocale.forLanguageTag("ar-aao").getLanguage(); // returns "aao"
2942 * ULocale.forLanguageTag("en-abc-def-us").toString(); // returns "abc_US"
2945 * <li>Case is normalized. Language is normalized to lower case,
2946 * script to title case, country to upper case, variant to upper case,
2947 * and extensions to lower case.
2949 * <p>This implements the 'Language-Tag' production of BCP47, and
2950 * so supports grandfathered (regular and irregular) as well as
2951 * private use language tags. Stand alone private use tags are
2952 * represented as empty language and extension 'x-whatever',
2953 * and grandfathered tags are converted to their canonical replacements
2956 * <p>Grandfathered tags with canonical replacements are as follows:
2959 * <tbody align="center">
2960 * <tr><th>grandfathered tag</th><th> </th><th>modern replacement</th></tr>
2961 * <tr><td>art-lojban</td><td> </td><td>jbo</td></tr>
2962 * <tr><td>i-ami</td><td> </td><td>ami</td></tr>
2963 * <tr><td>i-bnn</td><td> </td><td>bnn</td></tr>
2964 * <tr><td>i-hak</td><td> </td><td>hak</td></tr>
2965 * <tr><td>i-klingon</td><td> </td><td>tlh</td></tr>
2966 * <tr><td>i-lux</td><td> </td><td>lb</td></tr>
2967 * <tr><td>i-navajo</td><td> </td><td>nv</td></tr>
2968 * <tr><td>i-pwn</td><td> </td><td>pwn</td></tr>
2969 * <tr><td>i-tao</td><td> </td><td>tao</td></tr>
2970 * <tr><td>i-tay</td><td> </td><td>tay</td></tr>
2971 * <tr><td>i-tsu</td><td> </td><td>tsu</td></tr>
2972 * <tr><td>no-bok</td><td> </td><td>nb</td></tr>
2973 * <tr><td>no-nyn</td><td> </td><td>nn</td></tr>
2974 * <tr><td>sgn-BE-FR</td><td> </td><td>sfb</td></tr>
2975 * <tr><td>sgn-BE-NL</td><td> </td><td>vgt</td></tr>
2976 * <tr><td>sgn-CH-DE</td><td> </td><td>sgg</td></tr>
2977 * <tr><td>zh-guoyu</td><td> </td><td>cmn</td></tr>
2978 * <tr><td>zh-hakka</td><td> </td><td>hak</td></tr>
2979 * <tr><td>zh-min-nan</td><td> </td><td>nan</td></tr>
2980 * <tr><td>zh-xiang</td><td> </td><td>hsn</td></tr>
2984 * <p>Grandfathered tags with no modern replacement will be
2985 * converted as follows:
2988 * <tbody align="center">
2989 * <tr><th>grandfathered tag</th><th> </th><th>converts to</th></tr>
2990 * <tr><td>cel-gaulish</td><td> </td><td>xtg-x-cel-gaulish</td></tr>
2991 * <tr><td>en-GB-oed</td><td> </td><td>en-GB-x-oed</td></tr>
2992 * <tr><td>i-default</td><td> </td><td>en-x-i-default</td></tr>
2993 * <tr><td>i-enochian</td><td> </td><td>und-x-i-enochian</td></tr>
2994 * <tr><td>i-mingo</td><td> </td><td>see-x-i-mingo</td></tr>
2995 * <tr><td>zh-min</td><td> </td><td>nan-x-zh-min</td></tr>
2999 * <p>For a list of all grandfathered tags, see the
3000 * IANA Language Subtag Registry (search for "Type: grandfathered").
3002 * <p><b>Note</b>: there is no guarantee that <code>toLanguageTag</code>
3003 * and <code>forLanguageTag</code> will round-trip.
3005 * @param languageTag the language tag
3006 * @return The locale that best represents the language tag.
3007 * @throws NullPointerException if <code>languageTag</code> is <code>null</code>
3008 * @see #toLanguageTag()
3009 * @see ULocale.Builder#setLanguageTag(String)
3012 * @provisional This API might change or be removed in a future release.
3014 public static ULocale forLanguageTag(String languageTag) {
3015 LanguageTag tag = LanguageTag.parse(languageTag, null);
3016 InternalLocaleBuilder bldr = new InternalLocaleBuilder();
3017 bldr.setLanguageTag(tag);
3018 return getInstance(bldr.getBaseLocale(), bldr.getLocaleExtensions());
3023 * <code>Builder</code> is used to build instances of <code>ULocale</code>
3024 * from values configured by the setters. Unlike the <code>ULocale</code>
3025 * constructors, the <code>Builder</code> checks if a value configured by a
3026 * setter satisfies the syntax requirements defined by the <code>ULocale</code>
3027 * class. A <code>ULocale</code> object created by a <code>Builder</code> is
3028 * well-formed and can be transformed to a well-formed IETF BCP 47 language tag
3029 * without losing information.
3031 * <p><b>Note:</b> The <code>ULocale</code> class does not provide any
3032 * syntactic restrictions on variant, while BCP 47 requires each variant
3033 * subtag to be 5 to 8 alphanumerics or a single numeric followed by 3
3034 * alphanumerics. The method <code>setVariant</code> throws
3035 * <code>IllformedLocaleException</code> for a variant that does not satisfy
3036 * this restriction. If it is necessary to support such a variant, use a
3037 * ULocale constructor. However, keep in mind that a <code>ULocale</code>
3038 * object created this way might lose the variant information when
3039 * transformed to a BCP 47 language tag.
3041 * <p>The following example shows how to create a <code>Locale</code> object
3042 * with the <code>Builder</code>.
3045 * ULocale aLocale = new Builder().setLanguage("sr").setScript("Latn").setRegion("RS").build();
3049 * <p>Builders can be reused; <code>clear()</code> resets all
3050 * fields to their default values.
3052 * @see ULocale#toLanguageTag()
3055 * @provisional This API might change or be removed in a future release.
3057 public static final class Builder {
3059 private final InternalLocaleBuilder _locbld;
3062 * Constructs an empty Builder. The default value of all
3063 * fields, extensions, and private use information is the
3067 * @provisional This API might change or be removed in a future release.
3070 _locbld = new InternalLocaleBuilder();
3074 * Resets the <code>Builder</code> to match the provided
3075 * <code>locale</code>. Existing state is discarded.
3077 * <p>All fields of the locale must be well-formed, see {@link Locale}.
3079 * <p>Locales with any ill-formed fields cause
3080 * <code>IllformedLocaleException</code> to be thrown.
3082 * @param locale the locale
3083 * @return This builder.
3084 * @throws IllformedLocaleException if <code>locale</code> has
3085 * any ill-formed fields.
3086 * @throws NullPointerException if <code>locale</code> is null.
3089 * @provisional This API might change or be removed in a future release.
3091 public Builder setLocale(ULocale locale) {
3093 _locbld.setLocale(locale.base(), locale.extensions());
3094 } catch (LocaleSyntaxException e) {
3095 throw new IllformedLocaleException(e.getMessage(), e.getErrorIndex());
3101 * Resets the Builder to match the provided IETF BCP 47
3102 * language tag. Discards the existing state. Null and the
3103 * empty string cause the builder to be reset, like {@link
3104 * #clear}. Grandfathered tags (see {@link
3105 * ULocale#forLanguageTag}) are converted to their canonical
3106 * form before being processed. Otherwise, the language tag
3107 * must be well-formed (see {@link ULocale}) or an exception is
3108 * thrown (unlike <code>ULocale.forLanguageTag</code>, which
3109 * just discards ill-formed and following portions of the
3112 * @param languageTag the language tag
3113 * @return This builder.
3114 * @throws IllformedLocaleException if <code>languageTag</code> is ill-formed
3115 * @see ULocale#forLanguageTag(String)
3118 * @provisional This API might change or be removed in a future release.
3120 public Builder setLanguageTag(String languageTag) {
3121 ParseStatus sts = new ParseStatus();
3122 LanguageTag tag = LanguageTag.parse(languageTag, sts);
3123 if (sts.isError()) {
3124 throw new IllformedLocaleException(sts.getErrorMessage(), sts.getErrorIndex());
3126 _locbld.setLanguageTag(tag);
3132 * Sets the language. If <code>language</code> is the empty string or
3133 * null, the language in this <code>Builder</code> is removed. Otherwise,
3134 * the language must be <a href="./Locale.html#def_language">well-formed</a>
3135 * or an exception is thrown.
3137 * <p>The typical language value is a two or three-letter language
3138 * code as defined in ISO639.
3140 * @param language the language
3141 * @return This builder.
3142 * @throws IllformedLocaleException if <code>language</code> is ill-formed
3145 * @provisional This API might change or be removed in a future release.
3147 public Builder setLanguage(String language) {
3149 _locbld.setLanguage(language);
3150 } catch (LocaleSyntaxException e) {
3151 throw new IllformedLocaleException(e.getMessage(), e.getErrorIndex());
3157 * Sets the script. If <code>script</code> is null or the empty string,
3158 * the script in this <code>Builder</code> is removed.
3159 * Otherwise, the script must be well-formed or an exception is thrown.
3161 * <p>The typical script value is a four-letter script code as defined by ISO 15924.
3163 * @param script the script
3164 * @return This builder.
3165 * @throws IllformedLocaleException if <code>script</code> is ill-formed
3168 * @provisional This API might change or be removed in a future release.
3170 public Builder setScript(String script) {
3172 _locbld.setScript(script);
3173 } catch (LocaleSyntaxException e) {
3174 throw new IllformedLocaleException(e.getMessage(), e.getErrorIndex());
3180 * Sets the region. If region is null or the empty string, the region
3181 * in this <code>Builder</code> is removed. Otherwise,
3182 * the region must be well-formed or an exception is thrown.
3184 * <p>The typical region value is a two-letter ISO 3166 code or a
3185 * three-digit UN M.49 area code.
3187 * <p>The country value in the <code>Locale</code> created by the
3188 * <code>Builder</code> is always normalized to upper case.
3190 * @param region the region
3191 * @return This builder.
3192 * @throws IllformedLocaleException if <code>region</code> is ill-formed
3195 * @provisional This API might change or be removed in a future release.
3197 public Builder setRegion(String region) {
3199 _locbld.setRegion(region);
3200 } catch (LocaleSyntaxException e) {
3201 throw new IllformedLocaleException(e.getMessage(), e.getErrorIndex());
3207 * Sets the variant. If variant is null or the empty string, the
3208 * variant in this <code>Builder</code> is removed. Otherwise, it
3209 * must consist of one or more well-formed subtags, or an exception is thrown.
3211 * <p><b>Note:</b> This method checks if <code>variant</code>
3212 * satisfies the IETF BCP 47 variant subtag's syntax requirements,
3213 * and normalizes the value to lowercase letters. However,
3214 * the <code>ULocale</code> class does not impose any syntactic
3215 * restriction on variant. To set such a variant,
3216 * use a ULocale constructor.
3218 * @param variant the variant
3219 * @return This builder.
3220 * @throws IllformedLocaleException if <code>variant</code> is ill-formed
3223 * @provisional This API might change or be removed in a future release.
3225 public Builder setVariant(String variant) {
3227 _locbld.setVariant(variant);
3228 } catch (LocaleSyntaxException e) {
3229 throw new IllformedLocaleException(e.getMessage(), e.getErrorIndex());
3235 * Sets the extension for the given key. If the value is null or the
3236 * empty string, the extension is removed. Otherwise, the extension
3237 * must be well-formed or an exception is thrown.
3239 * <p><b>Note:</b> The key {@link ULocale#UNICODE_LOCALE_EXTENSION
3240 * UNICODE_LOCALE_EXTENSION} ('u') is used for the Unicode locale extension.
3241 * Setting a value for this key replaces any existing Unicode locale key/type
3242 * pairs with those defined in the extension.
3244 * <p><b>Note:</b> The key {@link ULocale#PRIVATE_USE_EXTENSION
3245 * PRIVATE_USE_EXTENSION} ('x') is used for the private use code. To be
3246 * well-formed, the value for this key needs only to have subtags of one to
3247 * eight alphanumeric characters, not two to eight as in the general case.
3249 * @param key the extension key
3250 * @param value the extension value
3251 * @return This builder.
3252 * @throws IllformedLocaleException if <code>key</code> is illegal
3253 * or <code>value</code> is ill-formed
3254 * @see #setUnicodeLocaleKeyword(String, String)
3257 * @provisional This API might change or be removed in a future release.
3259 public Builder setExtension(char key, String value) {
3261 _locbld.setExtension(key, value);
3262 } catch (LocaleSyntaxException e) {
3263 throw new IllformedLocaleException(e.getMessage(), e.getErrorIndex());
3269 * Sets the Unicode locale keyword type for the given key. If the type
3270 * is null, the Unicode keyword is removed. Otherwise, the key must be
3271 * non-null and both key and type must be well-formed or an exception
3274 * <p>Keys and types are converted to lower case.
3276 * <p><b>Note</b>:Setting the 'u' extension via {@link #setExtension}
3277 * replaces all Unicode locale keywords with those defined in the
3280 * @param key the Unicode locale key
3281 * @param type the Unicode locale type
3282 * @return This builder.
3283 * @throws IllformedLocaleException if <code>key</code> or <code>type</code>
3285 * @throws NullPointerException if <code>key</code> is null
3286 * @see #setExtension(char, String)
3289 * @provisional This API might change or be removed in a future release.
3291 public Builder setUnicodeLocaleKeyword(String key, String type) {
3293 _locbld.setUnicodeLocaleKeyword(key, type);
3294 } catch (LocaleSyntaxException e) {
3295 throw new IllformedLocaleException(e.getMessage(), e.getErrorIndex());
3301 * Adds a unicode locale attribute, if not already present, otherwise
3302 * has no effect. The attribute must not be null and must be well-formed
3303 * or an exception is thrown.
3305 * @param attribute the attribute
3306 * @return This builder.
3307 * @throws NullPointerException if <code>attribute</code> is null
3308 * @throws IllformedLocaleException if <code>attribute</code> is ill-formed
3309 * @see #setExtension(char, String)
3312 * @provisional This API might change or be removed in a future release.
3314 public Builder addUnicodeLocaleAttribute(String attribute) {
3316 _locbld.addUnicodeLocaleAttribute(attribute);
3317 } catch (LocaleSyntaxException e) {
3318 throw new IllformedLocaleException(e.getMessage(), e.getErrorIndex());
3324 * Removes a unicode locale attribute, if present, otherwise has no
3325 * effect. The attribute must not be null and must be well-formed
3326 * or an exception is thrown.
3328 * <p>Attribute comparision for removal is case-insensitive.
3330 * @param attribute the attribute
3331 * @return This builder.
3332 * @throws NullPointerException if <code>attribute</code> is null
3333 * @throws IllformedLocaleException if <code>attribute</code> is ill-formed
3334 * @see #setExtension(char, String)
3337 * @provisional This API might change or be removed in a future release.
3339 public Builder removeUnicodeLocaleAttribute(String attribute) {
3341 _locbld.removeUnicodeLocaleAttribute(attribute);
3342 } catch (LocaleSyntaxException e) {
3343 throw new IllformedLocaleException(e.getMessage(), e.getErrorIndex());
3349 * Resets the builder to its initial, empty state.
3351 * @return this builder
3354 * @provisional This API might change or be removed in a future release.
3356 public Builder clear() {
3362 * Resets the extensions to their initial, empty state.
3363 * Language, script, region and variant are unchanged.
3365 * @return this builder
3366 * @see #setExtension(char, String)
3369 * @provisional This API might change or be removed in a future release.
3371 public Builder clearExtensions() {
3372 _locbld.clearExtensions();
3377 * Returns an instance of <code>ULocale</code> created from the fields set
3380 * @return a new Locale
3383 * @provisional This API might change or be removed in a future release.
3385 public ULocale build() {
3386 return getInstance(_locbld.getBaseLocale(), _locbld.getLocaleExtensions());
3390 private static ULocale getInstance(BaseLocale base, LocaleExtensions exts) {
3391 String id = lscvToID(base.getLanguage(), base.getScript(), base.getRegion(),
3394 Set<Character> extKeys = exts.getKeys();
3395 if (!extKeys.isEmpty()) {
3396 // legacy locale ID assume Unicode locale keywords and
3397 // other extensions are at the same level.
3398 // e.g. @a=ext-for-aa;calendar=japanese;m=ext-for-mm;x=priv-use
3400 TreeMap<String, String> kwds = new TreeMap<String, String>();
3401 for (Character key : extKeys) {
3402 Extension ext = exts.getExtension(key);
3403 if (ext instanceof UnicodeLocaleExtension) {
3404 UnicodeLocaleExtension uext = (UnicodeLocaleExtension)ext;
3405 Set<String> ukeys = uext.getUnicodeLocaleKeys();
3406 for (String bcpKey : ukeys) {
3407 String bcpType = uext.getUnicodeLocaleType(bcpKey);
3408 // convert to legacy key/type
3409 String lkey = bcp47ToLDMLKey(bcpKey);
3410 String ltype = bcp47ToLDMLType(lkey, ((bcpType.length() == 0) ? "true" : bcpType)); // use "true" as the value of typeless keywords
3411 // special handling for u-va-posix, since this is a variant, not a keyword
3412 if (lkey.equals("va") && ltype.equals("posix") && base.getVariant().length() == 0) {
3415 kwds.put(lkey, ltype);
3418 // Mapping Unicode locale attribute to the special keyword, attribute=xxx-yyy
3419 Set<String> uattributes = uext.getUnicodeLocaleAttributes();
3420 if (uattributes.size() > 0) {
3421 StringBuilder attrbuf = new StringBuilder();
3422 for (String attr : uattributes) {
3423 if (attrbuf.length() > 0) {
3424 attrbuf.append('-');
3426 attrbuf.append(attr);
3428 kwds.put(LOCALE_ATTRIBUTE_KEY, attrbuf.toString());
3431 kwds.put(String.valueOf(key), ext.getValue());
3435 if (!kwds.isEmpty()) {
3436 StringBuilder buf = new StringBuilder(id);
3438 Set<Map.Entry<String, String>> kset = kwds.entrySet();
3439 boolean insertSep = false;
3440 for (Map.Entry<String, String> kwd : kset) {
3446 buf.append(kwd.getKey());
3448 buf.append(kwd.getValue());
3451 id = buf.toString();
3454 return new ULocale(id);
3457 private BaseLocale base() {
3458 if (baseLocale == null) {
3459 String language = getLanguage();
3460 if (equals(ULocale.ROOT)) {
3463 baseLocale = BaseLocale.getInstance(language, getScript(), getCountry(), getVariant());
3468 private LocaleExtensions extensions() {
3469 if (extensions == null) {
3470 Iterator<String> kwitr = getKeywords();
3471 if (kwitr == null) {
3472 extensions = LocaleExtensions.EMPTY_EXTENSIONS;
3474 InternalLocaleBuilder intbld = new InternalLocaleBuilder();
3475 while (kwitr.hasNext()) {
3476 String key = kwitr.next();
3477 if (key.equals(LOCALE_ATTRIBUTE_KEY)) {
3478 // special keyword used for representing Unicode locale attributes
3479 String[] uattributes = getKeywordValue(key).split("[-_]");
3480 for (String uattr : uattributes) {
3482 intbld.addUnicodeLocaleAttribute(uattr);
3483 } catch (LocaleSyntaxException e) {
3484 // ignore and fall through
3487 } else if (key.length() >= 2) {
3488 String bcpKey = ldmlKeyToBCP47(key);
3489 String bcpType = ldmlTypeToBCP47(key, getKeywordValue(key));
3490 if (bcpKey != null && bcpType != null) {
3492 intbld.setUnicodeLocaleKeyword(bcpKey, bcpType);
3493 } catch (LocaleSyntaxException e) {
3494 // ignore and fall through
3497 } else if (key.length() == 1 && (key.charAt(0) != UNICODE_LOCALE_EXTENSION)) {
3499 intbld.setExtension(key.charAt(0), getKeywordValue(key).replace("_",
3501 } catch (LocaleSyntaxException e) {
3502 // ignore and fall through
3506 extensions = intbld.getLocaleExtensions();
3513 // LDML legacy/BCP47 key and type mapping functions
3515 private static String ldmlKeyToBCP47(String key) {
3516 UResourceBundle keyTypeData = UResourceBundle.getBundleInstance(
3517 ICUResourceBundle.ICU_BASE_NAME,
3519 ICUResourceBundle.ICU_DATA_CLASS_LOADER);
3520 UResourceBundle keyMap = keyTypeData.get("keyMap");
3522 // normalize key to lowercase
3523 key = AsciiUtil.toLowerString(key);
3524 String bcpKey = null;
3526 bcpKey = keyMap.getString(key);
3527 } catch (MissingResourceException mre) {
3531 if (bcpKey == null) {
3532 if (key.length() == 2 && LanguageTag.isExtensionSubtag(key)) {
3540 private static String bcp47ToLDMLKey(String bcpKey) {
3541 UResourceBundle keyTypeData = UResourceBundle.getBundleInstance(
3542 ICUResourceBundle.ICU_BASE_NAME,
3544 ICUResourceBundle.ICU_DATA_CLASS_LOADER);
3545 UResourceBundle keyMap = keyTypeData.get("keyMap");
3547 // normalize bcp key to lowercase
3548 bcpKey = AsciiUtil.toLowerString(bcpKey);
3550 for (int i = 0; i < keyMap.getSize(); i++) {
3551 UResourceBundle mapData = keyMap.get(i);
3552 if (bcpKey.equals(mapData.getString())) {
3553 key = mapData.getKey();
3563 private static String ldmlTypeToBCP47(String key, String type) {
3564 UResourceBundle keyTypeData = UResourceBundle.getBundleInstance(
3565 ICUResourceBundle.ICU_BASE_NAME,
3567 ICUResourceBundle.ICU_DATA_CLASS_LOADER);
3568 UResourceBundle typeMap = keyTypeData.get("typeMap");
3570 // keys are case-insensitive, while types are case-sensitive
3571 key = AsciiUtil.toLowerString(key);
3572 UResourceBundle typeMapForKey = null;
3573 String bcpType = null;
3574 String typeResKey = key.equals("timezone") ? type.replace('/', ':') : type;
3576 typeMapForKey = typeMap.get(key);
3577 bcpType = typeMapForKey.getString(typeResKey);
3578 } catch (MissingResourceException mre) {
3582 if (bcpType == null && typeMapForKey != null) {
3583 // is this type alias?
3584 UResourceBundle typeAlias = keyTypeData.get("typeAlias");
3586 UResourceBundle typeAliasForKey = typeAlias.get(key);
3587 typeResKey = typeAliasForKey.getString(typeResKey);
3588 bcpType = typeMapForKey.getString(typeResKey.replace('/', ':'));
3589 } catch (MissingResourceException mre) {
3594 if (bcpType == null) {
3595 int typeLen = type.length();
3596 if (typeLen >= 3 && typeLen <= 8 && LanguageTag.isExtensionSubtag(type)) {
3604 private static String bcp47ToLDMLType(String key, String bcpType) {
3605 UResourceBundle keyTypeData = UResourceBundle.getBundleInstance(
3606 ICUResourceBundle.ICU_BASE_NAME,
3608 ICUResourceBundle.ICU_DATA_CLASS_LOADER);
3609 UResourceBundle typeMap = keyTypeData.get("typeMap");
3611 // normalize key/bcpType to lowercase
3612 key = AsciiUtil.toLowerString(key);
3613 bcpType = AsciiUtil.toLowerString(bcpType);
3617 UResourceBundle typeMapForKey = typeMap.get(key);
3619 // Note: Linear search for time zone ID might be too slow.
3620 // ICU services do not use timezone keywords for now.
3621 // In future, we may need to build the optimized inverse
3624 for (int i = 0; i < typeMapForKey.getSize(); i++) {
3625 UResourceBundle mapData = typeMapForKey.get(i);
3626 if (bcpType.equals(mapData.getString())) {
3627 type = mapData.getKey();
3628 if (key.equals("timezone")) {
3629 type = type.replace(':', '/');
3634 } catch (MissingResourceException mre) {
3647 private static final class JDKLocaleHelper {
3648 private static boolean isJava7orNewer = false;
3651 * New methods in Java 7 Locale class
3653 private static Method mGetScript;
3654 private static Method mGetExtensionKeys;
3655 private static Method mGetExtension;
3656 private static Method mGetUnicodeLocaleKeys;
3657 private static Method mGetUnicodeLocaleAttributes;
3658 private static Method mGetUnicodeLocaleType;
3659 private static Method mForLanguageTag;
3661 private static Method mGetDefault;
3662 private static Method mSetDefault;
3663 private static Object eDISPLAY;
3664 private static Object eFORMAT;
3667 * This table is used for mapping between ICU and special Java
3668 * 6 locales. When an ICU locale matches <minumum base> with
3669 * <keyword>/<value>, the ICU locale is mapped to <Java> locale.
3670 * For example, both ja_JP@calendar=japanese and ja@calendar=japanese
3671 * are mapped to Java locale "ja_JP_JP". ICU locale "nn" is mapped
3672 * to Java locale "no_NO_NY".
3674 private static final String[][] JAVA6_MAPDATA = {
3675 // { <Java>, <ICU base>, <keyword>, <value>, <minimum base>
3676 { "ja_JP_JP", "ja_JP", "calendar", "japanese", "ja"},
3677 { "no_NO_NY", "nn_NO", null, null, "nn"},
3678 { "th_TH_TH", "th_TH", "numbers", "thai", "th"},
3684 mGetScript = Locale.class.getMethod("getScript", (Class[]) null);
3685 mGetExtensionKeys = Locale.class.getMethod("getExtensionKeys", (Class[]) null);
3686 mGetExtension = Locale.class.getMethod("getExtension", char.class);
3687 mGetUnicodeLocaleKeys = Locale.class.getMethod("getUnicodeLocaleKeys", (Class[]) null);
3688 mGetUnicodeLocaleAttributes = Locale.class.getMethod("getUnicodeLocaleAttributes", (Class[]) null);
3689 mGetUnicodeLocaleType = Locale.class.getMethod("getUnicodeLocaleType", String.class);
3690 mForLanguageTag = Locale.class.getMethod("forLanguageTag", String.class);
3692 Class<?> cCategory = null;
3693 Class<?>[] classes = Locale.class.getDeclaredClasses();
3694 for (Class<?> c : classes) {
3695 if (c.getName().equals("java.util.Locale$Category")) {
3700 if (cCategory == null) {
3703 mGetDefault = Locale.class.getDeclaredMethod("getDefault", cCategory);
3704 mSetDefault = Locale.class.getDeclaredMethod("setDefault", cCategory, Locale.class);
3706 Method mName = cCategory.getMethod("name", (Class[]) null);
3707 Object[] enumConstants = cCategory.getEnumConstants();
3708 for (Object e : enumConstants) {
3709 String catVal = (String)mName.invoke(e, (Object[])null);
3710 if (catVal.equals("DISPLAY")) {
3712 } else if (catVal.equals("FORMAT")) {
3716 if (eDISPLAY == null || eFORMAT == null) {
3719 isJava7orNewer = true;
3720 } catch (NoSuchMethodException e) {
3721 } catch (IllegalArgumentException e) {
3722 } catch (IllegalAccessException e) {
3723 } catch (InvocationTargetException e) {
3724 } catch (SecurityException e) {
3730 private JDKLocaleHelper() {
3733 public static boolean isJava7orNewer() {
3734 return isJava7orNewer;
3737 public static ULocale toULocale(Locale loc) {
3738 return isJava7orNewer ? toULocale7(loc) : toULocale6(loc);
3741 public static Locale toLocale(ULocale uloc) {
3742 return isJava7orNewer ? toLocale7(uloc) : toLocale6(uloc);
3745 private static ULocale toULocale7(Locale loc) {
3746 String language = loc.getLanguage();
3748 String country = loc.getCountry();
3749 String variant = loc.getVariant();
3751 Set<String> attributes = null;
3752 Map<String, String> keywords = null;
3755 script = (String) mGetScript.invoke(loc, (Object[]) null);
3756 @SuppressWarnings("unchecked")
3757 Set<Character> extKeys = (Set<Character>) mGetExtensionKeys.invoke(loc, (Object[]) null);
3758 if (!extKeys.isEmpty()) {
3759 for (Character extKey : extKeys) {
3760 if (extKey.charValue() == 'u') {
3761 // Found Unicode locale extension
3764 @SuppressWarnings("unchecked")
3765 Set<String> uAttributes = (Set<String>) mGetUnicodeLocaleAttributes.invoke(loc, (Object[]) null);
3766 if (!uAttributes.isEmpty()) {
3767 attributes = new TreeSet<String>();
3768 for (String attr : uAttributes) {
3769 attributes.add(attr);
3774 @SuppressWarnings("unchecked")
3775 Set<String> uKeys = (Set<String>) mGetUnicodeLocaleKeys.invoke(loc, (Object[]) null);
3776 for (String kwKey : uKeys) {
3777 String kwVal = (String) mGetUnicodeLocaleType.invoke(loc, kwKey);
3778 if (kwVal != null) {
3779 if (kwKey.equals("va")) {
3780 // va-* is interpreted as a variant
3781 variant = (variant.length() == 0) ? kwVal : kwVal + "_" + variant;
3783 if (keywords == null) {
3784 keywords = new TreeMap<String, String>();
3786 keywords.put(kwKey, kwVal);
3791 String extVal = (String) mGetExtension.invoke(loc, extKey);
3792 if (extVal != null) {
3793 if (keywords == null) {
3794 keywords = new TreeMap<String, String>();
3796 keywords.put(String.valueOf(extKey), extVal);
3801 } catch (IllegalAccessException e) {
3802 throw new RuntimeException(e);
3803 } catch (InvocationTargetException e) {
3804 throw new RuntimeException(e);
3807 // JDK locale no_NO_NY is not interpreted as Nynorsk by ICU,
3808 // and it should be transformed to nn_NO.
3810 // Note: JDK7+ unerstand both no_NO_NY and nn_NO. When convert
3811 // ICU locale to JDK, we do not need to map nn_NO back to no_NO_NY.
3813 if (language.equals("no") && country.equals("NO") && variant.equals("NY")) {
3819 StringBuilder buf = new StringBuilder(language);
3821 if (script.length() > 0) {
3826 if (country.length() > 0) {
3828 buf.append(country);
3831 if (variant.length() > 0) {
3832 if (country.length() == 0) {
3836 buf.append(variant);
3839 if (attributes != null) {
3840 // transform Unicode attributes into a keyword
3841 StringBuilder attrBuf = new StringBuilder();
3842 for (String attr : attributes) {
3843 if (attrBuf.length() != 0) {
3844 attrBuf.append('-');
3846 attrBuf.append(attr);
3848 if (keywords == null) {
3849 keywords = new TreeMap<String, String>();
3851 keywords.put(LOCALE_ATTRIBUTE_KEY, attrBuf.toString());
3854 if (keywords != null) {
3856 boolean addSep = false;
3857 for (Entry<String, String> kwEntry : keywords.entrySet()) {
3858 String kwKey = kwEntry.getKey();
3859 String kwVal = kwEntry.getValue();
3861 if (kwKey.length() != 1) {
3862 // Unicode locale key
3863 kwKey = bcp47ToLDMLKey(kwKey);
3864 // use "true" as the value of typeless keywords
3865 kwVal = bcp47ToLDMLType(kwKey, ((kwVal.length() == 0) ? "true" : kwVal));
3879 return new ULocale(getName(buf.toString()), loc);
3882 private static ULocale toULocale6(Locale loc) {
3883 ULocale uloc = null;
3884 String locStr = loc.toString();
3885 if (locStr.length() == 0) {
3886 uloc = ULocale.ROOT;
3888 for (int i = 0; i < JAVA6_MAPDATA.length; i++) {
3889 if (JAVA6_MAPDATA[i][0].equals(locStr)) {
3890 LocaleIDParser p = new LocaleIDParser(JAVA6_MAPDATA[i][1]);
3891 p.setKeywordValue(JAVA6_MAPDATA[i][2], JAVA6_MAPDATA[i][3]);
3892 locStr = p.getName();
3896 uloc = new ULocale(getName(locStr), loc);
3901 private static Locale toLocale7(ULocale uloc) {
3903 String ulocStr = uloc.getName();
3904 if (uloc.getScript().length() > 0 || ulocStr.contains("@")) {
3905 // With script or keywords available, the best way
3906 // to get a mapped Locale is to go through a language tag.
3907 // A Locale with script or keywords can only have variants
3908 // that is 1 to 8 alphanum. If this ULocale has a variant
3909 // subtag not satisfying the criteria, the variant subtag
3911 String tag = uloc.toLanguageTag();
3913 // Workaround for variant casing problem:
3915 // The variant field in ICU is case insensitive and normalized
3916 // to upper case letters by getVariant(), while
3917 // the variant field in JDK Locale is case sensitive.
3918 // ULocale#toLanguageTag use lower case characters for
3919 // BCP 47 variant and private use x-lvariant.
3921 // Locale#forLanguageTag in JDK preserves character casing
3922 // for variant. Because ICU always normalizes variant to
3923 // upper case, we convert language tag to upper case here.
3924 tag = AsciiUtil.toUpperString(tag);
3927 loc = (Locale)mForLanguageTag.invoke(null, tag);
3928 } catch (IllegalAccessException e) {
3929 throw new RuntimeException(e);
3930 } catch (InvocationTargetException e) {
3931 throw new RuntimeException(e);
3935 // Without script or keywords, use a Locale constructor,
3936 // so we can preserve any ill-formed variants.
3937 loc = new Locale(uloc.getLanguage(), uloc.getCountry(), uloc.getVariant());
3942 private static Locale toLocale6(ULocale uloc) {
3943 String locstr = uloc.getBaseName();
3944 for (int i = 0; i < JAVA6_MAPDATA.length; i++) {
3945 if (locstr.equals(JAVA6_MAPDATA[i][1]) || locstr.equals(JAVA6_MAPDATA[i][4])) {
3946 if (JAVA6_MAPDATA[i][2] != null) {
3947 String val = uloc.getKeywordValue(JAVA6_MAPDATA[i][2]);
3948 if (val != null && val.equals(JAVA6_MAPDATA[i][3])) {
3949 locstr = JAVA6_MAPDATA[i][0];
3953 locstr = JAVA6_MAPDATA[i][0];
3958 LocaleIDParser p = new LocaleIDParser(locstr);
3959 String[] names = p.getLanguageScriptCountryVariant();
3960 return new Locale(names[0], names[2], names[3]);
3963 public static Locale getDefault(Category category) {
3964 Locale loc = Locale.getDefault();
3965 if (isJava7orNewer) {
3977 loc = (Locale)mGetDefault.invoke(null, cat);
3978 } catch (InvocationTargetException e) {
3979 // fall through - use the base default
3980 } catch (IllegalArgumentException e) {
3981 // fall through - use the base default
3982 } catch (IllegalAccessException e) {
3983 // fall through - use the base default
3990 public static void setDefault(Category category, Locale newLocale) {
3991 if (isJava7orNewer) {
4003 mSetDefault.invoke(null, cat, newLocale);
4004 } catch (InvocationTargetException e) {
4005 // fall through - no effects
4006 } catch (IllegalArgumentException e) {
4007 // fall through - no effects
4008 } catch (IllegalAccessException e) {
4009 // fall through - no effects