2 *******************************************************************************
\r
3 * Copyright (C) 2001-2010 International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 *******************************************************************************
\r
8 package com.ibm.icu.lang;
\r
10 import java.util.Locale;
\r
11 import java.util.MissingResourceException;
\r
13 import com.ibm.icu.impl.ICUResourceBundle;
\r
14 import com.ibm.icu.impl.UCharacterProperty;
\r
15 import com.ibm.icu.util.ULocale;
\r
16 import com.ibm.icu.util.UResourceBundle;
\r
19 * A class to reflect UTR #24: Script Names
\r
20 * (based on ISO 15924:2000, "Code for the representation of names of
\r
21 * scripts"). UTR #24 describes the basis for a new Unicode data file,
\r
25 public final class UScript {
\r
30 public static final int INVALID_CODE = -1;
\r
35 public static final int COMMON = 0; /* Zyyy */
\r
40 public static final int INHERITED = 1; /* Zinh */ /* "Code for inherited script", for non-spacing combining marks; also Qaai */
\r
45 public static final int ARABIC = 2; /* Arab */
\r
50 public static final int ARMENIAN = 3; /* Armn */
\r
55 public static final int BENGALI = 4; /* Beng */
\r
60 public static final int BOPOMOFO = 5; /* Bopo */
\r
65 public static final int CHEROKEE = 6; /* Cher */
\r
70 public static final int COPTIC = 7; /* Qaac */
\r
75 public static final int CYRILLIC = 8; /* Cyrl (Cyrs) */
\r
80 public static final int DESERET = 9; /* Dsrt */
\r
85 public static final int DEVANAGARI = 10; /* Deva */
\r
90 public static final int ETHIOPIC = 11; /* Ethi */
\r
95 public static final int GEORGIAN = 12; /* Geor (Geon; Geoa) */
\r
100 public static final int GOTHIC = 13; /* Goth */
\r
105 public static final int GREEK = 14; /* Grek */
\r
110 public static final int GUJARATI = 15; /* Gujr */
\r
115 public static final int GURMUKHI = 16; /* Guru */
\r
120 public static final int HAN = 17; /* Hani */
\r
125 public static final int HANGUL = 18; /* Hang */
\r
130 public static final int HEBREW = 19; /* Hebr */
\r
135 public static final int HIRAGANA = 20; /* Hira */
\r
140 public static final int KANNADA = 21; /* Knda */
\r
145 public static final int KATAKANA = 22; /* Kana */
\r
150 public static final int KHMER = 23; /* Khmr */
\r
155 public static final int LAO = 24; /* Laoo */
\r
160 public static final int LATIN = 25; /* Latn (Latf; Latg) */
\r
165 public static final int MALAYALAM = 26; /* Mlym */
\r
170 public static final int MONGOLIAN = 27; /* Mong */
\r
175 public static final int MYANMAR = 28; /* Mymr */
\r
180 public static final int OGHAM = 29; /* Ogam */
\r
185 public static final int OLD_ITALIC = 30; /* Ital */
\r
190 public static final int ORIYA = 31; /* Orya */
\r
195 public static final int RUNIC = 32; /* Runr */
\r
200 public static final int SINHALA = 33; /* Sinh */
\r
205 public static final int SYRIAC = 34; /* Syrc (Syrj; Syrn; Syre) */
\r
210 public static final int TAMIL = 35; /* Taml */
\r
215 public static final int TELUGU = 36; /* Telu */
\r
220 public static final int THAANA = 37; /* Thaa */
\r
225 public static final int THAI = 38; /* Thai */
\r
230 public static final int TIBETAN = 39; /* Tibt */
\r
232 * Unified Canadian Aboriginal Symbols
\r
235 public static final int CANADIAN_ABORIGINAL = 40; /* Cans */
\r
237 * Unified Canadian Aboriginal Symbols (alias)
\r
240 public static final int UCAS = CANADIAN_ABORIGINAL; /* Cans */
\r
245 public static final int YI = 41; /* Yiii */
\r
250 public static final int TAGALOG = 42; /* Tglg */
\r
255 public static final int HANUNOO = 43; /* Hano */
\r
260 public static final int BUHID = 44; /* Buhd */
\r
265 public static final int TAGBANWA = 45; /* Tagb */
\r
268 * Script in Unicode 4
\r
272 public static final int BRAILLE = 46; /* Brai */
\r
275 * Script in Unicode 4
\r
279 public static final int CYPRIOT = 47; /* Cprt */
\r
282 * Script in Unicode 4
\r
286 public static final int LIMBU = 48; /* Limb */
\r
289 * Script in Unicode 4
\r
293 public static final int LINEAR_B = 49; /* Linb */
\r
296 * Script in Unicode 4
\r
300 public static final int OSMANYA = 50; /* Osma */
\r
303 * Script in Unicode 4
\r
307 public static final int SHAVIAN = 51; /* Shaw */
\r
310 * Script in Unicode 4
\r
314 public static final int TAI_LE = 52; /* Tale */
\r
317 * Script in Unicode 4
\r
321 public static final int UGARITIC = 53; /* Ugar */
\r
323 * Script in Unicode 4.0.1
\r
326 public static final int KATAKANA_OR_HIRAGANA = 54; /*Hrkt */
\r
329 * Script in Unicode 4.1
\r
332 public static final int BUGINESE = 55; /* Bugi */
\r
334 * Script in Unicode 4.1
\r
337 public static final int GLAGOLITIC = 56; /* Glag */
\r
339 * Script in Unicode 4.1
\r
342 public static final int KHAROSHTHI = 57; /* Khar */
\r
344 * Script in Unicode 4.1
\r
347 public static final int SYLOTI_NAGRI = 58; /* Sylo */
\r
349 * Script in Unicode 4.1
\r
352 public static final int NEW_TAI_LUE = 59; /* Talu */
\r
354 * Script in Unicode 4.1
\r
357 public static final int TIFINAGH = 60; /* Tfng */
\r
359 * Script in Unicode 4.1
\r
362 public static final int OLD_PERSIAN = 61; /* Xpeo */
\r
366 * ISO 15924 script code
\r
369 public static final int BALINESE = 62; /* Bali */
\r
371 * ISO 15924 script code
\r
374 public static final int BATAK = 63; /* Batk */
\r
376 * ISO 15924 script code
\r
379 public static final int BLISSYMBOLS = 64; /* Blis */
\r
381 * ISO 15924 script code
\r
384 public static final int BRAHMI = 65; /* Brah */
\r
386 * ISO 15924 script code
\r
389 public static final int CHAM = 66; /* Cham */
\r
391 * ISO 15924 script code
\r
394 public static final int CIRTH = 67; /* Cirt */
\r
396 * ISO 15924 script code
\r
399 public static final int OLD_CHURCH_SLAVONIC_CYRILLIC = 68; /* Cyrs */
\r
401 * ISO 15924 script code
\r
404 public static final int DEMOTIC_EGYPTIAN = 69; /* Egyd */
\r
406 * ISO 15924 script code
\r
409 public static final int HIERATIC_EGYPTIAN = 70; /* Egyh */
\r
411 * ISO 15924 script code
\r
414 public static final int EGYPTIAN_HIEROGLYPHS = 71; /* Egyp */
\r
416 * ISO 15924 script code
\r
419 public static final int KHUTSURI = 72; /* Geok */
\r
421 * ISO 15924 script code
\r
424 public static final int SIMPLIFIED_HAN = 73; /* Hans */
\r
426 * ISO 15924 script code
\r
429 public static final int TRADITIONAL_HAN = 74; /* Hant */
\r
431 * ISO 15924 script code
\r
434 public static final int PAHAWH_HMONG = 75; /* Hmng */
\r
436 * ISO 15924 script code
\r
439 public static final int OLD_HUNGARIAN = 76; /* Hung */
\r
441 * ISO 15924 script code
\r
444 public static final int HARAPPAN_INDUS = 77; /* Inds */
\r
446 * ISO 15924 script code
\r
449 public static final int JAVANESE = 78; /* Java */
\r
451 * ISO 15924 script code
\r
454 public static final int KAYAH_LI = 79; /* Kali */
\r
456 * ISO 15924 script code
\r
459 public static final int LATIN_FRAKTUR = 80; /* Latf */
\r
461 * ISO 15924 script code
\r
464 public static final int LATIN_GAELIC = 81; /* Latg */
\r
466 * ISO 15924 script code
\r
469 public static final int LEPCHA = 82; /* Lepc */
\r
471 * ISO 15924 script code
\r
474 public static final int LINEAR_A = 83; /* Lina */
\r
476 * ISO 15924 script code
\r
479 public static final int MANDAEAN = 84; /* Mand */
\r
481 * ISO 15924 script code
\r
484 public static final int MAYAN_HIEROGLYPHS = 85; /* Maya */
\r
486 * ISO 15924 script code
\r
489 public static final int MEROITIC = 86; /* Mero */
\r
491 * ISO 15924 script code
\r
494 public static final int NKO = 87; /* Nkoo */
\r
496 * ISO 15924 script code
\r
499 public static final int ORKHON = 88; /* Orkh */
\r
501 * ISO 15924 script code
\r
504 public static final int OLD_PERMIC = 89; /* Perm */
\r
506 * ISO 15924 script code
\r
509 public static final int PHAGS_PA = 90; /* Phag */
\r
511 * ISO 15924 script code
\r
514 public static final int PHOENICIAN = 91; /* Phnx */
\r
516 * ISO 15924 script code
\r
519 public static final int PHONETIC_POLLARD = 92; /* Plrd */
\r
521 * ISO 15924 script code
\r
524 public static final int RONGORONGO = 93; /* Roro */
\r
526 * ISO 15924 script code
\r
529 public static final int SARATI = 94; /* Sara */
\r
531 * ISO 15924 script code
\r
534 public static final int ESTRANGELO_SYRIAC = 95; /* Syre */
\r
536 * ISO 15924 script code
\r
539 public static final int WESTERN_SYRIAC = 96; /* Syrj */
\r
541 * ISO 15924 script code
\r
544 public static final int EASTERN_SYRIAC = 97; /* Syrn */
\r
546 * ISO 15924 script code
\r
549 public static final int TENGWAR = 98; /* Teng */
\r
551 * ISO 15924 script code
\r
554 public static final int VAI = 99; /* Vaii */
\r
556 * ISO 15924 script code
\r
559 public static final int VISIBLE_SPEECH = 100;/* Visp */
\r
561 * ISO 15924 script code
\r
564 public static final int CUNEIFORM = 101;/* Xsux */
\r
566 * ISO 15924 script code
\r
569 public static final int UNWRITTEN_LANGUAGES = 102;/* Zxxx */
\r
571 * ISO 15924 script code
\r
574 public static final int UNKNOWN = 103;/* Zzzz */ /* Unknown="Code for uncoded script", for unassigned code points */
\r
576 /* Private use codes from Qaaa - Qabx are not supported*/
\r
578 * ISO 15924 script code
\r
581 public static final int CARIAN = 104;/* Cari */
\r
583 * ISO 15924 script code
\r
586 public static final int JAPANESE = 105;/* Jpan */
\r
588 * ISO 15924 script code
\r
591 public static final int LANNA = 106;/* Lana */
\r
593 * ISO 15924 script code
\r
596 public static final int LYCIAN = 107;/* Lyci */
\r
598 * ISO 15924 script code
\r
601 public static final int LYDIAN = 108;/* Lydi */
\r
603 * ISO 15924 script code
\r
606 public static final int OL_CHIKI = 109;/* Olck */
\r
608 * ISO 15924 script code
\r
611 public static final int REJANG = 110;/* Rjng */
\r
613 * ISO 15924 script code
\r
616 public static final int SAURASHTRA = 111;/* Saur */
\r
618 * ISO 15924 script code
\r
621 public static final int SIGN_WRITING = 112;/* Sgnw */
\r
623 * ISO 15924 script code
\r
626 public static final int SUNDANESE = 113;/* Sund */
\r
628 * ISO 15924 script code
\r
631 public static final int MOON = 114;/* Moon */
\r
633 * ISO 15924 script code
\r
636 public static final int MEITEI_MAYEK = 115;/* Mtei */
\r
639 * ISO 15924 script code
\r
642 public static final int IMPERIAL_ARAMAIC = 116;/* Armi */
\r
645 * ISO 15924 script code
\r
648 public static final int AVESTAN = 117;/* Avst */
\r
651 * ISO 15924 script code
\r
654 public static final int CHAKMA = 118;/* Cakm */
\r
657 * ISO 15924 script code
\r
660 public static final int KOREAN = 119;/* Kore */
\r
663 * ISO 15924 script code
\r
666 public static final int KAITHI = 120;/* Kthi */
\r
669 * ISO 15924 script code
\r
672 public static final int MANICHAEAN = 121;/* Mani */
\r
675 * ISO 15924 script code
\r
678 public static final int INSCRIPTIONAL_PAHLAVI = 122;/* Phli */
\r
681 * ISO 15924 script code
\r
684 public static final int PSALTER_PAHLAVI = 123;/* Phlp */
\r
687 * ISO 15924 script code
\r
690 public static final int BOOK_PAHLAVI = 124;/* Phlv */
\r
693 * ISO 15924 script code
\r
696 public static final int INSCRIPTIONAL_PARTHIAN = 125;/* Prti */
\r
699 * ISO 15924 script code
\r
702 public static final int SAMARITAN = 126;/* Samr */
\r
705 * ISO 15924 script code
\r
708 public static final int TAI_VIET = 127;/* Tavt */
\r
711 * ISO 15924 script code
\r
714 public static final int MATHEMATICAL_NOTATION = 128;/* Zmth */
\r
717 * ISO 15924 script code
\r
720 public static final int SYMBOLS = 129;/* Zsym */
\r
723 * ISO 15924 script code
\r
726 public static final int BAMUM = 130;/* Bamu */
\r
728 * ISO 15924 script code
\r
731 public static final int LISU = 131;/* Lisu */
\r
733 * ISO 15924 script code
\r
736 public static final int NAKHI_GEBA = 132;/* Nkgb */
\r
738 * ISO 15924 script code
\r
741 public static final int OLD_SOUTH_ARABIAN = 133;/* Sarb */
\r
747 public static final int CODE_LIMIT = 134;
\r
749 private static final String kLocaleScript = "LocaleScript";
\r
751 //private static final String INVALID_NAME = "Invalid";
\r
753 * Helper function to find the code from locale.
\r
754 * @param locale The locale.
\r
756 private static int[] findCodeFromLocale(ULocale locale) {
\r
757 ICUResourceBundle rb;
\r
760 rb = (ICUResourceBundle)UResourceBundle.getBundleInstance(ICUResourceBundle.ICU_BASE_NAME, locale);
\r
761 } catch (MissingResourceException e) {
\r
762 /* This part seems to never be called since "UResourceBundle.getBundleInstance"
\r
763 * corrects this by setting to ICUResourceBundle.FROM_DEFAULT
\r
764 * when such an invalid locale is passed.
\r
771 rb = (ICUResourceBundle)UResourceBundle.getBundleInstance(ICUResourceBundle.ICU_BASE_NAME, locale);
\r
773 // if rb is not a strict fallback of the requested locale, return null
\r
774 //if(!LocaleUtility.isFallbackOf(rb.getULocale().toString(), locale.toString())){
\r
777 //non existent locale check
\r
778 if(rb.getLoadingStatus()==ICUResourceBundle.FROM_DEFAULT && ! locale.equals(ULocale.getDefault())){
\r
781 UResourceBundle sub = rb.get(kLocaleScript);
\r
783 int[] result = new int[sub.getSize()];
\r
785 for (int i = 0; i < result.length; ++i) {
\r
786 int code = UCharacter.getPropertyValueEnum(UProperty.SCRIPT,
\r
788 result[w++] = code;
\r
792 if (w < result.length) {
\r
793 throw new IllegalStateException("bad locale data, listed " +
\r
794 result.length + " scripts but found only " + w);
\r
801 * Gets a script codes associated with the given locale or ISO 15924 abbreviation or name.
\r
802 * Returns MALAYAM given "Malayam" OR "Mlym".
\r
803 * Returns LATIN given "en" OR "en_US"
\r
804 * @param locale Locale
\r
805 * @return The script codes array. null if the the code cannot be found.
\r
808 public static final int[] getCode(Locale locale){
\r
809 return findCodeFromLocale(ULocale.forLocale(locale));
\r
812 * Gets a script codes associated with the given locale or ISO 15924 abbreviation or name.
\r
813 * Returns MALAYAM given "Malayam" OR "Mlym".
\r
814 * Returns LATIN given "en" OR "en_US"
\r
815 * @param locale ULocale
\r
816 * @return The script codes array. null if the the code cannot be found.
\r
819 public static final int[] getCode(ULocale locale){
\r
820 return findCodeFromLocale(locale);
\r
823 * Gets a script codes associated with the given locale or ISO 15924 abbreviation or name.
\r
824 * Returns MALAYAM given "Malayam" OR "Mlym".
\r
825 * Returns LATIN given "en" OR "en_US"
\r
827 * <p>Note: To search by short or long script alias only, use
\r
828 * UCharacater.getPropertyValueEnum(UProperty.SCRIPT, alias)
\r
829 * instead. This does a fast lookup with no access of the locale
\r
831 * @param nameOrAbbrOrLocale name of the script or ISO 15924 code or locale
\r
832 * @return The script codes array. null if the the code cannot be found.
\r
835 public static final int[] getCode(String nameOrAbbrOrLocale){
\r
838 UCharacter.getPropertyValueEnum(UProperty.SCRIPT,
\r
839 nameOrAbbrOrLocale)
\r
841 } catch (IllegalArgumentException e) {
\r
842 return findCodeFromLocale(new ULocale(nameOrAbbrOrLocale));
\r
847 * Gets a script codes associated with the given ISO 15924 abbreviation or name.
\r
848 * Returns MALAYAM given "Malayam" OR "Mlym".
\r
850 * @param nameOrAbbr name of the script or ISO 15924 code
\r
851 * @return The script code value or INVALID_CODE if the code cannot be found.
\r
853 * @deprecated This API is ICU internal only.
\r
855 public static final int getCodeFromName(String nameOrAbbr) {
\r
857 return UCharacter.getPropertyValueEnum(UProperty.SCRIPT,
\r
859 } catch (IllegalArgumentException e) {
\r
860 return INVALID_CODE;
\r
865 * Gets the script code associated with the given codepoint.
\r
866 * Returns UScript.MALAYAM given 0x0D02
\r
867 * @param codepoint UChar32 codepoint
\r
868 * @return The script code
\r
871 public static final int getScript(int codepoint){
\r
872 if (codepoint >= UCharacter.MIN_VALUE & codepoint <= UCharacter.MAX_VALUE) {
\r
873 return (UCharacterProperty.INSTANCE.getAdditional(codepoint,0) & UCharacter.SCRIPT_MASK_);
\r
875 throw new IllegalArgumentException(Integer.toString(codepoint));
\r
880 * Gets a script name associated with the given script code.
\r
881 * Returns "Malayam" given MALAYAM
\r
882 * @param scriptCode int script code
\r
883 * @return script name as a string in full as given in TR#24
\r
886 public static final String getName(int scriptCode){
\r
887 return UCharacter.getPropertyValueName(UProperty.SCRIPT,
\r
889 UProperty.NameChoice.LONG);
\r
893 * Gets a script name associated with the given script code.
\r
894 * Returns "Mlym" given MALAYAM
\r
895 * @param scriptCode int script code
\r
896 * @return script abbreviated name as a string as given in TR#24
\r
899 public static final String getShortName(int scriptCode){
\r
900 return UCharacter.getPropertyValueName(UProperty.SCRIPT,
\r
902 UProperty.NameChoice.SHORT);
\r
906 * Private Constructor. Never default construct
\r
908 private UScript(){}
\r