1 // Copyright 2011 Google Inc. All Rights Reserved.
\r
3 // Licensed under the Apache License, Version 2.0 (the "License");
\r
4 // you may not use this file except in compliance with the License.
\r
5 // You may obtain a copy of the License at
\r
7 // http://www.apache.org/licenses/LICENSE-2.0
\r
9 // Unless required by applicable law or agreed to in writing, software
\r
10 // distributed under the License is distributed on an "AS IS" BASIS,
\r
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
\r
12 // See the License for the specific language governing permissions and
\r
13 // limitations under the License.
\r
15 package com.hughes.android.dictionary.engine;
\r
17 import com.hughes.android.dictionary.R;
\r
18 import com.ibm.icu.text.Collator;
\r
20 import java.util.ArrayList;
\r
21 import java.util.LinkedHashMap;
\r
22 import java.util.Locale;
\r
23 import java.util.Map;
\r
24 import java.util.regex.Pattern;
\r
26 public class Language {
\r
28 public static final class LanguageResources {
\r
29 public final String englishName;
\r
30 public final int nameId;
\r
31 public final int flagId;
\r
33 private LanguageResources(final String englishName, int nameId, int flagId) {
\r
34 this.englishName = englishName;
\r
35 this.nameId = nameId;
\r
36 this.flagId = flagId;
\r
39 private LanguageResources(final String englishName, int nameId) {
\r
40 this(englishName, nameId, 0);
\r
45 // http://www.loc.gov/standards/iso639-2/php/code_list.php
\r
46 public static final Map<String, LanguageResources> isoCodeToResources = new LinkedHashMap<String, LanguageResources>();
\r
48 isoCodeToResources.put("AF", new LanguageResources("Afrikaans", R.string.AF,
\r
49 R.drawable.flag_of_south_africa));
\r
50 isoCodeToResources.put("SQ", new LanguageResources("Albanian", R.string.SQ,
\r
51 R.drawable.flag_of_albania));
\r
52 isoCodeToResources.put("AR",
\r
53 new LanguageResources("Arabic", R.string.AR, R.drawable.arabic));
\r
54 isoCodeToResources.put("HY", new LanguageResources("Armenian", R.string.HY,
\r
55 R.drawable.flag_of_armenia));
\r
56 isoCodeToResources.put("BE", new LanguageResources("Belarusian", R.string.BE,
\r
57 R.drawable.flag_of_belarus));
\r
58 isoCodeToResources.put("BN", new LanguageResources("Bengali", R.string.BN));
\r
59 isoCodeToResources.put("BS", new LanguageResources("Bosnian", R.string.BS,
\r
60 R.drawable.flag_of_bosnia_and_herzegovina));
\r
61 isoCodeToResources.put("BG", new LanguageResources("Bulgarian", R.string.BG,
\r
62 R.drawable.flag_of_bulgaria));
\r
63 isoCodeToResources.put("MY", new LanguageResources("Burmese", R.string.MY,
\r
64 R.drawable.flag_of_myanmar));
\r
65 isoCodeToResources.put("ZH", new LanguageResources("Chinese", R.string.ZH,
\r
66 R.drawable.flag_of_the_peoples_republic_of_china));
\r
67 isoCodeToResources.put("cmn", new LanguageResources("Mandarin", R.string.cmn,
\r
68 R.drawable.flag_of_the_peoples_republic_of_china));
\r
69 isoCodeToResources.put("yue", new LanguageResources("Cantonese", R.string.yue,
\r
70 R.drawable.flag_of_hong_kong));
\r
71 isoCodeToResources.put("CA", new LanguageResources("Catalan", R.string.CA));
\r
72 isoCodeToResources.put("HR", new LanguageResources("Croatian", R.string.HR,
\r
73 R.drawable.flag_of_croatia));
\r
74 isoCodeToResources.put("CS", new LanguageResources("Czech", R.string.CS,
\r
75 R.drawable.flag_of_the_czech_republic));
\r
76 isoCodeToResources.put("DA", new LanguageResources("Danish", R.string.DA,
\r
77 R.drawable.flag_of_denmark));
\r
78 isoCodeToResources.put("NL", new LanguageResources("Dutch", R.string.NL,
\r
79 R.drawable.flag_of_the_netherlands));
\r
80 isoCodeToResources.put("EN", new LanguageResources("English", R.string.EN,
\r
81 R.drawable.flag_of_the_united_kingdom));
\r
82 isoCodeToResources.put("EO", new LanguageResources("Esperanto", R.string.EO,
\r
83 R.drawable.flag_of_esperanto));
\r
84 isoCodeToResources.put("ET", new LanguageResources("Estonian", R.string.ET,
\r
85 R.drawable.flag_of_estonia));
\r
86 isoCodeToResources.put("FI", new LanguageResources("Finnish", R.string.FI,
\r
87 R.drawable.flag_of_finland));
\r
88 isoCodeToResources.put("FR", new LanguageResources("French", R.string.FR,
\r
89 R.drawable.flag_of_france));
\r
90 isoCodeToResources.put("DE", new LanguageResources("German", R.string.DE,
\r
91 R.drawable.flag_of_germany));
\r
92 isoCodeToResources.put("EL", new LanguageResources("Greek", R.string.EL,
\r
93 R.drawable.flag_of_greece));
\r
94 isoCodeToResources.put("grc", new LanguageResources("Ancient Greek", R.string.grc));
\r
95 isoCodeToResources.put("haw", new LanguageResources("Hawaiian", R.string.haw,
\r
96 R.drawable.flag_of_hawaii));
\r
97 isoCodeToResources.put("HE", new LanguageResources("Hebrew", R.string.HE,
\r
98 R.drawable.flag_of_israel));
\r
99 isoCodeToResources.put("HI", new LanguageResources("Hindi", R.string.HI, R.drawable.hindi));
\r
100 isoCodeToResources.put("HU", new LanguageResources("Hungarian", R.string.HU,
\r
101 R.drawable.flag_of_hungary));
\r
102 isoCodeToResources.put("IS", new LanguageResources("Icelandic", R.string.IS,
\r
103 R.drawable.flag_of_iceland));
\r
104 isoCodeToResources.put("ID", new LanguageResources("Indonesian", R.string.ID,
\r
105 R.drawable.flag_of_indonesia));
\r
106 isoCodeToResources.put("GA", new LanguageResources("Irish", R.string.GA,
\r
107 R.drawable.flag_of_ireland));
\r
108 isoCodeToResources.put("GD", new LanguageResources("Scottish Gaelic", R.string.GD,
\r
109 R.drawable.flag_of_scotland));
\r
110 isoCodeToResources.put("GV", new LanguageResources("Manx", R.string.GV,
\r
111 R.drawable.flag_of_the_isle_of_man));
\r
112 isoCodeToResources.put("IT", new LanguageResources("Italian", R.string.IT,
\r
113 R.drawable.flag_of_italy));
\r
114 isoCodeToResources.put("LA", new LanguageResources("Latin", R.string.LA));
\r
115 isoCodeToResources.put("LV", new LanguageResources("Latvian", R.string.LV,
\r
116 R.drawable.flag_of_latvia));
\r
117 isoCodeToResources.put("LT", new LanguageResources("Lithuanian", R.string.LT,
\r
118 R.drawable.flag_of_lithuania));
\r
119 isoCodeToResources.put("JA", new LanguageResources("Japanese", R.string.JA,
\r
120 R.drawable.flag_of_japan));
\r
121 isoCodeToResources.put("KO", new LanguageResources("Korean", R.string.KO,
\r
122 R.drawable.flag_of_south_korea));
\r
123 isoCodeToResources.put("KU", new LanguageResources("Kurdish", R.string.KU));
\r
124 isoCodeToResources.put("MS", new LanguageResources("Malay", R.string.MS,
\r
125 R.drawable.flag_of_malaysia));
\r
126 isoCodeToResources.put("MI", new LanguageResources("Maori", R.string.MI,
\r
127 R.drawable.flag_of_new_zealand));
\r
128 isoCodeToResources.put("MN", new LanguageResources("Mongolian", R.string.MN,
\r
129 R.drawable.flag_of_mongolia));
\r
130 isoCodeToResources.put("NE", new LanguageResources("Nepali", R.string.NE,
\r
131 R.drawable.flag_of_nepal));
\r
132 isoCodeToResources.put("NO", new LanguageResources("Norwegian", R.string.NO,
\r
133 R.drawable.flag_of_norway));
\r
134 isoCodeToResources.put("FA", new LanguageResources("Persian", R.string.FA,
\r
135 R.drawable.flag_of_iran));
\r
136 isoCodeToResources.put("PL", new LanguageResources("Polish", R.string.PL,
\r
137 R.drawable.flag_of_poland));
\r
138 isoCodeToResources.put("PT", new LanguageResources("Portuguese", R.string.PT,
\r
139 R.drawable.flag_of_portugal));
\r
140 isoCodeToResources.put("PA", new LanguageResources("Punjabi", R.string.PA));
\r
141 isoCodeToResources.put("RO", new LanguageResources("Romanian", R.string.RO,
\r
142 R.drawable.flag_of_romania));
\r
143 isoCodeToResources.put("RU", new LanguageResources("Russian", R.string.RU,
\r
144 R.drawable.flag_of_russia));
\r
145 isoCodeToResources.put("SA", new LanguageResources("Sanskrit", R.string.SA));
\r
146 isoCodeToResources.put("SR", new LanguageResources("Serbian", R.string.SR,
\r
147 R.drawable.flag_of_serbia));
\r
148 isoCodeToResources.put("SK", new LanguageResources("Slovak", R.string.SK,
\r
149 R.drawable.flag_of_slovakia));
\r
150 isoCodeToResources.put("SL", new LanguageResources("Slovenian", R.string.SL,
\r
151 R.drawable.flag_of_slovenia));
\r
152 isoCodeToResources.put("SO", new LanguageResources("Somali", R.string.SO,
\r
153 R.drawable.flag_of_somalia));
\r
154 isoCodeToResources.put("ES", new LanguageResources("Spanish", R.string.ES,
\r
155 R.drawable.flag_of_spain));
\r
156 isoCodeToResources.put("SW", new LanguageResources("Swahili", R.string.SW));
\r
157 isoCodeToResources.put("SV", new LanguageResources("Swedish", R.string.SV,
\r
158 R.drawable.flag_of_sweden));
\r
159 isoCodeToResources.put("TL", new LanguageResources("Tagalog", R.string.TL));
\r
160 isoCodeToResources.put("TG", new LanguageResources("Tajik", R.string.TG,
\r
161 R.drawable.flag_of_tajikistan));
\r
162 isoCodeToResources.put("TH", new LanguageResources("Thai", R.string.TH,
\r
163 R.drawable.flag_of_thailand));
\r
164 isoCodeToResources.put("BO", new LanguageResources("Tibetan", R.string.BO));
\r
165 isoCodeToResources.put("TR", new LanguageResources("Turkish", R.string.TR,
\r
166 R.drawable.flag_of_turkey));
\r
167 isoCodeToResources.put("UK", new LanguageResources("Ukrainian", R.string.UK,
\r
168 R.drawable.flag_of_ukraine));
\r
169 isoCodeToResources.put("UR", new LanguageResources("Urdu", R.string.UR));
\r
170 isoCodeToResources.put("VI", new LanguageResources("Vietnamese", R.string.VI,
\r
171 R.drawable.flag_of_vietnam));
\r
172 isoCodeToResources.put("CI", new LanguageResources("Welsh", R.string.CI,
\r
173 R.drawable.flag_of_wales_2));
\r
174 isoCodeToResources.put("YI", new LanguageResources("Yiddish", R.string.YI));
\r
175 isoCodeToResources.put("ZU", new LanguageResources("Zulu", R.string.ZU));
\r
176 isoCodeToResources.put("AZ", new LanguageResources("Azeri", R.string.AZ,
\r
177 R.drawable.flag_of_azerbaijan));
\r
178 isoCodeToResources.put("EU", new LanguageResources("Basque", R.string.EU,
\r
179 R.drawable.flag_of_the_basque_country));
\r
180 isoCodeToResources.put("BR", new LanguageResources("Breton", R.string.BR));
\r
181 isoCodeToResources.put("MR", new LanguageResources("Marathi", R.string.MR));
\r
182 isoCodeToResources.put("FO", new LanguageResources("Faroese", R.string.FO));
\r
183 isoCodeToResources.put("GL", new LanguageResources("Galician", R.string.GL,
\r
184 R.drawable.flag_of_galicia));
\r
185 isoCodeToResources.put("KA", new LanguageResources("Georgian", R.string.KA,
\r
186 R.drawable.flag_of_georgia));
\r
187 isoCodeToResources.put("HT", new LanguageResources("Haitian Creole", R.string.HT,
\r
188 R.drawable.flag_of_haiti));
\r
189 isoCodeToResources.put("LB", new LanguageResources("Luxembourgish", R.string.LB,
\r
190 R.drawable.flag_of_luxembourg));
\r
191 isoCodeToResources.put("MK", new LanguageResources("Macedonian", R.string.MK,
\r
192 R.drawable.flag_of_macedonia));
\r
193 isoCodeToResources.put("LO", new LanguageResources("Lao", R.string.LO,
\r
194 R.drawable.flag_of_laos));
\r
195 isoCodeToResources.put("ML", new LanguageResources("Malayalam", R.string.ML));
\r
196 isoCodeToResources.put("SL", new LanguageResources("Slovenian", R.string.SL,
\r
197 R.drawable.flag_of_slovenia));
\r
198 isoCodeToResources.put("TA", new LanguageResources("Tamil", R.string.TA));
\r
199 isoCodeToResources.put("SH", new LanguageResources("Serbo-Croatian", R.string.SH));
\r
200 isoCodeToResources.put("SD", new LanguageResources("Sindhi", R.string.SD, R.drawable.flag_of_sindhi));
\r
202 // Hack to allow lower-case ISO codes to work:
\r
203 for (final String isoCode : new ArrayList<String>(isoCodeToResources.keySet())) {
\r
204 isoCodeToResources.put(isoCode.toLowerCase(), isoCodeToResources.get(isoCode));
\r
209 private static final Map<String, Language> registry = new LinkedHashMap<String, Language>();
\r
211 final String isoCode;
\r
212 final Locale locale;
\r
214 private Collator collator;
\r
216 private Language(final Locale locale, final String isoCode) {
\r
217 this.locale = locale;
\r
218 this.isoCode = isoCode;
\r
220 registry.put(isoCode.toLowerCase(), this);
\r
224 public String toString() {
\r
225 return locale.toString();
\r
228 public String getIsoCode() {
\r
232 public synchronized Collator getCollator() {
\r
233 // Don't think this is thread-safe...
\r
234 // if (collator == null) {
\r
235 this.collator = Collator.getInstance(locale);
\r
236 this.collator.setStrength(Collator.IDENTICAL);
\r
241 public String getDefaultNormalizerRules() {
\r
242 return ":: Any-Latin; ' ' > ; :: Lower; :: NFD; :: [:Nonspacing Mark:] Remove; :: NFC ;";
\r
246 * A practical pattern to identify strong RTL characters. This pattern is
\r
247 * not completely correct according to the Unicode standard. It is
\r
248 * simplified for performance and small code size.
\r
250 private static final String rtlChars =
\r
251 "\u0591-\u07FF\uFB1D-\uFDFD\uFE70-\uFEFC";
\r
253 private static final String puncChars =
\r
254 "\\[\\]\\(\\)\\{\\}\\=";
\r
256 private static final Pattern RTL_LEFT_BOUNDARY = Pattern.compile("([" + puncChars + "])(["
\r
257 + rtlChars + "])");
\r
258 private static final Pattern RTL_RIGHT_BOUNDARY = Pattern.compile("([" + rtlChars + "])(["
\r
259 + puncChars + "])");
\r
261 public static String fixBidiText(String text) {
\r
262 // text = RTL_LEFT_BOUNDARY.matcher(text).replaceAll("$1\u200e $2");
\r
263 // text = RTL_RIGHT_BOUNDARY.matcher(text).replaceAll("$1 \u200e$2");
\r
267 // ----------------------------------------------------------------
\r
269 public static final Language en = new Language(Locale.ENGLISH, "EN");
\r
270 public static final Language fr = new Language(Locale.FRENCH, "FR");
\r
271 public static final Language it = new Language(Locale.ITALIAN, "IT");
\r
273 public static final Language de = new Language(Locale.GERMAN, "DE") {
\r
275 public String getDefaultNormalizerRules() {
\r
276 return ":: Lower; 'ae' > 'ä'; 'oe' > 'ö'; 'ue' > 'ü'; 'ß' > 'ss'; ";
\r
280 // ----------------------------------------------------------------
\r
282 public static synchronized Language lookup(final String isoCode) {
\r
283 Language lang = registry.get(isoCode.toLowerCase());
\r
284 if (lang == null) {
\r
285 lang = new Language(new Locale(isoCode), isoCode);
\r