-// Copyright 2011 Google Inc. All Rights Reserved.\r
-//\r
-// Licensed under the Apache License, Version 2.0 (the "License");\r
-// you may not use this file except in compliance with the License.\r
-// You may obtain a copy of the License at\r
-//\r
-// http://www.apache.org/licenses/LICENSE-2.0\r
-//\r
-// Unless required by applicable law or agreed to in writing, software\r
-// distributed under the License is distributed on an "AS IS" BASIS,\r
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\r
-// See the License for the specific language governing permissions and\r
-// limitations under the License.\r
-\r
-package com.hughes.android.dictionary.engine;\r
-\r
-import java.util.ArrayList;\r
-import java.util.LinkedHashMap;\r
-import java.util.Locale;\r
-import java.util.Map;\r
-import java.util.regex.Pattern;\r
-\r
-import com.hughes.android.dictionary.R;\r
-import com.ibm.icu.text.Collator;\r
-\r
-public class Language {\r
- \r
- public static final class LanguageResources {\r
- public final String englishName;\r
- public final int nameId;\r
- public final int flagId;\r
- \r
- private LanguageResources(final String englishName, int nameId, int flagId) {\r
- this.englishName = englishName;\r
- this.nameId = nameId;\r
- this.flagId = flagId;\r
- }\r
-\r
- private LanguageResources(final String englishName, int nameId) {\r
- this(englishName, nameId, 0);\r
- }\r
-}\r
-\r
- public static final Map<String,LanguageResources> isoCodeToResources = new LinkedHashMap<String,LanguageResources>();\r
- static {\r
- isoCodeToResources.put("AF", new LanguageResources("Afrikaans", R.string.AF));\r
- isoCodeToResources.put("SQ", new LanguageResources("Albanian", R.string.SQ));\r
- isoCodeToResources.put("AR", new LanguageResources("Arabic", R.string.AR));\r
- isoCodeToResources.put("HY", new LanguageResources("Armenian", R.string.HY));\r
- isoCodeToResources.put("BE", new LanguageResources("Belarusian", R.string.BE));\r
- isoCodeToResources.put("BN", new LanguageResources("Bengali", R.string.BN));\r
- isoCodeToResources.put("BS", new LanguageResources("Bosnian", R.string.BS));\r
- isoCodeToResources.put("BG", new LanguageResources("Bulgarian", R.string.BG));\r
- isoCodeToResources.put("CA", new LanguageResources("Catalan", R.string.CA));\r
- isoCodeToResources.put("HR", new LanguageResources("Croatian", R.string.HR));\r
- isoCodeToResources.put("CS", new LanguageResources("Czech", R.string.CS));\r
- isoCodeToResources.put("ZH", new LanguageResources("Chinese", R.string.ZH));\r
- isoCodeToResources.put("DA", new LanguageResources("Danish", R.string.DA));\r
- isoCodeToResources.put("NL", new LanguageResources("Dutch", R.string.NL));\r
- isoCodeToResources.put("EN", new LanguageResources("English", R.string.EN));\r
- isoCodeToResources.put("EO", new LanguageResources("Esperanto", R.string.EO));\r
- isoCodeToResources.put("ET", new LanguageResources("Estonian", R.string.ET));\r
- isoCodeToResources.put("FI", new LanguageResources("Finnish", R.string.FI));\r
- isoCodeToResources.put("FR", new LanguageResources("French", R.string.FR));\r
- isoCodeToResources.put("DE", new LanguageResources("German", R.string.DE));\r
- isoCodeToResources.put("EL", new LanguageResources("Greek", R.string.EL));\r
- isoCodeToResources.put("haw", new LanguageResources("Hawaiian", R.string.haw));\r
- isoCodeToResources.put("HE", new LanguageResources("Hebrew", R.string.HE));\r
- isoCodeToResources.put("HI", new LanguageResources("Hindi", R.string.HI));\r
- isoCodeToResources.put("HU", new LanguageResources("Hungarian", R.string.HU));\r
- isoCodeToResources.put("IS", new LanguageResources("Icelandic", R.string.IS));\r
- isoCodeToResources.put("ID", new LanguageResources("Indonesian", R.string.ID));\r
- isoCodeToResources.put("GA", new LanguageResources("Gaelic (Irish, Scottish)", R.string.GA));\r
- isoCodeToResources.put("IT", new LanguageResources("Italian", R.string.IT));\r
- isoCodeToResources.put("LA", new LanguageResources("Latin", R.string.LA));\r
- isoCodeToResources.put("LV", new LanguageResources("Latvian", R.string.LV));\r
- isoCodeToResources.put("LT", new LanguageResources("Lithuanian", R.string.LT));\r
- isoCodeToResources.put("JA", new LanguageResources("Japanese", R.string.JA));\r
- isoCodeToResources.put("KO", new LanguageResources("Korean", R.string.KO));\r
- isoCodeToResources.put("KU", new LanguageResources("Kurdish", R.string.KU));\r
- isoCodeToResources.put("MS", new LanguageResources("Malay", R.string.MS));\r
- isoCodeToResources.put("MI", new LanguageResources("Maori", R.string.MI));\r
- isoCodeToResources.put("MN", new LanguageResources("Mongolian", R.string.MN));\r
- isoCodeToResources.put("NE", new LanguageResources("Nepali", R.string.NE));\r
- isoCodeToResources.put("NO", new LanguageResources("Norwegian", R.string.NO));\r
- isoCodeToResources.put("FA", new LanguageResources("Persian", R.string.FA));\r
- isoCodeToResources.put("PL", new LanguageResources("Polish", R.string.PL));\r
- isoCodeToResources.put("PT", new LanguageResources("Portuguese", R.string.PT));\r
- isoCodeToResources.put("PA", new LanguageResources("Punjabi", R.string.PA));\r
- isoCodeToResources.put("RO", new LanguageResources("Romanian", R.string.RO));\r
- isoCodeToResources.put("RU", new LanguageResources("Russian", R.string.RU));\r
- isoCodeToResources.put("SA", new LanguageResources("Sanskrit", R.string.SA));\r
- isoCodeToResources.put("SR", new LanguageResources("Serbian", R.string.SR));\r
- isoCodeToResources.put("SK", new LanguageResources("Slovak", R.string.SK));\r
- isoCodeToResources.put("SO", new LanguageResources("Somali", R.string.SO));\r
- isoCodeToResources.put("ES", new LanguageResources("Spanish", R.string.ES));\r
- isoCodeToResources.put("SW", new LanguageResources("Swahili", R.string.SW));\r
- isoCodeToResources.put("SV", new LanguageResources("Swedish", R.string.SV));\r
- isoCodeToResources.put("TL", new LanguageResources("Tagalog", R.string.TL));\r
- isoCodeToResources.put("TG", new LanguageResources("Tajik", R.string.TG));\r
- isoCodeToResources.put("TH", new LanguageResources("Thai", R.string.TH));\r
- isoCodeToResources.put("BO", new LanguageResources("Tibetan", R.string.BO));\r
- isoCodeToResources.put("TR", new LanguageResources("Turkish", R.string.TR));\r
- isoCodeToResources.put("UK", new LanguageResources("Ukrainian", R.string.UK));\r
- isoCodeToResources.put("UR", new LanguageResources("Urdu", R.string.UR));\r
- isoCodeToResources.put("VI", new LanguageResources("Vietnamese", R.string.VI));\r
- isoCodeToResources.put("CI", new LanguageResources("Welsh", R.string.CI));\r
- isoCodeToResources.put("YI", new LanguageResources("Yiddish", R.string.YI));\r
- isoCodeToResources.put("ZU", new LanguageResources("Zulu", R.string.ZU));\r
-\r
- isoCodeToResources.put("AZ", new LanguageResources("Azeri", R.string.AZ));\r
- isoCodeToResources.put("EU", new LanguageResources("Basque", R.string.EU));\r
- isoCodeToResources.put("BR", new LanguageResources("Breton", R.string.BR));\r
- isoCodeToResources.put("MR", new LanguageResources("Burmese", R.string.MR));\r
- isoCodeToResources.put("FO", new LanguageResources("Faroese", R.string.FO));\r
- isoCodeToResources.put("GL", new LanguageResources("Galician", R.string.GL));\r
- isoCodeToResources.put("KA", new LanguageResources("Georgian", R.string.KA));\r
- isoCodeToResources.put("HT", new LanguageResources("Haitian Creole", R.string.HT));\r
- isoCodeToResources.put("LB", new LanguageResources("Luxembourgish", R.string.LB));\r
- isoCodeToResources.put("MK", new LanguageResources("Macedonian", R.string.MK));\r
-\r
- // Hack to allow lower-case ISO codes to work:\r
- for (final String isoCode : new ArrayList<String>(isoCodeToResources.keySet())) {\r
- isoCodeToResources.put(isoCode.toLowerCase(), isoCodeToResources.get(isoCode));\r
- }\r
-\r
- }\r
-\r
-\r
- private static final Map<String, Language> registry = new LinkedHashMap<String, Language>();\r
-\r
- final String isoCode;\r
- final Locale locale;\r
- \r
- private Collator collator;\r
-\r
- private Language(final Locale locale, final String isoCode) {\r
- this.locale = locale;\r
- this.isoCode = isoCode;\r
-\r
- registry.put(isoCode.toLowerCase(), this);\r
- }\r
-\r
- @Override\r
- public String toString() {\r
- return locale.toString();\r
- }\r
- \r
- public String getIsoCode() {\r
- return isoCode;\r
- }\r
- \r
- public synchronized Collator getCollator() {\r
- if (collator == null) {\r
- this.collator = Collator.getInstance(locale);\r
- this.collator.setStrength(Collator.IDENTICAL);\r
- }\r
- return collator;\r
- }\r
- \r
- public String getDefaultNormalizerRules() {\r
- return ":: Any-Latin; ' ' > ; :: Lower; :: NFD; :: [:Nonspacing Mark:] Remove; :: NFC ;";\r
- }\r
- \r
- /**\r
- * A practical pattern to identify strong RTL characters. This pattern is not\r
- * completely correct according to the Unicode standard. It is simplified for\r
- * performance and small code size.\r
- */\r
- private static final String rtlChars =\r
- "\u0591-\u07FF\uFB1D-\uFDFD\uFE70-\uFEFC";\r
-\r
- private static final String puncChars =\r
- "\\[\\]\\(\\)\\{\\}\\=";\r
-\r
- private static final Pattern RTL_LEFT_BOUNDARY = Pattern.compile("(["+ puncChars +"])([" + rtlChars + "])");\r
- private static final Pattern RTL_RIGHT_BOUNDARY = Pattern.compile("([" + rtlChars + "])(["+ puncChars +"])");\r
- \r
- public static String fixBidiText(String text) {\r
-// text = RTL_LEFT_BOUNDARY.matcher(text).replaceAll("$1\u200e $2");\r
-// text = RTL_RIGHT_BOUNDARY.matcher(text).replaceAll("$1 \u200e$2");\r
- return text;\r
- }\r
- \r
- // ----------------------------------------------------------------\r
-\r
- public static final Language en = new Language(Locale.ENGLISH, "EN");\r
- public static final Language fr = new Language(Locale.FRENCH, "FR");\r
- public static final Language it = new Language(Locale.ITALIAN, "IT");\r
-\r
- public static final Language de = new Language(Locale.GERMAN, "DE") {\r
- @Override\r
- public String getDefaultNormalizerRules() {\r
- return ":: Lower; 'ae' > 'ä'; 'oe' > 'ö'; 'ue' > 'ü'; 'ß' > 'ss'; ";\r
- }\r
- };\r
- \r
- // ----------------------------------------------------------------\r
-\r
- public static synchronized Language lookup(final String isoCode) {\r
- Language lang = registry.get(isoCode.toLowerCase());\r
- if (lang == null) {\r
- lang = new Language(new Locale(isoCode), isoCode);\r
- }\r
- return lang;\r
- }\r
-\r
-}\r
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.hughes.android.dictionary.engine;
+
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.Locale;
+import java.util.Map;
+
+import com.hughes.android.dictionary.CollatorWrapper;
+import com.hughes.android.dictionary.DictionaryApplication;
+
+public class Language {
+
+ public static final class LanguageResources {
+ final String englishName;
+ public final int nameId;
+ public final int flagId;
+
+ public LanguageResources(final String englishName, int nameId, int flagId) {
+ this.englishName = englishName;
+ this.nameId = nameId;
+ this.flagId = flagId;
+ }
+
+ public LanguageResources(final String englishName, int nameId) {
+ this(englishName, nameId, 0);
+ }
+ }
+
+ private static final Map<String, Language> registry = new HashMap<>();
+
+ private final String isoCode;
+ private final Locale locale;
+
+ private Language(final Locale locale, final String isoCode) {
+ this.locale = locale;
+ this.isoCode = isoCode;
+
+ registry.put(isoCode.toLowerCase(), this);
+ }
+
+ @Override
+ public String toString() {
+ return locale.toString();
+ }
+
+ public String getIsoCode() {
+ return isoCode;
+ }
+
+ public synchronized Comparator<Object> getCollator() {
+ if (!DictionaryApplication.USE_COLLATOR)
+ return new Comparator<Object>() {
+ @Override
+ public int compare(Object o, Object t1) {
+ return ((String)o).compareToIgnoreCase((String)t1);
+ }
+ };
+ // TODO: consider if this should be cached - but must be thread-safe
+ return CollatorWrapper.getInstanceStrengthIdentical(locale);
+ }
+
+ public String getDefaultNormalizerRules() {
+ return ":: Any-Latin; ' ' > ; :: Lower; :: NFD; :: [:Nonspacing Mark:] Remove; :: NFC ;";
+ }
+
+ /**
+ * A practical pattern to identify strong RTL characters. This pattern is
+ * not completely correct according to the Unicode standard. It is
+ * simplified for performance and small code size.
+ */
+ private static final String rtlChars =
+ "\u0591-\u07FF\uFB1D-\uFDFD\uFE70-\uFEFC";
+
+ @SuppressWarnings("unused")
+ public static String fixBidiText(String text) {
+ // TODO: RTL text (e.g. arabic) in parenthesis might need extra
+ // \u200e markers sometimes - check what exactly is going on there.
+ return text;
+ }
+
+ // ----------------------------------------------------------------
+
+ public static final Language en = new Language(Locale.ENGLISH, "EN");
+ public static final Language it = new Language(Locale.ITALIAN, "IT");
+
+ public static final Language de = new Language(Locale.GERMAN, "DE") {
+ @Override
+ public String getDefaultNormalizerRules() {
+ return ":: Lower; 'ae' > 'ä'; 'oe' > 'ö'; 'ue' > 'ü'; 'ß' > 'ss'; ";
+ }
+ };
+
+ // ----------------------------------------------------------------
+
+ public static synchronized Language lookup(final String isoCode) {
+ Language lang = registry.get(isoCode.toLowerCase());
+ if (lang == null) {
+ lang = new Language(new Locale(isoCode), isoCode);
+ }
+ return lang;
+ }
+
+}