]> gitweb.fperrin.net Git - Dictionary.git/blobdiff - src/com/hughes/android/dictionary/engine/Language.java
More flags, fixed height of dictionary names so that they don't clip.
[Dictionary.git] / src / com / hughes / android / dictionary / engine / Language.java
old mode 100755 (executable)
new mode 100644 (file)
index b4d8558..5af5496
+// Copyright 2011 Google Inc. All Rights Reserved.\r
+//\r
+// Licensed under the Apache License, Version 2.0 (the "License");\r
+// you may not use this file except in compliance with the License.\r
+// You may obtain a copy of the License at\r
+//\r
+//     http://www.apache.org/licenses/LICENSE-2.0\r
+//\r
+// Unless required by applicable law or agreed to in writing, software\r
+// distributed under the License is distributed on an "AS IS" BASIS,\r
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\r
+// See the License for the specific language governing permissions and\r
+// limitations under the License.\r
+\r
 package com.hughes.android.dictionary.engine;\r
 \r
-import java.util.Comparator;\r
+import com.hughes.android.dictionary.R;\r
+import com.ibm.icu.text.Collator;\r
+\r
+import java.util.ArrayList;\r
 import java.util.LinkedHashMap;\r
 import java.util.Locale;\r
 import java.util.Map;\r
-\r
-import com.ibm.icu.text.Collator;\r
+import java.util.regex.Pattern;\r
 \r
 public class Language {\r
+  \r
+  public static final class LanguageResources {\r
+    public final String englishName;\r
+    public final int nameId;\r
+    public final int flagId;\r
+    \r
+    private LanguageResources(final String englishName, int nameId, int flagId) {\r
+      this.englishName = englishName;\r
+      this.nameId = nameId;\r
+      this.flagId = flagId;\r
+    }\r
 \r
-  static final Map<String, Language> symbolToLangauge = new LinkedHashMap<String, Language>();\r
+    private LanguageResources(final String englishName, int nameId) {\r
+      this(englishName, nameId, 0);\r
+    }\r
+  }\r
 \r
-  final String symbol;\r
-  final Locale locale;\r
+  // Useful:\r
+  // http://www.loc.gov/standards/iso639-2/php/code_list.php\r
+  public static final Map<String,LanguageResources> isoCodeToResources = new LinkedHashMap<String,LanguageResources>();\r
+  static {\r
+    isoCodeToResources.put("AF", new LanguageResources("Afrikaans", R.string.AF, R.drawable.flag_of_south_africa));\r
+    isoCodeToResources.put("SQ", new LanguageResources("Albanian", R.string.SQ, R.drawable.flag_of_albania));\r
+    isoCodeToResources.put("AR", new LanguageResources("Arabic", R.string.AR, R.drawable.arabic));\r
+    isoCodeToResources.put("HY", new LanguageResources("Armenian", R.string.HY, R.drawable.flag_of_armenia));\r
+    isoCodeToResources.put("BE", new LanguageResources("Belarusian", R.string.BE, R.drawable.flag_of_belarus));\r
+    isoCodeToResources.put("BN", new LanguageResources("Bengali", R.string.BN));\r
+    isoCodeToResources.put("BS", new LanguageResources("Bosnian", R.string.BS, R.drawable.flag_of_bosnia_and_herzegovina));\r
+    isoCodeToResources.put("BG", new LanguageResources("Bulgarian", R.string.BG, R.drawable.flag_of_bulgaria));\r
+    isoCodeToResources.put("MY", new LanguageResources("Burmese", R.string.MY, R.drawable.flag_of_myanmar));\r
+    isoCodeToResources.put("ZH", new LanguageResources("Chinese", R.string.ZH, R.drawable.flag_of_the_peoples_republic_of_china));\r
+    isoCodeToResources.put("cmn", new LanguageResources("Mandarin", R.string.cmn, R.drawable.flag_of_the_peoples_republic_of_china));\r
+    isoCodeToResources.put("yue", new LanguageResources("Cantonese", R.string.yue, R.drawable.flag_of_hong_kong));\r
+    isoCodeToResources.put("CA", new LanguageResources("Catalan", R.string.CA));\r
+    isoCodeToResources.put("HR", new LanguageResources("Croatian", R.string.HR, R.drawable.flag_of_croatia));\r
+    isoCodeToResources.put("CS", new LanguageResources("Czech", R.string.CS, R.drawable.flag_of_the_czech_republic));\r
+    isoCodeToResources.put("DA", new LanguageResources("Danish", R.string.DA, R.drawable.flag_of_denmark));\r
+    isoCodeToResources.put("NL", new LanguageResources("Dutch", R.string.NL, R.drawable.flag_of_the_netherlands));\r
+    isoCodeToResources.put("EN", new LanguageResources("English", R.string.EN, R.drawable.flag_of_the_united_kingdom));\r
+    isoCodeToResources.put("EO", new LanguageResources("Esperanto", R.string.EO, R.drawable.flag_of_esperanto));\r
+    isoCodeToResources.put("ET", new LanguageResources("Estonian", R.string.ET, R.drawable.flag_of_estonia));\r
+    isoCodeToResources.put("FI", new LanguageResources("Finnish", R.string.FI, R.drawable.flag_of_finland));\r
+    isoCodeToResources.put("FR", new LanguageResources("French", R.string.FR, R.drawable.flag_of_france));\r
+    isoCodeToResources.put("DE", new LanguageResources("German", R.string.DE, R.drawable.flag_of_germany));\r
+    isoCodeToResources.put("EL", new LanguageResources("Greek", R.string.EL, R.drawable.flag_of_greece));\r
+    isoCodeToResources.put("grc", new LanguageResources("Ancient Greek", R.string.grc));\r
+    isoCodeToResources.put("haw", new LanguageResources("Hawaiian", R.string.haw, R.drawable.flag_of_hawaii));\r
+    isoCodeToResources.put("HE", new LanguageResources("Hebrew", R.string.HE, R.drawable.flag_of_israel));\r
+    isoCodeToResources.put("HI", new LanguageResources("Hindi", R.string.HI, R.drawable.hindi));\r
+    isoCodeToResources.put("HU", new LanguageResources("Hungarian", R.string.HU, R.drawable.flag_of_hungary));\r
+    isoCodeToResources.put("IS", new LanguageResources("Icelandic", R.string.IS, R.drawable.flag_of_iceland));\r
+    isoCodeToResources.put("ID", new LanguageResources("Indonesian", R.string.ID, R.drawable.flag_of_indonesia));\r
+    isoCodeToResources.put("GA", new LanguageResources("Irish", R.string.GA, R.drawable.flag_of_ireland));\r
+    isoCodeToResources.put("GD", new LanguageResources("Scottish Gaelic", R.string.GD));\r
+    isoCodeToResources.put("GV", new LanguageResources("Manx", R.string.GV, R.drawable.flag_of_the_isle_of_man));\r
+    isoCodeToResources.put("IT", new LanguageResources("Italian", R.string.IT, R.drawable.flag_of_italy));\r
+    isoCodeToResources.put("LA", new LanguageResources("Latin", R.string.LA));\r
+    isoCodeToResources.put("LV", new LanguageResources("Latvian", R.string.LV, R.drawable.flag_of_latvia));\r
+    isoCodeToResources.put("LT", new LanguageResources("Lithuanian", R.string.LT, R.drawable.flag_of_lithuania));\r
+    isoCodeToResources.put("JA", new LanguageResources("Japanese", R.string.JA, R.drawable.flag_of_japan));\r
+    isoCodeToResources.put("KO", new LanguageResources("Korean", R.string.KO, R.drawable.flag_of_south_korea));\r
+    isoCodeToResources.put("KU", new LanguageResources("Kurdish", R.string.KU));\r
+    isoCodeToResources.put("MS", new LanguageResources("Malay", R.string.MS, R.drawable.flag_of_malaysia));\r
+    isoCodeToResources.put("MI", new LanguageResources("Maori", R.string.MI, R.drawable.flag_of_new_zealand));\r
+    isoCodeToResources.put("MN", new LanguageResources("Mongolian", R.string.MN, R.drawable.flag_of_mongolia));\r
+    isoCodeToResources.put("NE", new LanguageResources("Nepali", R.string.NE, R.drawable.flag_of_nepal));\r
+    isoCodeToResources.put("NO", new LanguageResources("Norwegian", R.string.NO, R.drawable.flag_of_norway));\r
+    isoCodeToResources.put("FA", new LanguageResources("Persian", R.string.FA, R.drawable.flag_of_iran));\r
+    isoCodeToResources.put("PL", new LanguageResources("Polish", R.string.PL, R.drawable.flag_of_poland));\r
+    isoCodeToResources.put("PT", new LanguageResources("Portuguese", R.string.PT, R.drawable.flag_of_portugal));\r
+    isoCodeToResources.put("PA", new LanguageResources("Punjabi", R.string.PA));\r
+    isoCodeToResources.put("RO", new LanguageResources("Romanian", R.string.RO, R.drawable.flag_of_romania));\r
+    isoCodeToResources.put("RU", new LanguageResources("Russian", R.string.RU, R.drawable.flag_of_russia));\r
+    isoCodeToResources.put("SA", new LanguageResources("Sanskrit", R.string.SA));\r
+    isoCodeToResources.put("SR", new LanguageResources("Serbian", R.string.SR, R.drawable.flag_of_serbia));\r
+    isoCodeToResources.put("SK", new LanguageResources("Slovak", R.string.SK, R.drawable.flag_of_slovakia));\r
+    isoCodeToResources.put("SL", new LanguageResources("Slovenian", R.string.SL, R.drawable.flag_of_slovenia));\r
+    isoCodeToResources.put("SO", new LanguageResources("Somali", R.string.SO, R.drawable.flag_of_somalia));\r
+    isoCodeToResources.put("ES", new LanguageResources("Spanish", R.string.ES, R.drawable.flag_of_spain));\r
+    isoCodeToResources.put("SW", new LanguageResources("Swahili", R.string.SW));\r
+    isoCodeToResources.put("SV", new LanguageResources("Swedish", R.string.SV, R.drawable.flag_of_sweden));\r
+    isoCodeToResources.put("TL", new LanguageResources("Tagalog", R.string.TL));\r
+    isoCodeToResources.put("TG", new LanguageResources("Tajik", R.string.TG, R.drawable.flag_of_tajikistan));\r
+    isoCodeToResources.put("TH", new LanguageResources("Thai", R.string.TH, R.drawable.flag_of_thailand));\r
+    isoCodeToResources.put("BO", new LanguageResources("Tibetan", R.string.BO));\r
+    isoCodeToResources.put("TR", new LanguageResources("Turkish", R.string.TR, R.drawable.flag_of_turkey));\r
+    isoCodeToResources.put("UK", new LanguageResources("Ukrainian", R.string.UK, R.drawable.flag_of_ukraine));\r
+    isoCodeToResources.put("UR", new LanguageResources("Urdu", R.string.UR));\r
+    isoCodeToResources.put("VI", new LanguageResources("Vietnamese", R.string.VI, R.drawable.flag_of_vietnam));\r
+    isoCodeToResources.put("CI", new LanguageResources("Welsh", R.string.CI, R.drawable.flag_of_wales_2));\r
+    isoCodeToResources.put("YI", new LanguageResources("Yiddish", R.string.YI));\r
+    isoCodeToResources.put("ZU", new LanguageResources("Zulu", R.string.ZU));\r
+    isoCodeToResources.put("AZ", new LanguageResources("Azeri", R.string.AZ, R.drawable.flag_of_azerbaijan));\r
+    isoCodeToResources.put("EU", new LanguageResources("Basque", R.string.EU, R.drawable.flag_of_the_basque_country));\r
+    isoCodeToResources.put("BR", new LanguageResources("Breton", R.string.BR));\r
+    isoCodeToResources.put("MR", new LanguageResources("Marathi", R.string.MR));\r
+    isoCodeToResources.put("FO", new LanguageResources("Faroese", R.string.FO));\r
+    isoCodeToResources.put("GL", new LanguageResources("Galician", R.string.GL, R.drawable.flag_of_galicia));\r
+    isoCodeToResources.put("KA", new LanguageResources("Georgian", R.string.KA, R.drawable.flag_of_georgia));\r
+    isoCodeToResources.put("HT", new LanguageResources("Haitian Creole", R.string.HT, R.drawable.flag_of_haiti));\r
+    isoCodeToResources.put("LB", new LanguageResources("Luxembourgish", R.string.LB, R.drawable.flag_of_luxembourg));\r
+    isoCodeToResources.put("MK", new LanguageResources("Macedonian", R.string.MK, R.drawable.flag_of_macedonia));\r
+    isoCodeToResources.put("LO", new LanguageResources("Lao", R.string.LO, R.drawable.flag_of_laos));\r
+    isoCodeToResources.put("ML", new LanguageResources("Malayalam", R.string.ML));\r
+    isoCodeToResources.put("SL", new LanguageResources("Slovenian", R.string.SL, R.drawable.flag_of_slovenia));\r
+    isoCodeToResources.put("TA", new LanguageResources("Tamil", R.string.TA));\r
+    isoCodeToResources.put("SH", new LanguageResources("Serbo-Croatian", R.string.SH));\r
+\r
+    // Hack to allow lower-case ISO codes to work:\r
+    for (final String isoCode : new ArrayList<String>(isoCodeToResources.keySet())) {\r
+      isoCodeToResources.put(isoCode.toLowerCase(), isoCodeToResources.get(isoCode));\r
+    }\r
 \r
-  Collator sortCollator;\r
-  final Comparator<String> sortComparator;\r
+  }\r
 \r
-  private Collator findCollator;\r
-  final Comparator<String> findComparator;\r
 \r
-  public Language(final Locale locale) {\r
-    this.symbol = locale.getLanguage();\r
-    this.locale = locale;\r
+  private static final Map<String, Language> registry = new LinkedHashMap<String, Language>();\r
 \r
-    this.sortComparator = new Comparator<String>() {\r
-      public int compare(final String s1, final String s2) {\r
-        return getSortCollator().compare(textNorm(s1, false), textNorm(s2, false));\r
-      }\r
-    };\r
-\r
-    this.findComparator = new Comparator<String>() {\r
-      public int compare(final String s1, final String s2) {\r
-        return getFindCollator().compare(textNorm(s1, false), textNorm(s2, false));\r
-      }\r
-    };\r
-    \r
-    symbolToLangauge.put(symbol.toLowerCase(), this);\r
-  }\r
+  final String isoCode;\r
+  final Locale locale;\r
+  \r
+  private Collator collator;\r
+\r
+  private Language(final Locale locale, final String isoCode) {\r
+    this.locale = locale;\r
+    this.isoCode = isoCode;\r
 \r
-  public String textNorm(final String s, final boolean toLower) {\r
-    return toLower ? s.toLowerCase() : s;\r
+    registry.put(isoCode.toLowerCase(), this);\r
   }\r
 \r
   @Override\r
@@ -48,82 +159,64 @@ public class Language {
     return locale.toString();\r
   }\r
   \r
-  public String getSymbol() {\r
-    return symbol;\r
+  public String getIsoCode() {\r
+    return isoCode;\r
   }\r
   \r
-  public synchronized Collator getFindCollator() {\r
-    if (findCollator == null) {\r
-      findCollator = Collator.getInstance(locale);\r
-      findCollator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);\r
-      findCollator.setStrength(Collator.SECONDARY);\r
-    }\r
-    return findCollator;\r
+  public synchronized Collator getCollator() {\r
+    // Don't think this is thread-safe...\r
+//    if (collator == null) {\r
+      this.collator = Collator.getInstance(locale);\r
+      this.collator.setStrength(Collator.IDENTICAL);\r
+//    }\r
+    return collator;\r
   }\r
-\r
-  public synchronized Collator getSortCollator() {\r
-    if (sortCollator == null) {\r
-      sortCollator = Collator.getInstance(locale);\r
-      sortCollator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);\r
-      sortCollator.setStrength(Collator.IDENTICAL);\r
-    }\r
-    return sortCollator;\r
+  \r
+  public String getDefaultNormalizerRules() {\r
+    return ":: Any-Latin; ' ' > ; :: Lower; :: NFD; :: [:Nonspacing Mark:] Remove; :: NFC ;";\r
   }\r
-\r
+  \r
+  /**\r
+   * A practical pattern to identify strong RTL characters. This pattern is not\r
+   * completely correct according to the Unicode standard. It is simplified for\r
+   * performance and small code size.\r
+   */\r
+  private static final String rtlChars =\r
+      "\u0591-\u07FF\uFB1D-\uFDFD\uFE70-\uFEFC";\r
+\r
+  private static final String puncChars =\r
+      "\\[\\]\\(\\)\\{\\}\\=";\r
+\r
+  private static final Pattern RTL_LEFT_BOUNDARY = Pattern.compile("(["+ puncChars +"])([" + rtlChars + "])");\r
+  private static final Pattern RTL_RIGHT_BOUNDARY = Pattern.compile("([" + rtlChars + "])(["+ puncChars +"])");\r
+  \r
+  public static String fixBidiText(String text) {\r
+//    text = RTL_LEFT_BOUNDARY.matcher(text).replaceAll("$1\u200e $2");\r
+//    text = RTL_RIGHT_BOUNDARY.matcher(text).replaceAll("$1 \u200e$2");\r
+    return text;\r
+  }\r
+  \r
   // ----------------------------------------------------------------\r
 \r
-  public static final Language en = new Language(Locale.ENGLISH);\r
-  public static final Language fr = new Language(Locale.FRENCH);\r
-  public static final Language it = new Language(Locale.ITALIAN);\r
+  public static final Language en = new Language(Locale.ENGLISH, "EN");\r
+  public static final Language fr = new Language(Locale.FRENCH, "FR");\r
+  public static final Language it = new Language(Locale.ITALIAN, "IT");\r
 \r
-  public static final Language de = new Language(Locale.GERMAN) {\r
+  public static final Language de = new Language(Locale.GERMAN, "DE") {\r
     @Override\r
-    public String textNorm(String token, final boolean toLower) {\r
-      if (toLower) {\r
-        token = token.toLowerCase();\r
-      }\r
-      boolean sub = false;\r
-      // This is meant to be fast: occurrences of ae, oe, ue are probably rare.\r
-      for (int ePos = token.indexOf('e', 1); ePos != -1; ePos = token.indexOf(\r
-          'e', ePos + 1)) {\r
-        final char pre = Character.toLowerCase(token.charAt(ePos - 1));\r
-        if (pre == 'a' || pre == 'o' || pre == 'u') {\r
-          sub = true;\r
-          break;\r
-        }\r
-      }\r
-      if (!sub) {\r
-        return token;\r
-      }\r
-      \r
-      token = token.replaceAll("ae", "ä");\r
-      token = token.replaceAll("oe", "ö");\r
-      token = token.replaceAll("ue", "ü");\r
-\r
-      token = token.replaceAll("Ae", "Ä");\r
-      token = token.replaceAll("Oe", "Ö");\r
-      token = token.replaceAll("Ue", "Ü");\r
-\r
-      token = token.replaceAll("AE", "Ä");\r
-      token = token.replaceAll("OE", "Ö");\r
-      token = token.replaceAll("UE", "Ü");\r
-      \r
-      return token;   \r
+    public String getDefaultNormalizerRules() {\r
+      return ":: Lower; 'ae' > 'ä'; 'oe' > 'ö'; 'ue' > 'ü'; 'ß' > 'ss'; ";\r
     }\r
   };\r
   \r
-  static {\r
-    for (final String lang : Locale.getISOLanguages()) {\r
-      if (lookup(lang) == null) {\r
-        new Language(new Locale(lang));\r
-      }\r
-    }\r
-  }\r
-\r
   // ----------------------------------------------------------------\r
 \r
-  public static Language lookup(final String symbol) {\r
-    return symbolToLangauge.get(symbol.toLowerCase());\r
+  public static synchronized Language lookup(final String isoCode) {\r
+    Language lang = registry.get(isoCode.toLowerCase());\r
+    if (lang == null) {\r
+      lang = new Language(new Locale(isoCode), isoCode);\r
+    }\r
+    return lang;\r
   }\r
 \r
 }\r