]> gitweb.fperrin.net Git - Dictionary.git/blobdiff - src/com/hughes/android/dictionary/engine/Language.java
About dialog, added pictures, multi word search.
[Dictionary.git] / src / com / hughes / android / dictionary / engine / Language.java
old mode 100755 (executable)
new mode 100644 (file)
index b4d8558..c8c7938
+// Copyright 2011 Google Inc. All Rights Reserved.\r
+//\r
+// Licensed under the Apache License, Version 2.0 (the "License");\r
+// you may not use this file except in compliance with the License.\r
+// You may obtain a copy of the License at\r
+//\r
+//     http://www.apache.org/licenses/LICENSE-2.0\r
+//\r
+// Unless required by applicable law or agreed to in writing, software\r
+// distributed under the License is distributed on an "AS IS" BASIS,\r
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\r
+// See the License for the specific language governing permissions and\r
+// limitations under the License.\r
+\r
 package com.hughes.android.dictionary.engine;\r
 \r
-import java.util.Comparator;\r
+import java.util.ArrayList;\r
 import java.util.LinkedHashMap;\r
 import java.util.Locale;\r
 import java.util.Map;\r
+import java.util.regex.Pattern;\r
 \r
+import com.hughes.android.dictionary.R;\r
 import com.ibm.icu.text.Collator;\r
 \r
 public class Language {\r
 \r
-  static final Map<String, Language> symbolToLangauge = new LinkedHashMap<String, Language>();\r
+  public static final Map<String,Integer> isoCodeToResourceId = new LinkedHashMap<String,Integer>();\r
+  static {\r
+    isoCodeToResourceId.put("AF", R.string.AF);\r
+    isoCodeToResourceId.put("SQ", R.string.SQ);\r
+    isoCodeToResourceId.put("AR", R.string.AR);\r
+    isoCodeToResourceId.put("HY", R.string.HY);\r
+    isoCodeToResourceId.put("BE", R.string.BE);\r
+    isoCodeToResourceId.put("BN", R.string.BN);\r
+    isoCodeToResourceId.put("BS", R.string.BS);\r
+    isoCodeToResourceId.put("BG", R.string.BG);\r
+    isoCodeToResourceId.put("CA", R.string.CA);\r
+    isoCodeToResourceId.put("HR", R.string.HR);\r
+    isoCodeToResourceId.put("CS", R.string.CS);\r
+    isoCodeToResourceId.put("ZH", R.string.ZH);\r
+    isoCodeToResourceId.put("DA", R.string.DA);\r
+    isoCodeToResourceId.put("NL", R.string.NL);\r
+    isoCodeToResourceId.put("EN", R.string.EN);\r
+    isoCodeToResourceId.put("EO", R.string.EO);\r
+    isoCodeToResourceId.put("ET", R.string.ET);\r
+    isoCodeToResourceId.put("FI", R.string.FI);\r
+    isoCodeToResourceId.put("FR", R.string.FR);\r
+    isoCodeToResourceId.put("DE", R.string.DE);\r
+    isoCodeToResourceId.put("EL", R.string.EL);\r
+    isoCodeToResourceId.put("haw", R.string.haw);\r
+    isoCodeToResourceId.put("HE", R.string.HE);\r
+    isoCodeToResourceId.put("HI", R.string.HI);\r
+    isoCodeToResourceId.put("HU", R.string.HU);\r
+    isoCodeToResourceId.put("IS", R.string.IS);\r
+    isoCodeToResourceId.put("ID", R.string.ID);\r
+    isoCodeToResourceId.put("GA", R.string.GA);\r
+    isoCodeToResourceId.put("IT", R.string.IT);\r
+    isoCodeToResourceId.put("LA", R.string.LA);\r
+    isoCodeToResourceId.put("LV", R.string.LV);\r
+    isoCodeToResourceId.put("LT", R.string.LT);\r
+    isoCodeToResourceId.put("JA", R.string.JA);\r
+    isoCodeToResourceId.put("KO", R.string.KO);\r
+    isoCodeToResourceId.put("KU", R.string.KU);\r
+    isoCodeToResourceId.put("MS", R.string.MS);\r
+    isoCodeToResourceId.put("MI", R.string.MI);\r
+    isoCodeToResourceId.put("MN", R.string.MN);\r
+    isoCodeToResourceId.put("NE", R.string.NE);\r
+    isoCodeToResourceId.put("NO", R.string.NO);\r
+    isoCodeToResourceId.put("FA", R.string.FA);\r
+    isoCodeToResourceId.put("PL", R.string.PL);\r
+    isoCodeToResourceId.put("PT", R.string.PT);\r
+    isoCodeToResourceId.put("PA", R.string.PA);\r
+    isoCodeToResourceId.put("RO", R.string.RO);\r
+    isoCodeToResourceId.put("RU", R.string.RU);\r
+    isoCodeToResourceId.put("SA", R.string.SA);\r
+    isoCodeToResourceId.put("SR", R.string.SR);\r
+    isoCodeToResourceId.put("SK", R.string.SK);\r
+    isoCodeToResourceId.put("SO", R.string.SO);\r
+    isoCodeToResourceId.put("ES", R.string.ES);\r
+    isoCodeToResourceId.put("SW", R.string.SW);\r
+    isoCodeToResourceId.put("SV", R.string.SV);\r
+    isoCodeToResourceId.put("TL", R.string.TL);\r
+    isoCodeToResourceId.put("TG", R.string.TG);\r
+    isoCodeToResourceId.put("TH", R.string.TH);\r
+    isoCodeToResourceId.put("BO", R.string.BO);\r
+    isoCodeToResourceId.put("TR", R.string.TR);\r
+    isoCodeToResourceId.put("UK", R.string.UK);\r
+    isoCodeToResourceId.put("UR", R.string.UR);\r
+    isoCodeToResourceId.put("VI", R.string.VI);\r
+    isoCodeToResourceId.put("CI", R.string.CI);\r
+    isoCodeToResourceId.put("YI", R.string.YI);\r
+    isoCodeToResourceId.put("ZU", R.string.ZU);\r
+\r
+    isoCodeToResourceId.put("AZ", R.string.AZ);\r
+    isoCodeToResourceId.put("EU", R.string.EU);\r
+    isoCodeToResourceId.put("BR", R.string.BR);\r
+    isoCodeToResourceId.put("MR", R.string.MR);\r
+    isoCodeToResourceId.put("FO", R.string.FO);\r
+    isoCodeToResourceId.put("GL", R.string.GL);\r
+    isoCodeToResourceId.put("HT", R.string.HT);\r
+    isoCodeToResourceId.put("LB", R.string.LB);\r
+    isoCodeToResourceId.put("MK", R.string.MK);\r
+\r
+    // Hack to allow lower-case ISO codes to work:\r
+    for (final String isoCode : new ArrayList<String>(isoCodeToResourceId.keySet())) {\r
+      isoCodeToResourceId.put(isoCode.toLowerCase(), isoCodeToResourceId.get(isoCode));\r
+    }\r
 \r
-  final String symbol;\r
-  final Locale locale;\r
+  }\r
 \r
-  Collator sortCollator;\r
-  final Comparator<String> sortComparator;\r
 \r
-  private Collator findCollator;\r
-  final Comparator<String> findComparator;\r
+  private static final Map<String, Language> registry = new LinkedHashMap<String, Language>();\r
 \r
-  public Language(final Locale locale) {\r
-    this.symbol = locale.getLanguage();\r
-    this.locale = locale;\r
+  final String isoCode;\r
+  final Locale locale;\r
+  \r
+  private Collator collator;\r
 \r
-    this.sortComparator = new Comparator<String>() {\r
-      public int compare(final String s1, final String s2) {\r
-        return getSortCollator().compare(textNorm(s1, false), textNorm(s2, false));\r
-      }\r
-    };\r
-\r
-    this.findComparator = new Comparator<String>() {\r
-      public int compare(final String s1, final String s2) {\r
-        return getFindCollator().compare(textNorm(s1, false), textNorm(s2, false));\r
-      }\r
-    };\r
-    \r
-    symbolToLangauge.put(symbol.toLowerCase(), this);\r
-  }\r
+  private Language(final Locale locale, final String isoCode) {\r
+    this.locale = locale;\r
+    this.isoCode = isoCode;\r
 \r
-  public String textNorm(final String s, final boolean toLower) {\r
-    return toLower ? s.toLowerCase() : s;\r
+    registry.put(isoCode.toLowerCase(), this);\r
   }\r
 \r
   @Override\r
@@ -48,82 +129,63 @@ public class Language {
     return locale.toString();\r
   }\r
   \r
-  public String getSymbol() {\r
-    return symbol;\r
+  public String getIsoCode() {\r
+    return isoCode;\r
   }\r
   \r
-  public synchronized Collator getFindCollator() {\r
-    if (findCollator == null) {\r
-      findCollator = Collator.getInstance(locale);\r
-      findCollator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);\r
-      findCollator.setStrength(Collator.SECONDARY);\r
+  public synchronized Collator getCollator() {\r
+    if (collator == null) {\r
+      this.collator = Collator.getInstance(locale);\r
+      this.collator.setStrength(Collator.IDENTICAL);\r
     }\r
-    return findCollator;\r
+    return collator;\r
   }\r
-\r
-  public synchronized Collator getSortCollator() {\r
-    if (sortCollator == null) {\r
-      sortCollator = Collator.getInstance(locale);\r
-      sortCollator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);\r
-      sortCollator.setStrength(Collator.IDENTICAL);\r
-    }\r
-    return sortCollator;\r
+  \r
+  public String getDefaultNormalizerRules() {\r
+    return ":: Any-Latin; ' ' > ; :: Lower; :: NFD; :: [:Nonspacing Mark:] Remove; :: NFC ;";\r
   }\r
-\r
+  \r
+  /**\r
+   * A practical pattern to identify strong RTL characters. This pattern is not\r
+   * completely correct according to the Unicode standard. It is simplified for\r
+   * performance and small code size.\r
+   */\r
+  private static final String rtlChars =\r
+      "\u0591-\u07FF\uFB1D-\uFDFD\uFE70-\uFEFC";\r
+\r
+  private static final String puncChars =\r
+      "\\[\\]\\(\\)\\{\\}\\=";\r
+\r
+  private static final Pattern RTL_LEFT_BOUNDARY = Pattern.compile("(["+ puncChars +"])([" + rtlChars + "])");\r
+  private static final Pattern RTL_RIGHT_BOUNDARY = Pattern.compile("([" + rtlChars + "])(["+ puncChars +"])");\r
+  \r
+  public static String fixBidiText(String text) {\r
+//    text = RTL_LEFT_BOUNDARY.matcher(text).replaceAll("$1\u200e $2");\r
+//    text = RTL_RIGHT_BOUNDARY.matcher(text).replaceAll("$1 \u200e$2");\r
+    return text;\r
+  }\r
+  \r
   // ----------------------------------------------------------------\r
 \r
-  public static final Language en = new Language(Locale.ENGLISH);\r
-  public static final Language fr = new Language(Locale.FRENCH);\r
-  public static final Language it = new Language(Locale.ITALIAN);\r
+  public static final Language en = new Language(Locale.ENGLISH, "EN");\r
+  public static final Language fr = new Language(Locale.FRENCH, "FR");\r
+  public static final Language it = new Language(Locale.ITALIAN, "IT");\r
 \r
-  public static final Language de = new Language(Locale.GERMAN) {\r
+  public static final Language de = new Language(Locale.GERMAN, "DE") {\r
     @Override\r
-    public String textNorm(String token, final boolean toLower) {\r
-      if (toLower) {\r
-        token = token.toLowerCase();\r
-      }\r
-      boolean sub = false;\r
-      // This is meant to be fast: occurrences of ae, oe, ue are probably rare.\r
-      for (int ePos = token.indexOf('e', 1); ePos != -1; ePos = token.indexOf(\r
-          'e', ePos + 1)) {\r
-        final char pre = Character.toLowerCase(token.charAt(ePos - 1));\r
-        if (pre == 'a' || pre == 'o' || pre == 'u') {\r
-          sub = true;\r
-          break;\r
-        }\r
-      }\r
-      if (!sub) {\r
-        return token;\r
-      }\r
-      \r
-      token = token.replaceAll("ae", "ä");\r
-      token = token.replaceAll("oe", "ö");\r
-      token = token.replaceAll("ue", "ü");\r
-\r
-      token = token.replaceAll("Ae", "Ä");\r
-      token = token.replaceAll("Oe", "Ö");\r
-      token = token.replaceAll("Ue", "Ü");\r
-\r
-      token = token.replaceAll("AE", "Ä");\r
-      token = token.replaceAll("OE", "Ö");\r
-      token = token.replaceAll("UE", "Ü");\r
-      \r
-      return token;   \r
+    public String getDefaultNormalizerRules() {\r
+      return ":: Lower; 'ae' > 'ä'; 'oe' > 'ö'; 'ue' > 'ü'; 'ß' > 'ss'; ";\r
     }\r
   };\r
   \r
-  static {\r
-    for (final String lang : Locale.getISOLanguages()) {\r
-      if (lookup(lang) == null) {\r
-        new Language(new Locale(lang));\r
-      }\r
-    }\r
-  }\r
-\r
   // ----------------------------------------------------------------\r
 \r
-  public static Language lookup(final String symbol) {\r
-    return symbolToLangauge.get(symbol.toLowerCase());\r
+  public static synchronized Language lookup(final String isoCode) {\r
+    Language lang = registry.get(isoCode.toLowerCase());\r
+    if (lang == null) {\r
+      lang = new Language(new Locale(isoCode), isoCode);\r
+    }\r
+    return lang;\r
   }\r
 \r
 }\r