+// Copyright 2011 Google Inc. All Rights Reserved.\r
+//\r
+// Licensed under the Apache License, Version 2.0 (the "License");\r
+// you may not use this file except in compliance with the License.\r
+// You may obtain a copy of the License at\r
+//\r
+// http://www.apache.org/licenses/LICENSE-2.0\r
+//\r
+// Unless required by applicable law or agreed to in writing, software\r
+// distributed under the License is distributed on an "AS IS" BASIS,\r
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\r
+// See the License for the specific language governing permissions and\r
+// limitations under the License.\r
+\r
package com.hughes.android.dictionary.engine;\r
\r
-import java.util.Comparator;\r
import java.util.LinkedHashMap;\r
import java.util.Locale;\r
import java.util.Map;\r
\r
public class Language {\r
\r
+ public static final Map<String,String> isoCodeToWikiName = new LinkedHashMap<String,String>();\r
+ static {\r
+// Albanian\r
+// Armenian\r
+// Belarusian\r
+// Bengali\r
+// Bosnian\r
+// Bulgarian\r
+// Catalan\r
+// Esperanto\r
+// Estonian\r
+// Hungarian\r
+// Indonesian\r
+// Kurdish\r
+// Latin\r
+// Lithuanian\r
+// Nepali\r
+// Punjabi\r
+// Swahili\r
+ isoCodeToWikiName.put("AF", "Afrikaans");\r
+ isoCodeToWikiName.put("AR", "Arabic");\r
+ isoCodeToWikiName.put("HY", "Armenian");\r
+ isoCodeToWikiName.put("HR", "Croatian");\r
+ isoCodeToWikiName.put("CS", "Czech");\r
+ isoCodeToWikiName.put("ZH", "Chinese|Mandarin|Cantonese");\r
+ isoCodeToWikiName.put("DA", "Danish");\r
+ isoCodeToWikiName.put("NL", "Dutch");\r
+ isoCodeToWikiName.put("EN", "English");\r
+ isoCodeToWikiName.put("FI", "Finnish");\r
+ isoCodeToWikiName.put("FR", "French");\r
+ isoCodeToWikiName.put("DE", "German");\r
+ isoCodeToWikiName.put("EL", "Greek");\r
+ isoCodeToWikiName.put("haw", "Hawaiian");\r
+ isoCodeToWikiName.put("HE", "Hebrew");\r
+ isoCodeToWikiName.put("HI", "Hindi");\r
+ isoCodeToWikiName.put("IS", "Icelandic");\r
+ isoCodeToWikiName.put("GA", "Irish");\r
+ isoCodeToWikiName.put("IT", "Italian");\r
+ isoCodeToWikiName.put("LT", "Lithuanian");\r
+ isoCodeToWikiName.put("JA", "Japanese");\r
+ isoCodeToWikiName.put("KO", "Korean");\r
+ isoCodeToWikiName.put("KU", "Kurdish");\r
+ isoCodeToWikiName.put("MS", "Malay");\r
+ isoCodeToWikiName.put("MI", "Maori");\r
+ isoCodeToWikiName.put("MN", "Mongolian");\r
+ isoCodeToWikiName.put("NO", "Norwegian");\r
+ isoCodeToWikiName.put("FA", "Persian");\r
+ isoCodeToWikiName.put("PL", "Polish");\r
+ isoCodeToWikiName.put("PT", "Portuguese");\r
+ isoCodeToWikiName.put("RO", "Romanian");\r
+ isoCodeToWikiName.put("RU", "Russian");\r
+ isoCodeToWikiName.put("SA", "Sanskrit");\r
+ isoCodeToWikiName.put("SR", "Serbian");\r
+ isoCodeToWikiName.put("SO", "Somali");\r
+ isoCodeToWikiName.put("ES", "Spanish");\r
+ isoCodeToWikiName.put("SV", "Swedish");\r
+ isoCodeToWikiName.put("TG", "Tajik");\r
+ isoCodeToWikiName.put("TH", "Thai");\r
+ isoCodeToWikiName.put("BO", "Tibetan");\r
+ isoCodeToWikiName.put("TR", "Turkish");\r
+ isoCodeToWikiName.put("UK", "Ukrainian");\r
+ isoCodeToWikiName.put("VI", "Vietnamese");\r
+ isoCodeToWikiName.put("CI", "Welsh");\r
+ isoCodeToWikiName.put("YI", "Yiddish");\r
+ isoCodeToWikiName.put("ZU", "Zulu");\r
+ }\r
+\r
static final Map<String, Language> symbolToLangauge = new LinkedHashMap<String, Language>();\r
\r
final String symbol;\r
final Locale locale;\r
-\r
- Collator sortCollator;\r
- final Comparator<String> sortComparator;\r
-\r
- private Collator findCollator;\r
- final Comparator<String> findComparator;\r
+ \r
+ private Collator collator;\r
\r
public Language(final Locale locale) {\r
this.symbol = locale.getLanguage();\r
this.locale = locale;\r
\r
- this.sortComparator = new Comparator<String>() {\r
- public int compare(final String s1, final String s2) {\r
- return getSortCollator().compare(textNorm(s1, false), textNorm(s2, false));\r
- }\r
- };\r
-\r
- this.findComparator = new Comparator<String>() {\r
- public int compare(final String s1, final String s2) {\r
- return getFindCollator().compare(textNorm(s1, false), textNorm(s2, false));\r
- }\r
- };\r
- \r
symbolToLangauge.put(symbol.toLowerCase(), this);\r
}\r
\r
- public String textNorm(final String s, final boolean toLower) {\r
- return toLower ? s.toLowerCase() : s;\r
- }\r
-\r
@Override\r
public String toString() {\r
return locale.toString();\r
return symbol;\r
}\r
\r
- public synchronized Collator getFindCollator() {\r
- if (findCollator == null) {\r
- findCollator = Collator.getInstance(locale);\r
- findCollator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);\r
- findCollator.setStrength(Collator.SECONDARY);\r
+ public synchronized Collator getCollator() {\r
+ if (collator == null) {\r
+ this.collator = Collator.getInstance(locale);\r
+ this.collator.setStrength(Collator.IDENTICAL);\r
}\r
- return findCollator;\r
+ return collator;\r
}\r
-\r
- public synchronized Collator getSortCollator() {\r
- if (sortCollator == null) {\r
- sortCollator = Collator.getInstance(locale);\r
- sortCollator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);\r
- sortCollator.setStrength(Collator.IDENTICAL);\r
- }\r
- return sortCollator;\r
+ \r
+ public String getDefaultNormalizerRules() {\r
+ return ":: Any-Latin; :: Lower; :: NFD; :: [:Nonspacing Mark:] Remove; :: NFC ;";\r
}\r
-\r
// ----------------------------------------------------------------\r
\r
public static final Language en = new Language(Locale.ENGLISH);\r
\r
public static final Language de = new Language(Locale.GERMAN) {\r
@Override\r
- public String textNorm(String token, final boolean toLower) {\r
- if (toLower) {\r
- token = token.toLowerCase();\r
- }\r
- boolean sub = false;\r
- // This is meant to be fast: occurrences of ae, oe, ue are probably rare.\r
- for (int ePos = token.indexOf('e', 1); ePos != -1; ePos = token.indexOf(\r
- 'e', ePos + 1)) {\r
- final char pre = Character.toLowerCase(token.charAt(ePos - 1));\r
- if (pre == 'a' || pre == 'o' || pre == 'u') {\r
- sub = true;\r
- break;\r
- }\r
- }\r
- if (!sub) {\r
- return token;\r
- }\r
- \r
- token = token.replaceAll("ae", "ä");\r
- token = token.replaceAll("oe", "ö");\r
- token = token.replaceAll("ue", "ü");\r
-\r
- token = token.replaceAll("Ae", "Ä");\r
- token = token.replaceAll("Oe", "Ö");\r
- token = token.replaceAll("Ue", "Ü");\r
-\r
- token = token.replaceAll("AE", "Ä");\r
- token = token.replaceAll("OE", "Ö");\r
- token = token.replaceAll("UE", "Ü");\r
- \r
- return token; \r
+ public String getDefaultNormalizerRules() {\r
+ return ":: Lower; 'ae' > 'ä'; 'oe' > 'ö'; 'ue' > 'ü'; 'ß' > 'ss'; ";\r
}\r
};\r
\r
- static {\r
- for (final String lang : Locale.getISOLanguages()) {\r
- if (lookup(lang) == null) {\r
- new Language(new Locale(lang));\r
- }\r
- }\r
- }\r
-\r
// ----------------------------------------------------------------\r
\r
- public static Language lookup(final String symbol) {\r
- return symbolToLangauge.get(symbol.toLowerCase());\r
+ public static synchronized Language lookup(final String symbol) {\r
+ Language lang = symbolToLangauge.get(symbol.toLowerCase());\r
+ if (lang == null) {\r
+ lang = new Language(new Locale(symbol));\r
+ }\r
+ return lang;\r
}\r
\r
}\r