]> gitweb.fperrin.net Git - DictionaryPC.git/blobdiff - src/com/hughes/android/dictionary/parser/wiktionary/WiktionaryLangs.java
Added simple parsing logic for DE and IT wiktionaries.
[DictionaryPC.git] / src / com / hughes / android / dictionary / parser / wiktionary / WiktionaryLangs.java
index 0fbe21e14ffe1482887e1ee1415c2e667ab659f5..b7642915041a7ac6422c618faf4742815fcbbb80 100644 (file)
 
 package com.hughes.android.dictionary.parser.wiktionary;
 
+import com.hughes.android.dictionary.engine.Language;
+
 import java.util.LinkedHashMap;
 import java.util.Map;
 import java.util.regex.Pattern;
 
 public class WiktionaryLangs {
   
-  public static final Map<String,String> isoCodeToWikiName = new LinkedHashMap<String,String>();
+  public static final Map<String,String> isoCodeToEnWikiName = new LinkedHashMap<String,String>();
   static {
-    isoCodeToWikiName.put("AF", "Afrikaans");
-    isoCodeToWikiName.put("SQ", "Albanian");
-    isoCodeToWikiName.put("AR", "Arabic");
-    isoCodeToWikiName.put("HY", "Armenian");
-    isoCodeToWikiName.put("BE", "Belarusian");
-    isoCodeToWikiName.put("BN", "Bengali");
-    isoCodeToWikiName.put("BS", "Bosnian");
-    isoCodeToWikiName.put("BG", "Bulgarian");
-    isoCodeToWikiName.put("CA", "Catalan");
-    isoCodeToWikiName.put("HR", "Croatian");
-    isoCodeToWikiName.put("CS", "Czech");
-    isoCodeToWikiName.put("ZH", "Chinese|Mandarin|Cantonese");
-    isoCodeToWikiName.put("DA", "Danish");
-    isoCodeToWikiName.put("NL", "Dutch");
-    isoCodeToWikiName.put("EN", "English");
-    isoCodeToWikiName.put("EO", "Esperanto");
-    isoCodeToWikiName.put("ET", "Estonian");
-    isoCodeToWikiName.put("FI", "Finnish");
-    isoCodeToWikiName.put("FR", "French");
-    isoCodeToWikiName.put("DE", "German");
-    isoCodeToWikiName.put("EL", "Greek");
-    isoCodeToWikiName.put("haw", "Hawaiian");
-    isoCodeToWikiName.put("HE", "Hebrew");
-    isoCodeToWikiName.put("HI", "Hindi");
-    isoCodeToWikiName.put("HU", "Hungarian");
-    isoCodeToWikiName.put("IS", "Icelandic");
-    isoCodeToWikiName.put("ID", "Indonesian");
-    isoCodeToWikiName.put("GA", "Irish");
-    isoCodeToWikiName.put("GD", "Gaelic");
-    isoCodeToWikiName.put("IT", "Italian");
-    isoCodeToWikiName.put("LA", "Latin");
-    isoCodeToWikiName.put("LV", "Latvian");
-    isoCodeToWikiName.put("LT", "Lithuanian");
-    isoCodeToWikiName.put("JA", "Japanese");
-    isoCodeToWikiName.put("KO", "Korean");
-    isoCodeToWikiName.put("KU", "Kurdish");
-    isoCodeToWikiName.put("MS", "Malay");
-    isoCodeToWikiName.put("MI", "Maori");
-    isoCodeToWikiName.put("MN", "Mongolian");
-    isoCodeToWikiName.put("NE", "Nepali");
-    isoCodeToWikiName.put("NO", "Norwegian");
-    isoCodeToWikiName.put("FA", "Persian");
-    isoCodeToWikiName.put("PL", "Polish");
-    isoCodeToWikiName.put("PT", "Portuguese");
-    isoCodeToWikiName.put("PA", "Punjabi");
-    isoCodeToWikiName.put("RO", "Romanian");
-    isoCodeToWikiName.put("RU", "Russian");
-    isoCodeToWikiName.put("SA", "Sanskrit");
-    isoCodeToWikiName.put("SR", "Serbian");
-    isoCodeToWikiName.put("SK", "Slovak");
-    isoCodeToWikiName.put("SO", "Somali");
-    isoCodeToWikiName.put("ES", "Spanish");
-    isoCodeToWikiName.put("SW", "Swahili");
-    isoCodeToWikiName.put("SV", "Swedish");
-    isoCodeToWikiName.put("TL", "Tagalog");
-    isoCodeToWikiName.put("TG", "Tajik");
-    isoCodeToWikiName.put("TH", "Thai");
-    isoCodeToWikiName.put("BO", "Tibetan");
-    isoCodeToWikiName.put("TR", "Turkish");
-    isoCodeToWikiName.put("UK", "Ukrainian");
-    isoCodeToWikiName.put("UR", "Urdu");
-    isoCodeToWikiName.put("VI", "Vietnamese");
-    isoCodeToWikiName.put("CI", "Welsh");
-    isoCodeToWikiName.put("YI", "Yiddish");
-    isoCodeToWikiName.put("ZU", "Zulu");
+    isoCodeToEnWikiName.put("AF", "Afrikaans");
+    isoCodeToEnWikiName.put("SQ", "Albanian");
+    isoCodeToEnWikiName.put("AR", "Arabic");
+    isoCodeToEnWikiName.put("HY", "Armenian");
+    isoCodeToEnWikiName.put("BE", "Belarusian");
+    isoCodeToEnWikiName.put("BN", "Bengali");
+    isoCodeToEnWikiName.put("BS", "Bosnian");
+    isoCodeToEnWikiName.put("BG", "Bulgarian");
+    isoCodeToEnWikiName.put("MY", "Burmese");
+    isoCodeToEnWikiName.put("yue", "Cantonese");
+    isoCodeToEnWikiName.put("CA", "Catalan");
+    isoCodeToEnWikiName.put("HR", "Croatian");
+    isoCodeToEnWikiName.put("CS", "Czech");
+    isoCodeToEnWikiName.put("ZH", "Chinese|Mandarin");
+    isoCodeToEnWikiName.put("DA", "Danish");
+    isoCodeToEnWikiName.put("NL", "Dutch");
+    isoCodeToEnWikiName.put("EN", "English");
+    isoCodeToEnWikiName.put("EO", "Esperanto");
+    isoCodeToEnWikiName.put("ET", "Estonian");
+    isoCodeToEnWikiName.put("FI", "Finnish");
+    isoCodeToEnWikiName.put("FR", "French");
+    isoCodeToEnWikiName.put("DE", "German");
+    isoCodeToEnWikiName.put("EL", "Greek");
+    isoCodeToEnWikiName.put("grc", "Ancient Greek");
+    isoCodeToEnWikiName.put("haw", "Hawaiian");
+    isoCodeToEnWikiName.put("HE", "Hebrew");
+    isoCodeToEnWikiName.put("HI", "Hindi");
+    isoCodeToEnWikiName.put("HU", "Hungarian");
+    isoCodeToEnWikiName.put("IS", "Icelandic");
+    isoCodeToEnWikiName.put("ID", "Indonesian");
+    isoCodeToEnWikiName.put("GA", "Irish");
+    isoCodeToEnWikiName.put("GD", "Gaelic");
+    isoCodeToEnWikiName.put("IT", "Italian");
+    isoCodeToEnWikiName.put("LA", "Latin");
+    isoCodeToEnWikiName.put("LV", "Latvian");
+    isoCodeToEnWikiName.put("LT", "Lithuanian");
+    isoCodeToEnWikiName.put("JA", "Japanese");
+    isoCodeToEnWikiName.put("KO", "Korean");
+    isoCodeToEnWikiName.put("KU", "Kurdish");
+    isoCodeToEnWikiName.put("LO", "Lao");
+    isoCodeToEnWikiName.put("MS", "Malay");
+    isoCodeToEnWikiName.put("ML", "Malayalam");
+    isoCodeToEnWikiName.put("MI", "Maori");
+    isoCodeToEnWikiName.put("MN", "Mongolian");
+    isoCodeToEnWikiName.put("NE", "Nepali");
+    isoCodeToEnWikiName.put("NO", "Norwegian");
+    isoCodeToEnWikiName.put("FA", "Persian");
+    isoCodeToEnWikiName.put("PL", "Polish");
+    isoCodeToEnWikiName.put("PT", "Portuguese");
+    isoCodeToEnWikiName.put("PA", "Punjabi");
+    isoCodeToEnWikiName.put("RO", "Romanian");
+    isoCodeToEnWikiName.put("RU", "Russian");
+    isoCodeToEnWikiName.put("SA", "Sanskrit");
+    isoCodeToEnWikiName.put("SR", "Serbian");
+    isoCodeToEnWikiName.put("SK", "Slovak");
+    isoCodeToEnWikiName.put("SL", "Slovene|Slovenian");
+    isoCodeToEnWikiName.put("SO", "Somali");
+    isoCodeToEnWikiName.put("ES", "Spanish");
+    isoCodeToEnWikiName.put("SW", "Swahili");
+    isoCodeToEnWikiName.put("SV", "Swedish");
+    isoCodeToEnWikiName.put("TL", "Tagalog");
+    isoCodeToEnWikiName.put("TG", "Tajik");
+    isoCodeToEnWikiName.put("TA", "Tamil");
+    isoCodeToEnWikiName.put("TH", "Thai");
+    isoCodeToEnWikiName.put("BO", "Tibetan");
+    isoCodeToEnWikiName.put("TR", "Turkish");
+    isoCodeToEnWikiName.put("UK", "Ukrainian");
+    isoCodeToEnWikiName.put("UR", "Urdu");
+    isoCodeToEnWikiName.put("VI", "Vietnamese");
+    isoCodeToEnWikiName.put("CI", "Welsh");
+    isoCodeToEnWikiName.put("YI", "Yiddish");
+    isoCodeToEnWikiName.put("ZU", "Zulu");
     
-    isoCodeToWikiName.put("AZ", "Azeri");
-    isoCodeToWikiName.put("EU", "Basque");
-    isoCodeToWikiName.put("BR", "Breton");
-    isoCodeToWikiName.put("MR", "Burmese");
-    isoCodeToWikiName.put("FO", "Faroese");
-    isoCodeToWikiName.put("GL", "Galician");
-    isoCodeToWikiName.put("KA", "Georgian");
-    isoCodeToWikiName.put("HT", "Haitian Creole");
-    isoCodeToWikiName.put("LB", "Luxembourgish");
-    isoCodeToWikiName.put("MK", "Macedonian");
+    isoCodeToEnWikiName.put("AZ", "Azeri");
+    isoCodeToEnWikiName.put("EU", "Basque");
+    isoCodeToEnWikiName.put("BR", "Breton");
+    isoCodeToEnWikiName.put("MR", "Burmese");
+    isoCodeToEnWikiName.put("FO", "Faroese");
+    isoCodeToEnWikiName.put("GL", "Galician");
+    isoCodeToEnWikiName.put("KA", "Georgian");
+    isoCodeToEnWikiName.put("HT", "Haitian Creole");
+    isoCodeToEnWikiName.put("LB", "Luxembourgish");
+    isoCodeToEnWikiName.put("MK", "Macedonian");
     
+    assert Language.isoCodeToResources.keySet().containsAll(isoCodeToEnWikiName.keySet());
   }
 
   public static final Map<String,Map<String,String>> wikiCodeToIsoCodeToWikiName = new LinkedHashMap<String, Map<String,String>>();
   static {
     // en
-    wikiCodeToIsoCodeToWikiName.put("en", isoCodeToWikiName);
+    wikiCodeToIsoCodeToWikiName.put("en", isoCodeToEnWikiName);
     
     Map<String,String> isoCodeToWikiName;
     
@@ -158,12 +168,24 @@ public class WiktionaryLangs {
     isoCodeToWikiName.put("PL", Pattern.quote("{{-pl-}}"));
     isoCodeToWikiName.put("NL", Pattern.quote("{{-nl-}}"));
     isoCodeToWikiName.put("LV", Pattern.quote("{{-lv-}}"));
-    isoCodeToWikiName.put("LV", Pattern.quote("{{-la-}}"));
+    isoCodeToWikiName.put("LA", Pattern.quote("{{-la-}}"));
     isoCodeToWikiName.put("HU", Pattern.quote("{{-hu-}}"));
-    isoCodeToWikiName.put("PL", Pattern.quote("{{-pl-}}"));
     isoCodeToWikiName.put("EL", Pattern.quote("{{-grc-}}"));
     isoCodeToWikiName.put("SV", Pattern.quote("{{-sv-}}"));
 
   }
+  public static String getEnglishName(String langCode) {
+      String name = isoCodeToEnWikiName.get(langCode);
+      if (name == null) {
+          name = isoCodeToEnWikiName.get(langCode.toUpperCase());
+      }
+      if (name == null) {
+          return null;
+      }
+      if (name.indexOf('|') != -1) {
+          return name.substring(name.indexOf('|'));
+      }
+      return name;  // can be null.
+  }
   
 }