]> gitweb.fperrin.net Git - DictionaryPC.git/blobdiff - src/com/hughes/android/dictionary/parser/wiktionary/WiktionaryLangs.java
Add support for generating IT-RU dictionary.
[DictionaryPC.git] / src / com / hughes / android / dictionary / parser / wiktionary / WiktionaryLangs.java
index 3af9a139a63f9e6e21e43af3b6ed1eda7e6e5bfa..7f52642821bc4b8b2bbafac65be248a0642fa504 100644 (file)
@@ -35,7 +35,6 @@ public class WiktionaryLangs {
     isoCodeToEnWikiName.put("BG", "Bulgarian");
     isoCodeToEnWikiName.put("CA", "Catalan");
     isoCodeToEnWikiName.put("SH", "Serbo-Croatian");
-    isoCodeToEnWikiName.put("HR", "Croatian");
     isoCodeToEnWikiName.put("CS", "Czech");
     isoCodeToEnWikiName.put("ZH", "Chinese");
     isoCodeToEnWikiName.put("cmn", "Mandarin");
@@ -48,8 +47,8 @@ public class WiktionaryLangs {
     isoCodeToEnWikiName.put("FI", "Finnish");
     isoCodeToEnWikiName.put("FR", "French");
     isoCodeToEnWikiName.put("DE", "German");
-    isoCodeToEnWikiName.put("EL", "Greek");
     isoCodeToEnWikiName.put("grc", "Ancient Greek");
+    isoCodeToEnWikiName.put("EL", "Greek");
     isoCodeToEnWikiName.put("haw", "Hawaiian");
     isoCodeToEnWikiName.put("HE", "Hebrew");
     isoCodeToEnWikiName.put("HI", "Hindi");
@@ -67,8 +66,8 @@ public class WiktionaryLangs {
     isoCodeToEnWikiName.put("KO", "Korean");
     isoCodeToEnWikiName.put("KU", "Kurdish");
     isoCodeToEnWikiName.put("LO", "Lao");
-    isoCodeToEnWikiName.put("MS", "Malay$");
     isoCodeToEnWikiName.put("ML", "Malayalam");
+    isoCodeToEnWikiName.put("MS", "Malay");
     isoCodeToEnWikiName.put("MI", "Maori");
     isoCodeToEnWikiName.put("MN", "Mongolian");
     isoCodeToEnWikiName.put("NE", "Nepali");
@@ -113,17 +112,18 @@ public class WiktionaryLangs {
     // No longer exists in EN:
     // isoCodeToEnWikiName.put("BS", "Bosnian");
     // isoCodeToEnWikiName.put("SR", "Serbian");
+    // isoCodeToEnWikiName.put("HR", "Croatian");
     
     // Font doesn't work:
     //isoCodeToEnWikiName.put("MY", "Burmese");
 
 
     {
-        Set<String> missing = new LinkedHashSet<String>(isoCodeToEnWikiName.keySet());
-        missing.removeAll(Language.isoCodeToResources.keySet());
+        //Set<String> missing = new LinkedHashSet<String>(isoCodeToEnWikiName.keySet());
+        //missing.removeAll(Language.isoCodeToResources.keySet());
         //System.out.println(missing);
     }
-    assert Language.isoCodeToResources.keySet().containsAll(isoCodeToEnWikiName.keySet());
+    //assert Language.isoCodeToResources.keySet().containsAll(isoCodeToEnWikiName.keySet());
   }
 
   public static final Map<String,Map<String,String>> wikiCodeToIsoCodeToWikiName = new LinkedHashMap<String, Map<String,String>>();
@@ -143,17 +143,18 @@ public class WiktionaryLangs {
     isoCodeToWikiName.put("FR", "Französisch");
     isoCodeToWikiName.put("EO", "Esperanto");
     isoCodeToWikiName.put("CA", "Katalanisch");
-    isoCodeToWikiName.put("LA", "Lateinisch");
+    isoCodeToWikiName.put("LA", "Latein");
     isoCodeToWikiName.put("CS", "Tschechisch");
     isoCodeToWikiName.put("HU", "Ungarisch");
     isoCodeToWikiName.put("SV", "Schwedisch");
     isoCodeToWikiName.put("ES", "Spanisch");
 
-    // egrep -o '\{\{=[a-zA-Z]+=\}\}' frwiktionary-pages-articles.xml | sort | uniq -c | sort -nr
+    // egrep -o '== *\{\{langue\|[a-zA-Z]+\}\} *==' frwiktionary-pages-articles.xml | sort | uniq -c | sort -nr
     isoCodeToWikiName = new LinkedHashMap<String, String>();
     wikiCodeToIsoCodeToWikiName.put("fr", isoCodeToWikiName);
     isoCodeToWikiName.put("FR", Pattern.quote("{{langue|fr}}"));
     isoCodeToWikiName.put("RU", Pattern.quote("{{langue|ru}}"));
+    isoCodeToWikiName.put("AR", Pattern.quote("{{langue|ar}}"));  // Arabic
     isoCodeToWikiName.put("BG", Pattern.quote("{{langue|bg}}"));  // Bulgarian
     isoCodeToWikiName.put("EN", Pattern.quote("{{langue|en}}"));
     //isoCodeToWikiName.put("", Pattern.quote("{{langue|sl}}"));
@@ -189,7 +190,12 @@ public class WiktionaryLangs {
     isoCodeToWikiName.put("HU", Pattern.quote("{{-hu-}}"));
     isoCodeToWikiName.put("EL", Pattern.quote("{{-grc-}}"));
     isoCodeToWikiName.put("SV", Pattern.quote("{{-sv-}}"));
+    isoCodeToWikiName.put("RU", Pattern.quote("{{-ru-}}"));
 
+    // There seems to be no consistent pattern and few foreign language entries anyway
+    isoCodeToWikiName = new LinkedHashMap<String, String>();
+    wikiCodeToIsoCodeToWikiName.put("es", isoCodeToWikiName);
+    isoCodeToWikiName.put("ES", Pattern.quote("{{ES"));
   }
   public static String getEnglishName(String langCode) {
       String name = isoCodeToEnWikiName.get(langCode);