]> gitweb.fperrin.net Git - DictionaryPC.git/blobdiff - src/com/hughes/android/dictionary/parser/wiktionary/WiktionaryLangs.java
Add script to help with dictionary generation.
[DictionaryPC.git] / src / com / hughes / android / dictionary / parser / wiktionary / WiktionaryLangs.java
index f87afa107ec3f9e5dbb30b26e62eb0787a67da60..93d1f1ff32b696e6eac7d3d422cd0797057295a8 100644 (file)
@@ -33,7 +33,6 @@ public class WiktionaryLangs {
     isoCodeToEnWikiName.put("BE", "Belarusian");
     isoCodeToEnWikiName.put("BN", "Bengali");
     isoCodeToEnWikiName.put("BG", "Bulgarian");
-    isoCodeToEnWikiName.put("MY", "Burmese");
     isoCodeToEnWikiName.put("CA", "Catalan");
     isoCodeToEnWikiName.put("SH", "Serbo-Croatian");
     isoCodeToEnWikiName.put("HR", "Croatian");
@@ -59,6 +58,7 @@ public class WiktionaryLangs {
     isoCodeToEnWikiName.put("ID", "Indonesian");
     isoCodeToEnWikiName.put("GA", "Irish");
     isoCodeToEnWikiName.put("GD", "Gaelic");
+    isoCodeToEnWikiName.put("GV", "Manx");
     isoCodeToEnWikiName.put("IT", "Italian");
     isoCodeToEnWikiName.put("LA", "Latin");
     isoCodeToEnWikiName.put("LV", "Latvian");
@@ -67,7 +67,7 @@ public class WiktionaryLangs {
     isoCodeToEnWikiName.put("KO", "Korean");
     isoCodeToEnWikiName.put("KU", "Kurdish");
     isoCodeToEnWikiName.put("LO", "Lao");
-    isoCodeToEnWikiName.put("MS", "Malay");
+    isoCodeToEnWikiName.put("MS", "Malay$");
     isoCodeToEnWikiName.put("ML", "Malayalam");
     isoCodeToEnWikiName.put("MI", "Maori");
     isoCodeToEnWikiName.put("MN", "Mongolian");
@@ -101,24 +101,29 @@ public class WiktionaryLangs {
     isoCodeToEnWikiName.put("AZ", "Azeri");
     isoCodeToEnWikiName.put("EU", "Basque");
     isoCodeToEnWikiName.put("BR", "Breton");
-    isoCodeToEnWikiName.put("MR", "Burmese");
+    isoCodeToEnWikiName.put("MR", "Marathi");
     isoCodeToEnWikiName.put("FO", "Faroese");
     isoCodeToEnWikiName.put("GL", "Galician");
     isoCodeToEnWikiName.put("KA", "Georgian");
     isoCodeToEnWikiName.put("HT", "Haitian Creole");
     isoCodeToEnWikiName.put("LB", "Luxembourgish");
     isoCodeToEnWikiName.put("MK", "Macedonian");
+    isoCodeToEnWikiName.put("GV", "Manx");
     
     // No longer exists in EN:
     // isoCodeToEnWikiName.put("BS", "Bosnian");
     // isoCodeToEnWikiName.put("SR", "Serbian");
+    
+    // Font doesn't work:
+    //isoCodeToEnWikiName.put("MY", "Burmese");
+
 
     {
-        Set<String> missing = new LinkedHashSet<String>(isoCodeToEnWikiName.keySet());
-        missing.removeAll(Language.isoCodeToResources.keySet());
+        //Set<String> missing = new LinkedHashSet<String>(isoCodeToEnWikiName.keySet());
+        //missing.removeAll(Language.isoCodeToResources.keySet());
         //System.out.println(missing);
     }
-    assert Language.isoCodeToResources.keySet().containsAll(isoCodeToEnWikiName.keySet());
+    //assert Language.isoCodeToResources.keySet().containsAll(isoCodeToEnWikiName.keySet());
   }
 
   public static final Map<String,Map<String,String>> wikiCodeToIsoCodeToWikiName = new LinkedHashMap<String, Map<String,String>>();
@@ -144,7 +149,7 @@ public class WiktionaryLangs {
     isoCodeToWikiName.put("SV", "Schwedisch");
     isoCodeToWikiName.put("ES", "Spanisch");
 
-    // egrep -o '\{\{=[a-zA-Z]+=\}\}' frwiktionary-pages-articles.xml | sort | uniq -c | sort -nr
+    // egrep -o '== *\{\{langue\|[a-zA-Z]+\}\} *==' frwiktionary-pages-articles.xml | sort | uniq -c | sort -nr
     isoCodeToWikiName = new LinkedHashMap<String, String>();
     wikiCodeToIsoCodeToWikiName.put("fr", isoCodeToWikiName);
     isoCodeToWikiName.put("FR", Pattern.quote("{{langue|fr}}"));
@@ -185,6 +190,10 @@ public class WiktionaryLangs {
     isoCodeToWikiName.put("EL", Pattern.quote("{{-grc-}}"));
     isoCodeToWikiName.put("SV", Pattern.quote("{{-sv-}}"));
 
+    // There seems to be no consistent pattern and few foreign language entries anyway
+    isoCodeToWikiName = new LinkedHashMap<String, String>();
+    wikiCodeToIsoCodeToWikiName.put("es", isoCodeToWikiName);
+    isoCodeToWikiName.put("ES", Pattern.quote("{{ES}}"));
   }
   public static String getEnglishName(String langCode) {
       String name = isoCodeToEnWikiName.get(langCode);
@@ -195,7 +204,10 @@ public class WiktionaryLangs {
           return null;
       }
       if (name.indexOf('|') != -1) {
-          return name.substring(name.indexOf('|'));
+          return name.substring(0, name.indexOf('|'));
+      }
+      if (name.indexOf('$') != -1) {
+          return name.substring(0, name.indexOf('$'));
       }
       return name;  // can be null.
   }