]> gitweb.fperrin.net Git - DictionaryPC.git/blobdiff - src/com/hughes/android/dictionary/parser/wiktionary/WiktionaryLangs.java
Add support for generating Romani dictionary.
[DictionaryPC.git] / src / com / hughes / android / dictionary / parser / wiktionary / WiktionaryLangs.java
index 9dfa00a3a0932caf2b843bcccfa597017c1def55..86dc0526c8532c50321f57368272fa1bcf9722ea 100644 (file)
@@ -46,6 +46,10 @@ public class WiktionaryLangs {
         isoCodeToEnWikiName.put("ET", "Estonian");
         isoCodeToEnWikiName.put("FI", "Finnish");
         isoCodeToEnWikiName.put("FR", "French");
+        // Note: must be before German since matcher
+        // simply takes first match instead of best.
+        isoCodeToEnWikiName.put("nds", "Low German");
+        isoCodeToEnWikiName.put("pdc", "Pennsylvania German");
         isoCodeToEnWikiName.put("DE", "German");
         isoCodeToEnWikiName.put("grc", "Ancient Greek");
         isoCodeToEnWikiName.put("EL", "Greek");
@@ -108,6 +112,9 @@ public class WiktionaryLangs {
         isoCodeToEnWikiName.put("LB", "Luxembourgish");
         isoCodeToEnWikiName.put("MK", "Macedonian");
         isoCodeToEnWikiName.put("GV", "Manx");
+        isoCodeToEnWikiName.put("scn", "Sicilian");
+        isoCodeToEnWikiName.put("cu", "Old Church Slavonic");
+        isoCodeToEnWikiName.put("rom", "Romani");
 
         // No longer exists in EN:
         // isoCodeToEnWikiName.put("BS", "Bosnian");
@@ -136,6 +143,7 @@ public class WiktionaryLangs {
         // egrep -o '\{\{Wortart[^}]+\}\}' dewiktionary-pages-articles.xml | cut -d \| -f3 | sort | uniq -c | sort -nr
         isoCodeToWikiName = new LinkedHashMap<String, String>();
         wikiCodeToIsoCodeToWikiName.put("de", isoCodeToWikiName);
+        isoCodeToWikiName.put("nds", "Niederdeutsch");
         isoCodeToWikiName.put("DE", "Deutsch");
         isoCodeToWikiName.put("EN", "Englisch");
         isoCodeToWikiName.put("IT", "Italienisch");
@@ -148,6 +156,7 @@ public class WiktionaryLangs {
         isoCodeToWikiName.put("HU", "Ungarisch");
         isoCodeToWikiName.put("SV", "Schwedisch");
         isoCodeToWikiName.put("ES", "Spanisch");
+        isoCodeToWikiName.put("RO", "Rumänisch");
 
         // egrep -o '== *\{\{langue\|[a-zA-Z]+\}\} *==' frwiktionary-pages-articles.xml | sort | uniq -c | sort -nr
         isoCodeToWikiName = new LinkedHashMap<String, String>();
@@ -173,6 +182,7 @@ public class WiktionaryLangs {
         isoCodeToWikiName.put("IS", Pattern.quote("{{langue|is}}"));  // Icelandic
         isoCodeToWikiName.put("ES", Pattern.quote("{{langue|es}}"));
         isoCodeToWikiName.put("UK", Pattern.quote("{{langue|uk}}"));
+        isoCodeToWikiName.put("PT", Pattern.quote("{{langue|pt}}"));
 
         // egrep -o '= *\{\{-[a-z]+-\}\} *=' itwiktionary-pages-articles.xml | sort | uniq -c | sort -n
         isoCodeToWikiName = new LinkedHashMap<String, String>();
@@ -192,10 +202,21 @@ public class WiktionaryLangs {
         isoCodeToWikiName.put("SV", Pattern.quote("{{-sv-}}"));
         isoCodeToWikiName.put("RU", Pattern.quote("{{-ru-}}"));
 
-        // There seems to be no consistent pattern and few foreign language entries anyway
+        // egrep -o '== *\{\{lengua\|[a-zA-Z]+\}\} *==' frwiktionary-pages-articles.xml | sort | uniq -c | sort -nr
         isoCodeToWikiName = new LinkedHashMap<String, String>();
         wikiCodeToIsoCodeToWikiName.put("es", isoCodeToWikiName);
-        isoCodeToWikiName.put("ES", Pattern.quote("{{ES"));
+        isoCodeToWikiName.put("AR", Pattern.quote("{{lengua|ar}}"));
+        isoCodeToWikiName.put("ES", Pattern.quote("{{lengua|es}}"));
+        isoCodeToWikiName.put("EN", Pattern.quote("{{lengua|en}}"));
+        isoCodeToWikiName.put("FR", Pattern.quote("{{lengua|fr}}"));
+        isoCodeToWikiName.put("IT", Pattern.quote("{{lengua|it}}"));
+
+        // Pattern seems to match Italian one
+        isoCodeToWikiName = new LinkedHashMap<String, String>();
+        wikiCodeToIsoCodeToWikiName.put("pt", isoCodeToWikiName);
+        isoCodeToWikiName.put("PT", Pattern.quote("{{-pt-}}"));
+        isoCodeToWikiName.put("EN", Pattern.quote("{{-en-}}"));
+        isoCodeToWikiName.put("ES", Pattern.quote("{{-es-}}"));
     }
     public static String getEnglishName(String langCode) {
         String name = isoCodeToEnWikiName.get(langCode);