X-Git-Url: http://gitweb.fperrin.net/?a=blobdiff_plain;f=src%2Fcom%2Fhughes%2Fandroid%2Fdictionary%2Fparser%2Fwiktionary%2FWiktionaryLangs.java;h=2d2fc087e5dd7ec8ff7be22f8991b71cea4a508d;hb=aad37bfcc3c1a1772d5fba894d7471c2b09b64ee;hp=9dfa00a3a0932caf2b843bcccfa597017c1def55;hpb=e479ba38bbcb261951399326623c20ffacc147d4;p=DictionaryPC.git diff --git a/src/com/hughes/android/dictionary/parser/wiktionary/WiktionaryLangs.java b/src/com/hughes/android/dictionary/parser/wiktionary/WiktionaryLangs.java index 9dfa00a..2d2fc08 100644 --- a/src/com/hughes/android/dictionary/parser/wiktionary/WiktionaryLangs.java +++ b/src/com/hughes/android/dictionary/parser/wiktionary/WiktionaryLangs.java @@ -46,6 +46,10 @@ public class WiktionaryLangs { isoCodeToEnWikiName.put("ET", "Estonian"); isoCodeToEnWikiName.put("FI", "Finnish"); isoCodeToEnWikiName.put("FR", "French"); + // Note: must be before German since matcher + // simply takes first match instead of best. + isoCodeToEnWikiName.put("nds", "Low German"); + isoCodeToEnWikiName.put("pdc", "Pennsylvania German"); isoCodeToEnWikiName.put("DE", "German"); isoCodeToEnWikiName.put("grc", "Ancient Greek"); isoCodeToEnWikiName.put("EL", "Greek"); @@ -108,6 +112,8 @@ public class WiktionaryLangs { isoCodeToEnWikiName.put("LB", "Luxembourgish"); isoCodeToEnWikiName.put("MK", "Macedonian"); isoCodeToEnWikiName.put("GV", "Manx"); + isoCodeToEnWikiName.put("scn", "Sicilian"); + isoCodeToEnWikiName.put("cu", "Old Church Slavonic"); // No longer exists in EN: // isoCodeToEnWikiName.put("BS", "Bosnian"); @@ -136,6 +142,7 @@ public class WiktionaryLangs { // egrep -o '\{\{Wortart[^}]+\}\}' dewiktionary-pages-articles.xml | cut -d \| -f3 | sort | uniq -c | sort -nr isoCodeToWikiName = new LinkedHashMap(); wikiCodeToIsoCodeToWikiName.put("de", isoCodeToWikiName); + isoCodeToWikiName.put("nds", "Niederdeutsch"); isoCodeToWikiName.put("DE", "Deutsch"); isoCodeToWikiName.put("EN", "Englisch"); isoCodeToWikiName.put("IT", "Italienisch"); @@ -148,6 +155,7 @@ public class WiktionaryLangs { isoCodeToWikiName.put("HU", "Ungarisch"); isoCodeToWikiName.put("SV", "Schwedisch"); isoCodeToWikiName.put("ES", "Spanisch"); + isoCodeToWikiName.put("RO", "Rumänisch"); // egrep -o '== *\{\{langue\|[a-zA-Z]+\}\} *==' frwiktionary-pages-articles.xml | sort | uniq -c | sort -nr isoCodeToWikiName = new LinkedHashMap(); @@ -173,6 +181,7 @@ public class WiktionaryLangs { isoCodeToWikiName.put("IS", Pattern.quote("{{langue|is}}")); // Icelandic isoCodeToWikiName.put("ES", Pattern.quote("{{langue|es}}")); isoCodeToWikiName.put("UK", Pattern.quote("{{langue|uk}}")); + isoCodeToWikiName.put("PT", Pattern.quote("{{langue|pt}}")); // egrep -o '= *\{\{-[a-z]+-\}\} *=' itwiktionary-pages-articles.xml | sort | uniq -c | sort -n isoCodeToWikiName = new LinkedHashMap(); @@ -192,10 +201,20 @@ public class WiktionaryLangs { isoCodeToWikiName.put("SV", Pattern.quote("{{-sv-}}")); isoCodeToWikiName.put("RU", Pattern.quote("{{-ru-}}")); - // There seems to be no consistent pattern and few foreign language entries anyway + // egrep -o '== *\{\{lengua\|[a-zA-Z]+\}\} *==' frwiktionary-pages-articles.xml | sort | uniq -c | sort -nr isoCodeToWikiName = new LinkedHashMap(); wikiCodeToIsoCodeToWikiName.put("es", isoCodeToWikiName); - isoCodeToWikiName.put("ES", Pattern.quote("{{ES")); + isoCodeToWikiName.put("ES", Pattern.quote("{{lengua|es}}")); + isoCodeToWikiName.put("EN", Pattern.quote("{{lengua|en}}")); + isoCodeToWikiName.put("FR", Pattern.quote("{{lengua|fr}}")); + isoCodeToWikiName.put("IT", Pattern.quote("{{lengua|it}}")); + + // Pattern seems to match Italian one + isoCodeToWikiName = new LinkedHashMap(); + wikiCodeToIsoCodeToWikiName.put("pt", isoCodeToWikiName); + isoCodeToWikiName.put("PT", Pattern.quote("{{-pt-}}")); + isoCodeToWikiName.put("EN", Pattern.quote("{{-en-}}")); + isoCodeToWikiName.put("ES", Pattern.quote("{{-es-}}")); } public static String getEnglishName(String langCode) { String name = isoCodeToEnWikiName.get(langCode);