X-Git-Url: http://gitweb.fperrin.net/?a=blobdiff_plain;f=src%2Fcom%2Fhughes%2Fandroid%2Fdictionary%2Fparser%2Fwiktionary%2FWiktionaryLangs.java;h=01b31a23861e7f4c093fa4d101424503a22c8736;hb=2fc669d88306d563fc9c899d8d91b25d591692ea;hp=9dfa00a3a0932caf2b843bcccfa597017c1def55;hpb=e479ba38bbcb261951399326623c20ffacc147d4;p=DictionaryPC.git diff --git a/src/com/hughes/android/dictionary/parser/wiktionary/WiktionaryLangs.java b/src/com/hughes/android/dictionary/parser/wiktionary/WiktionaryLangs.java index 9dfa00a..01b31a2 100644 --- a/src/com/hughes/android/dictionary/parser/wiktionary/WiktionaryLangs.java +++ b/src/com/hughes/android/dictionary/parser/wiktionary/WiktionaryLangs.java @@ -14,17 +14,13 @@ package com.hughes.android.dictionary.parser.wiktionary; -import com.hughes.android.dictionary.engine.Language; - import java.util.LinkedHashMap; -import java.util.LinkedHashSet; import java.util.Map; -import java.util.Set; import java.util.regex.Pattern; public class WiktionaryLangs { - public static final Map isoCodeToEnWikiName = new LinkedHashMap(); + public static final Map isoCodeToEnWikiName = new LinkedHashMap<>(); static { isoCodeToEnWikiName.put("AF", "Afrikaans"); isoCodeToEnWikiName.put("SQ", "Albanian"); @@ -46,6 +42,10 @@ public class WiktionaryLangs { isoCodeToEnWikiName.put("ET", "Estonian"); isoCodeToEnWikiName.put("FI", "Finnish"); isoCodeToEnWikiName.put("FR", "French"); + // Note: must be before German since matcher + // simply takes first match instead of best. + isoCodeToEnWikiName.put("nds", "Low German"); + isoCodeToEnWikiName.put("pdc", "Pennsylvania German"); isoCodeToEnWikiName.put("DE", "German"); isoCodeToEnWikiName.put("grc", "Ancient Greek"); isoCodeToEnWikiName.put("EL", "Greek"); @@ -107,7 +107,9 @@ public class WiktionaryLangs { isoCodeToEnWikiName.put("HT", "Haitian Creole"); isoCodeToEnWikiName.put("LB", "Luxembourgish"); isoCodeToEnWikiName.put("MK", "Macedonian"); - isoCodeToEnWikiName.put("GV", "Manx"); + isoCodeToEnWikiName.put("scn", "Sicilian"); + isoCodeToEnWikiName.put("cu", "Old Church Slavonic"); + isoCodeToEnWikiName.put("rom", "Romani"); // No longer exists in EN: // isoCodeToEnWikiName.put("BS", "Bosnian"); @@ -126,16 +128,17 @@ public class WiktionaryLangs { //assert Language.isoCodeToResources.keySet().containsAll(isoCodeToEnWikiName.keySet()); } - public static final Map> wikiCodeToIsoCodeToWikiName = new LinkedHashMap>(); + public static final Map> wikiCodeToIsoCodeToWikiName = new LinkedHashMap<>(); static { + Map isoCodeToWikiName; + // en wikiCodeToIsoCodeToWikiName.put("en", isoCodeToEnWikiName); - Map isoCodeToWikiName; - // egrep -o '\{\{Wortart[^}]+\}\}' dewiktionary-pages-articles.xml | cut -d \| -f3 | sort | uniq -c | sort -nr - isoCodeToWikiName = new LinkedHashMap(); + isoCodeToWikiName = new LinkedHashMap<>(); wikiCodeToIsoCodeToWikiName.put("de", isoCodeToWikiName); + isoCodeToWikiName.put("nds", "Niederdeutsch"); isoCodeToWikiName.put("DE", "Deutsch"); isoCodeToWikiName.put("EN", "Englisch"); isoCodeToWikiName.put("IT", "Italienisch"); @@ -148,14 +151,16 @@ public class WiktionaryLangs { isoCodeToWikiName.put("HU", "Ungarisch"); isoCodeToWikiName.put("SV", "Schwedisch"); isoCodeToWikiName.put("ES", "Spanisch"); + isoCodeToWikiName.put("RO", "Rumänisch"); // egrep -o '== *\{\{langue\|[a-zA-Z]+\}\} *==' frwiktionary-pages-articles.xml | sort | uniq -c | sort -nr - isoCodeToWikiName = new LinkedHashMap(); + isoCodeToWikiName = new LinkedHashMap<>(); wikiCodeToIsoCodeToWikiName.put("fr", isoCodeToWikiName); isoCodeToWikiName.put("FR", Pattern.quote("{{langue|fr}}")); isoCodeToWikiName.put("RU", Pattern.quote("{{langue|ru}}")); isoCodeToWikiName.put("AR", Pattern.quote("{{langue|ar}}")); // Arabic isoCodeToWikiName.put("BG", Pattern.quote("{{langue|bg}}")); // Bulgarian + isoCodeToWikiName.put("EL", Pattern.quote("{{langue|el}}")); isoCodeToWikiName.put("EN", Pattern.quote("{{langue|en}}")); //isoCodeToWikiName.put("", Pattern.quote("{{langue|sl}}")); isoCodeToWikiName.put("LA", Pattern.quote("{{langue|la}}")); @@ -173,9 +178,10 @@ public class WiktionaryLangs { isoCodeToWikiName.put("IS", Pattern.quote("{{langue|is}}")); // Icelandic isoCodeToWikiName.put("ES", Pattern.quote("{{langue|es}}")); isoCodeToWikiName.put("UK", Pattern.quote("{{langue|uk}}")); + isoCodeToWikiName.put("PT", Pattern.quote("{{langue|pt}}")); // egrep -o '= *\{\{-[a-z]+-\}\} *=' itwiktionary-pages-articles.xml | sort | uniq -c | sort -n - isoCodeToWikiName = new LinkedHashMap(); + isoCodeToWikiName = new LinkedHashMap<>(); wikiCodeToIsoCodeToWikiName.put("it", isoCodeToWikiName); isoCodeToWikiName.put("IT", "\\{\\{-(it|scn|nap|cal|lmo)-\\}\\}"); // scn, nap, cal, lmo isoCodeToWikiName.put("EN", Pattern.quote("{{-en-}}")); @@ -192,10 +198,21 @@ public class WiktionaryLangs { isoCodeToWikiName.put("SV", Pattern.quote("{{-sv-}}")); isoCodeToWikiName.put("RU", Pattern.quote("{{-ru-}}")); - // There seems to be no consistent pattern and few foreign language entries anyway - isoCodeToWikiName = new LinkedHashMap(); + // egrep -o '== *\{\{lengua\|[a-zA-Z]+\}\} *==' frwiktionary-pages-articles.xml | sort | uniq -c | sort -nr + isoCodeToWikiName = new LinkedHashMap<>(); wikiCodeToIsoCodeToWikiName.put("es", isoCodeToWikiName); - isoCodeToWikiName.put("ES", Pattern.quote("{{ES")); + isoCodeToWikiName.put("AR", Pattern.quote("{{lengua|ar}}")); + isoCodeToWikiName.put("ES", Pattern.quote("{{lengua|es}}")); + isoCodeToWikiName.put("EN", Pattern.quote("{{lengua|en}}")); + isoCodeToWikiName.put("FR", Pattern.quote("{{lengua|fr}}")); + isoCodeToWikiName.put("IT", Pattern.quote("{{lengua|it}}")); + + // Pattern seems to match Italian one + isoCodeToWikiName = new LinkedHashMap<>(); + wikiCodeToIsoCodeToWikiName.put("pt", isoCodeToWikiName); + isoCodeToWikiName.put("PT", Pattern.quote("{{-pt-}}")); + isoCodeToWikiName.put("EN", Pattern.quote("{{-en-}}")); + isoCodeToWikiName.put("ES", Pattern.quote("{{-es-}}")); } public static String getEnglishName(String langCode) { String name = isoCodeToEnWikiName.get(langCode);