package com.hughes.android.dictionary.parser.wiktionary;
-import com.hughes.android.dictionary.engine.Language;
-
import java.util.LinkedHashMap;
-import java.util.LinkedHashSet;
import java.util.Map;
-import java.util.Set;
import java.util.regex.Pattern;
public class WiktionaryLangs {
- public static final Map<String,String> isoCodeToEnWikiName = new LinkedHashMap<String,String>();
+ public static final Map<String,String> isoCodeToEnWikiName = new LinkedHashMap<>();
static {
isoCodeToEnWikiName.put("AF", "Afrikaans");
isoCodeToEnWikiName.put("SQ", "Albanian");
isoCodeToEnWikiName.put("ET", "Estonian");
isoCodeToEnWikiName.put("FI", "Finnish");
isoCodeToEnWikiName.put("FR", "French");
+ // Note: must be before German since matcher
+ // simply takes first match instead of best.
+ isoCodeToEnWikiName.put("nds", "Low German");
+ isoCodeToEnWikiName.put("pdc", "Pennsylvania German");
isoCodeToEnWikiName.put("DE", "German");
isoCodeToEnWikiName.put("grc", "Ancient Greek");
isoCodeToEnWikiName.put("EL", "Greek");
isoCodeToEnWikiName.put("HT", "Haitian Creole");
isoCodeToEnWikiName.put("LB", "Luxembourgish");
isoCodeToEnWikiName.put("MK", "Macedonian");
- isoCodeToEnWikiName.put("GV", "Manx");
isoCodeToEnWikiName.put("scn", "Sicilian");
isoCodeToEnWikiName.put("cu", "Old Church Slavonic");
- isoCodeToEnWikiName.put("pdc", "Pennsylvania German");
+ isoCodeToEnWikiName.put("rom", "Romani");
// No longer exists in EN:
// isoCodeToEnWikiName.put("BS", "Bosnian");
//assert Language.isoCodeToResources.keySet().containsAll(isoCodeToEnWikiName.keySet());
}
- public static final Map<String,Map<String,String>> wikiCodeToIsoCodeToWikiName = new LinkedHashMap<String, Map<String,String>>();
+ public static final Map<String,Map<String,String>> wikiCodeToIsoCodeToWikiName = new LinkedHashMap<>();
static {
+ Map<String,String> isoCodeToWikiName;
+
// en
wikiCodeToIsoCodeToWikiName.put("en", isoCodeToEnWikiName);
- Map<String,String> isoCodeToWikiName;
-
// egrep -o '\{\{Wortart[^}]+\}\}' dewiktionary-pages-articles.xml | cut -d \| -f3 | sort | uniq -c | sort -nr
- isoCodeToWikiName = new LinkedHashMap<String, String>();
+ isoCodeToWikiName = new LinkedHashMap<>();
wikiCodeToIsoCodeToWikiName.put("de", isoCodeToWikiName);
+ isoCodeToWikiName.put("nds", "Niederdeutsch");
isoCodeToWikiName.put("DE", "Deutsch");
isoCodeToWikiName.put("EN", "Englisch");
isoCodeToWikiName.put("IT", "Italienisch");
isoCodeToWikiName.put("HU", "Ungarisch");
isoCodeToWikiName.put("SV", "Schwedisch");
isoCodeToWikiName.put("ES", "Spanisch");
+ isoCodeToWikiName.put("RO", "Rumänisch");
// egrep -o '== *\{\{langue\|[a-zA-Z]+\}\} *==' frwiktionary-pages-articles.xml | sort | uniq -c | sort -nr
- isoCodeToWikiName = new LinkedHashMap<String, String>();
+ isoCodeToWikiName = new LinkedHashMap<>();
wikiCodeToIsoCodeToWikiName.put("fr", isoCodeToWikiName);
isoCodeToWikiName.put("FR", Pattern.quote("{{langue|fr}}"));
isoCodeToWikiName.put("RU", Pattern.quote("{{langue|ru}}"));
isoCodeToWikiName.put("AR", Pattern.quote("{{langue|ar}}")); // Arabic
isoCodeToWikiName.put("BG", Pattern.quote("{{langue|bg}}")); // Bulgarian
+ isoCodeToWikiName.put("EL", Pattern.quote("{{langue|el}}"));
isoCodeToWikiName.put("EN", Pattern.quote("{{langue|en}}"));
//isoCodeToWikiName.put("", Pattern.quote("{{langue|sl}}"));
isoCodeToWikiName.put("LA", Pattern.quote("{{langue|la}}"));
isoCodeToWikiName.put("PT", Pattern.quote("{{langue|pt}}"));
// egrep -o '= *\{\{-[a-z]+-\}\} *=' itwiktionary-pages-articles.xml | sort | uniq -c | sort -n
- isoCodeToWikiName = new LinkedHashMap<String, String>();
+ isoCodeToWikiName = new LinkedHashMap<>();
wikiCodeToIsoCodeToWikiName.put("it", isoCodeToWikiName);
isoCodeToWikiName.put("IT", "\\{\\{-(it|scn|nap|cal|lmo)-\\}\\}"); // scn, nap, cal, lmo
isoCodeToWikiName.put("EN", Pattern.quote("{{-en-}}"));
isoCodeToWikiName.put("RU", Pattern.quote("{{-ru-}}"));
// egrep -o '== *\{\{lengua\|[a-zA-Z]+\}\} *==' frwiktionary-pages-articles.xml | sort | uniq -c | sort -nr
- isoCodeToWikiName = new LinkedHashMap<String, String>();
+ isoCodeToWikiName = new LinkedHashMap<>();
wikiCodeToIsoCodeToWikiName.put("es", isoCodeToWikiName);
+ isoCodeToWikiName.put("AR", Pattern.quote("{{lengua|ar}}"));
isoCodeToWikiName.put("ES", Pattern.quote("{{lengua|es}}"));
isoCodeToWikiName.put("EN", Pattern.quote("{{lengua|en}}"));
isoCodeToWikiName.put("FR", Pattern.quote("{{lengua|fr}}"));
isoCodeToWikiName.put("IT", Pattern.quote("{{lengua|it}}"));
// Pattern seems to match Italian one
- isoCodeToWikiName = new LinkedHashMap<String, String>();
+ isoCodeToWikiName = new LinkedHashMap<>();
wikiCodeToIsoCodeToWikiName.put("pt", isoCodeToWikiName);
isoCodeToWikiName.put("PT", Pattern.quote("{{-pt-}}"));
isoCodeToWikiName.put("EN", Pattern.quote("{{-en-}}"));