X-Git-Url: http://gitweb.fperrin.net/?a=blobdiff_plain;f=src%2Fcom%2Fhughes%2Fandroid%2Fdictionary%2Fparser%2Fwiktionary%2FWiktionaryLangs.java;h=86dc0526c8532c50321f57368272fa1bcf9722ea;hb=f37d4e6b31795ca783ab34c4fff7a388f92090ac;hp=3af9a139a63f9e6e21e43af3b6ed1eda7e6e5bfa;hpb=58f90bc7be44db5f61d02527ced3cac01863b076;p=DictionaryPC.git diff --git a/src/com/hughes/android/dictionary/parser/wiktionary/WiktionaryLangs.java b/src/com/hughes/android/dictionary/parser/wiktionary/WiktionaryLangs.java index 3af9a13..86dc052 100644 --- a/src/com/hughes/android/dictionary/parser/wiktionary/WiktionaryLangs.java +++ b/src/com/hughes/android/dictionary/parser/wiktionary/WiktionaryLangs.java @@ -23,189 +23,216 @@ import java.util.Set; import java.util.regex.Pattern; public class WiktionaryLangs { - - public static final Map isoCodeToEnWikiName = new LinkedHashMap(); - static { - isoCodeToEnWikiName.put("AF", "Afrikaans"); - isoCodeToEnWikiName.put("SQ", "Albanian"); - isoCodeToEnWikiName.put("AR", "Arabic"); - isoCodeToEnWikiName.put("HY", "Armenian"); - isoCodeToEnWikiName.put("BE", "Belarusian"); - isoCodeToEnWikiName.put("BN", "Bengali"); - isoCodeToEnWikiName.put("BG", "Bulgarian"); - isoCodeToEnWikiName.put("CA", "Catalan"); - isoCodeToEnWikiName.put("SH", "Serbo-Croatian"); - isoCodeToEnWikiName.put("HR", "Croatian"); - isoCodeToEnWikiName.put("CS", "Czech"); - isoCodeToEnWikiName.put("ZH", "Chinese"); - isoCodeToEnWikiName.put("cmn", "Mandarin"); - isoCodeToEnWikiName.put("yue", "Cantonese"); - isoCodeToEnWikiName.put("DA", "Danish"); - isoCodeToEnWikiName.put("NL", "Dutch"); - isoCodeToEnWikiName.put("EN", "English"); - isoCodeToEnWikiName.put("EO", "Esperanto"); - isoCodeToEnWikiName.put("ET", "Estonian"); - isoCodeToEnWikiName.put("FI", "Finnish"); - isoCodeToEnWikiName.put("FR", "French"); - isoCodeToEnWikiName.put("DE", "German"); - isoCodeToEnWikiName.put("EL", "Greek"); - isoCodeToEnWikiName.put("grc", "Ancient Greek"); - isoCodeToEnWikiName.put("haw", "Hawaiian"); - isoCodeToEnWikiName.put("HE", "Hebrew"); - isoCodeToEnWikiName.put("HI", "Hindi"); - isoCodeToEnWikiName.put("HU", "Hungarian"); - isoCodeToEnWikiName.put("IS", "Icelandic"); - isoCodeToEnWikiName.put("ID", "Indonesian"); - isoCodeToEnWikiName.put("GA", "Irish"); - isoCodeToEnWikiName.put("GD", "Gaelic"); - isoCodeToEnWikiName.put("GV", "Manx"); - isoCodeToEnWikiName.put("IT", "Italian"); - isoCodeToEnWikiName.put("LA", "Latin"); - isoCodeToEnWikiName.put("LV", "Latvian"); - isoCodeToEnWikiName.put("LT", "Lithuanian"); - isoCodeToEnWikiName.put("JA", "Japanese"); - isoCodeToEnWikiName.put("KO", "Korean"); - isoCodeToEnWikiName.put("KU", "Kurdish"); - isoCodeToEnWikiName.put("LO", "Lao"); - isoCodeToEnWikiName.put("MS", "Malay$"); - isoCodeToEnWikiName.put("ML", "Malayalam"); - isoCodeToEnWikiName.put("MI", "Maori"); - isoCodeToEnWikiName.put("MN", "Mongolian"); - isoCodeToEnWikiName.put("NE", "Nepali"); - isoCodeToEnWikiName.put("NO", "Norwegian"); - isoCodeToEnWikiName.put("FA", "Persian"); - isoCodeToEnWikiName.put("PL", "Polish"); - isoCodeToEnWikiName.put("PT", "Portuguese"); - isoCodeToEnWikiName.put("PA", "Punjabi"); - isoCodeToEnWikiName.put("RO", "Romanian"); - isoCodeToEnWikiName.put("RU", "Russian"); - isoCodeToEnWikiName.put("SA", "Sanskrit"); - isoCodeToEnWikiName.put("SK", "Slovak"); - isoCodeToEnWikiName.put("SL", "Slovene|Slovenian"); - isoCodeToEnWikiName.put("SO", "Somali"); - isoCodeToEnWikiName.put("ES", "Spanish"); - isoCodeToEnWikiName.put("SW", "Swahili"); - isoCodeToEnWikiName.put("SV", "Swedish"); - isoCodeToEnWikiName.put("TL", "Tagalog"); - isoCodeToEnWikiName.put("TG", "Tajik"); - isoCodeToEnWikiName.put("TA", "Tamil"); - isoCodeToEnWikiName.put("TH", "Thai"); - isoCodeToEnWikiName.put("BO", "Tibetan"); - isoCodeToEnWikiName.put("TR", "Turkish"); - isoCodeToEnWikiName.put("UK", "Ukrainian"); - isoCodeToEnWikiName.put("UR", "Urdu"); - isoCodeToEnWikiName.put("VI", "Vietnamese"); - isoCodeToEnWikiName.put("CI", "Welsh"); - isoCodeToEnWikiName.put("YI", "Yiddish"); - isoCodeToEnWikiName.put("ZU", "Zulu"); - isoCodeToEnWikiName.put("AZ", "Azeri"); - isoCodeToEnWikiName.put("EU", "Basque"); - isoCodeToEnWikiName.put("BR", "Breton"); - isoCodeToEnWikiName.put("MR", "Marathi"); - isoCodeToEnWikiName.put("FO", "Faroese"); - isoCodeToEnWikiName.put("GL", "Galician"); - isoCodeToEnWikiName.put("KA", "Georgian"); - isoCodeToEnWikiName.put("HT", "Haitian Creole"); - isoCodeToEnWikiName.put("LB", "Luxembourgish"); - isoCodeToEnWikiName.put("MK", "Macedonian"); - isoCodeToEnWikiName.put("GV", "Manx"); - - // No longer exists in EN: - // isoCodeToEnWikiName.put("BS", "Bosnian"); - // isoCodeToEnWikiName.put("SR", "Serbian"); - - // Font doesn't work: - //isoCodeToEnWikiName.put("MY", "Burmese"); - - - { - Set missing = new LinkedHashSet(isoCodeToEnWikiName.keySet()); - missing.removeAll(Language.isoCodeToResources.keySet()); - //System.out.println(missing); + + public static final Map isoCodeToEnWikiName = new LinkedHashMap(); + static { + isoCodeToEnWikiName.put("AF", "Afrikaans"); + isoCodeToEnWikiName.put("SQ", "Albanian"); + isoCodeToEnWikiName.put("AR", "Arabic"); + isoCodeToEnWikiName.put("HY", "Armenian"); + isoCodeToEnWikiName.put("BE", "Belarusian"); + isoCodeToEnWikiName.put("BN", "Bengali"); + isoCodeToEnWikiName.put("BG", "Bulgarian"); + isoCodeToEnWikiName.put("CA", "Catalan"); + isoCodeToEnWikiName.put("SH", "Serbo-Croatian"); + isoCodeToEnWikiName.put("CS", "Czech"); + isoCodeToEnWikiName.put("ZH", "Chinese"); + isoCodeToEnWikiName.put("cmn", "Mandarin"); + isoCodeToEnWikiName.put("yue", "Cantonese"); + isoCodeToEnWikiName.put("DA", "Danish"); + isoCodeToEnWikiName.put("NL", "Dutch"); + isoCodeToEnWikiName.put("EN", "English"); + isoCodeToEnWikiName.put("EO", "Esperanto"); + isoCodeToEnWikiName.put("ET", "Estonian"); + isoCodeToEnWikiName.put("FI", "Finnish"); + isoCodeToEnWikiName.put("FR", "French"); + // Note: must be before German since matcher + // simply takes first match instead of best. + isoCodeToEnWikiName.put("nds", "Low German"); + isoCodeToEnWikiName.put("pdc", "Pennsylvania German"); + isoCodeToEnWikiName.put("DE", "German"); + isoCodeToEnWikiName.put("grc", "Ancient Greek"); + isoCodeToEnWikiName.put("EL", "Greek"); + isoCodeToEnWikiName.put("haw", "Hawaiian"); + isoCodeToEnWikiName.put("HE", "Hebrew"); + isoCodeToEnWikiName.put("HI", "Hindi"); + isoCodeToEnWikiName.put("HU", "Hungarian"); + isoCodeToEnWikiName.put("IS", "Icelandic"); + isoCodeToEnWikiName.put("ID", "Indonesian"); + isoCodeToEnWikiName.put("GA", "Irish"); + isoCodeToEnWikiName.put("GD", "Gaelic"); + isoCodeToEnWikiName.put("GV", "Manx"); + isoCodeToEnWikiName.put("IT", "Italian"); + isoCodeToEnWikiName.put("LA", "Latin"); + isoCodeToEnWikiName.put("LV", "Latvian"); + isoCodeToEnWikiName.put("LT", "Lithuanian"); + isoCodeToEnWikiName.put("JA", "Japanese"); + isoCodeToEnWikiName.put("KO", "Korean"); + isoCodeToEnWikiName.put("KU", "Kurdish"); + isoCodeToEnWikiName.put("LO", "Lao"); + isoCodeToEnWikiName.put("ML", "Malayalam"); + isoCodeToEnWikiName.put("MS", "Malay"); + isoCodeToEnWikiName.put("MI", "Maori"); + isoCodeToEnWikiName.put("MN", "Mongolian"); + isoCodeToEnWikiName.put("NE", "Nepali"); + isoCodeToEnWikiName.put("NO", "Norwegian"); + isoCodeToEnWikiName.put("FA", "Persian"); + isoCodeToEnWikiName.put("PL", "Polish"); + isoCodeToEnWikiName.put("PT", "Portuguese"); + isoCodeToEnWikiName.put("PA", "Punjabi"); + isoCodeToEnWikiName.put("RO", "Romanian"); + isoCodeToEnWikiName.put("RU", "Russian"); + isoCodeToEnWikiName.put("SA", "Sanskrit"); + isoCodeToEnWikiName.put("SK", "Slovak"); + isoCodeToEnWikiName.put("SL", "Slovene|Slovenian"); + isoCodeToEnWikiName.put("SO", "Somali"); + isoCodeToEnWikiName.put("ES", "Spanish"); + isoCodeToEnWikiName.put("SW", "Swahili"); + isoCodeToEnWikiName.put("SV", "Swedish"); + isoCodeToEnWikiName.put("TL", "Tagalog"); + isoCodeToEnWikiName.put("TG", "Tajik"); + isoCodeToEnWikiName.put("TA", "Tamil"); + isoCodeToEnWikiName.put("TH", "Thai"); + isoCodeToEnWikiName.put("BO", "Tibetan"); + isoCodeToEnWikiName.put("TR", "Turkish"); + isoCodeToEnWikiName.put("UK", "Ukrainian"); + isoCodeToEnWikiName.put("UR", "Urdu"); + isoCodeToEnWikiName.put("VI", "Vietnamese"); + isoCodeToEnWikiName.put("CI", "Welsh"); + isoCodeToEnWikiName.put("YI", "Yiddish"); + isoCodeToEnWikiName.put("ZU", "Zulu"); + isoCodeToEnWikiName.put("AZ", "Azeri"); + isoCodeToEnWikiName.put("EU", "Basque"); + isoCodeToEnWikiName.put("BR", "Breton"); + isoCodeToEnWikiName.put("MR", "Marathi"); + isoCodeToEnWikiName.put("FO", "Faroese"); + isoCodeToEnWikiName.put("GL", "Galician"); + isoCodeToEnWikiName.put("KA", "Georgian"); + isoCodeToEnWikiName.put("HT", "Haitian Creole"); + isoCodeToEnWikiName.put("LB", "Luxembourgish"); + isoCodeToEnWikiName.put("MK", "Macedonian"); + isoCodeToEnWikiName.put("GV", "Manx"); + isoCodeToEnWikiName.put("scn", "Sicilian"); + isoCodeToEnWikiName.put("cu", "Old Church Slavonic"); + isoCodeToEnWikiName.put("rom", "Romani"); + + // No longer exists in EN: + // isoCodeToEnWikiName.put("BS", "Bosnian"); + // isoCodeToEnWikiName.put("SR", "Serbian"); + // isoCodeToEnWikiName.put("HR", "Croatian"); + + // Font doesn't work: + //isoCodeToEnWikiName.put("MY", "Burmese"); + + + { + //Set missing = new LinkedHashSet(isoCodeToEnWikiName.keySet()); + //missing.removeAll(Language.isoCodeToResources.keySet()); + //System.out.println(missing); + } + //assert Language.isoCodeToResources.keySet().containsAll(isoCodeToEnWikiName.keySet()); } - assert Language.isoCodeToResources.keySet().containsAll(isoCodeToEnWikiName.keySet()); - } - - public static final Map> wikiCodeToIsoCodeToWikiName = new LinkedHashMap>(); - static { - // en - wikiCodeToIsoCodeToWikiName.put("en", isoCodeToEnWikiName); - - Map isoCodeToWikiName; - - // egrep -o '\{\{Wortart[^}]+\}\}' dewiktionary-pages-articles.xml | cut -d \| -f3 | sort | uniq -c | sort -nr - isoCodeToWikiName = new LinkedHashMap(); - wikiCodeToIsoCodeToWikiName.put("de", isoCodeToWikiName); - isoCodeToWikiName.put("DE", "Deutsch"); - isoCodeToWikiName.put("EN", "Englisch"); - isoCodeToWikiName.put("IT", "Italienisch"); - isoCodeToWikiName.put("PL", "Polnisch"); - isoCodeToWikiName.put("FR", "Französisch"); - isoCodeToWikiName.put("EO", "Esperanto"); - isoCodeToWikiName.put("CA", "Katalanisch"); - isoCodeToWikiName.put("LA", "Lateinisch"); - isoCodeToWikiName.put("CS", "Tschechisch"); - isoCodeToWikiName.put("HU", "Ungarisch"); - isoCodeToWikiName.put("SV", "Schwedisch"); - isoCodeToWikiName.put("ES", "Spanisch"); - - // egrep -o '\{\{=[a-zA-Z]+=\}\}' frwiktionary-pages-articles.xml | sort | uniq -c | sort -nr - isoCodeToWikiName = new LinkedHashMap(); - wikiCodeToIsoCodeToWikiName.put("fr", isoCodeToWikiName); - isoCodeToWikiName.put("FR", Pattern.quote("{{langue|fr}}")); - isoCodeToWikiName.put("RU", Pattern.quote("{{langue|ru}}")); - isoCodeToWikiName.put("BG", Pattern.quote("{{langue|bg}}")); // Bulgarian - isoCodeToWikiName.put("EN", Pattern.quote("{{langue|en}}")); - //isoCodeToWikiName.put("", Pattern.quote("{{langue|sl}}")); - isoCodeToWikiName.put("LA", Pattern.quote("{{langue|la}}")); - isoCodeToWikiName.put("IT", Pattern.quote("{{langue|it}}")); - isoCodeToWikiName.put("EO", Pattern.quote("{{langue|eo}}")); - isoCodeToWikiName.put("CS", Pattern.quote("{{langue|cs}}")); // Czech - isoCodeToWikiName.put("NL", Pattern.quote("{{langue|nl}}")); // Dutch - //isoCodeToWikiName.put("", Pattern.quote("{{langue|mg}}")); - //isoCodeToWikiName.put("", Pattern.quote("{{langue|hsb}}")); - isoCodeToWikiName.put("ZH", Pattern.quote("{{langue|zh}}")); - isoCodeToWikiName.put("cmn", Pattern.quote("{{langue|cmn}}")); - isoCodeToWikiName.put("yue", Pattern.quote("{{langue|yue}}")); - isoCodeToWikiName.put("JA", Pattern.quote("{{langue|ja}}")); - isoCodeToWikiName.put("DE", Pattern.quote("{{langue|de}}")); - isoCodeToWikiName.put("IS", Pattern.quote("{{langue|is}}")); // Icelandic - isoCodeToWikiName.put("ES", Pattern.quote("{{langue|es}}")); - isoCodeToWikiName.put("UK", Pattern.quote("{{langue|uk}}")); - - // egrep -o '= *\{\{-[a-z]+-\}\} *=' itwiktionary-pages-articles.xml | sort | uniq -c | sort -n - isoCodeToWikiName = new LinkedHashMap(); - wikiCodeToIsoCodeToWikiName.put("it", isoCodeToWikiName); - isoCodeToWikiName.put("IT", "\\{\\{-(it|scn|nap|cal|lmo)-\\}\\}"); // scn, nap, cal, lmo - isoCodeToWikiName.put("EN", Pattern.quote("{{-en-}}")); - isoCodeToWikiName.put("FR", Pattern.quote("{{-fr-}}")); - isoCodeToWikiName.put("DE", Pattern.quote("{{-de-}}")); - isoCodeToWikiName.put("ES", Pattern.quote("{{-es-}}")); - isoCodeToWikiName.put("JA", Pattern.quote("{{-ja-}}")); - isoCodeToWikiName.put("PL", Pattern.quote("{{-pl-}}")); - isoCodeToWikiName.put("NL", Pattern.quote("{{-nl-}}")); - isoCodeToWikiName.put("LV", Pattern.quote("{{-lv-}}")); - isoCodeToWikiName.put("LA", Pattern.quote("{{-la-}}")); - isoCodeToWikiName.put("HU", Pattern.quote("{{-hu-}}")); - isoCodeToWikiName.put("EL", Pattern.quote("{{-grc-}}")); - isoCodeToWikiName.put("SV", Pattern.quote("{{-sv-}}")); - - } - public static String getEnglishName(String langCode) { - String name = isoCodeToEnWikiName.get(langCode); - if (name == null) { - name = isoCodeToEnWikiName.get(langCode.toUpperCase()); - } - if (name == null) { - return null; - } - if (name.indexOf('|') != -1) { - return name.substring(0, name.indexOf('|')); - } - if (name.indexOf('$') != -1) { - return name.substring(0, name.indexOf('$')); - } - return name; // can be null. - } - + + public static final Map> wikiCodeToIsoCodeToWikiName = new LinkedHashMap>(); + static { + // en + wikiCodeToIsoCodeToWikiName.put("en", isoCodeToEnWikiName); + + Map isoCodeToWikiName; + + // egrep -o '\{\{Wortart[^}]+\}\}' dewiktionary-pages-articles.xml | cut -d \| -f3 | sort | uniq -c | sort -nr + isoCodeToWikiName = new LinkedHashMap(); + wikiCodeToIsoCodeToWikiName.put("de", isoCodeToWikiName); + isoCodeToWikiName.put("nds", "Niederdeutsch"); + isoCodeToWikiName.put("DE", "Deutsch"); + isoCodeToWikiName.put("EN", "Englisch"); + isoCodeToWikiName.put("IT", "Italienisch"); + isoCodeToWikiName.put("PL", "Polnisch"); + isoCodeToWikiName.put("FR", "Französisch"); + isoCodeToWikiName.put("EO", "Esperanto"); + isoCodeToWikiName.put("CA", "Katalanisch"); + isoCodeToWikiName.put("LA", "Latein"); + isoCodeToWikiName.put("CS", "Tschechisch"); + isoCodeToWikiName.put("HU", "Ungarisch"); + isoCodeToWikiName.put("SV", "Schwedisch"); + isoCodeToWikiName.put("ES", "Spanisch"); + isoCodeToWikiName.put("RO", "Rumänisch"); + + // egrep -o '== *\{\{langue\|[a-zA-Z]+\}\} *==' frwiktionary-pages-articles.xml | sort | uniq -c | sort -nr + isoCodeToWikiName = new LinkedHashMap(); + wikiCodeToIsoCodeToWikiName.put("fr", isoCodeToWikiName); + isoCodeToWikiName.put("FR", Pattern.quote("{{langue|fr}}")); + isoCodeToWikiName.put("RU", Pattern.quote("{{langue|ru}}")); + isoCodeToWikiName.put("AR", Pattern.quote("{{langue|ar}}")); // Arabic + isoCodeToWikiName.put("BG", Pattern.quote("{{langue|bg}}")); // Bulgarian + isoCodeToWikiName.put("EN", Pattern.quote("{{langue|en}}")); + //isoCodeToWikiName.put("", Pattern.quote("{{langue|sl}}")); + isoCodeToWikiName.put("LA", Pattern.quote("{{langue|la}}")); + isoCodeToWikiName.put("IT", Pattern.quote("{{langue|it}}")); + isoCodeToWikiName.put("EO", Pattern.quote("{{langue|eo}}")); + isoCodeToWikiName.put("CS", Pattern.quote("{{langue|cs}}")); // Czech + isoCodeToWikiName.put("NL", Pattern.quote("{{langue|nl}}")); // Dutch + //isoCodeToWikiName.put("", Pattern.quote("{{langue|mg}}")); + //isoCodeToWikiName.put("", Pattern.quote("{{langue|hsb}}")); + isoCodeToWikiName.put("ZH", Pattern.quote("{{langue|zh}}")); + isoCodeToWikiName.put("cmn", Pattern.quote("{{langue|cmn}}")); + isoCodeToWikiName.put("yue", Pattern.quote("{{langue|yue}}")); + isoCodeToWikiName.put("JA", Pattern.quote("{{langue|ja}}")); + isoCodeToWikiName.put("DE", Pattern.quote("{{langue|de}}")); + isoCodeToWikiName.put("IS", Pattern.quote("{{langue|is}}")); // Icelandic + isoCodeToWikiName.put("ES", Pattern.quote("{{langue|es}}")); + isoCodeToWikiName.put("UK", Pattern.quote("{{langue|uk}}")); + isoCodeToWikiName.put("PT", Pattern.quote("{{langue|pt}}")); + + // egrep -o '= *\{\{-[a-z]+-\}\} *=' itwiktionary-pages-articles.xml | sort | uniq -c | sort -n + isoCodeToWikiName = new LinkedHashMap(); + wikiCodeToIsoCodeToWikiName.put("it", isoCodeToWikiName); + isoCodeToWikiName.put("IT", "\\{\\{-(it|scn|nap|cal|lmo)-\\}\\}"); // scn, nap, cal, lmo + isoCodeToWikiName.put("EN", Pattern.quote("{{-en-}}")); + isoCodeToWikiName.put("FR", Pattern.quote("{{-fr-}}")); + isoCodeToWikiName.put("DE", Pattern.quote("{{-de-}}")); + isoCodeToWikiName.put("ES", Pattern.quote("{{-es-}}")); + isoCodeToWikiName.put("JA", Pattern.quote("{{-ja-}}")); + isoCodeToWikiName.put("PL", Pattern.quote("{{-pl-}}")); + isoCodeToWikiName.put("NL", Pattern.quote("{{-nl-}}")); + isoCodeToWikiName.put("LV", Pattern.quote("{{-lv-}}")); + isoCodeToWikiName.put("LA", Pattern.quote("{{-la-}}")); + isoCodeToWikiName.put("HU", Pattern.quote("{{-hu-}}")); + isoCodeToWikiName.put("EL", Pattern.quote("{{-grc-}}")); + isoCodeToWikiName.put("SV", Pattern.quote("{{-sv-}}")); + isoCodeToWikiName.put("RU", Pattern.quote("{{-ru-}}")); + + // egrep -o '== *\{\{lengua\|[a-zA-Z]+\}\} *==' frwiktionary-pages-articles.xml | sort | uniq -c | sort -nr + isoCodeToWikiName = new LinkedHashMap(); + wikiCodeToIsoCodeToWikiName.put("es", isoCodeToWikiName); + isoCodeToWikiName.put("AR", Pattern.quote("{{lengua|ar}}")); + isoCodeToWikiName.put("ES", Pattern.quote("{{lengua|es}}")); + isoCodeToWikiName.put("EN", Pattern.quote("{{lengua|en}}")); + isoCodeToWikiName.put("FR", Pattern.quote("{{lengua|fr}}")); + isoCodeToWikiName.put("IT", Pattern.quote("{{lengua|it}}")); + + // Pattern seems to match Italian one + isoCodeToWikiName = new LinkedHashMap(); + wikiCodeToIsoCodeToWikiName.put("pt", isoCodeToWikiName); + isoCodeToWikiName.put("PT", Pattern.quote("{{-pt-}}")); + isoCodeToWikiName.put("EN", Pattern.quote("{{-en-}}")); + isoCodeToWikiName.put("ES", Pattern.quote("{{-es-}}")); + } + public static String getEnglishName(String langCode) { + String name = isoCodeToEnWikiName.get(langCode); + if (name == null) { + name = isoCodeToEnWikiName.get(langCode.toUpperCase()); + } + if (name == null) { + return null; + } + if (name.indexOf('|') != -1) { + return name.substring(0, name.indexOf('|')); + } + if (name.indexOf('$') != -1) { + return name.substring(0, name.indexOf('$')); + } + return name; // can be null. + } + }