import com.hughes.android.dictionary.engine.Language;
import java.util.LinkedHashMap;
+import java.util.LinkedHashSet;
import java.util.Map;
+import java.util.Set;
import java.util.regex.Pattern;
public class WiktionaryLangs {
isoCodeToEnWikiName.put("HY", "Armenian");
isoCodeToEnWikiName.put("BE", "Belarusian");
isoCodeToEnWikiName.put("BN", "Bengali");
- isoCodeToEnWikiName.put("BS", "Bosnian");
isoCodeToEnWikiName.put("BG", "Bulgarian");
- isoCodeToEnWikiName.put("MY", "Burmese");
- isoCodeToEnWikiName.put("yue", "Cantonese");
isoCodeToEnWikiName.put("CA", "Catalan");
+ isoCodeToEnWikiName.put("SH", "Serbo-Croatian");
isoCodeToEnWikiName.put("HR", "Croatian");
isoCodeToEnWikiName.put("CS", "Czech");
- isoCodeToEnWikiName.put("ZH", "Chinese|Mandarin");
+ isoCodeToEnWikiName.put("ZH", "Chinese");
+ isoCodeToEnWikiName.put("cmn", "Mandarin");
+ isoCodeToEnWikiName.put("yue", "Cantonese");
isoCodeToEnWikiName.put("DA", "Danish");
isoCodeToEnWikiName.put("NL", "Dutch");
isoCodeToEnWikiName.put("EN", "English");
isoCodeToEnWikiName.put("ID", "Indonesian");
isoCodeToEnWikiName.put("GA", "Irish");
isoCodeToEnWikiName.put("GD", "Gaelic");
+ isoCodeToEnWikiName.put("GV", "Manx");
isoCodeToEnWikiName.put("IT", "Italian");
isoCodeToEnWikiName.put("LA", "Latin");
isoCodeToEnWikiName.put("LV", "Latvian");
isoCodeToEnWikiName.put("KO", "Korean");
isoCodeToEnWikiName.put("KU", "Kurdish");
isoCodeToEnWikiName.put("LO", "Lao");
- isoCodeToEnWikiName.put("MS", "Malay");
+ isoCodeToEnWikiName.put("MS", "Malay$");
isoCodeToEnWikiName.put("ML", "Malayalam");
isoCodeToEnWikiName.put("MI", "Maori");
isoCodeToEnWikiName.put("MN", "Mongolian");
isoCodeToEnWikiName.put("RO", "Romanian");
isoCodeToEnWikiName.put("RU", "Russian");
isoCodeToEnWikiName.put("SA", "Sanskrit");
- isoCodeToEnWikiName.put("SR", "Serbian");
isoCodeToEnWikiName.put("SK", "Slovak");
isoCodeToEnWikiName.put("SL", "Slovene|Slovenian");
isoCodeToEnWikiName.put("SO", "Somali");
isoCodeToEnWikiName.put("CI", "Welsh");
isoCodeToEnWikiName.put("YI", "Yiddish");
isoCodeToEnWikiName.put("ZU", "Zulu");
-
isoCodeToEnWikiName.put("AZ", "Azeri");
isoCodeToEnWikiName.put("EU", "Basque");
isoCodeToEnWikiName.put("BR", "Breton");
- isoCodeToEnWikiName.put("MR", "Burmese");
+ isoCodeToEnWikiName.put("MR", "Marathi");
isoCodeToEnWikiName.put("FO", "Faroese");
isoCodeToEnWikiName.put("GL", "Galician");
isoCodeToEnWikiName.put("KA", "Georgian");
isoCodeToEnWikiName.put("HT", "Haitian Creole");
isoCodeToEnWikiName.put("LB", "Luxembourgish");
isoCodeToEnWikiName.put("MK", "Macedonian");
+ isoCodeToEnWikiName.put("GV", "Manx");
+
+ // No longer exists in EN:
+ // isoCodeToEnWikiName.put("BS", "Bosnian");
+ // isoCodeToEnWikiName.put("SR", "Serbian");
+ // Font doesn't work:
+ //isoCodeToEnWikiName.put("MY", "Burmese");
+
+
+ {
+ Set<String> missing = new LinkedHashSet<String>(isoCodeToEnWikiName.keySet());
+ missing.removeAll(Language.isoCodeToResources.keySet());
+ //System.out.println(missing);
+ }
assert Language.isoCodeToResources.keySet().containsAll(isoCodeToEnWikiName.keySet());
}
// egrep -o '\{\{=[a-zA-Z]+=\}\}' frwiktionary-pages-articles.xml | sort | uniq -c | sort -nr
isoCodeToWikiName = new LinkedHashMap<String, String>();
wikiCodeToIsoCodeToWikiName.put("fr", isoCodeToWikiName);
- isoCodeToWikiName.put("FR", Pattern.quote("{{=fr=}}"));
- isoCodeToWikiName.put("RU", Pattern.quote("{{=ru=}}"));
- isoCodeToWikiName.put("BG", Pattern.quote("{{=bg=}}")); // Bulgarian
- isoCodeToWikiName.put("EN", Pattern.quote("{{=en=}}"));
- //isoCodeToWikiName.put("", Pattern.quote("{{=sl=}}"));
- isoCodeToWikiName.put("LA", Pattern.quote("{{=la=}}"));
- isoCodeToWikiName.put("IT", Pattern.quote("{{=it=}}"));
- isoCodeToWikiName.put("EO", Pattern.quote("{{=eo=}}"));
- isoCodeToWikiName.put("CS", Pattern.quote("{{=cs=}}")); // Czech
- isoCodeToWikiName.put("NL", Pattern.quote("{{=nl=}}")); // Dutch
- //isoCodeToWikiName.put("", Pattern.quote("{{=mg=}}"));
- //isoCodeToWikiName.put("", Pattern.quote("{{=hsb=}}"));
- isoCodeToWikiName.put("ZH", Pattern.quote("{{=zh=}}"));
- isoCodeToWikiName.put("JA", Pattern.quote("{{=ja=}}"));
- isoCodeToWikiName.put("DE", Pattern.quote("{{=de=}}"));
- isoCodeToWikiName.put("IS", Pattern.quote("{{=is=}}")); // Icelandic
- isoCodeToWikiName.put("ES", Pattern.quote("{{=es=}}"));
- isoCodeToWikiName.put("UK", Pattern.quote("{{=uk=}}"));
+ isoCodeToWikiName.put("FR", Pattern.quote("{{langue|fr}}"));
+ isoCodeToWikiName.put("RU", Pattern.quote("{{langue|ru}}"));
+ isoCodeToWikiName.put("BG", Pattern.quote("{{langue|bg}}")); // Bulgarian
+ isoCodeToWikiName.put("EN", Pattern.quote("{{langue|en}}"));
+ //isoCodeToWikiName.put("", Pattern.quote("{{langue|sl}}"));
+ isoCodeToWikiName.put("LA", Pattern.quote("{{langue|la}}"));
+ isoCodeToWikiName.put("IT", Pattern.quote("{{langue|it}}"));
+ isoCodeToWikiName.put("EO", Pattern.quote("{{langue|eo}}"));
+ isoCodeToWikiName.put("CS", Pattern.quote("{{langue|cs}}")); // Czech
+ isoCodeToWikiName.put("NL", Pattern.quote("{{langue|nl}}")); // Dutch
+ //isoCodeToWikiName.put("", Pattern.quote("{{langue|mg}}"));
+ //isoCodeToWikiName.put("", Pattern.quote("{{langue|hsb}}"));
+ isoCodeToWikiName.put("ZH", Pattern.quote("{{langue|zh}}"));
+ isoCodeToWikiName.put("cmn", Pattern.quote("{{langue|cmn}}"));
+ isoCodeToWikiName.put("yue", Pattern.quote("{{langue|yue}}"));
+ isoCodeToWikiName.put("JA", Pattern.quote("{{langue|ja}}"));
+ isoCodeToWikiName.put("DE", Pattern.quote("{{langue|de}}"));
+ isoCodeToWikiName.put("IS", Pattern.quote("{{langue|is}}")); // Icelandic
+ isoCodeToWikiName.put("ES", Pattern.quote("{{langue|es}}"));
+ isoCodeToWikiName.put("UK", Pattern.quote("{{langue|uk}}"));
// egrep -o '= *\{\{-[a-z]+-\}\} *=' itwiktionary-pages-articles.xml | sort | uniq -c | sort -n
isoCodeToWikiName = new LinkedHashMap<String, String>();
return null;
}
if (name.indexOf('|') != -1) {
- return name.substring(name.indexOf('|'));
+ return name.substring(0, name.indexOf('|'));
+ }
+ if (name.indexOf('$') != -1) {
+ return name.substring(0, name.indexOf('$'));
}
return name; // can be null.
}