X-Git-Url: http://gitweb.fperrin.net/?p=DictionaryPC.git;a=blobdiff_plain;f=src%2Fcom%2Fhughes%2Fandroid%2Fdictionary%2Fparser%2Fwiktionary%2FWiktionaryLangs.java;h=7f52642821bc4b8b2bbafac65be248a0642fa504;hp=4acdef73d0dc4e23eeda7fbf048839c64da8aa1c;hb=16c4f59950e8e6beabe312dd03e7678d084fe25b;hpb=0cde0a508bc65074c94b408e9f74b01aca9b8b29 diff --git a/src/com/hughes/android/dictionary/parser/wiktionary/WiktionaryLangs.java b/src/com/hughes/android/dictionary/parser/wiktionary/WiktionaryLangs.java index 4acdef7..7f52642 100644 --- a/src/com/hughes/android/dictionary/parser/wiktionary/WiktionaryLangs.java +++ b/src/com/hughes/android/dictionary/parser/wiktionary/WiktionaryLangs.java @@ -14,12 +14,12 @@ package com.hughes.android.dictionary.parser.wiktionary; -import com.hughes.android.dictionary.R; import com.hughes.android.dictionary.engine.Language; -import com.hughes.android.dictionary.engine.Language.LanguageResources; import java.util.LinkedHashMap; +import java.util.LinkedHashSet; import java.util.Map; +import java.util.Set; import java.util.regex.Pattern; public class WiktionaryLangs { @@ -32,14 +32,13 @@ public class WiktionaryLangs { isoCodeToEnWikiName.put("HY", "Armenian"); isoCodeToEnWikiName.put("BE", "Belarusian"); isoCodeToEnWikiName.put("BN", "Bengali"); - isoCodeToEnWikiName.put("BS", "Bosnian"); isoCodeToEnWikiName.put("BG", "Bulgarian"); - isoCodeToEnWikiName.put("MY", "Burmese"); - isoCodeToEnWikiName.put("yue", "Cantonese"); isoCodeToEnWikiName.put("CA", "Catalan"); - isoCodeToEnWikiName.put("HR", "Croatian"); + isoCodeToEnWikiName.put("SH", "Serbo-Croatian"); isoCodeToEnWikiName.put("CS", "Czech"); - isoCodeToEnWikiName.put("ZH", "Chinese|Mandarin"); + isoCodeToEnWikiName.put("ZH", "Chinese"); + isoCodeToEnWikiName.put("cmn", "Mandarin"); + isoCodeToEnWikiName.put("yue", "Cantonese"); isoCodeToEnWikiName.put("DA", "Danish"); isoCodeToEnWikiName.put("NL", "Dutch"); isoCodeToEnWikiName.put("EN", "English"); @@ -48,8 +47,8 @@ public class WiktionaryLangs { isoCodeToEnWikiName.put("FI", "Finnish"); isoCodeToEnWikiName.put("FR", "French"); isoCodeToEnWikiName.put("DE", "German"); - isoCodeToEnWikiName.put("EL", "Greek"); isoCodeToEnWikiName.put("grc", "Ancient Greek"); + isoCodeToEnWikiName.put("EL", "Greek"); isoCodeToEnWikiName.put("haw", "Hawaiian"); isoCodeToEnWikiName.put("HE", "Hebrew"); isoCodeToEnWikiName.put("HI", "Hindi"); @@ -58,6 +57,7 @@ public class WiktionaryLangs { isoCodeToEnWikiName.put("ID", "Indonesian"); isoCodeToEnWikiName.put("GA", "Irish"); isoCodeToEnWikiName.put("GD", "Gaelic"); + isoCodeToEnWikiName.put("GV", "Manx"); isoCodeToEnWikiName.put("IT", "Italian"); isoCodeToEnWikiName.put("LA", "Latin"); isoCodeToEnWikiName.put("LV", "Latvian"); @@ -66,8 +66,8 @@ public class WiktionaryLangs { isoCodeToEnWikiName.put("KO", "Korean"); isoCodeToEnWikiName.put("KU", "Kurdish"); isoCodeToEnWikiName.put("LO", "Lao"); - isoCodeToEnWikiName.put("MS", "Malay"); isoCodeToEnWikiName.put("ML", "Malayalam"); + isoCodeToEnWikiName.put("MS", "Malay"); isoCodeToEnWikiName.put("MI", "Maori"); isoCodeToEnWikiName.put("MN", "Mongolian"); isoCodeToEnWikiName.put("NE", "Nepali"); @@ -79,7 +79,6 @@ public class WiktionaryLangs { isoCodeToEnWikiName.put("RO", "Romanian"); isoCodeToEnWikiName.put("RU", "Russian"); isoCodeToEnWikiName.put("SA", "Sanskrit"); - isoCodeToEnWikiName.put("SR", "Serbian"); isoCodeToEnWikiName.put("SK", "Slovak"); isoCodeToEnWikiName.put("SL", "Slovene|Slovenian"); isoCodeToEnWikiName.put("SO", "Somali"); @@ -98,20 +97,33 @@ public class WiktionaryLangs { isoCodeToEnWikiName.put("CI", "Welsh"); isoCodeToEnWikiName.put("YI", "Yiddish"); isoCodeToEnWikiName.put("ZU", "Zulu"); - isoCodeToEnWikiName.put("AZ", "Azeri"); isoCodeToEnWikiName.put("EU", "Basque"); isoCodeToEnWikiName.put("BR", "Breton"); - isoCodeToEnWikiName.put("MR", "Burmese"); + isoCodeToEnWikiName.put("MR", "Marathi"); isoCodeToEnWikiName.put("FO", "Faroese"); isoCodeToEnWikiName.put("GL", "Galician"); isoCodeToEnWikiName.put("KA", "Georgian"); isoCodeToEnWikiName.put("HT", "Haitian Creole"); isoCodeToEnWikiName.put("LB", "Luxembourgish"); isoCodeToEnWikiName.put("MK", "Macedonian"); + isoCodeToEnWikiName.put("GV", "Manx"); + + // No longer exists in EN: + // isoCodeToEnWikiName.put("BS", "Bosnian"); + // isoCodeToEnWikiName.put("SR", "Serbian"); + // isoCodeToEnWikiName.put("HR", "Croatian"); + + // Font doesn't work: + //isoCodeToEnWikiName.put("MY", "Burmese"); - assert Language.isoCodeToResources.keySet().containsAll(isoCodeToEnWikiName.keySet()); + { + //Set missing = new LinkedHashSet(isoCodeToEnWikiName.keySet()); + //missing.removeAll(Language.isoCodeToResources.keySet()); + //System.out.println(missing); + } + //assert Language.isoCodeToResources.keySet().containsAll(isoCodeToEnWikiName.keySet()); } public static final Map> wikiCodeToIsoCodeToWikiName = new LinkedHashMap>(); @@ -131,33 +143,36 @@ public class WiktionaryLangs { isoCodeToWikiName.put("FR", "Französisch"); isoCodeToWikiName.put("EO", "Esperanto"); isoCodeToWikiName.put("CA", "Katalanisch"); - isoCodeToWikiName.put("LA", "Lateinisch"); + isoCodeToWikiName.put("LA", "Latein"); isoCodeToWikiName.put("CS", "Tschechisch"); isoCodeToWikiName.put("HU", "Ungarisch"); isoCodeToWikiName.put("SV", "Schwedisch"); isoCodeToWikiName.put("ES", "Spanisch"); - // egrep -o '\{\{=[a-zA-Z]+=\}\}' frwiktionary-pages-articles.xml | sort | uniq -c | sort -nr + // egrep -o '== *\{\{langue\|[a-zA-Z]+\}\} *==' frwiktionary-pages-articles.xml | sort | uniq -c | sort -nr isoCodeToWikiName = new LinkedHashMap(); wikiCodeToIsoCodeToWikiName.put("fr", isoCodeToWikiName); - isoCodeToWikiName.put("FR", Pattern.quote("{{=fr=}}")); - isoCodeToWikiName.put("RU", Pattern.quote("{{=ru=}}")); - isoCodeToWikiName.put("BG", Pattern.quote("{{=bg=}}")); // Bulgarian - isoCodeToWikiName.put("EN", Pattern.quote("{{=en=}}")); - //isoCodeToWikiName.put("", Pattern.quote("{{=sl=}}")); - isoCodeToWikiName.put("LA", Pattern.quote("{{=la=}}")); - isoCodeToWikiName.put("IT", Pattern.quote("{{=it=}}")); - isoCodeToWikiName.put("EO", Pattern.quote("{{=eo=}}")); - isoCodeToWikiName.put("CS", Pattern.quote("{{=cs=}}")); // Czech - isoCodeToWikiName.put("NL", Pattern.quote("{{=nl=}}")); // Dutch - //isoCodeToWikiName.put("", Pattern.quote("{{=mg=}}")); - //isoCodeToWikiName.put("", Pattern.quote("{{=hsb=}}")); - isoCodeToWikiName.put("ZH", Pattern.quote("{{=zh=}}")); - isoCodeToWikiName.put("JA", Pattern.quote("{{=ja=}}")); - isoCodeToWikiName.put("DE", Pattern.quote("{{=de=}}")); - isoCodeToWikiName.put("IS", Pattern.quote("{{=is=}}")); // Icelandic - isoCodeToWikiName.put("ES", Pattern.quote("{{=es=}}")); - isoCodeToWikiName.put("UK", Pattern.quote("{{=uk=}}")); + isoCodeToWikiName.put("FR", Pattern.quote("{{langue|fr}}")); + isoCodeToWikiName.put("RU", Pattern.quote("{{langue|ru}}")); + isoCodeToWikiName.put("AR", Pattern.quote("{{langue|ar}}")); // Arabic + isoCodeToWikiName.put("BG", Pattern.quote("{{langue|bg}}")); // Bulgarian + isoCodeToWikiName.put("EN", Pattern.quote("{{langue|en}}")); + //isoCodeToWikiName.put("", Pattern.quote("{{langue|sl}}")); + isoCodeToWikiName.put("LA", Pattern.quote("{{langue|la}}")); + isoCodeToWikiName.put("IT", Pattern.quote("{{langue|it}}")); + isoCodeToWikiName.put("EO", Pattern.quote("{{langue|eo}}")); + isoCodeToWikiName.put("CS", Pattern.quote("{{langue|cs}}")); // Czech + isoCodeToWikiName.put("NL", Pattern.quote("{{langue|nl}}")); // Dutch + //isoCodeToWikiName.put("", Pattern.quote("{{langue|mg}}")); + //isoCodeToWikiName.put("", Pattern.quote("{{langue|hsb}}")); + isoCodeToWikiName.put("ZH", Pattern.quote("{{langue|zh}}")); + isoCodeToWikiName.put("cmn", Pattern.quote("{{langue|cmn}}")); + isoCodeToWikiName.put("yue", Pattern.quote("{{langue|yue}}")); + isoCodeToWikiName.put("JA", Pattern.quote("{{langue|ja}}")); + isoCodeToWikiName.put("DE", Pattern.quote("{{langue|de}}")); + isoCodeToWikiName.put("IS", Pattern.quote("{{langue|is}}")); // Icelandic + isoCodeToWikiName.put("ES", Pattern.quote("{{langue|es}}")); + isoCodeToWikiName.put("UK", Pattern.quote("{{langue|uk}}")); // egrep -o '= *\{\{-[a-z]+-\}\} *=' itwiktionary-pages-articles.xml | sort | uniq -c | sort -n isoCodeToWikiName = new LinkedHashMap(); @@ -175,7 +190,12 @@ public class WiktionaryLangs { isoCodeToWikiName.put("HU", Pattern.quote("{{-hu-}}")); isoCodeToWikiName.put("EL", Pattern.quote("{{-grc-}}")); isoCodeToWikiName.put("SV", Pattern.quote("{{-sv-}}")); + isoCodeToWikiName.put("RU", Pattern.quote("{{-ru-}}")); + // There seems to be no consistent pattern and few foreign language entries anyway + isoCodeToWikiName = new LinkedHashMap(); + wikiCodeToIsoCodeToWikiName.put("es", isoCodeToWikiName); + isoCodeToWikiName.put("ES", Pattern.quote("{{ES")); } public static String getEnglishName(String langCode) { String name = isoCodeToEnWikiName.get(langCode); @@ -186,7 +206,10 @@ public class WiktionaryLangs { return null; } if (name.indexOf('|') != -1) { - return name.substring(name.indexOf('|')); + return name.substring(0, name.indexOf('|')); + } + if (name.indexOf('$') != -1) { + return name.substring(0, name.indexOf('$')); } return name; // can be null. }