From 761f748ec034b093581535f7a4befa8e6e3c6873 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Reimar=20D=C3=B6ffinger?= Date: Sat, 11 Feb 2017 17:32:56 +0100 Subject: [PATCH] Support pt and es wiktionary in splitter. The ES format seems to have changed so we can now actually use it. --- .../parser/wiktionary/WiktionaryLangs.java | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/com/hughes/android/dictionary/parser/wiktionary/WiktionaryLangs.java b/src/com/hughes/android/dictionary/parser/wiktionary/WiktionaryLangs.java index 3efc676..8049257 100644 --- a/src/com/hughes/android/dictionary/parser/wiktionary/WiktionaryLangs.java +++ b/src/com/hughes/android/dictionary/parser/wiktionary/WiktionaryLangs.java @@ -193,10 +193,20 @@ public class WiktionaryLangs { isoCodeToWikiName.put("SV", Pattern.quote("{{-sv-}}")); isoCodeToWikiName.put("RU", Pattern.quote("{{-ru-}}")); - // There seems to be no consistent pattern and few foreign language entries anyway + // egrep -o '== *\{\{lengua\|[a-zA-Z]+\}\} *==' frwiktionary-pages-articles.xml | sort | uniq -c | sort -nr isoCodeToWikiName = new LinkedHashMap(); wikiCodeToIsoCodeToWikiName.put("es", isoCodeToWikiName); - isoCodeToWikiName.put("ES", Pattern.quote("{{ES")); + isoCodeToWikiName.put("ES", Pattern.quote("{{lengua|es}}")); + isoCodeToWikiName.put("EN", Pattern.quote("{{lengua|en}}")); + isoCodeToWikiName.put("FR", Pattern.quote("{{lengua|fr}}")); + isoCodeToWikiName.put("IT", Pattern.quote("{{lengua|it}}")); + + // Pattern seems to match Italian one + isoCodeToWikiName = new LinkedHashMap(); + wikiCodeToIsoCodeToWikiName.put("pt", isoCodeToWikiName); + isoCodeToWikiName.put("PT", Pattern.quote("{{-pt-}}")); + isoCodeToWikiName.put("EN", Pattern.quote("{{-en-}}")); + isoCodeToWikiName.put("ES", Pattern.quote("{{-es-}}")); } public static String getEnglishName(String langCode) { String name = isoCodeToEnWikiName.get(langCode); -- 2.43.0