From 2fdf5cd298a8767a8bf8e875fd90c92a05257462 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Reimar=20D=C3=B6ffinger?= Date: Wed, 16 Dec 2015 23:35:44 +0100 Subject: [PATCH] Fix for splitting Mandarin/Cantonese/... --- .../dictionary/engine/WiktionarySplitter.java | 15 +++++++++++++++ .../parser/wiktionary/WiktionaryLangs.java | 2 +- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/com/hughes/android/dictionary/engine/WiktionarySplitter.java b/src/com/hughes/android/dictionary/engine/WiktionarySplitter.java index a5dfebc..12b0c52 100644 --- a/src/com/hughes/android/dictionary/engine/WiktionarySplitter.java +++ b/src/com/hughes/android/dictionary/engine/WiktionarySplitter.java @@ -170,6 +170,7 @@ public class WiktionarySplitter extends org.xml.sax.helpers.DefaultHandler { } String text = textBuilder.toString(); + String translingual = ""; while (text.length() > 0) { // Find start. @@ -181,6 +182,19 @@ public class WiktionarySplitter extends org.xml.sax.helpers.DefaultHandler { final String heading = startMatcher.group(); for (final Selector selector : currentSelectors) { + if (heading.indexOf("Translingual") != -1) { + // Find end. + final int depth = startMatcher.group(1).length(); + final Pattern endPattern = Pattern.compile(String.format("^={1,%d}[^=].*$", depth), Pattern.MULTILINE); + + final Matcher endMatcher = endPattern.matcher(text); + if (endMatcher.find()) { + int end = endMatcher.start(); + translingual = text.substring(0, endMatcher.start()); + text = text.substring(end); + break; + } + } if (selector.pattern.matcher(heading).find()) { // Find end. @@ -205,6 +219,7 @@ public class WiktionarySplitter extends org.xml.sax.helpers.DefaultHandler { sectionText.charAt(dummy_end + 1) == '\n') ++dummy_end; sectionText = sectionText.substring(dummy_end); } + if (heading.indexOf("Japanese") == -1) sectionText += translingual; final Section section = new Section(title, heading, sectionText); try { diff --git a/src/com/hughes/android/dictionary/parser/wiktionary/WiktionaryLangs.java b/src/com/hughes/android/dictionary/parser/wiktionary/WiktionaryLangs.java index 5c039e8..813c69b 100644 --- a/src/com/hughes/android/dictionary/parser/wiktionary/WiktionaryLangs.java +++ b/src/com/hughes/android/dictionary/parser/wiktionary/WiktionaryLangs.java @@ -35,7 +35,6 @@ public class WiktionaryLangs { isoCodeToEnWikiName.put("BG", "Bulgarian"); isoCodeToEnWikiName.put("CA", "Catalan"); isoCodeToEnWikiName.put("SH", "Serbo-Croatian"); - isoCodeToEnWikiName.put("HR", "Croatian"); isoCodeToEnWikiName.put("CS", "Czech"); isoCodeToEnWikiName.put("ZH", "Chinese"); isoCodeToEnWikiName.put("cmn", "Mandarin"); @@ -113,6 +112,7 @@ public class WiktionaryLangs { // No longer exists in EN: // isoCodeToEnWikiName.put("BS", "Bosnian"); // isoCodeToEnWikiName.put("SR", "Serbian"); + // isoCodeToEnWikiName.put("HR", "Croatian"); // Font doesn't work: //isoCodeToEnWikiName.put("MY", "Burmese"); -- 2.43.0