]> gitweb.fperrin.net Git - DictionaryPC.git/commitdiff
Fix for splitting Mandarin/Cantonese/...
authorReimar Döffinger <Reimar.Doeffinger@gmx.de>
Wed, 16 Dec 2015 22:35:44 +0000 (23:35 +0100)
committerReimar Döffinger <Reimar.Doeffinger@gmx.de>
Wed, 16 Dec 2015 22:35:44 +0000 (23:35 +0100)
src/com/hughes/android/dictionary/engine/WiktionarySplitter.java
src/com/hughes/android/dictionary/parser/wiktionary/WiktionaryLangs.java

index a5dfebc88203a51321928272635f4cfd9a0571c5..12b0c5215e772f201f8d18b15f0adaff61f7b7ad 100644 (file)
@@ -170,6 +170,7 @@ public class WiktionarySplitter extends org.xml.sax.helpers.DefaultHandler {
     }
     
     String text = textBuilder.toString();
+    String translingual = "";
     
     while (text.length() > 0) {
       // Find start.
@@ -181,6 +182,19 @@ public class WiktionarySplitter extends org.xml.sax.helpers.DefaultHandler {
       
       final String heading = startMatcher.group();
       for (final Selector selector : currentSelectors) {
+        if (heading.indexOf("Translingual") != -1) {
+          // Find end.
+          final int depth = startMatcher.group(1).length();
+          final Pattern endPattern = Pattern.compile(String.format("^={1,%d}[^=].*$", depth), Pattern.MULTILINE);
+
+          final Matcher endMatcher = endPattern.matcher(text);
+          if (endMatcher.find()) {
+            int end = endMatcher.start();
+            translingual = text.substring(0, endMatcher.start());
+            text = text.substring(end);
+            break;
+          }
+        }
         if (selector.pattern.matcher(heading).find()) {
           
           // Find end.
@@ -205,6 +219,7 @@ public class WiktionarySplitter extends org.xml.sax.helpers.DefaultHandler {
                      sectionText.charAt(dummy_end + 1) == '\n') ++dummy_end;
               sectionText = sectionText.substring(dummy_end);
           }
+          if (heading.indexOf("Japanese") == -1) sectionText += translingual;
           final Section section = new Section(title, heading, sectionText);
           
           try {
index 5c039e861ea6373c8bd95bddea12c38165a17e4a..813c69b834a66786d0ce0a8bdaa2d6b60c0077af 100644 (file)
@@ -35,7 +35,6 @@ public class WiktionaryLangs {
     isoCodeToEnWikiName.put("BG", "Bulgarian");
     isoCodeToEnWikiName.put("CA", "Catalan");
     isoCodeToEnWikiName.put("SH", "Serbo-Croatian");
-    isoCodeToEnWikiName.put("HR", "Croatian");
     isoCodeToEnWikiName.put("CS", "Czech");
     isoCodeToEnWikiName.put("ZH", "Chinese");
     isoCodeToEnWikiName.put("cmn", "Mandarin");
@@ -113,6 +112,7 @@ public class WiktionaryLangs {
     // No longer exists in EN:
     // isoCodeToEnWikiName.put("BS", "Bosnian");
     // isoCodeToEnWikiName.put("SR", "Serbian");
+    // isoCodeToEnWikiName.put("HR", "Croatian");
     
     // Font doesn't work:
     //isoCodeToEnWikiName.put("MY", "Burmese");