]> gitweb.fperrin.net Git - DictionaryPC.git/blobdiff - src/com/hughes/android/dictionary/engine/WiktionarySplitter.java
Improve wiktionary splitter for Spanish and Portuguese
[DictionaryPC.git] / src / com / hughes / android / dictionary / engine / WiktionarySplitter.java
index 37344cae59362fa1e5da2342bb36e2e98f708724..435c3f212cd623cf9456054768d6feab355abfe4 100644 (file)
@@ -105,7 +105,7 @@ public class WiktionarySplitter extends org.xml.sax.helpers.DefaultHandler {
                     parser.parse(new BufferedInputStream(in), this);
                 }
             } catch (Exception e) {
-                System.err.println("Exception during parse, lastPageTitle=" + lastPageTitle + ", titleBuilder=" + titleBuilder.toString());
+                System.err.println("Exception during parse, lastPageTitle=" + lastPageTitle + ", titleBuilder=" + titleBuilder.toString() + " of file " + pathToSelectorsEntry.getKey());
                 throw e;
             }
 
@@ -187,6 +187,15 @@ public class WiktionarySplitter extends org.xml.sax.helpers.DefaultHandler {
                 title.startsWith("Plantilla:") ||
                 title.startsWith("Wikcionario:") ||
 
+                // PT
+                title.startsWith("Ajuda:") ||
+                title.startsWith("Apêndice:") ||
+                title.startsWith("Citações:") ||
+                title.startsWith("Portal:") ||
+                title.startsWith("Predefinição:") ||
+                title.startsWith("Vocabulário:") ||
+                title.startsWith("Wikcionário:") ||
+
                 // sentinel
                 false
                ) return;
@@ -196,6 +205,8 @@ public class WiktionarySplitter extends org.xml.sax.helpers.DefaultHandler {
         }
 
         String text = textBuilder.toString();
+        // Workaround for Spanish wiktionary {{ES}} pattern
+        text = text.replace("{{ES}}", "== {{lengua|es}} ==");
         String translingual = "";
         int start = 0;
         final Matcher startMatcher = headingStart.matcher(text);