]> gitweb.fperrin.net Git - DictionaryPC.git/blobdiff - src/com/hughes/android/dictionary/engine/WiktionarySplitter.java
Fix WiktionarySplitter breakage.
[DictionaryPC.git] / src / com / hughes / android / dictionary / engine / WiktionarySplitter.java
index 361473e80ac3e0043b9191fb1098ab3a1298c42f..97c64a74a3766f2a315d9d95686d4c44039bed9b 100644 (file)
@@ -39,6 +39,8 @@ public class WiktionarySplitter extends org.xml.sax.helpers.DefaultHandler {
 
   // The matches the whole line, otherwise regexes don't work well on French:
   // {{=uk=}}
+  // Spanish has no initial headings, tried to also detect {{ES as such
+  // with "^(\\{\\{ES|(=+)[^=]).*$" but that broke English.
   static final Pattern headingStart = Pattern.compile("^(=+)[^=].*$", Pattern.MULTILINE);
   
   final Map<String,List<Selector>> pathToSelectors = new LinkedHashMap<String, List<Selector>>();
@@ -56,7 +58,7 @@ public class WiktionarySplitter extends org.xml.sax.helpers.DefaultHandler {
   private WiktionarySplitter() {
     List<Selector> selectors;
     for (final String code : WiktionaryLangs.wikiCodeToIsoCodeToWikiName.keySet()) {
-      //if (code.equals("en") || code.equals("de") || code.equals("fr")) {continue;}
+      //if (!code.equals("fr")) {continue;}
       selectors = new ArrayList<WiktionarySplitter.Selector>();
       pathToSelectors.put(String.format("data/inputs/%swiktionary-pages-articles.xml", code), selectors);
       for (final Map.Entry<String, String> entry : WiktionaryLangs.wikiCodeToIsoCodeToWikiName.get(code).entrySet()) {
@@ -146,6 +148,13 @@ public class WiktionarySplitter extends org.xml.sax.helpers.DefaultHandler {
             title.startsWith("Categoria:") ||
             title.startsWith("Aiuto:") ||
             title.startsWith("Portail:") ||
+            // ES
+            title.startsWith("ApĂ©ndice:") ||
+            title.startsWith("Archivo:") ||
+            title.startsWith("Ayuda:") ||
+            title.startsWith("CategorĂ­a:") ||
+            title.startsWith("Plantilla:") ||
+            title.startsWith("Wikcionario:") ||
 
             // sentinel
             false