]> gitweb.fperrin.net Git - DictionaryPC.git/commitdiff
Improvements to wikisplit code.
authorReimar Döffinger <Reimar.Doeffinger@gmx.de>
Tue, 8 Dec 2015 18:56:51 +0000 (19:56 +0100)
committerReimar Döffinger <Reimar.Doeffinger@gmx.de>
Tue, 8 Dec 2015 18:56:51 +0000 (19:56 +0100)
src/com/hughes/android/dictionary/engine/WiktionarySplitter.java
src/com/hughes/android/dictionary/parser/wiktionary/WholeSectionToHtmlParser.java

index 97c64a74a3766f2a315d9d95686d4c44039bed9b..42f7a724d478eab03bf6fd3546b1b00f8642e347 100644 (file)
@@ -122,6 +122,7 @@ public class WiktionarySplitter extends org.xml.sax.helpers.DefaultHandler {
             title.startsWith("Thread:") || 
             title.startsWith("Template:") ||
             title.startsWith("Summary:") ||
+            title.startsWith("Module:") ||
             // DE
             title.startsWith("Datei:") ||
             title.startsWith("Verzeichnis:") ||
@@ -129,6 +130,7 @@ public class WiktionarySplitter extends org.xml.sax.helpers.DefaultHandler {
             title.startsWith("Thesaurus:") ||
             title.startsWith("Kategorie:") ||
             title.startsWith("Hilfe:") ||
+            title.startsWith("Reim:") ||
             // FR:
             title.startsWith("Annexe:") ||
             title.startsWith("Catégori:") ||
@@ -193,7 +195,16 @@ public class WiktionarySplitter extends org.xml.sax.helpers.DefaultHandler {
             end = text.length();
           }
           
-          final String sectionText = text.substring(0, end);
+          String sectionText = text.substring(0, end);
+          // Hack to remove empty dummy section from French
+          if (sectionText.startsWith("\n=== {{S|étymologie}} ===\n: {{ébauche-étym"))
+          {
+              int dummy_end = sectionText.indexOf("}}", 41) + 2;
+              while (dummy_end + 1 < sectionText.length() &&
+                     sectionText.charAt(dummy_end) == '\n' &&
+                     sectionText.charAt(dummy_end + 1) == '\n') ++dummy_end;
+              sectionText = sectionText.substring(dummy_end);
+          }
           final Section section = new Section(title, heading, sectionText);
           
           try {
index 6b7da0b8df9bb32d76fe363dea7afa15a0700841..e8a7e9b0a53528ea849bf45634313733e718ab9d 100644 (file)
@@ -156,7 +156,7 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser {
             @Override
             public boolean skipWikiLink(WikiTokenizer wikiTokenizer) {
                 final String wikiText = wikiTokenizer.wikiLinkText();
-                if (wikiText.startsWith("???Category:")) {
+                if (wikiText.startsWith("Kategorie:")) {
                     return true;
                 }
                 return false;
@@ -204,7 +204,7 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser {
             @Override
             public boolean skipWikiLink(WikiTokenizer wikiTokenizer) {
                 final String wikiText = wikiTokenizer.wikiLinkText();
-                if (wikiText.startsWith("???Category:")) {
+                if (wikiText.startsWith("Categoria:")) {
                     return true;
                 }
                 return false;
@@ -244,11 +244,18 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser {
                 if (sectionName.equalsIgnoreCase("Synonymes")) {
                     return EntryTypeName.SYNONYM_MULTI;
                 }
+                if (sectionName.equalsIgnoreCase("Antonymes")) {
+                    return EntryTypeName.ANTONYM_MULTI;
+                }
                 return null;
             }
             
             @Override
             public boolean skipWikiLink(WikiTokenizer wikiTokenizer) {
+                final String wikiText = wikiTokenizer.wikiLinkText();
+                if (wikiText.startsWith("Catégorie:")) {
+                    return true;
+                }
                 return false;
             }
             @Override