From: Reimar Döffinger Date: Tue, 8 Dec 2015 18:56:51 +0000 (+0100) Subject: Improvements to wikisplit code. X-Git-Url: http://gitweb.fperrin.net/?p=DictionaryPC.git;a=commitdiff_plain;h=8b88cd6646f8abeed149db17958c3a1fc30eec99 Improvements to wikisplit code. --- diff --git a/src/com/hughes/android/dictionary/engine/WiktionarySplitter.java b/src/com/hughes/android/dictionary/engine/WiktionarySplitter.java index 97c64a7..42f7a72 100644 --- a/src/com/hughes/android/dictionary/engine/WiktionarySplitter.java +++ b/src/com/hughes/android/dictionary/engine/WiktionarySplitter.java @@ -122,6 +122,7 @@ public class WiktionarySplitter extends org.xml.sax.helpers.DefaultHandler { title.startsWith("Thread:") || title.startsWith("Template:") || title.startsWith("Summary:") || + title.startsWith("Module:") || // DE title.startsWith("Datei:") || title.startsWith("Verzeichnis:") || @@ -129,6 +130,7 @@ public class WiktionarySplitter extends org.xml.sax.helpers.DefaultHandler { title.startsWith("Thesaurus:") || title.startsWith("Kategorie:") || title.startsWith("Hilfe:") || + title.startsWith("Reim:") || // FR: title.startsWith("Annexe:") || title.startsWith("Catégori:") || @@ -193,7 +195,16 @@ public class WiktionarySplitter extends org.xml.sax.helpers.DefaultHandler { end = text.length(); } - final String sectionText = text.substring(0, end); + String sectionText = text.substring(0, end); + // Hack to remove empty dummy section from French + if (sectionText.startsWith("\n=== {{S|étymologie}} ===\n: {{ébauche-étym")) + { + int dummy_end = sectionText.indexOf("}}", 41) + 2; + while (dummy_end + 1 < sectionText.length() && + sectionText.charAt(dummy_end) == '\n' && + sectionText.charAt(dummy_end + 1) == '\n') ++dummy_end; + sectionText = sectionText.substring(dummy_end); + } final Section section = new Section(title, heading, sectionText); try { diff --git a/src/com/hughes/android/dictionary/parser/wiktionary/WholeSectionToHtmlParser.java b/src/com/hughes/android/dictionary/parser/wiktionary/WholeSectionToHtmlParser.java index 6b7da0b..e8a7e9b 100644 --- a/src/com/hughes/android/dictionary/parser/wiktionary/WholeSectionToHtmlParser.java +++ b/src/com/hughes/android/dictionary/parser/wiktionary/WholeSectionToHtmlParser.java @@ -156,7 +156,7 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser { @Override public boolean skipWikiLink(WikiTokenizer wikiTokenizer) { final String wikiText = wikiTokenizer.wikiLinkText(); - if (wikiText.startsWith("???Category:")) { + if (wikiText.startsWith("Kategorie:")) { return true; } return false; @@ -204,7 +204,7 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser { @Override public boolean skipWikiLink(WikiTokenizer wikiTokenizer) { final String wikiText = wikiTokenizer.wikiLinkText(); - if (wikiText.startsWith("???Category:")) { + if (wikiText.startsWith("Categoria:")) { return true; } return false; @@ -244,11 +244,18 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser { if (sectionName.equalsIgnoreCase("Synonymes")) { return EntryTypeName.SYNONYM_MULTI; } + if (sectionName.equalsIgnoreCase("Antonymes")) { + return EntryTypeName.ANTONYM_MULTI; + } return null; } @Override public boolean skipWikiLink(WikiTokenizer wikiTokenizer) { + final String wikiText = wikiTokenizer.wikiLinkText(); + if (wikiText.startsWith("Catégorie:")) { + return true; + } return false; } @Override