X-Git-Url: http://gitweb.fperrin.net/?a=blobdiff_plain;f=src%2Fcom%2Fhughes%2Fandroid%2Fdictionary%2Fparser%2Fwiktionary%2FWholeSectionToHtmlParser.java;h=ceeb4c2395532f5e6630968976eec01fc088683f;hb=22f584bdc1bd3cf68d3c375888a13676aa3ced2f;hp=53104fc166e862c39cb89b9ec4a814d0fdc50253;hpb=4df160f54810f0c7d279552f8b0531fc99a83a79;p=DictionaryPC.git diff --git a/src/com/hughes/android/dictionary/parser/wiktionary/WholeSectionToHtmlParser.java b/src/com/hughes/android/dictionary/parser/wiktionary/WholeSectionToHtmlParser.java index 53104fc..ceeb4c2 100644 --- a/src/com/hughes/android/dictionary/parser/wiktionary/WholeSectionToHtmlParser.java +++ b/src/com/hughes/android/dictionary/parser/wiktionary/WholeSectionToHtmlParser.java @@ -10,20 +10,45 @@ import com.hughes.android.dictionary.parser.WikiTokenizer; import org.apache.commons.lang3.StringEscapeUtils; import java.util.ArrayList; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.regex.Pattern; public class WholeSectionToHtmlParser extends AbstractWiktionaryParser { + + interface LangConfig { + boolean skipSection(final String name); + boolean skipWikiLink(final WikiTokenizer wikiTokenizer); + } + static final Map isoToLangConfig = new LinkedHashMap(); + static { + final Pattern enSkipSections = Pattern.compile(".*Translations.*"); + isoToLangConfig.put("EN", new LangConfig() { + @Override + public boolean skipSection(String headingText) { + return enSkipSections.matcher(headingText).matches(); + } + + @Override + public boolean skipWikiLink(WikiTokenizer wikiTokenizer) { + final String wikiText = wikiTokenizer.wikiLinkText(); + if (wikiText.startsWith("Category:")) { + return true; + } + return false; + }}); + } public static final String NAME = "WholeSectionToHtmlParser"; - public static final Pattern skipSections = Pattern.compile(".*Translations.*"); final IndexBuilder titleIndexBuilder; + final LangConfig langConfig; - public WholeSectionToHtmlParser(final IndexBuilder titleIndexBuilder) { + public WholeSectionToHtmlParser(final IndexBuilder titleIndexBuilder, final String wiktionaryIso) { this.titleIndexBuilder = titleIndexBuilder; - + assert isoToLangConfig.containsKey(wiktionaryIso): wiktionaryIso; + this.langConfig = isoToLangConfig.get(wiktionaryIso); } @Override @@ -69,6 +94,9 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser { // Skips wikilinks like: [[en::dick]] return; } + if (langConfig.skipWikiLink(wikiTokenizer)) { + return; + } super.onWikiLink(wikiTokenizer); } @@ -91,7 +119,7 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser { public void onHeading(WikiTokenizer wikiTokenizer) { final String headingText = wikiTokenizer.headingWikiText(); final int depth = wikiTokenizer.headingDepth(); - if (skipSections.matcher(headingText).matches()) { + if (langConfig.skipSection(headingText)) { while ((wikiTokenizer = wikiTokenizer.nextToken()) != null) { if (wikiTokenizer.isHeading() && wikiTokenizer.headingDepth() <= depth) { wikiTokenizer.returnToLineStart();