X-Git-Url: http://gitweb.fperrin.net/?a=blobdiff_plain;f=src%2Fcom%2Fhughes%2Fandroid%2Fdictionary%2Fparser%2Fwiktionary%2FWholeSectionToHtmlParser.java;h=0f7ae2d1cef8d94876a6b97ad0627a7633ee4562;hb=1b515f031d39e758e8e6339c03e124f1548579cc;hp=70a02c37f826433b9cda1a6626339ebf0dffce61;hpb=cd86f5f7abf5e545daf89c03d9dbbfd4e5bc65a4;p=DictionaryPC.git diff --git a/src/com/hughes/android/dictionary/parser/wiktionary/WholeSectionToHtmlParser.java b/src/com/hughes/android/dictionary/parser/wiktionary/WholeSectionToHtmlParser.java index 70a02c3..0f7ae2d 100644 --- a/src/com/hughes/android/dictionary/parser/wiktionary/WholeSectionToHtmlParser.java +++ b/src/com/hughes/android/dictionary/parser/wiktionary/WholeSectionToHtmlParser.java @@ -1,6 +1,7 @@ package com.hughes.android.dictionary.parser.wiktionary; +import com.hughes.android.dictionary.HtmlDisplayActivity; import com.hughes.android.dictionary.engine.EntryTypeName; import com.hughes.android.dictionary.engine.HtmlEntry; import com.hughes.android.dictionary.engine.IndexBuilder; @@ -23,8 +24,9 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser { interface LangConfig { boolean skipSection(final String name); + EntryTypeName sectionNameToEntryType(String sectionName); boolean skipWikiLink(final WikiTokenizer wikiTokenizer); - String adjustWikiLink(String wikiLinkDest); + String adjustWikiLink(String wikiLinkDest, final String wikiLinkText); void addFunctionCallbacks( Map> functionCallbacks); } @@ -36,7 +38,25 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser { public boolean skipSection(String headingText) { return enSkipSections.matcher(headingText).matches(); } - + + @Override + public EntryTypeName sectionNameToEntryType(String sectionName) { + if (sectionName.equalsIgnoreCase("Synonyms")) { + return EntryTypeName.SYNONYM_MULTI; + } + if (sectionName.equalsIgnoreCase("Antonyms")) { + return EntryTypeName.ANTONYM_MULTI; + } + if (EnParser.partOfSpeechHeader.matcher(sectionName).matches()) { + // We need to put it in the other index, too. + return null; + } + if (sectionName.equalsIgnoreCase("Derived Terms")) { + return null; + } + return null; + } + @Override public boolean skipWikiLink(WikiTokenizer wikiTokenizer) { final String wikiText = wikiTokenizer.wikiLinkText(); @@ -46,10 +66,17 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser { return false; } @Override - public String adjustWikiLink(String wikiLinkDest) { + public String adjustWikiLink(String wikiLinkDest, String wikiLinkText) { if (wikiLinkDest.startsWith("w:") || wikiLinkDest.startsWith("Image:")) { return null; } + final int hashPos = wikiLinkDest.indexOf("#"); + if (hashPos != -1) { + wikiLinkDest = wikiLinkDest.substring(0, hashPos); + if (wikiLinkDest.isEmpty()) { + wikiLinkDest = wikiLinkText; + } + } return wikiLinkDest; } @@ -57,14 +84,18 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser { public void addFunctionCallbacks( Map> functionCallbacks) { EnFunctionCallbacks.addGenericCallbacks(functionCallbacks); - }}); + } + }); final LangConfig basicLangConfig = new LangConfig() { @Override public boolean skipSection(String headingText) { return false; } - + @Override + public EntryTypeName sectionNameToEntryType(String sectionName) { + return EntryTypeName.WIKTIONARY_MENTIONED; + } @Override public boolean skipWikiLink(WikiTokenizer wikiTokenizer) { final String wikiText = wikiTokenizer.wikiLinkText(); @@ -74,7 +105,7 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser { return false; } @Override - public String adjustWikiLink(String wikiLinkDest) { + public String adjustWikiLink(String wikiLinkDest, final String wikiLinkText) { return wikiLinkDest; } @@ -89,11 +120,14 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser { } final IndexBuilder titleIndexBuilder; + final IndexBuilder defIndexBuilder; final String skipLangIso; final LangConfig langConfig; + - public WholeSectionToHtmlParser(final IndexBuilder titleIndexBuilder, final String wiktionaryIso, final String skipLangIso) { + public WholeSectionToHtmlParser(final IndexBuilder titleIndexBuilder, final IndexBuilder defIndexBuilder, final String wiktionaryIso, final String skipLangIso) { this.titleIndexBuilder = titleIndexBuilder; + this.defIndexBuilder = defIndexBuilder; assert isoToLangConfig.containsKey(wiktionaryIso): wiktionaryIso; this.langConfig = isoToLangConfig.get(wiktionaryIso); this.skipLangIso = skipLangIso; @@ -104,7 +138,7 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser { @Override public void parseSection(String heading, String text) { assert entrySource != null; - final HtmlEntry htmlEntry = new HtmlEntry(entrySource, StringEscapeUtils.escapeHtml3(title)); + final HtmlEntry htmlEntry = new HtmlEntry(entrySource, title); indexedEntry = new IndexedEntry(htmlEntry); final AppendAndIndexWikiCallback callback = new AppendCallback( @@ -139,8 +173,6 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser { - static final Pattern ALL_ASCII = Pattern.compile("[\\p{ASCII}]*"); - class AppendCallback extends AppendAndIndexWikiCallback { public AppendCallback(WholeSectionToHtmlParser parser) { super(parser); @@ -149,7 +181,7 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser { @Override public void onPlainText(String plainText) { final String htmlEscaped = StringEscapeUtils.escapeHtml3(plainText); - if (ALL_ASCII.matcher(htmlEscaped).matches()) { + if (StringUtil.isAscii(htmlEscaped)) { super.onPlainText(htmlEscaped); } else { super.onPlainText(StringUtil.escapeToPureHtmlUnicode(plainText)); @@ -167,12 +199,16 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser { } String linkDest; if (wikiTokenizer.wikiLinkDest() != null) { - linkDest = langConfig.adjustWikiLink(wikiTokenizer.wikiLinkDest()); + linkDest = langConfig.adjustWikiLink(wikiTokenizer.wikiLinkDest(), wikiTokenizer.wikiLinkText()); } else { linkDest = wikiTokenizer.wikiLinkText(); } + if (sectionEntryTypeName != null) { + // TODO: inside a definition, this could be the wrong language. + titleIndexBuilder.addEntryWithString(indexedEntry, wikiTokenizer.wikiLinkText(), sectionEntryTypeName); + } if (linkDest != null) { - builder.append(String.format("", linkDest)); + builder.append(String.format("", HtmlEntry.formatQuickdicUrl("", linkDest))); super.onWikiLink(wikiTokenizer); builder.append(String.format("")); } else { @@ -197,10 +233,14 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser { @Override public void onNewline(WikiTokenizer wikiTokenizer) { } + + EntryTypeName sectionEntryTypeName; + IndexBuilder currentIndexBuilder; @Override public void onHeading(WikiTokenizer wikiTokenizer) { final String headingText = wikiTokenizer.headingWikiText(); + sectionEntryTypeName = langConfig.sectionNameToEntryType(headingText); final int depth = wikiTokenizer.headingDepth(); if (langConfig.skipSection(headingText)) { while ((wikiTokenizer = wikiTokenizer.nextToken()) != null) {