X-Git-Url: http://gitweb.fperrin.net/?a=blobdiff_plain;f=src%2Fcom%2Fhughes%2Fandroid%2Fdictionary%2Fparser%2Fwiktionary%2FWholeSectionToHtmlParser.java;h=ceeb4c2395532f5e6630968976eec01fc088683f;hb=22f584bdc1bd3cf68d3c375888a13676aa3ced2f;hp=dcf6f494132e7b98ef929f6e3bc223923fe482a8;hpb=0eaf1a63bc6d1145490b64d8c68e5a545401ec16;p=DictionaryPC.git diff --git a/src/com/hughes/android/dictionary/parser/wiktionary/WholeSectionToHtmlParser.java b/src/com/hughes/android/dictionary/parser/wiktionary/WholeSectionToHtmlParser.java index dcf6f49..ceeb4c2 100644 --- a/src/com/hughes/android/dictionary/parser/wiktionary/WholeSectionToHtmlParser.java +++ b/src/com/hughes/android/dictionary/parser/wiktionary/WholeSectionToHtmlParser.java @@ -1,25 +1,200 @@ + package com.hughes.android.dictionary.parser.wiktionary; +import com.hughes.android.dictionary.engine.HtmlEntry; +import com.hughes.android.dictionary.engine.IndexBuilder; +import com.hughes.android.dictionary.engine.IndexBuilder.TokenData; +import com.hughes.android.dictionary.engine.IndexedEntry; +import com.hughes.android.dictionary.parser.WikiTokenizer; + +import org.apache.commons.lang3.StringEscapeUtils; + +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; import java.util.Map; import java.util.regex.Pattern; -import com.hughes.android.dictionary.engine.IndexBuilder; - public class WholeSectionToHtmlParser extends AbstractWiktionaryParser { - - final IndexBuilder thisIndexBuilder; - final IndexBuilder foreignIndexBuilder; - final Pattern langPattern; - final Pattern langCodePattern; + + interface LangConfig { + boolean skipSection(final String name); + boolean skipWikiLink(final WikiTokenizer wikiTokenizer); + } + static final Map isoToLangConfig = new LinkedHashMap(); + static { + final Pattern enSkipSections = Pattern.compile(".*Translations.*"); + isoToLangConfig.put("EN", new LangConfig() { + @Override + public boolean skipSection(String headingText) { + return enSkipSections.matcher(headingText).matches(); + } + @Override + public boolean skipWikiLink(WikiTokenizer wikiTokenizer) { + final String wikiText = wikiTokenizer.wikiLinkText(); + if (wikiText.startsWith("Category:")) { + return true; + } + return false; + }}); + } - @Override - void parseSection(String heading, String text) { - - } + public static final String NAME = "WholeSectionToHtmlParser"; + + final IndexBuilder titleIndexBuilder; + final LangConfig langConfig; + + public WholeSectionToHtmlParser(final IndexBuilder titleIndexBuilder, final String wiktionaryIso) { + this.titleIndexBuilder = titleIndexBuilder; + assert isoToLangConfig.containsKey(wiktionaryIso): wiktionaryIso; + this.langConfig = isoToLangConfig.get(wiktionaryIso); + } + + @Override + void parseSection(String heading, String text) { + HtmlEntry htmlEntry = new HtmlEntry(entrySource, StringEscapeUtils.escapeHtml3(title)); + IndexedEntry indexedEntry = new IndexedEntry(htmlEntry); + + final AppendAndIndexWikiCallback callback = new AppendCallback( + this); + + callback.builder = new StringBuilder(); + callback.indexedEntry = indexedEntry; + callback.dispatch(text, null); + + htmlEntry.html = callback.builder.toString(); + indexedEntry.isValid = true; + + final TokenData tokenData = titleIndexBuilder.getOrCreateTokenData(title); + + htmlEntry.addToDictionary(titleIndexBuilder.index.dict); + tokenData.htmlEntries.add(htmlEntry); + // titleIndexBuilder.addEntryWithString(indexedEntry, title, + // EntryTypeName.WIKTIONARY_TITLE_MULTI_DETAIL); + } + + @Override + void removeUselessArgs(Map namedArgs) { + } + + class AppendCallback extends AppendAndIndexWikiCallback { + public AppendCallback(WholeSectionToHtmlParser parser) { + super(parser); + } + + @Override + public void onPlainText(String plainText) { + super.onPlainText(StringEscapeUtils.escapeHtml3(plainText)); + } + + @Override + public void onWikiLink(WikiTokenizer wikiTokenizer) { + if (wikiTokenizer.wikiLinkText().endsWith(":" + title)) { + // Skips wikilinks like: [[en::dick]] + return; + } + if (langConfig.skipWikiLink(wikiTokenizer)) { + return; + } + super.onWikiLink(wikiTokenizer); + } + + @Override + public void onFunction(WikiTokenizer wikiTokenizer, String name, + List args, Map namedArgs) { + super.onFunction(wikiTokenizer, name, args, namedArgs); + } + + @Override + public void onHtml(WikiTokenizer wikiTokenizer) { + super.onHtml(wikiTokenizer); + } + + @Override + public void onNewline(WikiTokenizer wikiTokenizer) { + } + + @Override + public void onHeading(WikiTokenizer wikiTokenizer) { + final String headingText = wikiTokenizer.headingWikiText(); + final int depth = wikiTokenizer.headingDepth(); + if (langConfig.skipSection(headingText)) { + while ((wikiTokenizer = wikiTokenizer.nextToken()) != null) { + if (wikiTokenizer.isHeading() && wikiTokenizer.headingDepth() <= depth) { + wikiTokenizer.returnToLineStart(); + return; + } + } + return; + } + builder.append(String.format("\n", depth)); + dispatch(headingText, null); + builder.append(String.format("\n", depth)); + } + + final List listPrefixStack = new ArrayList(); + + @Override + public void onListItem(WikiTokenizer wikiTokenizer) { + if (builder.length() != 0 && builder.charAt(builder.length() - 1) != '\n') { + builder.append("\n"); + } + final String prefix = wikiTokenizer.listItemPrefix(); + while (listPrefixStack.size() < prefix.length()) { + builder.append(String.format("<%s>", + WikiTokenizer.getListTag(prefix.charAt(listPrefixStack.size())))); + listPrefixStack.add(prefix.charAt(listPrefixStack.size())); + } + builder.append("
  • "); + dispatch(wikiTokenizer.listItemWikiText(), null); + builder.append("
  • \n"); + + WikiTokenizer nextToken = wikiTokenizer.nextToken(); + boolean returnToLineStart = false; + if (nextToken != null && nextToken.isNewline()) { + nextToken = nextToken.nextToken(); + returnToLineStart = true; + } + final String nextListHeader; + if (nextToken == null || !nextToken.isListItem()) { + nextListHeader = ""; + } else { + nextListHeader = nextToken.listItemPrefix(); + } + if (returnToLineStart) { + wikiTokenizer.returnToLineStart(); + } + while (listPrefixStack.size() > nextListHeader.length()) { + final char prefixChar = listPrefixStack.remove(listPrefixStack.size() - 1); + builder.append(String.format("\n", WikiTokenizer.getListTag(prefixChar))); + } + } + + boolean boldOn = false; + boolean italicOn = false; + + @Override + public void onMarkup(WikiTokenizer wikiTokenizer) { + if ("'''".equals(wikiTokenizer.token())) { + if (!boldOn) { + builder.append(""); + } else { + builder.append(""); + } + boldOn = !boldOn; + } else if ("''".equals(wikiTokenizer.token())) { + if (!italicOn) { + builder.append(""); + } else { + builder.append(""); + } + italicOn = !italicOn; + } else { + assert false; + } + } - @Override - void removeUselessArgs(Map namedArgs) { - } + } }