X-Git-Url: http://gitweb.fperrin.net/?a=blobdiff_plain;f=src%2Fcom%2Fhughes%2Fandroid%2Fdictionary%2Fparser%2Fwiktionary%2FWholeSectionToHtmlParser.java;h=d0a4908f10ccad4962dc5ce47ebcb2af5b10daa3;hb=b8bbc4b434cd5e1500b3b534e25136a687e78265;hp=b51f24ed497ac7abd832fb7ece1ae126c7eade3d;hpb=4d8a2efb83009dbcca069fe9cac7b8e7d2f72161;p=DictionaryPC.git diff --git a/src/com/hughes/android/dictionary/parser/wiktionary/WholeSectionToHtmlParser.java b/src/com/hughes/android/dictionary/parser/wiktionary/WholeSectionToHtmlParser.java index b51f24e..d0a4908 100644 --- a/src/com/hughes/android/dictionary/parser/wiktionary/WholeSectionToHtmlParser.java +++ b/src/com/hughes/android/dictionary/parser/wiktionary/WholeSectionToHtmlParser.java @@ -1,18 +1,22 @@ package com.hughes.android.dictionary.parser.wiktionary; +import java.util.ArrayList; +import java.util.List; import java.util.Map; +import java.util.regex.Pattern; import com.hughes.android.dictionary.engine.EntryTypeName; import com.hughes.android.dictionary.engine.HtmlEntry; import com.hughes.android.dictionary.engine.IndexBuilder; import com.hughes.android.dictionary.engine.IndexedEntry; +import com.hughes.android.dictionary.parser.WikiTokenizer; public class WholeSectionToHtmlParser extends AbstractWiktionaryParser { public static final String NAME = "WholeSectionToHtmlParser"; + public static final Pattern skipSections = Pattern.compile(".*Translations.*"); final IndexBuilder titleIndexBuilder; - public WholeSectionToHtmlParser(final IndexBuilder titleIndexBuilder) { this.titleIndexBuilder = titleIndexBuilder; @@ -20,8 +24,15 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser { @Override void parseSection(String heading, String text) { - HtmlEntry htmlEntry = new HtmlEntry(entrySource, title, text); + HtmlEntry htmlEntry = new HtmlEntry(entrySource, title); IndexedEntry indexedEntry = new IndexedEntry(htmlEntry); + + final AppendAndIndexWikiCallback callback = new AppendCallback(this); + callback.builder = new StringBuilder(); + callback.indexedEntry = indexedEntry; + callback.dispatch(text, null); + + htmlEntry.html = callback.builder.toString(); indexedEntry.isValid = true; titleIndexBuilder.addEntryWithString(indexedEntry, title, EntryTypeName.WIKTIONARY_TITLE_MULTI_DETAIL); } @@ -29,5 +40,114 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser { @Override void removeUselessArgs(Map namedArgs) { } + + class AppendCallback extends AppendAndIndexWikiCallback { + public AppendCallback(WholeSectionToHtmlParser parser) { + super(parser); + } + + @Override + public void onPlainText(String plainText) { + super.onPlainText(plainText); + } + + @Override + public void onWikiLink(WikiTokenizer wikiTokenizer) { + super.onWikiLink(wikiTokenizer); + } + + @Override + public void onFunction(WikiTokenizer wikiTokenizer, String name, + List args, Map namedArgs) { + super.onFunction(wikiTokenizer, name, args, namedArgs); + } + + @Override + public void onHtml(WikiTokenizer wikiTokenizer) { + super.onHtml(wikiTokenizer); + } + + @Override + public void onNewline(WikiTokenizer wikiTokenizer) { + } + + @Override + public void onHeading(WikiTokenizer wikiTokenizer) { + final String headingText = wikiTokenizer.headingWikiText(); + final int depth = wikiTokenizer.headingDepth(); + if (skipSections.matcher(headingText).matches()) { + while ((wikiTokenizer = wikiTokenizer.nextToken()) != null) { + if (wikiTokenizer.isHeading() && wikiTokenizer.headingDepth() <= depth) { + wikiTokenizer.returnToLineStart(); + return; + } + } + return; + } + onPlainText(String.format("\n", depth)); + dispatch(headingText, null); + onPlainText(String.format("\n", depth)); + } + + final List listPrefixStack = new ArrayList(); + @Override + public void onListItem(WikiTokenizer wikiTokenizer) { + if (builder.length() != 0 && builder.charAt(builder.length() - 1) != '\n') { + builder.append("\n"); + } + final String prefix = wikiTokenizer.listItemPrefix(); + while (listPrefixStack.size() < prefix.length()) { + onPlainText(String.format("<%s>", WikiTokenizer.getListTag(prefix.charAt(listPrefixStack.size())))); + listPrefixStack.add(prefix.charAt(listPrefixStack.size())); + } + onPlainText("
  • "); + dispatch(wikiTokenizer.listItemWikiText(), null); + onPlainText("
  • \n"); + + WikiTokenizer nextToken = wikiTokenizer.nextToken(); + boolean returnToLineStart = false; + if (nextToken != null && nextToken.isNewline()) { + nextToken = nextToken.nextToken(); + returnToLineStart = true; + } + final String nextListHeader; + if (nextToken == null || !nextToken.isListItem()) { + nextListHeader = ""; + } else { + nextListHeader = nextToken.listItemPrefix(); + } + if (returnToLineStart) { + wikiTokenizer.returnToLineStart(); + } + while (listPrefixStack.size() > nextListHeader.length()) { + final char prefixChar = listPrefixStack.remove(listPrefixStack.size() - 1); + onPlainText(String.format("\n", WikiTokenizer.getListTag(prefixChar))); + } + } + + boolean boldOn = false; + boolean italicOn = false; + @Override + public void onMarkup(WikiTokenizer wikiTokenizer) { + if ("'''".equals(wikiTokenizer.token())) { + if (!boldOn) { + onPlainText(""); + } else { + onPlainText(""); + } + boldOn = !boldOn; + } else if ("''".equals(wikiTokenizer.token())) { + if (!italicOn) { + onPlainText(""); + } else { + onPlainText(""); + } + italicOn = !italicOn; + } else { + assert false; + } + } + + } }