1 package com.hughes.android.dictionary.parser.wiktionary;
3 import com.hughes.android.dictionary.engine.HtmlEntry;
4 import com.hughes.android.dictionary.engine.IndexBuilder;
5 import com.hughes.android.dictionary.engine.IndexBuilder.TokenData;
6 import com.hughes.android.dictionary.engine.IndexedEntry;
7 import com.hughes.android.dictionary.parser.WikiTokenizer;
9 import org.apache.commons.lang3.StringEscapeUtils;
11 import java.util.ArrayList;
12 import java.util.List;
14 import java.util.regex.Pattern;
16 public class WholeSectionToHtmlParser extends AbstractWiktionaryParser {
18 public static final String NAME = "WholeSectionToHtmlParser";
19 public static final Pattern skipSections = Pattern.compile(".*Translations.*");
21 final IndexBuilder titleIndexBuilder;
23 public WholeSectionToHtmlParser(final IndexBuilder titleIndexBuilder) {
24 this.titleIndexBuilder = titleIndexBuilder;
29 void parseSection(String heading, String text) {
30 HtmlEntry htmlEntry = new HtmlEntry(entrySource, StringEscapeUtils.escapeHtml3(title));
31 IndexedEntry indexedEntry = new IndexedEntry(htmlEntry);
33 final AppendAndIndexWikiCallback<WholeSectionToHtmlParser> callback = new AppendCallback(this);
35 callback.builder = new StringBuilder();
36 callback.indexedEntry = indexedEntry;
37 callback.dispatch(text, null);
39 htmlEntry.html = callback.builder.toString();
40 indexedEntry.isValid = true;
42 final TokenData tokenData = titleIndexBuilder.getOrCreateTokenData(title);
44 htmlEntry.addToDictionary(titleIndexBuilder.index.dict);
45 tokenData.htmlEntries.add(htmlEntry);
46 //titleIndexBuilder.addEntryWithString(indexedEntry, title, EntryTypeName.WIKTIONARY_TITLE_MULTI_DETAIL);
50 void removeUselessArgs(Map<String, String> namedArgs) {
53 class AppendCallback extends AppendAndIndexWikiCallback<WholeSectionToHtmlParser> {
54 public AppendCallback(WholeSectionToHtmlParser parser) {
59 public void onPlainText(String plainText) {
60 super.onPlainText(StringEscapeUtils.escapeHtml3(plainText));
64 public void onWikiLink(WikiTokenizer wikiTokenizer) {
65 super.onWikiLink(wikiTokenizer);
69 public void onFunction(WikiTokenizer wikiTokenizer, String name,
70 List<String> args, Map<String, String> namedArgs) {
71 super.onFunction(wikiTokenizer, name, args, namedArgs);
75 public void onHtml(WikiTokenizer wikiTokenizer) {
76 super.onHtml(wikiTokenizer);
80 public void onNewline(WikiTokenizer wikiTokenizer) {
84 public void onHeading(WikiTokenizer wikiTokenizer) {
85 final String headingText = wikiTokenizer.headingWikiText();
86 final int depth = wikiTokenizer.headingDepth();
87 if (skipSections.matcher(headingText).matches()) {
88 while ((wikiTokenizer = wikiTokenizer.nextToken()) != null) {
89 if (wikiTokenizer.isHeading() && wikiTokenizer.headingDepth() <= depth) {
90 wikiTokenizer.returnToLineStart();
96 builder.append(String.format("\n<h%d>", depth));
97 dispatch(headingText, null);
98 builder.append(String.format("</h%d>\n", depth));
101 final List<Character> listPrefixStack = new ArrayList<Character>();
103 public void onListItem(WikiTokenizer wikiTokenizer) {
104 if (builder.length() != 0 && builder.charAt(builder.length() - 1) != '\n') {
105 builder.append("\n");
107 final String prefix = wikiTokenizer.listItemPrefix();
108 while (listPrefixStack.size() < prefix.length()) {
109 builder.append(String.format("<%s>", WikiTokenizer.getListTag(prefix.charAt(listPrefixStack.size()))));
110 listPrefixStack.add(prefix.charAt(listPrefixStack.size()));
112 builder.append("<li>");
113 dispatch(wikiTokenizer.listItemWikiText(), null);
114 builder.append("</li>\n");
116 WikiTokenizer nextToken = wikiTokenizer.nextToken();
117 boolean returnToLineStart = false;
118 if (nextToken != null && nextToken.isNewline()) {
119 nextToken = nextToken.nextToken();
120 returnToLineStart = true;
122 final String nextListHeader;
123 if (nextToken == null || !nextToken.isListItem()) {
126 nextListHeader = nextToken.listItemPrefix();
128 if (returnToLineStart) {
129 wikiTokenizer.returnToLineStart();
131 while (listPrefixStack.size() > nextListHeader.length()) {
132 final char prefixChar = listPrefixStack.remove(listPrefixStack.size() - 1);
133 builder.append(String.format("</%s>\n", WikiTokenizer.getListTag(prefixChar)));
137 boolean boldOn = false;
138 boolean italicOn = false;
140 public void onMarkup(WikiTokenizer wikiTokenizer) {
141 if ("'''".equals(wikiTokenizer.token())) {
143 builder.append("<b>");
145 builder.append("</b>");
148 } else if ("''".equals(wikiTokenizer.token())) {
150 builder.append("<em>");
152 builder.append("</em>");
154 italicOn = !italicOn;