2 package com.hughes.android.dictionary.parser.wiktionary;
4 import com.hughes.android.dictionary.engine.HtmlEntry;
5 import com.hughes.android.dictionary.engine.IndexBuilder;
6 import com.hughes.android.dictionary.engine.IndexBuilder.TokenData;
7 import com.hughes.android.dictionary.engine.IndexedEntry;
8 import com.hughes.android.dictionary.parser.WikiTokenizer;
10 import org.apache.commons.lang3.StringEscapeUtils;
12 import java.util.ArrayList;
13 import java.util.List;
15 import java.util.regex.Pattern;
17 public class WholeSectionToHtmlParser extends AbstractWiktionaryParser {
19 public static final String NAME = "WholeSectionToHtmlParser";
20 public static final Pattern skipSections = Pattern.compile(".*Translations.*");
22 final IndexBuilder titleIndexBuilder;
24 public WholeSectionToHtmlParser(final IndexBuilder titleIndexBuilder) {
25 this.titleIndexBuilder = titleIndexBuilder;
30 void parseSection(String heading, String text) {
31 HtmlEntry htmlEntry = new HtmlEntry(entrySource, StringEscapeUtils.escapeHtml3(title));
32 IndexedEntry indexedEntry = new IndexedEntry(htmlEntry);
34 final AppendAndIndexWikiCallback<WholeSectionToHtmlParser> callback = new AppendCallback(
37 callback.builder = new StringBuilder();
38 callback.indexedEntry = indexedEntry;
39 callback.dispatch(text, null);
41 htmlEntry.html = callback.builder.toString();
42 indexedEntry.isValid = true;
44 final TokenData tokenData = titleIndexBuilder.getOrCreateTokenData(title);
46 htmlEntry.addToDictionary(titleIndexBuilder.index.dict);
47 tokenData.htmlEntries.add(htmlEntry);
48 // titleIndexBuilder.addEntryWithString(indexedEntry, title,
49 // EntryTypeName.WIKTIONARY_TITLE_MULTI_DETAIL);
53 void removeUselessArgs(Map<String, String> namedArgs) {
56 class AppendCallback extends AppendAndIndexWikiCallback<WholeSectionToHtmlParser> {
57 public AppendCallback(WholeSectionToHtmlParser parser) {
62 public void onPlainText(String plainText) {
63 super.onPlainText(StringEscapeUtils.escapeHtml3(plainText));
67 public void onWikiLink(WikiTokenizer wikiTokenizer) {
68 if (wikiTokenizer.wikiLinkText().endsWith(":" + title)) {
69 // Skips wikilinks like: [[en::dick]]
72 super.onWikiLink(wikiTokenizer);
76 public void onFunction(WikiTokenizer wikiTokenizer, String name,
77 List<String> args, Map<String, String> namedArgs) {
78 super.onFunction(wikiTokenizer, name, args, namedArgs);
82 public void onHtml(WikiTokenizer wikiTokenizer) {
83 super.onHtml(wikiTokenizer);
87 public void onNewline(WikiTokenizer wikiTokenizer) {
91 public void onHeading(WikiTokenizer wikiTokenizer) {
92 final String headingText = wikiTokenizer.headingWikiText();
93 final int depth = wikiTokenizer.headingDepth();
94 if (skipSections.matcher(headingText).matches()) {
95 while ((wikiTokenizer = wikiTokenizer.nextToken()) != null) {
96 if (wikiTokenizer.isHeading() && wikiTokenizer.headingDepth() <= depth) {
97 wikiTokenizer.returnToLineStart();
103 builder.append(String.format("\n<h%d>", depth));
104 dispatch(headingText, null);
105 builder.append(String.format("</h%d>\n", depth));
108 final List<Character> listPrefixStack = new ArrayList<Character>();
111 public void onListItem(WikiTokenizer wikiTokenizer) {
112 if (builder.length() != 0 && builder.charAt(builder.length() - 1) != '\n') {
113 builder.append("\n");
115 final String prefix = wikiTokenizer.listItemPrefix();
116 while (listPrefixStack.size() < prefix.length()) {
117 builder.append(String.format("<%s>",
118 WikiTokenizer.getListTag(prefix.charAt(listPrefixStack.size()))));
119 listPrefixStack.add(prefix.charAt(listPrefixStack.size()));
121 builder.append("<li>");
122 dispatch(wikiTokenizer.listItemWikiText(), null);
123 builder.append("</li>\n");
125 WikiTokenizer nextToken = wikiTokenizer.nextToken();
126 boolean returnToLineStart = false;
127 if (nextToken != null && nextToken.isNewline()) {
128 nextToken = nextToken.nextToken();
129 returnToLineStart = true;
131 final String nextListHeader;
132 if (nextToken == null || !nextToken.isListItem()) {
135 nextListHeader = nextToken.listItemPrefix();
137 if (returnToLineStart) {
138 wikiTokenizer.returnToLineStart();
140 while (listPrefixStack.size() > nextListHeader.length()) {
141 final char prefixChar = listPrefixStack.remove(listPrefixStack.size() - 1);
142 builder.append(String.format("</%s>\n", WikiTokenizer.getListTag(prefixChar)));
146 boolean boldOn = false;
147 boolean italicOn = false;
150 public void onMarkup(WikiTokenizer wikiTokenizer) {
151 if ("'''".equals(wikiTokenizer.token())) {
153 builder.append("<b>");
155 builder.append("</b>");
158 } else if ("''".equals(wikiTokenizer.token())) {
160 builder.append("<em>");
162 builder.append("</em>");
164 italicOn = !italicOn;