1 package com.hughes.android.dictionary.parser.wiktionary;
3 import java.util.ArrayList;
6 import java.util.regex.Pattern;
8 import org.apache.commons.lang3.StringEscapeUtils;
10 import com.hughes.android.dictionary.engine.EntryTypeName;
11 import com.hughes.android.dictionary.engine.HtmlEntry;
12 import com.hughes.android.dictionary.engine.IndexBuilder;
13 import com.hughes.android.dictionary.engine.IndexedEntry;
14 import com.hughes.android.dictionary.parser.WikiTokenizer;
16 public class WholeSectionToHtmlParser extends AbstractWiktionaryParser {
18 public static final String NAME = "WholeSectionToHtmlParser";
19 public static final Pattern skipSections = Pattern.compile(".*Translations.*");
21 final IndexBuilder titleIndexBuilder;
23 public WholeSectionToHtmlParser(final IndexBuilder titleIndexBuilder) {
24 this.titleIndexBuilder = titleIndexBuilder;
28 void parseSection(String heading, String text) {
29 HtmlEntry htmlEntry = new HtmlEntry(entrySource, title);
30 IndexedEntry indexedEntry = new IndexedEntry(htmlEntry);
32 final AppendAndIndexWikiCallback<WholeSectionToHtmlParser> callback = new AppendCallback(this);
33 callback.builder = new StringBuilder();
34 callback.indexedEntry = indexedEntry;
35 callback.dispatch(text, null);
37 htmlEntry.html = callback.builder.toString();
38 indexedEntry.isValid = true;
39 titleIndexBuilder.addEntryWithString(indexedEntry, title, EntryTypeName.WIKTIONARY_TITLE_MULTI_DETAIL);
43 void removeUselessArgs(Map<String, String> namedArgs) {
46 class AppendCallback extends AppendAndIndexWikiCallback<WholeSectionToHtmlParser> {
47 public AppendCallback(WholeSectionToHtmlParser parser) {
52 public void onPlainText(String plainText) {
53 super.onPlainText(StringEscapeUtils.escapeHtml3(plainText));
57 public void onWikiLink(WikiTokenizer wikiTokenizer) {
58 super.onWikiLink(wikiTokenizer);
62 public void onFunction(WikiTokenizer wikiTokenizer, String name,
63 List<String> args, Map<String, String> namedArgs) {
64 super.onFunction(wikiTokenizer, name, args, namedArgs);
68 public void onHtml(WikiTokenizer wikiTokenizer) {
69 super.onHtml(wikiTokenizer);
73 public void onNewline(WikiTokenizer wikiTokenizer) {
77 public void onHeading(WikiTokenizer wikiTokenizer) {
78 final String headingText = wikiTokenizer.headingWikiText();
79 final int depth = wikiTokenizer.headingDepth();
80 if (skipSections.matcher(headingText).matches()) {
81 while ((wikiTokenizer = wikiTokenizer.nextToken()) != null) {
82 if (wikiTokenizer.isHeading() && wikiTokenizer.headingDepth() <= depth) {
83 wikiTokenizer.returnToLineStart();
89 builder.append(String.format("\n<h%d>", depth));
90 dispatch(headingText, null);
91 builder.append(String.format("</h%d>\n", depth));
94 final List<Character> listPrefixStack = new ArrayList<Character>();
96 public void onListItem(WikiTokenizer wikiTokenizer) {
97 if (builder.length() != 0 && builder.charAt(builder.length() - 1) != '\n') {
100 final String prefix = wikiTokenizer.listItemPrefix();
101 while (listPrefixStack.size() < prefix.length()) {
102 builder.append(String.format("<%s>", WikiTokenizer.getListTag(prefix.charAt(listPrefixStack.size()))));
103 listPrefixStack.add(prefix.charAt(listPrefixStack.size()));
105 builder.append("<li>");
106 dispatch(wikiTokenizer.listItemWikiText(), null);
107 builder.append("</li>\n");
109 WikiTokenizer nextToken = wikiTokenizer.nextToken();
110 boolean returnToLineStart = false;
111 if (nextToken != null && nextToken.isNewline()) {
112 nextToken = nextToken.nextToken();
113 returnToLineStart = true;
115 final String nextListHeader;
116 if (nextToken == null || !nextToken.isListItem()) {
119 nextListHeader = nextToken.listItemPrefix();
121 if (returnToLineStart) {
122 wikiTokenizer.returnToLineStart();
124 while (listPrefixStack.size() > nextListHeader.length()) {
125 final char prefixChar = listPrefixStack.remove(listPrefixStack.size() - 1);
126 builder.append(String.format("</%s>\n", WikiTokenizer.getListTag(prefixChar)));
130 boolean boldOn = false;
131 boolean italicOn = false;
133 public void onMarkup(WikiTokenizer wikiTokenizer) {
134 if ("'''".equals(wikiTokenizer.token())) {
136 builder.append("<b>");
138 builder.append("</b>");
141 } else if ("''".equals(wikiTokenizer.token())) {
143 builder.append("<em>");
145 builder.append("</em>");
147 italicOn = !italicOn;