]> gitweb.fperrin.net Git - DictionaryPC.git/blob - src/com/hughes/android/dictionary/parser/wiktionary/WholeSectionToHtmlParser.java
53104fc166e862c39cb89b9ec4a814d0fdc50253
[DictionaryPC.git] / src / com / hughes / android / dictionary / parser / wiktionary / WholeSectionToHtmlParser.java
1
2 package com.hughes.android.dictionary.parser.wiktionary;
3
4 import com.hughes.android.dictionary.engine.HtmlEntry;
5 import com.hughes.android.dictionary.engine.IndexBuilder;
6 import com.hughes.android.dictionary.engine.IndexBuilder.TokenData;
7 import com.hughes.android.dictionary.engine.IndexedEntry;
8 import com.hughes.android.dictionary.parser.WikiTokenizer;
9
10 import org.apache.commons.lang3.StringEscapeUtils;
11
12 import java.util.ArrayList;
13 import java.util.List;
14 import java.util.Map;
15 import java.util.regex.Pattern;
16
17 public class WholeSectionToHtmlParser extends AbstractWiktionaryParser {
18
19     public static final String NAME = "WholeSectionToHtmlParser";
20     public static final Pattern skipSections = Pattern.compile(".*Translations.*");
21
22     final IndexBuilder titleIndexBuilder;
23
24     public WholeSectionToHtmlParser(final IndexBuilder titleIndexBuilder) {
25         this.titleIndexBuilder = titleIndexBuilder;
26
27     }
28
29     @Override
30     void parseSection(String heading, String text) {
31         HtmlEntry htmlEntry = new HtmlEntry(entrySource, StringEscapeUtils.escapeHtml3(title));
32         IndexedEntry indexedEntry = new IndexedEntry(htmlEntry);
33
34         final AppendAndIndexWikiCallback<WholeSectionToHtmlParser> callback = new AppendCallback(
35                 this);
36
37         callback.builder = new StringBuilder();
38         callback.indexedEntry = indexedEntry;
39         callback.dispatch(text, null);
40
41         htmlEntry.html = callback.builder.toString();
42         indexedEntry.isValid = true;
43
44         final TokenData tokenData = titleIndexBuilder.getOrCreateTokenData(title);
45
46         htmlEntry.addToDictionary(titleIndexBuilder.index.dict);
47         tokenData.htmlEntries.add(htmlEntry);
48         // titleIndexBuilder.addEntryWithString(indexedEntry, title,
49         // EntryTypeName.WIKTIONARY_TITLE_MULTI_DETAIL);
50     }
51
52     @Override
53     void removeUselessArgs(Map<String, String> namedArgs) {
54     }
55
56     class AppendCallback extends AppendAndIndexWikiCallback<WholeSectionToHtmlParser> {
57         public AppendCallback(WholeSectionToHtmlParser parser) {
58             super(parser);
59         }
60
61         @Override
62         public void onPlainText(String plainText) {
63             super.onPlainText(StringEscapeUtils.escapeHtml3(plainText));
64         }
65
66         @Override
67         public void onWikiLink(WikiTokenizer wikiTokenizer) {
68             if (wikiTokenizer.wikiLinkText().endsWith(":" + title)) {
69                 // Skips wikilinks like: [[en::dick]]
70                 return;
71             }
72             super.onWikiLink(wikiTokenizer);
73         }
74
75         @Override
76         public void onFunction(WikiTokenizer wikiTokenizer, String name,
77                 List<String> args, Map<String, String> namedArgs) {
78             super.onFunction(wikiTokenizer, name, args, namedArgs);
79         }
80
81         @Override
82         public void onHtml(WikiTokenizer wikiTokenizer) {
83             super.onHtml(wikiTokenizer);
84         }
85
86         @Override
87         public void onNewline(WikiTokenizer wikiTokenizer) {
88         }
89
90         @Override
91         public void onHeading(WikiTokenizer wikiTokenizer) {
92             final String headingText = wikiTokenizer.headingWikiText();
93             final int depth = wikiTokenizer.headingDepth();
94             if (skipSections.matcher(headingText).matches()) {
95                 while ((wikiTokenizer = wikiTokenizer.nextToken()) != null) {
96                     if (wikiTokenizer.isHeading() && wikiTokenizer.headingDepth() <= depth) {
97                         wikiTokenizer.returnToLineStart();
98                         return;
99                     }
100                 }
101                 return;
102             }
103             builder.append(String.format("\n<h%d>", depth));
104             dispatch(headingText, null);
105             builder.append(String.format("</h%d>\n", depth));
106         }
107
108         final List<Character> listPrefixStack = new ArrayList<Character>();
109
110         @Override
111         public void onListItem(WikiTokenizer wikiTokenizer) {
112             if (builder.length() != 0 && builder.charAt(builder.length() - 1) != '\n') {
113                 builder.append("\n");
114             }
115             final String prefix = wikiTokenizer.listItemPrefix();
116             while (listPrefixStack.size() < prefix.length()) {
117                 builder.append(String.format("<%s>",
118                         WikiTokenizer.getListTag(prefix.charAt(listPrefixStack.size()))));
119                 listPrefixStack.add(prefix.charAt(listPrefixStack.size()));
120             }
121             builder.append("<li>");
122             dispatch(wikiTokenizer.listItemWikiText(), null);
123             builder.append("</li>\n");
124
125             WikiTokenizer nextToken = wikiTokenizer.nextToken();
126             boolean returnToLineStart = false;
127             if (nextToken != null && nextToken.isNewline()) {
128                 nextToken = nextToken.nextToken();
129                 returnToLineStart = true;
130             }
131             final String nextListHeader;
132             if (nextToken == null || !nextToken.isListItem()) {
133                 nextListHeader = "";
134             } else {
135                 nextListHeader = nextToken.listItemPrefix();
136             }
137             if (returnToLineStart) {
138                 wikiTokenizer.returnToLineStart();
139             }
140             while (listPrefixStack.size() > nextListHeader.length()) {
141                 final char prefixChar = listPrefixStack.remove(listPrefixStack.size() - 1);
142                 builder.append(String.format("</%s>\n", WikiTokenizer.getListTag(prefixChar)));
143             }
144         }
145
146         boolean boldOn = false;
147         boolean italicOn = false;
148
149         @Override
150         public void onMarkup(WikiTokenizer wikiTokenizer) {
151             if ("'''".equals(wikiTokenizer.token())) {
152                 if (!boldOn) {
153                     builder.append("<b>");
154                 } else {
155                     builder.append("</b>");
156                 }
157                 boldOn = !boldOn;
158             } else if ("''".equals(wikiTokenizer.token())) {
159                 if (!italicOn) {
160                     builder.append("<em>");
161                 } else {
162                     builder.append("</em>");
163                 }
164                 italicOn = !italicOn;
165             } else {
166                 assert false;
167             }
168         }
169
170     }
171
172 }