1 package com.hughes.android.dictionary.parser;
3 import java.util.ArrayList;
4 import java.util.LinkedHashMap;
7 import java.util.regex.Matcher;
8 import java.util.regex.Pattern;
10 public class WikiParser {
12 private static final Pattern markup = Pattern.compile("$|''|\\{\\{|\\[\\[|(==+)\\s*$|<!--|<pre>", Pattern.MULTILINE);
13 private static final Pattern listStart = Pattern.compile("^[*#;:]+");
14 private static final Pattern pipeSplit = Pattern.compile("\\s*\\|\\s*");
15 private static final Pattern whitespace = Pattern.compile("\\s+");
16 private static final Pattern headerStart = Pattern.compile("^==+");
19 static void parse(final String wikiText, final WikiCallback callback) {
21 boolean boldOn = false;
22 boolean italicOn = false;
23 int insideHeaderDepth = -1;
24 String lastListItem = null;
26 final List<String> positionalArgs = new ArrayList<String>();
27 final Map<String, String> namedArgs = new LinkedHashMap<String, String>();
29 String rest = wikiText;
30 while (rest.length() > 0) {
31 final Matcher matcher = markup.matcher(rest);
33 final int nextMarkupPos = matcher.start();
34 if (nextMarkupPos != 0) {
35 String text = rest.substring(0, nextMarkupPos);
36 whitespace.matcher(text).replaceAll(" ");
37 callback.onText(text);
38 rest = rest.substring(nextMarkupPos);
41 if (rest.equals("")) {
43 } else if (rest.startsWith("\n")) {
44 rest = rest.substring(1);
46 if (insideHeaderDepth != -1) {
47 throw new RuntimeException("barf");
49 if (lastListItem != null) {
50 callback.onListItemEnd(lastListItem, null);
53 final Matcher headerMatcher = headerStart.matcher(rest);
54 if (headerMatcher.find()) {
56 insideHeaderDepth = headerMatcher.group().length();
57 callback.onHeadingStart(insideHeaderDepth);
58 rest = rest.substring(headerMatcher.group().length());
62 final Matcher listStartMatcher = listStart.matcher(rest);
63 if (listStartMatcher.find()) {
64 lastListItem = listStartMatcher.group();
65 callback.onListItemStart(lastListItem, null);
66 rest = rest.substring(lastListItem.length());
68 } else if (lastListItem != null) {
69 callback.onNewParagraph();
73 if (rest.startsWith("\n")) {
74 callback.onNewParagraph();
78 } else if (rest.startsWith("'''")) {
80 callback.onFormatBold(boldOn);
81 rest = rest.substring(3);
82 } else if (rest.startsWith("''")) {
84 callback.onFormatItalic(italicOn);
85 rest = rest.substring(2);
86 } else if (rest.startsWith("{{")) {
87 int end = rest.indexOf("}}");
89 callback.onUnterminated("{{", rest);
92 final String template = rest.substring(2, end).trim();
93 //todo: this doesn't work. can't split pipes inside [[asdf|asdf]]
94 final List<String> templateArray = new ArrayList<String>();
95 contextSensitivePipeSplit(template, templateArray);
96 positionalArgs.clear();
98 for (int i = 0; i < templateArray.size(); ++i) {
102 equalPos = templateArray.get(i).indexOf('=', equalPos + 1);
103 } while (equalPos > 1 && templateArray.get(i).charAt(equalPos - 1) == ' ');
105 if (equalPos == -1) {
106 positionalArgs.add(templateArray.get(i));
108 namedArgs.put(templateArray.get(i).substring(0, equalPos), templateArray.get(i).substring(equalPos + 1));
111 callback.onTemplate(positionalArgs, namedArgs);
112 rest = rest.substring(end + 2);
113 } else if (rest.startsWith("[[")) {
114 int end = rest.indexOf("]]");
116 callback.onUnterminated("[[", rest);
119 final String wikiLink = rest.substring(2, end);
120 final String[] args = pipeSplit.split(wikiLink);
121 callback.onWikiLink(args);
122 rest = rest.substring(end + 2);
123 } else if (rest.startsWith("=")) {
124 final String match = matcher.group(1) != null ? matcher.group(1) : matcher.group(2);
125 if (insideHeaderDepth == -1) {
127 if (match.length() != insideHeaderDepth) {
128 callback.onInvalidHeaderEnd(rest);
131 callback.onHeadingEnd(insideHeaderDepth);
132 insideHeaderDepth = -1;
134 rest = rest.substring(match.length());
135 } else if (rest.startsWith("<!--")) {
136 int end = rest.indexOf("-->");
138 callback.onUnterminated("<!--", rest);
141 callback.onComment(rest.substring(4, end));
142 rest = rest.substring(end + 3);
143 } else if (rest.startsWith("<pre>")) {
144 int end = rest.indexOf("</pre>");
146 callback.onUnterminated("<pre>", rest);
149 callback.onText(rest.substring(5, end));
150 rest = rest.substring(end + 6);
152 throw new RuntimeException("barf: " + rest);
158 private static final Pattern openBracketOrPipe = Pattern.compile("($)|(\\[\\[)|(\\s*\\|\\s*)");
159 private static void contextSensitivePipeSplit(String template, final List<String> result) {
160 StringBuilder builder = new StringBuilder();
161 while (template.length() > 0) {
162 final Matcher matcher = openBracketOrPipe.matcher(template);
163 if (matcher.find()) {
164 // append to the match.
165 builder.append(template.substring(0, matcher.start()));
166 if (matcher.group(2) != null) { // [[
167 // append to the close ]].
168 final int closeIndex = template.indexOf("]]", matcher.end());
169 builder.append(template.substring(matcher.start(), closeIndex + 2));
170 template = template.substring(closeIndex + 2);
171 } else if (matcher.group(3) != null) { // |
172 result.add(builder.toString());
173 builder = new StringBuilder();
174 template = template.substring(matcher.end());
176 template = template.substring(matcher.start());
177 assert template.length() == 0 : template;
183 result.add(builder.toString());
186 // ------------------------------------------------------------------------
188 public static String simpleParse(final String wikiText) {
189 final StringBuilderCallback callback = new StringBuilderCallback();
190 parse(wikiText, callback);
191 return callback.builder.toString();
194 static final class StringBuilderCallback implements WikiCallback {
196 final StringBuilder builder = new StringBuilder();
199 public void onComment(String text) {
203 public void onFormatBold(boolean boldOn) {
207 public void onFormatItalic(boolean italicOn) {
211 public void onWikiLink(String[] args) {
212 builder.append(args[args.length - 1]);
216 public void onTemplate(List<String> positionalArgs,
217 Map<String, String> namedArgs) {
218 builder.append("{{").append(positionalArgs).append(namedArgs).append("}}");
222 public void onText(String text) {
223 builder.append(text);
227 public void onHeadingStart(int depth) {
231 public void onHeadingEnd(int depth) {
235 public void onNewLine() {
239 public void onNewParagraph() {
243 public void onListItemStart(String header, int[] section) {
247 public void onListItemEnd(String header, int[] section) {
251 public void onUnterminated(String start, String rest) {
252 throw new RuntimeException(start + rest);
256 public void onInvalidHeaderEnd(String rest) {
257 throw new RuntimeException(rest);