1 package com.hughes.android.dictionary.parser;
3 import java.util.ArrayList;
4 import java.util.LinkedHashMap;
7 import java.util.regex.Matcher;
8 import java.util.regex.Pattern;
10 import com.hughes.util.StringUtil;
12 public class WikiParser {
14 private static final Pattern markup = Pattern.compile("$|''|\\{\\{|\\[\\[|(==+)\\s*$|<!--|<pre>", Pattern.MULTILINE);
15 private static final Pattern listStart = Pattern.compile("^[*#;:]+");
16 private static final Pattern pipeSplit = Pattern.compile("\\s*\\|\\s*");
17 private static final Pattern whitespace = Pattern.compile("\\s+");
18 private static final Pattern headerStart = Pattern.compile("^==+");
21 static void parse(final String wikiText, final WikiCallback callback) {
23 boolean boldOn = false;
24 boolean italicOn = false;
25 int insideHeaderDepth = -1;
26 String lastListItem = null;
28 final List<String> positionalArgs = new ArrayList<String>();
29 final Map<String, String> namedArgs = new LinkedHashMap<String, String>();
31 String rest = wikiText;
32 while (rest.length() > 0) {
33 final Matcher matcher = markup.matcher(rest);
35 final int nextMarkupPos = matcher.start();
36 if (nextMarkupPos != 0) {
37 String text = rest.substring(0, nextMarkupPos);
38 whitespace.matcher(text).replaceAll(" ");
39 callback.onText(text);
40 rest = rest.substring(nextMarkupPos);
43 if (rest.equals("")) {
45 } else if (rest.startsWith("\n")) {
46 rest = rest.substring(1);
48 if (insideHeaderDepth != -1) {
49 throw new RuntimeException("barf");
51 if (lastListItem != null) {
52 callback.onListItemEnd(lastListItem, null);
55 final Matcher headerMatcher = headerStart.matcher(rest);
56 if (headerMatcher.find()) {
58 insideHeaderDepth = headerMatcher.group().length();
59 callback.onHeadingStart(insideHeaderDepth);
60 rest = rest.substring(headerMatcher.group().length());
64 final Matcher listStartMatcher = listStart.matcher(rest);
65 if (listStartMatcher.find()) {
66 lastListItem = listStartMatcher.group();
67 callback.onListItemStart(lastListItem, null);
68 rest = rest.substring(lastListItem.length());
70 } else if (lastListItem != null) {
71 callback.onNewParagraph();
75 if (rest.startsWith("\n")) {
76 callback.onNewParagraph();
80 } else if (rest.startsWith("'''")) {
82 callback.onFormatBold(boldOn);
83 rest = rest.substring(3);
84 } else if (rest.startsWith("''")) {
86 callback.onFormatItalic(italicOn);
87 rest = rest.substring(2);
88 } else if (rest.startsWith("{{")) {
89 int end = StringUtil.nestedIndexOf(rest, 2, "{{", "}}");
91 callback.onUnterminated("{{", rest);
92 end = StringUtil.safeIndexOf(rest, "\n") - 2;
94 final String template = rest.substring(2, end).trim();
95 final List<String> templateArray = new ArrayList<String>();
96 contextSensitivePipeSplit(template, templateArray);
97 positionalArgs.clear();
99 for (int i = 0; i < templateArray.size(); ++i) {
103 equalPos = templateArray.get(i).indexOf('=', equalPos + 1);
104 } while (equalPos > 1 && templateArray.get(i).charAt(equalPos - 1) == ' ');
106 if (equalPos == -1) {
107 positionalArgs.add(templateArray.get(i));
109 namedArgs.put(templateArray.get(i).substring(0, equalPos), templateArray.get(i).substring(equalPos + 1));
112 callback.onTemplate(positionalArgs, namedArgs);
113 rest = rest.substring(end + 2);
114 } else if (rest.startsWith("[[")) {
115 int end = rest.indexOf("]]");
117 callback.onUnterminated("[[", rest);
118 end = StringUtil.safeIndexOf(rest, "\n") - 2;
120 final String wikiLink = rest.substring(2, end);
121 final String[] args = pipeSplit.split(wikiLink);
122 callback.onWikiLink(args);
123 rest = rest.substring(end + 2);
124 } else if (rest.startsWith("=")) {
125 final String match = matcher.group(1) != null ? matcher.group(1) : matcher.group(2);
126 if (insideHeaderDepth == -1) {
128 if (match.length() != insideHeaderDepth) {
129 callback.onInvalidHeaderEnd(rest);
132 callback.onHeadingEnd(insideHeaderDepth);
133 insideHeaderDepth = -1;
135 rest = rest.substring(match.length());
136 } else if (rest.startsWith("<!--")) {
137 int end = rest.indexOf("-->");
139 callback.onUnterminated("<!--", rest);
140 end = StringUtil.safeIndexOf(rest, "\n") - 3;
142 callback.onComment(rest.substring(4, end));
143 rest = rest.substring(end + 3);
144 } else if (rest.startsWith("<pre>")) {
145 int end = rest.indexOf("</pre>");
147 callback.onUnterminated("<pre>", rest);
148 end = StringUtil.safeIndexOf(rest, "\n") - 6;
150 callback.onText(rest.substring(5, end));
151 rest = rest.substring(end + 6);
153 throw new RuntimeException("barf: " + rest);
159 private static void contextSensitivePipeSplit(String template, final List<String> result) {
162 for (int i = 1; i < template.length(); ) {
163 if (template.charAt(i) == '|' && depth == 0) {
164 final String s = template.substring(lastStart, i);
165 result.add(s.trim());
168 } else if (template.startsWith("[[", i) || template.startsWith("{{", i)) {
171 } else if (template.startsWith("]]", i) || template.startsWith("}}", i)) {
174 throw new RuntimeException("too many closings: " + template);
181 result.add(template.substring(lastStart).trim());
184 // ------------------------------------------------------------------------
186 public static String simpleParse(final String wikiText) {
187 final StringBuilderCallback callback = new StringBuilderCallback();
188 parse(wikiText, callback);
189 return callback.builder.toString();
192 static final class StringBuilderCallback implements WikiCallback {
194 final StringBuilder builder = new StringBuilder();
197 public void onComment(String text) {
201 public void onFormatBold(boolean boldOn) {
205 public void onFormatItalic(boolean italicOn) {
209 public void onWikiLink(String[] args) {
210 builder.append(args[args.length - 1]);
214 public void onTemplate(List<String> positionalArgs,
215 Map<String, String> namedArgs) {
216 builder.append("{{").append(positionalArgs).append(namedArgs).append("}}");
220 public void onText(String text) {
221 builder.append(text);
225 public void onHeadingStart(int depth) {
229 public void onHeadingEnd(int depth) {
233 public void onNewLine() {
237 public void onNewParagraph() {
241 public void onListItemStart(String header, int[] section) {
245 public void onListItemEnd(String header, int[] section) {
249 public void onUnterminated(String start, String rest) {
250 System.err.printf("onUnterminated: %s, %s\n", start, rest);
254 public void onInvalidHeaderEnd(String rest) {
255 throw new RuntimeException(rest);