import java.util.regex.Pattern;
public final class WikiTokenizer {
+
+ public static interface Callback {
+ void onPlainText(final String text);
+ void onMarkup(WikiTokenizer wikiTokenizer);
+ void onWikiLink(WikiTokenizer wikiTokenizer);
+ void onNewline(WikiTokenizer wikiTokenizer);
+ void onFunction(final WikiTokenizer tokenizer, String functionName, List<String> functionPositionArgs,
+ Map<String, String> functionNamedArgs);
+ void onHeading(WikiTokenizer wikiTokenizer);
+ void onListItem(WikiTokenizer wikiTokenizer);
+ void onComment(WikiTokenizer wikiTokenizer);
+ void onHtml(WikiTokenizer wikiTokenizer);
+ }
+
+ public static class DoNothingCallback implements Callback {
+
+ @Override
+ public void onPlainText(String text) {
+ }
+
+ @Override
+ public void onMarkup(WikiTokenizer wikiTokenizer) {
+ }
+
+ @Override
+ public void onWikiLink(WikiTokenizer wikiTokenizer) {
+ }
+ @Override
+ public void onNewline(WikiTokenizer wikiTokenizer) {
+ }
+
+ @Override
+ public void onFunction(WikiTokenizer tokenizer, String functionName,
+ List<String> functionPositionArgs, Map<String, String> functionNamedArgs) {
+ }
+
+ @Override
+ public void onHeading(WikiTokenizer wikiTokenizer) {
+ }
+
+ @Override
+ public void onListItem(WikiTokenizer wikiTokenizer) {
+ }
+
+ @Override
+ public void onComment(WikiTokenizer wikiTokenizer) {
+ }
+
+ @Override
+ public void onHtml(WikiTokenizer wikiTokenizer) {
+ }
+ }
+
//private static final Pattern wikiTokenEvent = Pattern.compile("($)", Pattern.MULTILINE);
private static final Pattern wikiTokenEvent = Pattern.compile("(" +
"\\{\\{|\\}\\}|" +
"=|" + // Need the = because we might have to find unescaped =
"<!--|" +
"''|" +
+ "<pre>|" +
+ "<math>|" +
+ "<ref>|" +
"$)", Pattern.MULTILINE);
private static final String listChars = "*#:;";
private boolean isComment;
private boolean isFunction;
private boolean isWikiLink;
+ private boolean isHtml;
private int firstUnescapedPipePos;
private int lastUnescapedPipePos;
this(wikiText, true);
}
- public WikiTokenizer(final String wikiText, final boolean isNewline) {
+ public WikiTokenizer(String wikiText, final boolean isNewline) {
+ wikiText = wikiText.replaceAll("\u2028", "\n");
+ wikiText = wikiText.replaceAll("\u0085", "\n");
this.wikiText = wikiText;
this.matcher = wikiTokenEvent.matcher(wikiText);
justReturnedNewline = isNewline;
isComment = false;
isFunction = false;
isWikiLink = false;
+ isHtml = false;
firstUnescapedPipePos = -1;
lastUnescapedPipePos = -1;
positionArgs.clear();
namedArgs.clear();
}
+
+ private static final Pattern POSSIBLE_WIKI_TEXT = Pattern.compile(
+ "\\{\\{|" +
+ "\\[\\[|" +
+ "<!--|" +
+ "''|" +
+ "<pre>|" +
+ "<math>|" +
+ "<ref>|" +
+ "[\n]"
+ );
+
+ public static void dispatch(final String wikiText, final boolean isNewline, final Callback callback) {
+ // Optimization...
+ if (!POSSIBLE_WIKI_TEXT.matcher(wikiText).find()) {
+ callback.onPlainText(wikiText);
+ } else {
+ final WikiTokenizer tokenizer = new WikiTokenizer(wikiText, isNewline);
+ while (tokenizer.nextToken() != null) {
+ if (tokenizer.isPlainText()) {
+ callback.onPlainText(tokenizer.token());
+ } else if (tokenizer.isMarkup()) {
+ callback.onMarkup(tokenizer);
+ } else if (tokenizer.isWikiLink()) {
+ callback.onWikiLink(tokenizer);
+ } else if (tokenizer.isNewline()) {
+ callback.onNewline(tokenizer);
+ } else if (tokenizer.isFunction()) {
+ callback.onFunction(tokenizer, tokenizer.functionName(), tokenizer.functionPositionArgs(), tokenizer.functionNamedArgs());
+ } else if (tokenizer.isHeading()) {
+ callback.onHeading(tokenizer);
+ } else if (tokenizer.isListItem()) {
+ callback.onListItem(tokenizer);
+ } else if (tokenizer.isComment()) {
+ callback.onComment(tokenizer);
+ } else if (tokenizer.isHtml()) {
+ callback.onHtml(tokenizer);
+ } else if (!tokenizer.errors.isEmpty()) {
+ // Log was already printed....
+ } else {
+ throw new IllegalStateException("Unknown wiki state: " + tokenizer.token());
+ }
+ }
+ }
+ }
+
+ public List<String> errors() {
+ return errors;
+ }
public boolean isNewline() {
return justReturnedNewline;
assert isListItem();
return wikiText.substring(start, listPrefixEnd);
}
+
+ public static String getListTag(char c) {
+ if (c == '#') {
+ return "ol";
+ }
+ return "ul";
+ }
public String listItemWikiText() {
assert isListItem();
assert isFunction();
// "{{.."
if (firstUnescapedPipePos != -1) {
- return wikiText.substring(start + 2, firstUnescapedPipePos);
+ return trimNewlines(wikiText.substring(start + 2, firstUnescapedPipePos).trim());
}
- return wikiText.substring(start + 2, end - 2);
+ final int safeEnd = Math.max(start + 2, end - 2);
+ return trimNewlines(wikiText.substring(start + 2, safeEnd).trim());
}
public List<String> functionPositionArgs() {
assert isWikiLink();
// "[[.."
if (lastUnescapedPipePos != -1) {
- return wikiText.substring(lastUnescapedPipePos + 1, end - 2);
+ return trimNewlines(wikiText.substring(lastUnescapedPipePos + 1, end - 2));
}
assert start + 2 < wikiText.length() && end >= 2: wikiText;
- return wikiText.substring(start + 2, end - 2);
+ return trimNewlines(wikiText.substring(start + 2, end - 2));
}
public String wikiLinkDest() {
assert isWikiLink();
// "[[.."
if (firstUnescapedPipePos != -1) {
- return wikiText.substring(start + 2, firstUnescapedPipePos);
+ return trimNewlines(wikiText.substring(start + 2, firstUnescapedPipePos));
}
return null;
}
+ public boolean isHtml() {
+ return isHtml;
+ }
+
public boolean remainderStartsWith(final String prefix) {
return wikiText.startsWith(prefix, start);
}
return this;
}
- if (justReturnedNewline) {
+ if (justReturnedNewline) {
justReturnedNewline = false;
final char firstChar = wikiText.charAt(end);
if (wikiText.startsWith("<pre>", start)) {
end = safeIndexOf(wikiText, start, "</pre>", "\n");
+ isHtml = true;
return this;
}
+ if (wikiText.startsWith("<ref>", start)) {
+ end = safeIndexOf(wikiText, start, "</ref>", "\n");
+ isHtml = true;
+ return this;
+ }
+
if (wikiText.startsWith("<math>", start)) {
end = safeIndexOf(wikiText, start, "</math>", "\n");
+ isHtml = true;
return this;
}
assert matcher.end() > end || matchText.length() == 0: "Group=" + matcher.group();
if (matchText.length() == 0) {
- assert matchStart == wikiText.length() || wikiText.charAt(matchStart) == '\n';
+ assert matchStart == wikiText.length() || wikiText.charAt(matchStart) == '\n' : wikiText + ", " + matchStart;
if (firstNewline == -1) {
firstNewline = matcher.end();
}
errors.add("Unmatched <!-- error: " + wikiText.substring(start));
return safeIndexOf(wikiText, start, "\n", "\n");
}
- } else if (matchText.equals("''")) {
+ } else if (matchText.equals("''") || (matchText.startsWith("<") && matchText.endsWith(">"))) {
// Don't care.
} else {
assert false : "Match text='" + matchText + "'";
// We were looking for the end, we got it.
return end;
}
+ errors.add("Couldn't find: " + toFind + ", "+ wikiText.substring(start));
if (firstNewline != -1) {
- errors.add("Couldn't find: " + toFind + ", "+ wikiText.substring(start));
return firstNewline;
}
return end;
if (lastUnescapedEqualsPos > lastUnescapedPipePos) {
final String key = wikiText.substring(lastUnescapedPipePos + 1, lastUnescapedEqualsPos);
final String value = wikiText.substring(lastUnescapedEqualsPos + 1, matchStart);
- namedArgs.put(key, value);
+ namedArgs.put(trimNewlines(key), trimNewlines(value));
} else {
final String value = wikiText.substring(lastUnescapedPipePos + 1, matchStart);
- positionArgs.add(value);
+ positionArgs.add(trimNewlines(value));
}
}
lastUnescapedPipePos = matchStart;
}
+
+ static final String trimNewlines(String s) {
+ while (s.startsWith("\n")) {
+ s = s.substring(1);
+ }
+ while (s.endsWith("\n")) {
+ s = s.substring(0, s.length() - 1);
+ }
+ return s.replaceAll("\n", " ");
+ }
static int safeIndexOf(final String s, final int start, final String target, final String backup) {
int close = s.indexOf(target, start);
return builder.toString();
}
+ public static StringBuilder appendFunction(final StringBuilder builder, final String name, List<String> args,
+ final Map<String, String> namedArgs) {
+ builder.append(name);
+ for (final String arg : args) {
+ builder.append("|").append(arg);
+ }
+ for (final Map.Entry<String, String> entry : namedArgs.entrySet()) {
+ builder.append("|").append(entry.getKey()).append("=").append(entry.getValue());
+ }
+ return builder;
+ }
+
}