public final class WikiTokenizer {
- static interface Callback {
- void onPlainText(WikiTokenizer wikiTokenizer);
+ public static interface Callback {
+ void onPlainText(final String text);
void onMarkup(WikiTokenizer wikiTokenizer);
void onWikiLink(WikiTokenizer wikiTokenizer);
void onNewline(WikiTokenizer wikiTokenizer);
- void onFunction(String functionName, List<String> functionPositionArgs,
+ void onFunction(final WikiTokenizer tokenizer, String functionName, List<String> functionPositionArgs,
Map<String, String> functionNamedArgs);
void onHeading(WikiTokenizer wikiTokenizer);
void onListItem(WikiTokenizer wikiTokenizer);
void onComment(WikiTokenizer wikiTokenizer);
+ void onHtml(WikiTokenizer wikiTokenizer);
+ }
+
+ public static class DoNothingCallback implements Callback {
+
+ @Override
+ public void onPlainText(String text) {
+ }
+
+ @Override
+ public void onMarkup(WikiTokenizer wikiTokenizer) {
+ }
+
+ @Override
+ public void onWikiLink(WikiTokenizer wikiTokenizer) {
+ }
+
+ @Override
+ public void onNewline(WikiTokenizer wikiTokenizer) {
+ }
+
+ @Override
+ public void onFunction(WikiTokenizer tokenizer, String functionName,
+ List<String> functionPositionArgs, Map<String, String> functionNamedArgs) {
+ }
+
+ @Override
+ public void onHeading(WikiTokenizer wikiTokenizer) {
+ }
+
+ @Override
+ public void onListItem(WikiTokenizer wikiTokenizer) {
+ }
+
+ @Override
+ public void onComment(WikiTokenizer wikiTokenizer) {
+ }
+
+ @Override
+ public void onHtml(WikiTokenizer wikiTokenizer) {
+ }
}
//private static final Pattern wikiTokenEvent = Pattern.compile("($)", Pattern.MULTILINE);
private boolean isComment;
private boolean isFunction;
private boolean isWikiLink;
+ private boolean isHtml;
private int firstUnescapedPipePos;
private int lastUnescapedPipePos;
}
public WikiTokenizer(final String wikiText, final boolean isNewline) {
- this.wikiText = wikiText;
+ this.wikiText = wikiText.replaceAll("\u2028", "\n");
this.matcher = wikiTokenEvent.matcher(wikiText);
justReturnedNewline = isNewline;
}
isComment = false;
isFunction = false;
isWikiLink = false;
+ isHtml = false;
firstUnescapedPipePos = -1;
lastUnescapedPipePos = -1;
positionArgs.clear();
namedArgs.clear();
}
-
- public void dispatch(final Callback callback) {
- while (nextToken() != null) {
- if (isPlainText()) {
- callback.onPlainText(this);
- } else if (isMarkup()) {
- callback.onMarkup(this);
- } else if (isWikiLink) {
- callback.onWikiLink(this);
- } else if (isNewline()) {
- callback.onNewline(this);
- } else if (isFunction()) {
- callback.onFunction(functionName(), functionPositionArgs(), functionNamedArgs());
- } else if (isHeading()) {
- callback.onHeading(this);
- } else if (isListItem()) {
- callback.onListItem(this);
- } else if (isComment()) {
- callback.onComment(this);
- } else {
- throw new IllegalStateException("Unknown wiki state.");
+
+ private static final Pattern POSSIBLE_WIKI_TEXT = Pattern.compile(
+ "\\{\\{|" +
+ "\\[\\[|" +
+ "<!--|" +
+ "''|" +
+ "[\n]"
+ );
+
+ public static void dispatch(final String wikiText, final boolean isNewline, final Callback callback) {
+ // Optimization...
+ if (!POSSIBLE_WIKI_TEXT.matcher(wikiText).find()) {
+ callback.onPlainText(wikiText);
+ } else {
+ final WikiTokenizer tokenizer = new WikiTokenizer(wikiText, isNewline);
+ while (tokenizer.nextToken() != null) {
+ if (tokenizer.isPlainText()) {
+ callback.onPlainText(tokenizer.token());
+ } else if (tokenizer.isMarkup()) {
+ callback.onMarkup(tokenizer);
+ } else if (tokenizer.isWikiLink) {
+ callback.onWikiLink(tokenizer);
+ } else if (tokenizer.isNewline()) {
+ callback.onNewline(tokenizer);
+ } else if (tokenizer.isFunction()) {
+ callback.onFunction(tokenizer, tokenizer.functionName(), tokenizer.functionPositionArgs(), tokenizer.functionNamedArgs());
+ } else if (tokenizer.isHeading()) {
+ callback.onHeading(tokenizer);
+ } else if (tokenizer.isListItem()) {
+ callback.onListItem(tokenizer);
+ } else if (tokenizer.isComment()) {
+ callback.onComment(tokenizer);
+ } else if (tokenizer.isHtml()) {
+ callback.onHtml(tokenizer);
+ } else if (!tokenizer.errors.isEmpty()) {
+ // Log was already printed....
+ } else {
+ throw new IllegalStateException("Unknown wiki state: " + tokenizer.token());
+ }
}
}
}
+ public List<String> errors() {
+ return errors;
+ }
+
public boolean isNewline() {
return justReturnedNewline;
}
assert isListItem();
return wikiText.substring(start, listPrefixEnd);
}
+
+ public static String getListTag(char c) {
+ if (c == '#') {
+ return "ol";
+ }
+ return "ul";
+ }
public String listItemWikiText() {
assert isListItem();
assert isFunction();
// "{{.."
if (firstUnescapedPipePos != -1) {
- return wikiText.substring(start + 2, firstUnescapedPipePos);
+ return trimNewlines(wikiText.substring(start + 2, firstUnescapedPipePos).trim());
}
- return wikiText.substring(start + 2, end - 2);
+ return trimNewlines(wikiText.substring(start + 2, end - 2).trim());
}
public List<String> functionPositionArgs() {
assert isWikiLink();
// "[[.."
if (lastUnescapedPipePos != -1) {
- return wikiText.substring(lastUnescapedPipePos + 1, end - 2);
+ return trimNewlines(wikiText.substring(lastUnescapedPipePos + 1, end - 2));
}
assert start + 2 < wikiText.length() && end >= 2: wikiText;
- return wikiText.substring(start + 2, end - 2);
+ return trimNewlines(wikiText.substring(start + 2, end - 2));
}
public String wikiLinkDest() {
assert isWikiLink();
// "[[.."
if (firstUnescapedPipePos != -1) {
- return wikiText.substring(start + 2, firstUnescapedPipePos);
+ return trimNewlines(wikiText.substring(start + 2, firstUnescapedPipePos));
}
return null;
}
+ public boolean isHtml() {
+ return isHtml;
+ }
+
public boolean remainderStartsWith(final String prefix) {
return wikiText.startsWith(prefix, start);
}
if (wikiText.startsWith("<pre>", start)) {
end = safeIndexOf(wikiText, start, "</pre>", "\n");
+ isHtml = true;
return this;
}
if (wikiText.startsWith("<math>", start)) {
end = safeIndexOf(wikiText, start, "</math>", "\n");
+ isHtml = true;
return this;
}
assert matcher.end() > end || matchText.length() == 0: "Group=" + matcher.group();
if (matchText.length() == 0) {
- assert matchStart == wikiText.length() || wikiText.charAt(matchStart) == '\n';
+ assert matchStart == wikiText.length() || wikiText.charAt(matchStart) == '\n' : wikiText + ", " + matchStart;
if (firstNewline == -1) {
firstNewline = matcher.end();
}
if (lastUnescapedEqualsPos > lastUnescapedPipePos) {
final String key = wikiText.substring(lastUnescapedPipePos + 1, lastUnescapedEqualsPos);
final String value = wikiText.substring(lastUnescapedEqualsPos + 1, matchStart);
- namedArgs.put(key, value);
+ namedArgs.put(trimNewlines(key), trimNewlines(value));
} else {
final String value = wikiText.substring(lastUnescapedPipePos + 1, matchStart);
- positionArgs.add(value);
+ positionArgs.add(trimNewlines(value));
}
}
lastUnescapedPipePos = matchStart;
}
+
+ static final String trimNewlines(String s) {
+ while (s.startsWith("\n")) {
+ s = s.substring(1);
+ }
+ while (s.endsWith("\n")) {
+ s = s.substring(0, s.length() - 1);
+ }
+ return s.replaceAll("\n", " ");
+ }
static int safeIndexOf(final String s, final int start, final String target, final String backup) {
int close = s.indexOf(target, start);