import java.util.regex.Pattern;
public final class WikiTokenizer {
-
+
+ public static interface Callback {
+ void onPlainText(final String text);
+ void onMarkup(WikiTokenizer wikiTokenizer);
+ void onWikiLink(WikiTokenizer wikiTokenizer);
+ void onNewline(WikiTokenizer wikiTokenizer);
+ void onFunction(final WikiTokenizer tokenizer, String functionName, List<String> functionPositionArgs,
+ Map<String, String> functionNamedArgs);
+ void onHeading(WikiTokenizer wikiTokenizer);
+ void onListItem(WikiTokenizer wikiTokenizer);
+ void onComment(WikiTokenizer wikiTokenizer);
+ void onHtml(WikiTokenizer wikiTokenizer);
+ }
+
//private static final Pattern wikiTokenEvent = Pattern.compile("($)", Pattern.MULTILINE);
private static final Pattern wikiTokenEvent = Pattern.compile("(" +
"\\{\\{|\\}\\}|" +
private boolean isComment;
private boolean isFunction;
private boolean isWikiLink;
+ private boolean isHtml;
private int firstUnescapedPipePos;
private int lastUnescapedPipePos;
isComment = false;
isFunction = false;
isWikiLink = false;
+ isHtml = false;
firstUnescapedPipePos = -1;
lastUnescapedPipePos = -1;
positionArgs.clear();
namedArgs.clear();
}
+
+ private static final Pattern POSSIBLE_WIKI_TEXT = Pattern.compile(
+ "\\{\\{|" +
+ "\\[\\[|" +
+ "<!--|" +
+ "''|" +
+ "[\n]"
+ );
+
+ public static void dispatch(final String wikiText, final boolean isNewline, final Callback callback) {
+ // Optimization...
+ if (!POSSIBLE_WIKI_TEXT.matcher(wikiText).find()) {
+ callback.onPlainText(wikiText);
+ } else {
+ final WikiTokenizer tokenizer = new WikiTokenizer(wikiText, isNewline);
+ while (tokenizer.nextToken() != null) {
+ if (tokenizer.isPlainText()) {
+ callback.onPlainText(tokenizer.token());
+ } else if (tokenizer.isMarkup()) {
+ callback.onMarkup(tokenizer);
+ } else if (tokenizer.isWikiLink) {
+ callback.onWikiLink(tokenizer);
+ } else if (tokenizer.isNewline()) {
+ callback.onNewline(tokenizer);
+ } else if (tokenizer.isFunction()) {
+ callback.onFunction(tokenizer, tokenizer.functionName(), tokenizer.functionPositionArgs(), tokenizer.functionNamedArgs());
+ } else if (tokenizer.isHeading()) {
+ callback.onHeading(tokenizer);
+ } else if (tokenizer.isListItem()) {
+ callback.onListItem(tokenizer);
+ } else if (tokenizer.isComment()) {
+ callback.onComment(tokenizer);
+ } else if (tokenizer.isHtml()) {
+ callback.onHtml(tokenizer);
+ } else if (!tokenizer.errors.isEmpty()) {
+ // Log was already printed....
+ } else {
+ throw new IllegalStateException("Unknown wiki state: " + tokenizer.token());
+ }
+ }
+ }
+ }
public boolean isNewline() {
return justReturnedNewline;
assert isFunction();
// "{{.."
if (firstUnescapedPipePos != -1) {
- return wikiText.substring(start + 2, firstUnescapedPipePos);
+ return trimNewlines(wikiText.substring(start + 2, firstUnescapedPipePos).trim());
}
- return wikiText.substring(start + 2, end - 2);
+ return trimNewlines(wikiText.substring(start + 2, end - 2).trim());
}
public List<String> functionPositionArgs() {
if (lastUnescapedPipePos != -1) {
return wikiText.substring(lastUnescapedPipePos + 1, end - 2);
}
+ assert start + 2 < wikiText.length() && end >= 2: wikiText;
return wikiText.substring(start + 2, end - 2);
}
return null;
}
+ public boolean isHtml() {
+ return isHtml;
+ }
+
public boolean remainderStartsWith(final String prefix) {
return wikiText.startsWith(prefix, start);
}
if (wikiText.startsWith("<pre>", start)) {
end = safeIndexOf(wikiText, start, "</pre>", "\n");
+ isHtml = true;
return this;
}
if (wikiText.startsWith("<math>", start)) {
end = safeIndexOf(wikiText, start, "</math>", "\n");
+ isHtml = true;
return this;
}
public String token() {
final String token = wikiText.substring(start, end);
- assert token.equals("\n") || !token.endsWith("\n") : token;
+ assert token.equals("\n") || !token.endsWith("\n") : "token='" + token + "'";
return token;
}
final boolean insideFunction = toFind.equals("}}");
int end = start;
+ int firstNewline = -1;
while (end < wikiText.length()) {
if (matcher.find(end)) {
final String matchText = matcher.group();
assert matcher.end() > end || matchText.length() == 0: "Group=" + matcher.group();
if (matchText.length() == 0) {
assert matchStart == wikiText.length() || wikiText.charAt(matchStart) == '\n';
+ if (firstNewline == -1) {
+ firstNewline = matcher.end();
+ }
if (tokenStack.isEmpty() && toFind.equals("\n")) {
return matchStart;
}
// Inside the while loop. Just go forward.
end = Math.max(end, matcher.end());
}
+ if (toFind.equals("\n") && tokenStack.isEmpty()) {
+ // We were looking for the end, we got it.
+ return end;
+ }
+ if (firstNewline != -1) {
+ errors.add("Couldn't find: " + toFind + ", "+ wikiText.substring(start));
+ return firstNewline;
+ }
return end;
}
if (lastUnescapedEqualsPos > lastUnescapedPipePos) {
final String key = wikiText.substring(lastUnescapedPipePos + 1, lastUnescapedEqualsPos);
final String value = wikiText.substring(lastUnescapedEqualsPos + 1, matchStart);
- namedArgs.put(key, value);
+ namedArgs.put(trimNewlines(key), trimNewlines(value));
} else {
final String value = wikiText.substring(lastUnescapedPipePos + 1, matchStart);
- positionArgs.add(value);
+ positionArgs.add(trimNewlines(value));
}
}
lastUnescapedPipePos = matchStart;
}
+
+ static final String trimNewlines(String s) {
+ while (s.startsWith("\n")) {
+ s = s.substring(1);
+ }
+ while (s.endsWith("\n")) {
+ s = s.substring(0, s.length() - 1);
+ }
+ return s.replaceAll("\n", " ");
+ }
static int safeIndexOf(final String s, final int start, final String target, final String backup) {
int close = s.indexOf(target, start);
return s.length();
}
- public static String toPlainText(String sense) {
- final WikiTokenizer wikiTokenizer = new WikiTokenizer(sense);
+ public static String toPlainText(final String wikiText) {
+ final WikiTokenizer wikiTokenizer = new WikiTokenizer(wikiText);
final StringBuilder builder = new StringBuilder();
while (wikiTokenizer.nextToken() != null) {
if (wikiTokenizer.isPlainText()) {
return builder.toString();
}
+ public static StringBuilder appendFunction(final StringBuilder builder, final String name, List<String> args,
+ final Map<String, String> namedArgs) {
+ builder.append(name);
+ for (final String arg : args) {
+ builder.append("|").append(arg);
+ }
+ for (final Map.Entry<String, String> entry : namedArgs.entrySet()) {
+ builder.append("|").append(entry.getKey()).append("=").append(entry.getValue());
+ }
+ return builder;
+ }
+
}