"=|" + // Need the = because we might have to find unescaped =
"<!--|" +
"''|" +
+ "<pre>|" +
+ "<math>|" +
+ "<ref>|" +
"$)", Pattern.MULTILINE);
private static final String listChars = "*#:;";
this(wikiText, true);
}
- public WikiTokenizer(final String wikiText, final boolean isNewline) {
- this.wikiText = wikiText.replaceAll("\u2028", "\n");
+ public WikiTokenizer(String wikiText, final boolean isNewline) {
+ wikiText = wikiText.replace('\u2028', '\n');
+ wikiText = wikiText.replace('\u0085', '\n');
+ this.wikiText = wikiText;
this.matcher = wikiTokenEvent.matcher(wikiText);
justReturnedNewline = isNewline;
}
"\\[\\[|" +
"<!--|" +
"''|" +
+ "<pre>|" +
+ "<math>|" +
+ "<ref>|" +
"[\n]"
);
callback.onPlainText(tokenizer.token());
} else if (tokenizer.isMarkup()) {
callback.onMarkup(tokenizer);
- } else if (tokenizer.isWikiLink) {
+ } else if (tokenizer.isWikiLink()) {
callback.onWikiLink(tokenizer);
} else if (tokenizer.isNewline()) {
callback.onNewline(tokenizer);
assert isListItem();
return wikiText.substring(start, listPrefixEnd);
}
+
+ public static String getListTag(char c) {
+ if (c == '#') {
+ return "ol";
+ }
+ return "ul";
+ }
public String listItemWikiText() {
assert isListItem();
if (firstUnescapedPipePos != -1) {
return trimNewlines(wikiText.substring(start + 2, firstUnescapedPipePos).trim());
}
- return trimNewlines(wikiText.substring(start + 2, end - 2).trim());
+ final int safeEnd = Math.max(start + 2, end - 2);
+ return trimNewlines(wikiText.substring(start + 2, safeEnd).trim());
}
public List<String> functionPositionArgs() {
return this;
}
- if (justReturnedNewline) {
+ if (justReturnedNewline) {
justReturnedNewline = false;
final char firstChar = wikiText.charAt(end);
return this;
}
+ if (wikiText.startsWith("<ref>", start)) {
+ end = safeIndexOf(wikiText, start, "</ref>", "\n");
+ isHtml = true;
+ return this;
+ }
+
if (wikiText.startsWith("<math>", start)) {
end = safeIndexOf(wikiText, start, "</math>", "\n");
isHtml = true;
return token;
}
+ final static String[] patterns = { "\n", "{{", "}}", "[[", "]]", "|", "=", "<!--" };
private int escapedFindEnd(final int start, final String toFind) {
assert tokenStack.isEmpty();
int end = start;
int firstNewline = -1;
+ int[] nextMatch = new int[8];
+ for (int i = 0; i < 8; ++i) {
+ nextMatch[i] = wikiText.indexOf(patterns[i], start);
+ if (nextMatch[i] == -1) nextMatch[i] = i > 0 ? 0x7fffffff : wikiText.length();
+ }
while (end < wikiText.length()) {
- if (matcher.find(end)) {
- final String matchText = matcher.group();
- final int matchStart = matcher.start();
-
- assert matcher.end() > end || matchText.length() == 0: "Group=" + matcher.group();
+ // Manual replacement for matcher.find(end),
+ // because Java regexp is a ridiculously slow implementation.
+ // Initialize to always match the end.
+ int matchIdx = 0;
+ for (int i = 1; i < 8; ++i) {
+ if (nextMatch[i] < nextMatch[matchIdx]) {
+ matchIdx = i;
+ }
+ }
+
+ int matchStart = nextMatch[matchIdx];
+ String matchText = patterns[matchIdx];
+ int matchEnd = matchStart + matchText.length();
+ nextMatch[matchIdx] = wikiText.indexOf(patterns[matchIdx], matchEnd);
+ if (nextMatch[matchIdx] == -1) nextMatch[matchIdx] = matchIdx > 0 ? 0x7fffffff : wikiText.length();
+ if (matchIdx == 0) {
+ matchText = "";
+ matchEnd = matchStart;
+ }
+
+ assert matchEnd > end || matchText.length() == 0: "Group=" + matchText;
if (matchText.length() == 0) {
assert matchStart == wikiText.length() || wikiText.charAt(matchStart) == '\n' : wikiText + ", " + matchStart;
if (firstNewline == -1) {
- firstNewline = matcher.end();
+ firstNewline = matchEnd;
}
if (tokenStack.isEmpty() && toFind.equals("\n")) {
return matchStart;
if (insideFunction) {
addFunctionArg(insideFunction, matchStart);
}
- return matcher.end();
+ return matchEnd;
} else if (matchText.equals("[[") || matchText.equals("{{")) {
tokenStack.add(matchText);
} else if (matchText.equals("]]") || matchText.equals("}}")) {
if (tokenStack.size() > 0) {
final String removed = tokenStack.remove(tokenStack.size() - 1);
- if (removed.equals("{{") && !matcher.group().equals("}}")) {
+ if (removed.equals("{{") && !matchText.equals("}}")) {
errors.add("Unmatched {{ error: " + wikiText.substring(start));
return safeIndexOf(wikiText, start, "\n", "\n");
- } else if (removed.equals("[[") && !matcher.group().equals("]]")) {
+ } else if (removed.equals("[[") && !matchText.equals("]]")) {
errors.add("Unmatched [[ error: " + wikiText.substring(start));
return safeIndexOf(wikiText, start, "\n", "\n");
}
} else {
- errors.add("Pop too many error: " + wikiText.substring(start).replaceAll("\n", "\\\\n"));
+ errors.add("Pop too many error: " + wikiText.substring(start).replace("\n", "\\\\n"));
// If we were looking for a newline
return safeIndexOf(wikiText, start, "\n", "\n");
}
errors.add("Unmatched <!-- error: " + wikiText.substring(start));
return safeIndexOf(wikiText, start, "\n", "\n");
}
- } else if (matchText.equals("''")) {
+ } else if (matchText.equals("''") || (matchText.startsWith("<") && matchText.endsWith(">"))) {
// Don't care.
} else {
assert false : "Match text='" + matchText + "'";
throw new IllegalStateException();
}
- } else {
- // Hmmm, we didn't find the closing symbol we were looking for...
- errors.add("Couldn't find: " + toFind + ", "+ wikiText.substring(start));
- return safeIndexOf(wikiText, start, "\n", "\n");
- }
-
+
// Inside the while loop. Just go forward.
- end = Math.max(end, matcher.end());
+ end = Math.max(end, matchEnd);
}
if (toFind.equals("\n") && tokenStack.isEmpty()) {
// We were looking for the end, we got it.
return end;
}
+ errors.add("Couldn't find: " + toFind + ", "+ wikiText.substring(start));
if (firstNewline != -1) {
- errors.add("Couldn't find: " + toFind + ", "+ wikiText.substring(start));
return firstNewline;
}
return end;
while (s.endsWith("\n")) {
s = s.substring(0, s.length() - 1);
}
- return s.replaceAll("\n", " ");
+ return s.replace('\n', ' ');
}
static int safeIndexOf(final String s, final int start, final String target, final String backup) {