X-Git-Url: http://gitweb.fperrin.net/?a=blobdiff_plain;f=src%2Fcom%2Fhughes%2Fandroid%2Fdictionary%2Fparser%2FWikiTokenizer.java;h=a7863c7f42ee3dc24882019e4bbdbf88658e8409;hb=e87d071962ee37719c9bea6740d93913ed4d8c7b;hp=d6c8901aa6a6b6541c1d5b2ccd0e4dc4af56f507;hpb=21e752e044b6c0dd7d24e6da143068326beab2e3;p=DictionaryPC.git diff --git a/src/com/hughes/android/dictionary/parser/WikiTokenizer.java b/src/com/hughes/android/dictionary/parser/WikiTokenizer.java index d6c8901..a7863c7 100644 --- a/src/com/hughes/android/dictionary/parser/WikiTokenizer.java +++ b/src/com/hughes/android/dictionary/parser/WikiTokenizer.java @@ -22,625 +22,636 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; public final class WikiTokenizer { - - public static interface Callback { - void onPlainText(final String text); - void onMarkup(WikiTokenizer wikiTokenizer); - void onWikiLink(WikiTokenizer wikiTokenizer); - void onNewline(WikiTokenizer wikiTokenizer); - void onFunction(final WikiTokenizer tokenizer, String functionName, List functionPositionArgs, - Map functionNamedArgs); - void onHeading(WikiTokenizer wikiTokenizer); - void onListItem(WikiTokenizer wikiTokenizer); - void onComment(WikiTokenizer wikiTokenizer); - void onHtml(WikiTokenizer wikiTokenizer); - } - - public static class DoNothingCallback implements Callback { - - @Override - public void onPlainText(String text) { - } - - @Override - public void onMarkup(WikiTokenizer wikiTokenizer) { - } - - @Override - public void onWikiLink(WikiTokenizer wikiTokenizer) { - } - - @Override - public void onNewline(WikiTokenizer wikiTokenizer) { - } - - @Override - public void onFunction(WikiTokenizer tokenizer, String functionName, - List functionPositionArgs, Map functionNamedArgs) { - } - - @Override - public void onHeading(WikiTokenizer wikiTokenizer) { - } - - @Override - public void onListItem(WikiTokenizer wikiTokenizer) { - } - - @Override - public void onComment(WikiTokenizer wikiTokenizer) { - } - - @Override - public void onHtml(WikiTokenizer wikiTokenizer) { - } - } - - //private static final Pattern wikiTokenEvent = Pattern.compile("($)", Pattern.MULTILINE); - private static final Pattern wikiTokenEvent = Pattern.compile("(" + - "\\{\\{|\\}\\}|" + - "\\[\\[|\\]\\]|" + - "\\||" + // Need the | because we might have to find unescaped pipes - "=|" + // Need the = because we might have to find unescaped = - "", "\n"); + return this; + } + + if (wikiText.startsWith("}}", start) || wikiText.startsWith("]]", start)) { + errors.add("Close without open!"); + end += 2; + return this; + } + + if (wikiText.charAt(start) == '|' || wikiText.charAt(start) == '=') { + isPlainText = true; + ++end; + return this; + } + + + if (this.matcher.find(start)) { + end = this.matcher.start(1); + isPlainText = true; + if (end == start) { + errors.add("Empty group: " + this.matcher.group()); + assert false; + } + return this; + } + + end = wikiText.length(); + return this; + + } finally { + if (!errors.isEmpty()) { + System.err.println("Errors: " + errors + ", token=" + token()); + } } - } - } - } - - public List errors() { - return errors; - } - - public boolean isNewline() { - return justReturnedNewline; - } - - public void returnToLineStart() { - end = start = lastLineStart; - justReturnedNewline = true; - } - - public boolean isHeading() { - return headingWikiText != null; - } - - public String headingWikiText() { - assert isHeading(); - return headingWikiText; - } - - public int headingDepth() { - assert isHeading(); - return headingDepth; - } - - public boolean isMarkup() { - return isMarkup; - } - - public boolean isComment() { - return isComment; - } - - public boolean isListItem() { - return listPrefixEnd != -1; - } - - public String listItemPrefix() { - assert isListItem(); - return wikiText.substring(start, listPrefixEnd); - } - - public static String getListTag(char c) { - if (c == '#') { - return "ol"; - } - return "ul"; - } - - public String listItemWikiText() { - assert isListItem(); - return wikiText.substring(listPrefixEnd, end); - } - - public boolean isFunction() { - return isFunction; - } - - public String functionName() { - assert isFunction(); - // "{{.." - if (firstUnescapedPipePos != -1) { - return trimNewlines(wikiText.substring(start + 2, firstUnescapedPipePos).trim()); - } - final int safeEnd = Math.max(start + 2, end - 2); - return trimNewlines(wikiText.substring(start + 2, safeEnd).trim()); - } - - public List functionPositionArgs() { - return positionArgs; - } - - public Map functionNamedArgs() { - return namedArgs; - } - - public boolean isPlainText() { - return isPlainText; - } - - public boolean isWikiLink() { - return isWikiLink; - } - - public String wikiLinkText() { - assert isWikiLink(); - // "[[.." - if (lastUnescapedPipePos != -1) { - return trimNewlines(wikiText.substring(lastUnescapedPipePos + 1, end - 2)); - } - assert start + 2 < wikiText.length() && end >= 2: wikiText; - return trimNewlines(wikiText.substring(start + 2, end - 2)); - } - - public String wikiLinkDest() { - assert isWikiLink(); - // "[[.." - if (firstUnescapedPipePos != -1) { - return trimNewlines(wikiText.substring(start + 2, firstUnescapedPipePos)); - } - return null; - } - - public boolean isHtml() { - return isHtml; - } - - public boolean remainderStartsWith(final String prefix) { - return wikiText.startsWith(prefix, start); - } - - public void nextLine() { - final int oldStart = start; - while(nextToken() != null && !isNewline()) {} - if (isNewline()) { - --end; - } - start = oldStart; - } - - - public WikiTokenizer nextToken() { - this.clear(); - - start = end; - - if (justReturnedNewline) { - lastLineStart = start; - } - - try { - - final int len = wikiText.length(); - if (start >= len) { - return null; - } - - // Eat a newline if we're looking at one: - final boolean atNewline = wikiText.charAt(end) == '\n' || wikiText.charAt(end) == '\u2028'; - if (atNewline) { - justReturnedNewline = true; - ++end; - return this; - } - - if (justReturnedNewline) { - justReturnedNewline = false; - - final char firstChar = wikiText.charAt(end); - if (firstChar == '=') { - final int headerStart = end; - // Skip ===... - while (++end < len && wikiText.charAt(end) == '=') {} - final int headerTitleStart = end; - headingDepth = headerTitleStart - headerStart; - // Skip non-=... - if (end < len) { - final int nextNewline = safeIndexOf(wikiText, end, "\n", "\n"); - final int closingEquals = escapedFindEnd(end, "="); - if (wikiText.charAt(closingEquals - 1) == '=') { - end = closingEquals - 1; - } else { - end = nextNewline; - } + + } + + public String token() { + final String token = wikiText.substring(start, end); + assert token.equals("\n") || !token.endsWith("\n") : "token='" + token + "'"; + return token; + } + + final static String[] patterns = { "\n", "{{", "}}", "[[", "]]", "[", "]", "|", "=", "", matchStart); + if (end == -1) { + errors.add("Unmatched ", "\n"); - return this; - } - - if (wikiText.startsWith("}}", start) || wikiText.startsWith("]]", start)) { - errors.add("Close without open!"); - end += 2; - return this; - } - - if (wikiText.charAt(start) == '|' || wikiText.charAt(start) == '=') { - isPlainText = true; - ++end; - return this; - } - - - if (this.matcher.find(start)) { - end = this.matcher.start(1); - isPlainText = true; - if (end == start) { - errors.add("Empty group: " + this.matcher.group()); - assert false; - } - return this; - } - - end = wikiText.length(); - return this; - - } finally { - if (!errors.isEmpty()) { - System.err.println("Errors: " + errors + ", token=" + token()); - } - } - - } - - public String token() { - final String token = wikiText.substring(start, end); - assert token.equals("\n") || !token.endsWith("\n") : "token='" + token + "'"; - return token; - } - - final static String[] patterns = { "\n", "{{", "}}", "[[", "]]", "|", "=", ""); - if (end == -1) { - errors.add("Unmatched