X-Git-Url: http://gitweb.fperrin.net/?a=blobdiff_plain;f=src%2Fcom%2Fhughes%2Fandroid%2Fdictionary%2Fparser%2FWikiTokenizer.java;h=56bda7fcb62d731e538800204392110d0b6c68de;hb=2fc669d88306d563fc9c899d8d91b25d591692ea;hp=d6c8901aa6a6b6541c1d5b2ccd0e4dc4af56f507;hpb=21e752e044b6c0dd7d24e6da143068326beab2e3;p=DictionaryPC.git diff --git a/src/com/hughes/android/dictionary/parser/WikiTokenizer.java b/src/com/hughes/android/dictionary/parser/WikiTokenizer.java index d6c8901..56bda7f 100644 --- a/src/com/hughes/android/dictionary/parser/WikiTokenizer.java +++ b/src/com/hughes/android/dictionary/parser/WikiTokenizer.java @@ -14,633 +14,642 @@ package com.hughes.android.dictionary.parser; -import java.util.ArrayList; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; +import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; public final class WikiTokenizer { - - public static interface Callback { - void onPlainText(final String text); - void onMarkup(WikiTokenizer wikiTokenizer); - void onWikiLink(WikiTokenizer wikiTokenizer); - void onNewline(WikiTokenizer wikiTokenizer); - void onFunction(final WikiTokenizer tokenizer, String functionName, List functionPositionArgs, - Map functionNamedArgs); - void onHeading(WikiTokenizer wikiTokenizer); - void onListItem(WikiTokenizer wikiTokenizer); - void onComment(WikiTokenizer wikiTokenizer); - void onHtml(WikiTokenizer wikiTokenizer); - } - - public static class DoNothingCallback implements Callback { - - @Override - public void onPlainText(String text) { - } - - @Override - public void onMarkup(WikiTokenizer wikiTokenizer) { - } - - @Override - public void onWikiLink(WikiTokenizer wikiTokenizer) { - } - - @Override - public void onNewline(WikiTokenizer wikiTokenizer) { - } - - @Override - public void onFunction(WikiTokenizer tokenizer, String functionName, - List functionPositionArgs, Map functionNamedArgs) { - } - - @Override - public void onHeading(WikiTokenizer wikiTokenizer) { - } - - @Override - public void onListItem(WikiTokenizer wikiTokenizer) { - } - - @Override - public void onComment(WikiTokenizer wikiTokenizer) { - } - - @Override - public void onHtml(WikiTokenizer wikiTokenizer) { - } - } - - //private static final Pattern wikiTokenEvent = Pattern.compile("($)", Pattern.MULTILINE); - private static final Pattern wikiTokenEvent = Pattern.compile("(" + - "\\{\\{|\\}\\}|" + - "\\[\\[|\\]\\]|" + - "\\||" + // Need the | because we might have to find unescaped pipes - "=|" + // Need the = because we might have to find unescaped = - "", "\n"); - return this; - } - - if (wikiText.startsWith("}}", start) || wikiText.startsWith("]]", start)) { - errors.add("Close without open!"); - end += 2; - return this; - } - - if (wikiText.charAt(start) == '|' || wikiText.charAt(start) == '=') { - isPlainText = true; - ++end; - return this; - } - - - if (this.matcher.find(start)) { - end = this.matcher.start(1); - isPlainText = true; - if (end == start) { - errors.add("Empty group: " + this.matcher.group()); - assert false; - } - return this; - } - - end = wikiText.length(); - return this; - - } finally { - if (!errors.isEmpty()) { - System.err.println("Errors: " + errors + ", token=" + token()); - } - } - - } - - public String token() { - final String token = wikiText.substring(start, end); - assert token.equals("\n") || !token.endsWith("\n") : "token='" + token + "'"; - return token; - } - - final static String[] patterns = { "\n", "{{", "}}", "[[", "]]", "|", "=", "", "\n"); + return this; + } + + if (wikiText.startsWith("}}", start) || wikiText.startsWith("]]", start)) { + errors.add("Close without open!"); + end += 2; + return this; + } + + if (wikiText.charAt(start) == '|' || wikiText.charAt(start) == '=') { + isPlainText = true; + ++end; + return this; + } + + + if (this.matcher.find(start)) { + end = this.matcher.start(1); + isPlainText = true; + if (end == start) { + // stumbled over a new type of newline? + // Or matcher is out of sync with checks above + errors.add("Empty group: " + this.matcher.group() + " char: " + (int)wikiText.charAt(end)); + assert false; + throw new RuntimeException("matcher not in sync with code, or new type of newline, errors :" + errors); + } + return this; + } + + end = wikiText.length(); + return this; + + } finally { + if (!errors.isEmpty()) { + System.err.println("Errors: " + errors + ", token=" + token()); } } - int matchStart = nextMatch[matchIdx]; - String matchText = patterns[matchIdx]; - int matchEnd = matchStart + matchText.length(); - nextMatch[matchIdx] = wikiText.indexOf(patterns[matchIdx], matchEnd); - if (nextMatch[matchIdx] == -1) nextMatch[matchIdx] = matchIdx > 0 ? 0x7fffffff : wikiText.length(); - if (matchIdx == 0) { - matchText = ""; - matchEnd = matchStart; + } + + public String token() { + final String token = wikiText.substring(start, end); + assert token.equals("\n") || !token.endsWith("\n") : "token='" + token + "'"; + return token; + } + + static final String[] patterns = { "\n", "{{", "}}", "[[", "]]", "[", "]", "|", "=", "", matchStart); + if (end == -1) { + errors.add("Unmatched "); - if (end == -1) { - errors.add("Unmatched