X-Git-Url: http://gitweb.fperrin.net/?a=blobdiff_plain;f=src%2Fcom%2Fhughes%2Fandroid%2Fdictionary%2Fparser%2FWikiTokenizer.java;h=111f131be2308cb40ab8e8d5739314fc0a6dde89;hb=90247c9eb280bd2b55f9b2b2816bad03a0821a7f;hp=47aac9488de54449f81399f33b29f3e264f55ac7;hpb=794c2989d4ff4c456c9aa1066150c6d51a5aae84;p=DictionaryPC.git diff --git a/src/com/hughes/android/dictionary/parser/WikiTokenizer.java b/src/com/hughes/android/dictionary/parser/WikiTokenizer.java index 47aac94..111f131 100644 --- a/src/com/hughes/android/dictionary/parser/WikiTokenizer.java +++ b/src/com/hughes/android/dictionary/parser/WikiTokenizer.java @@ -33,6 +33,47 @@ public final class WikiTokenizer { void onHeading(WikiTokenizer wikiTokenizer); void onListItem(WikiTokenizer wikiTokenizer); void onComment(WikiTokenizer wikiTokenizer); + void onHtml(WikiTokenizer wikiTokenizer); + } + + public static class DoNothingCallback implements Callback { + + @Override + public void onPlainText(String text) { + } + + @Override + public void onMarkup(WikiTokenizer wikiTokenizer) { + } + + @Override + public void onWikiLink(WikiTokenizer wikiTokenizer) { + } + + @Override + public void onNewline(WikiTokenizer wikiTokenizer) { + } + + @Override + public void onFunction(WikiTokenizer tokenizer, String functionName, + List functionPositionArgs, Map functionNamedArgs) { + } + + @Override + public void onHeading(WikiTokenizer wikiTokenizer) { + } + + @Override + public void onListItem(WikiTokenizer wikiTokenizer) { + } + + @Override + public void onComment(WikiTokenizer wikiTokenizer) { + } + + @Override + public void onHtml(WikiTokenizer wikiTokenizer) { + } } //private static final Pattern wikiTokenEvent = Pattern.compile("($)", Pattern.MULTILINE); @@ -67,6 +108,7 @@ public final class WikiTokenizer { private boolean isComment; private boolean isFunction; private boolean isWikiLink; + private boolean isHtml; private int firstUnescapedPipePos; private int lastUnescapedPipePos; @@ -80,7 +122,7 @@ public final class WikiTokenizer { } public WikiTokenizer(final String wikiText, final boolean isNewline) { - this.wikiText = wikiText; + this.wikiText = wikiText.replaceAll("\u2028", "\n"); this.matcher = wikiTokenEvent.matcher(wikiText); justReturnedNewline = isNewline; } @@ -97,6 +139,7 @@ public final class WikiTokenizer { isComment = false; isFunction = false; isWikiLink = false; + isHtml = false; firstUnescapedPipePos = -1; lastUnescapedPipePos = -1; @@ -136,13 +179,21 @@ public final class WikiTokenizer { callback.onListItem(tokenizer); } else if (tokenizer.isComment()) { callback.onComment(tokenizer); + } else if (tokenizer.isHtml()) { + callback.onHtml(tokenizer); + } else if (!tokenizer.errors.isEmpty()) { + // Log was already printed.... } else { - throw new IllegalStateException("Unknown wiki state."); + throw new IllegalStateException("Unknown wiki state: " + tokenizer.token()); } } } } + public List errors() { + return errors; + } + public boolean isNewline() { return justReturnedNewline; } @@ -196,9 +247,9 @@ public final class WikiTokenizer { assert isFunction(); // "{{.." if (firstUnescapedPipePos != -1) { - return wikiText.substring(start + 2, firstUnescapedPipePos); + return trimNewlines(wikiText.substring(start + 2, firstUnescapedPipePos).trim()); } - return wikiText.substring(start + 2, end - 2); + return trimNewlines(wikiText.substring(start + 2, end - 2).trim()); } public List functionPositionArgs() { @@ -221,21 +272,25 @@ public final class WikiTokenizer { assert isWikiLink(); // "[[.." if (lastUnescapedPipePos != -1) { - return wikiText.substring(lastUnescapedPipePos + 1, end - 2); + return trimNewlines(wikiText.substring(lastUnescapedPipePos + 1, end - 2)); } assert start + 2 < wikiText.length() && end >= 2: wikiText; - return wikiText.substring(start + 2, end - 2); + return trimNewlines(wikiText.substring(start + 2, end - 2)); } public String wikiLinkDest() { assert isWikiLink(); // "[[.." if (firstUnescapedPipePos != -1) { - return wikiText.substring(start + 2, firstUnescapedPipePos); + return trimNewlines(wikiText.substring(start + 2, firstUnescapedPipePos)); } return null; } + public boolean isHtml() { + return isHtml; + } + public boolean remainderStartsWith(final String prefix) { return wikiText.startsWith(prefix, start); } @@ -338,11 +393,13 @@ public final class WikiTokenizer { if (wikiText.startsWith("
", start)) {
       end = safeIndexOf(wikiText, start, "
", "\n"); + isHtml = true; return this; } if (wikiText.startsWith("", start)) { end = safeIndexOf(wikiText, start, "", "\n"); + isHtml = true; return this; } @@ -406,7 +463,7 @@ public final class WikiTokenizer { assert matcher.end() > end || matchText.length() == 0: "Group=" + matcher.group(); if (matchText.length() == 0) { - assert matchStart == wikiText.length() || wikiText.charAt(matchStart) == '\n'; + assert matchStart == wikiText.length() || wikiText.charAt(matchStart) == '\n' : wikiText + ", " + matchStart; if (firstNewline == -1) { firstNewline = matcher.end(); } @@ -486,14 +543,24 @@ public final class WikiTokenizer { if (lastUnescapedEqualsPos > lastUnescapedPipePos) { final String key = wikiText.substring(lastUnescapedPipePos + 1, lastUnescapedEqualsPos); final String value = wikiText.substring(lastUnescapedEqualsPos + 1, matchStart); - namedArgs.put(key, value); + namedArgs.put(trimNewlines(key), trimNewlines(value)); } else { final String value = wikiText.substring(lastUnescapedPipePos + 1, matchStart); - positionArgs.add(value); + positionArgs.add(trimNewlines(value)); } } lastUnescapedPipePos = matchStart; } + + static final String trimNewlines(String s) { + while (s.startsWith("\n")) { + s = s.substring(1); + } + while (s.endsWith("\n")) { + s = s.substring(0, s.length() - 1); + } + return s.replaceAll("\n", " "); + } static int safeIndexOf(final String s, final int start, final String target, final String backup) { int close = s.indexOf(target, start);