X-Git-Url: http://gitweb.fperrin.net/?a=blobdiff_plain;f=src%2Fcom%2Fhughes%2Fandroid%2Fdictionary%2Fparser%2FWikiTokenizer.java;h=da4e531475e9aec8bb68b5d4c9464b5cfef58a2e;hb=57d93b56ca2ffa3be718469a9f89f66b4716ad4e;hp=f80605d791b8dd63bd792f6dc9f6f2ac47ac9f15;hpb=2433d2f98245523c959985785d63525877ade0ea;p=DictionaryPC.git diff --git a/src/com/hughes/android/dictionary/parser/WikiTokenizer.java b/src/com/hughes/android/dictionary/parser/WikiTokenizer.java index f80605d..da4e531 100644 --- a/src/com/hughes/android/dictionary/parser/WikiTokenizer.java +++ b/src/com/hughes/android/dictionary/parser/WikiTokenizer.java @@ -173,6 +173,7 @@ public final class WikiTokenizer { if (lastUnescapedPipePos != -1) { return wikiText.substring(lastUnescapedPipePos + 1, end - 2); } + assert start + 2 < wikiText.length() && end >= 2: wikiText; return wikiText.substring(start + 2, end - 2); } @@ -337,7 +338,7 @@ public final class WikiTokenizer { public String token() { final String token = wikiText.substring(start, end); - assert token.equals("\n") || !token.endsWith("\n") : token; + assert token.equals("\n") || !token.endsWith("\n") : "token='" + token + "'"; return token; } @@ -347,6 +348,7 @@ public final class WikiTokenizer { final boolean insideFunction = toFind.equals("}}"); int end = start; + int firstNewline = -1; while (end < wikiText.length()) { if (matcher.find(end)) { final String matchText = matcher.group(); @@ -355,6 +357,9 @@ public final class WikiTokenizer { assert matcher.end() > end || matchText.length() == 0: "Group=" + matcher.group(); if (matchText.length() == 0) { assert matchStart == wikiText.length() || wikiText.charAt(matchStart) == '\n'; + if (firstNewline == -1) { + firstNewline = matcher.end(); + } if (tokenStack.isEmpty() && toFind.equals("\n")) { return matchStart; } @@ -413,6 +418,14 @@ public final class WikiTokenizer { // Inside the while loop. Just go forward. end = Math.max(end, matcher.end()); } + if (toFind.equals("\n") && tokenStack.isEmpty()) { + // We were looking for the end, we got it. + return end; + } + if (firstNewline != -1) { + errors.add("Couldn't find: " + toFind + ", "+ wikiText.substring(start)); + return firstNewline; + } return end; } @@ -445,8 +458,8 @@ public final class WikiTokenizer { return s.length(); } - public static String toPlainText(String sense) { - final WikiTokenizer wikiTokenizer = new WikiTokenizer(sense); + public static String toPlainText(final String wikiText) { + final WikiTokenizer wikiTokenizer = new WikiTokenizer(wikiText); final StringBuilder builder = new StringBuilder(); while (wikiTokenizer.nextToken() != null) { if (wikiTokenizer.isPlainText()) {