X-Git-Url: http://gitweb.fperrin.net/?a=blobdiff_plain;f=src%2Fcom%2Fhughes%2Fandroid%2Fdictionary%2Fparser%2FWikiTokenizer.java;h=e2d59c45e85f90706cb51a330992d7d4f4222cad;hb=7819736ae570bf597936f0dc640f60644da15fc8;hp=ad14bc0c1826a2ab27a139909e6700df418f7de1;hpb=7573784eea75700436bb900861b93a6d53210fc8;p=DictionaryPC.git diff --git a/src/com/hughes/android/dictionary/parser/WikiTokenizer.java b/src/com/hughes/android/dictionary/parser/WikiTokenizer.java index ad14bc0..e2d59c4 100644 --- a/src/com/hughes/android/dictionary/parser/WikiTokenizer.java +++ b/src/com/hughes/android/dictionary/parser/WikiTokenizer.java @@ -70,7 +70,7 @@ public final class WikiTokenizer { public WikiTokenizer(final String wikiText, final boolean isNewline) { this.wikiText = wikiText; this.matcher = wikiTokenEvent.matcher(wikiText); - justReturnedNewline = false; + justReturnedNewline = isNewline; } private void clear() { @@ -173,6 +173,7 @@ public final class WikiTokenizer { if (lastUnescapedPipePos != -1) { return wikiText.substring(lastUnescapedPipePos + 1, end - 2); } + assert start + 2 < wikiText.length() && end >= 2: wikiText; return wikiText.substring(start + 2, end - 2); } @@ -337,7 +338,7 @@ public final class WikiTokenizer { public String token() { final String token = wikiText.substring(start, end); - assert token.equals("\n") || !token.endsWith("\n") : token; + assert token.equals("\n") || !token.endsWith("\n") : "token='" + token + "'"; return token; } @@ -347,6 +348,7 @@ public final class WikiTokenizer { final boolean insideFunction = toFind.equals("}}"); int end = start; + int firstNewline = -1; while (end < wikiText.length()) { if (matcher.find(end)) { final String matchText = matcher.group(); @@ -355,6 +357,9 @@ public final class WikiTokenizer { assert matcher.end() > end || matchText.length() == 0: "Group=" + matcher.group(); if (matchText.length() == 0) { assert matchStart == wikiText.length() || wikiText.charAt(matchStart) == '\n'; + if (firstNewline == -1) { + firstNewline = matcher.end(); + } if (tokenStack.isEmpty() && toFind.equals("\n")) { return matchStart; } @@ -413,6 +418,14 @@ public final class WikiTokenizer { // Inside the while loop. Just go forward. end = Math.max(end, matcher.end()); } + if (toFind.equals("\n") && tokenStack.isEmpty()) { + // We were looking for the end, we got it. + return end; + } + if (firstNewline != -1) { + errors.add("Couldn't find: " + toFind + ", "+ wikiText.substring(start)); + return firstNewline; + } return end; } @@ -445,8 +458,8 @@ public final class WikiTokenizer { return s.length(); } - public static String toPlainText(String sense) { - final WikiTokenizer wikiTokenizer = new WikiTokenizer(sense); + public static String toPlainText(final String wikiText) { + final WikiTokenizer wikiTokenizer = new WikiTokenizer(wikiText); final StringBuilder builder = new StringBuilder(); while (wikiTokenizer.nextToken() != null) { if (wikiTokenizer.isPlainText()) { @@ -462,4 +475,16 @@ public final class WikiTokenizer { return builder.toString(); } + public static StringBuilder appendFunction(final StringBuilder builder, final String name, List args, + final Map namedArgs) { + builder.append(name); + for (final String arg : args) { + builder.append("|").append(arg); + } + for (final Map.Entry entry : namedArgs.entrySet()) { + builder.append("|").append(entry.getKey()).append("=").append(entry.getValue()); + } + return builder; + } + }