X-Git-Url: http://gitweb.fperrin.net/?a=blobdiff_plain;f=src%2Fcom%2Fhughes%2Fandroid%2Fdictionary%2Fparser%2FWikiTokenizer.java;h=83835d95fad90311872adb26d7927ed655b254de;hb=bcb697cbffe197457be9ffb2361569a57b5fdcde;hp=8cf882e7de6933ea77dc19c5f39396146bd3f815;hpb=e479ba38bbcb261951399326623c20ffacc147d4;p=DictionaryPC.git diff --git a/src/com/hughes/android/dictionary/parser/WikiTokenizer.java b/src/com/hughes/android/dictionary/parser/WikiTokenizer.java index 8cf882e..83835d9 100644 --- a/src/com/hughes/android/dictionary/parser/WikiTokenizer.java +++ b/src/com/hughes/android/dictionary/parser/WikiTokenizer.java @@ -481,15 +481,18 @@ public final class WikiTokenizer { int firstNewline = -1; int[] nextMatch = new int[8]; for (int i = 0; i < 8; ++i) { - nextMatch[i] = wikiText.indexOf(patterns[i], start); - if (nextMatch[i] == -1) nextMatch[i] = i > 0 ? 0x7fffffff : wikiText.length(); + nextMatch[i] = -2; } while (end < wikiText.length()) { // Manual replacement for matcher.find(end), // because Java regexp is a ridiculously slow implementation. // Initialize to always match the end. int matchIdx = 0; - for (int i = 1; i < 8; ++i) { + for (int i = 0; i < 8; ++i) { + if (nextMatch[i] <= end) { + nextMatch[i] = wikiText.indexOf(patterns[i], end); + if (nextMatch[i] == -1) nextMatch[i] = i > 0 ? 0x7fffffff : wikiText.length(); + } if (nextMatch[i] < nextMatch[matchIdx]) { matchIdx = i; } @@ -498,8 +501,6 @@ public final class WikiTokenizer { int matchStart = nextMatch[matchIdx]; String matchText = patterns[matchIdx]; int matchEnd = matchStart + matchText.length(); - nextMatch[matchIdx] = wikiText.indexOf(patterns[matchIdx], matchEnd); - if (nextMatch[matchIdx] == -1) nextMatch[matchIdx] = matchIdx > 0 ? 0x7fffffff : wikiText.length(); if (matchIdx == 0) { matchText = ""; matchEnd = matchStart; @@ -527,14 +528,14 @@ public final class WikiTokenizer { if (tokenStack.size() > 0) { final String removed = tokenStack.remove(tokenStack.size() - 1); if (removed.equals("{{") && !matchText.equals("}}")) { - errors.add("Unmatched {{ error: " + wikiText.substring(start)); + errors.add("Unmatched {{ error: " + wikiText.substring(start, matchEnd)); return safeIndexOf(wikiText, start, "\n", "\n"); } else if (removed.equals("[[") && !matchText.equals("]]")) { - errors.add("Unmatched [[ error: " + wikiText.substring(start)); + errors.add("Unmatched [[ error: " + wikiText.substring(start, matchEnd)); return safeIndexOf(wikiText, start, "\n", "\n"); } } else { - errors.add("Pop too many error: " + wikiText.substring(start).replace("\n", "\\\\n")); + errors.add("Pop too many " + matchText + " error: " + wikiText.substring(start, matchEnd).replace("\n", "\\\\n")); // If we were looking for a newline return safeIndexOf(wikiText, start, "\n", "\n"); } @@ -549,7 +550,7 @@ public final class WikiTokenizer { // Do nothing. These can match spuriously, and if it's not the thing // we're looking for, keep on going. } else if (matchText.equals(""); + end = wikiText.indexOf("-->", matchStart); if (end == -1) { errors.add("Unmatched