From bcb697cbffe197457be9ffb2361569a57b5fdcde Mon Sep 17 00:00:00 2001 From: =?utf8?q?Reimar=20D=C3=B6ffinger?= Date: Thu, 13 Apr 2017 20:37:37 +0200 Subject: [PATCH] Another fix to really skip comments. --- .../android/dictionary/parser/WikiTokenizer.java | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/com/hughes/android/dictionary/parser/WikiTokenizer.java b/src/com/hughes/android/dictionary/parser/WikiTokenizer.java index 9f3444f..83835d9 100644 --- a/src/com/hughes/android/dictionary/parser/WikiTokenizer.java +++ b/src/com/hughes/android/dictionary/parser/WikiTokenizer.java @@ -481,15 +481,18 @@ public final class WikiTokenizer { int firstNewline = -1; int[] nextMatch = new int[8]; for (int i = 0; i < 8; ++i) { - nextMatch[i] = wikiText.indexOf(patterns[i], start); - if (nextMatch[i] == -1) nextMatch[i] = i > 0 ? 0x7fffffff : wikiText.length(); + nextMatch[i] = -2; } while (end < wikiText.length()) { // Manual replacement for matcher.find(end), // because Java regexp is a ridiculously slow implementation. // Initialize to always match the end. int matchIdx = 0; - for (int i = 1; i < 8; ++i) { + for (int i = 0; i < 8; ++i) { + if (nextMatch[i] <= end) { + nextMatch[i] = wikiText.indexOf(patterns[i], end); + if (nextMatch[i] == -1) nextMatch[i] = i > 0 ? 0x7fffffff : wikiText.length(); + } if (nextMatch[i] < nextMatch[matchIdx]) { matchIdx = i; } @@ -498,8 +501,6 @@ public final class WikiTokenizer { int matchStart = nextMatch[matchIdx]; String matchText = patterns[matchIdx]; int matchEnd = matchStart + matchText.length(); - nextMatch[matchIdx] = wikiText.indexOf(patterns[matchIdx], matchEnd); - if (nextMatch[matchIdx] == -1) nextMatch[matchIdx] = matchIdx > 0 ? 0x7fffffff : wikiText.length(); if (matchIdx == 0) { matchText = ""; matchEnd = matchStart; -- 2.43.0