From 2f2eaf2360096aa926fba1b03916a5fe23fbb707 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Reimar=20D=C3=B6ffinger?= Date: Thu, 16 Apr 2020 22:10:03 +0200 Subject: [PATCH] Optimize finding start of next token. --- .../android/dictionary/parser/WikiTokenizer.java | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/com/hughes/android/dictionary/parser/WikiTokenizer.java b/src/com/hughes/android/dictionary/parser/WikiTokenizer.java index 0aaf9f6..886e4f9 100644 --- a/src/com/hughes/android/dictionary/parser/WikiTokenizer.java +++ b/src/com/hughes/android/dictionary/parser/WikiTokenizer.java @@ -463,8 +463,15 @@ public final class WikiTokenizer { } - if (this.matcher.find(start)) { - end = this.matcher.start(); + while (end < wikiText.length()) { + int c = wikiText.charAt(end); + if (c == '\n' || c == '\'' || ((c - 0x1b) & 0xff9f) < 3) { + matcher.region(end, wikiText.length()); + if (matcher.lookingAt()) break; + } + end++; + } + if (end != wikiText.length()) { isPlainText = true; if (end == start) { // stumbled over a new type of newline? @@ -477,7 +484,6 @@ public final class WikiTokenizer { return this; } - end = wikiText.length(); isPlainText = true; return this; -- 2.43.0