From b6cdf8f82c2e84dc0f72b0914439ad31d953b314 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Reimar=20D=C3=B6ffinger?= Date: Sat, 11 Apr 2020 21:28:32 +0200 Subject: [PATCH] Also handle "paragraph end" newline character. --- .../hughes/android/dictionary/parser/WikiTokenizer.java | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/com/hughes/android/dictionary/parser/WikiTokenizer.java b/src/com/hughes/android/dictionary/parser/WikiTokenizer.java index 7212319..f8d212f 100644 --- a/src/com/hughes/android/dictionary/parser/WikiTokenizer.java +++ b/src/com/hughes/android/dictionary/parser/WikiTokenizer.java @@ -338,7 +338,7 @@ public final class WikiTokenizer { } // Eat a newline if we're looking at one: - final boolean atNewline = wikiText.charAt(end) == '\n' || wikiText.charAt(end) == '\u2028'; + final boolean atNewline = wikiText.charAt(end) == '\n' || wikiText.charAt(end) == '\u2028' || wikiText.charAt(end) == '\u2029'; if (atNewline) { justReturnedNewline = true; ++end; @@ -448,8 +448,11 @@ public final class WikiTokenizer { end = this.matcher.start(1); isPlainText = true; if (end == start) { - errors.add("Empty group: " + this.matcher.group()); + // stumbled over a new type of newline? + // Or matcher is out of sync with checks above + errors.add("Empty group: " + this.matcher.group() + " char: " + (int)wikiText.charAt(end)); assert false; + throw new RuntimeException("matcher not in sync with code, or new type of newline, errors :" + errors); } return this; } -- 2.43.0