X-Git-Url: http://gitweb.fperrin.net/?a=blobdiff_plain;f=src%2Fcom%2Fhughes%2Fandroid%2Fdictionary%2Fparser%2FWikiTokenizer.java;h=56bda7fcb62d731e538800204392110d0b6c68de;hb=2fc669d88306d563fc9c899d8d91b25d591692ea;hp=9f3444f8f06d5ee27cbb8fa5d1fd7741add3e653;hpb=054097689b08815b3587cee6955c143b1cc285e4;p=DictionaryPC.git diff --git a/src/com/hughes/android/dictionary/parser/WikiTokenizer.java b/src/com/hughes/android/dictionary/parser/WikiTokenizer.java index 9f3444f..56bda7f 100644 --- a/src/com/hughes/android/dictionary/parser/WikiTokenizer.java +++ b/src/com/hughes/android/dictionary/parser/WikiTokenizer.java @@ -14,16 +14,13 @@ package com.hughes.android.dictionary.parser; -import java.util.ArrayList; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; +import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; public final class WikiTokenizer { - public static interface Callback { + public interface Callback { void onPlainText(final String text); void onMarkup(WikiTokenizer wikiTokenizer); void onWikiLink(WikiTokenizer wikiTokenizer); @@ -99,8 +96,8 @@ public final class WikiTokenizer { int end = 0; int start = -1; - final List errors = new ArrayList(); - final List tokenStack = new ArrayList(); + final List errors = new ArrayList<>(); + final List tokenStack = new ArrayList<>(); private String headingWikiText; @@ -116,8 +113,8 @@ public final class WikiTokenizer { private int lastUnescapedPipePos; private int lastUnescapedEqualsPos; - private final List positionArgs = new ArrayList(); - private final Map namedArgs = new LinkedHashMap(); + private final List positionArgs = new ArrayList<>(); + private final Map namedArgs = new LinkedHashMap<>(); public WikiTokenizer(final String wikiText) { @@ -338,7 +335,7 @@ public final class WikiTokenizer { } // Eat a newline if we're looking at one: - final boolean atNewline = wikiText.charAt(end) == '\n' || wikiText.charAt(end) == '\u2028'; + final boolean atNewline = wikiText.charAt(end) == '\n' || wikiText.charAt(end) == '\u2028' || wikiText.charAt(end) == '\u2029'; if (atNewline) { justReturnedNewline = true; ++end; @@ -448,8 +445,11 @@ public final class WikiTokenizer { end = this.matcher.start(1); isPlainText = true; if (end == start) { - errors.add("Empty group: " + this.matcher.group()); + // stumbled over a new type of newline? + // Or matcher is out of sync with checks above + errors.add("Empty group: " + this.matcher.group() + " char: " + (int)wikiText.charAt(end)); assert false; + throw new RuntimeException("matcher not in sync with code, or new type of newline, errors :" + errors); } return this; } @@ -471,7 +471,7 @@ public final class WikiTokenizer { return token; } - final static String[] patterns = { "\n", "{{", "}}", "[[", "]]", "|", "=", "