// Copyright 2011 Google Inc. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package com.hughes.android.dictionary.parser; import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; public final class WikiTokenizer { public interface Callback { void onPlainText(final String text); void onMarkup(WikiTokenizer wikiTokenizer); void onWikiLink(WikiTokenizer wikiTokenizer); void onNewline(WikiTokenizer wikiTokenizer); void onFunction(final WikiTokenizer tokenizer, String functionName, List functionPositionArgs, Map functionNamedArgs); void onHeading(WikiTokenizer wikiTokenizer); void onListItem(WikiTokenizer wikiTokenizer); void onComment(WikiTokenizer wikiTokenizer); void onHtml(WikiTokenizer wikiTokenizer); } public static class DoNothingCallback implements Callback { @Override public void onPlainText(String text) { } @Override public void onMarkup(WikiTokenizer wikiTokenizer) { } @Override public void onWikiLink(WikiTokenizer wikiTokenizer) { } @Override public void onNewline(WikiTokenizer wikiTokenizer) { } @Override public void onFunction(WikiTokenizer tokenizer, String functionName, List functionPositionArgs, Map functionNamedArgs) { } @Override public void onHeading(WikiTokenizer wikiTokenizer) { } @Override public void onListItem(WikiTokenizer wikiTokenizer) { } @Override public void onComment(WikiTokenizer wikiTokenizer) { } @Override public void onHtml(WikiTokenizer wikiTokenizer) { } } //private static final Pattern wikiTokenEvent = Pattern.compile("($)", Pattern.MULTILINE); private static final Pattern wikiTokenEvent = Pattern.compile( "\\{\\{|\\}\\}|" + "\\[\\[|\\]\\]|" + "\\||" + // Need the | because we might have to find unescaped pipes "=|" + // Need the = because we might have to find unescaped = "", "\n"); return this; } if (wikiText.startsWith("}}", start) || wikiText.startsWith("]]", start)) { errors.add("Close without open!"); end += 2; return this; } if (wikiText.charAt(start) == '|' || wikiText.charAt(start) == '=') { isPlainText = true; ++end; return this; } while (end < wikiText.length()) { int c = wikiText.charAt(end); if (c == '\n' || c == '\'' || ((c - 0x1b) & 0xff9f) < 3) { matcher.region(end, wikiText.length()); if (matcher.lookingAt()) break; } end++; } if (end != wikiText.length()) { isPlainText = true; if (end == start) { // stumbled over a new type of newline? // Or matcher is out of sync with checks above errors.add("Empty group: " + this.matcher.group() + " char: " + (int)wikiText.charAt(end)); assert false; // Note: all newlines should be normalize to \n before calling this function throw new RuntimeException("matcher not in sync with code, or new type of newline, errors :" + errors); } return this; } isPlainText = true; return this; } finally { if (!errors.isEmpty()) { System.err.println("Errors: " + errors + ", token=" + token()); } } } public String token() { final String token = wikiText.substring(start, end); assert token.equals("\n") || !token.endsWith("\n") : "token='" + token + "'"; return token; } enum TokenDelim { NEWLINE, BRACE_OPEN, BRACE_CLOSE, DBRACKET_OPEN, DBRACKET_CLOSE, BRACKET_OPEN, BRACKET_CLOSE, PIPE, EQUALS, COMMENT } private int tokenDelimLen(TokenDelim d) { switch (d) { case NEWLINE: case BRACKET_OPEN: case BRACKET_CLOSE: case PIPE: case EQUALS: return 1; case BRACE_OPEN: case BRACE_CLOSE: case DBRACKET_OPEN: case DBRACKET_CLOSE: return 2; case COMMENT: return 4; default: throw new RuntimeException(); } } static final String[] patterns = { "\n", "{{", "}}", "[[", "]]", "[", "]", "|", "=", "", matchStart); if (end == -1) { errors.add("Unmatched