// Copyright 2011 Google Inc. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package com.hughes.android.dictionary.parser; import java.util.ArrayList; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; public final class WikiTokenizer { public static interface Callback { void onPlainText(final String text); void onMarkup(WikiTokenizer wikiTokenizer); void onWikiLink(WikiTokenizer wikiTokenizer); void onNewline(WikiTokenizer wikiTokenizer); void onFunction(final WikiTokenizer tokenizer, String functionName, List functionPositionArgs, Map functionNamedArgs); void onHeading(WikiTokenizer wikiTokenizer); void onListItem(WikiTokenizer wikiTokenizer); void onComment(WikiTokenizer wikiTokenizer); void onHtml(WikiTokenizer wikiTokenizer); } public static class DoNothingCallback implements Callback { @Override public void onPlainText(String text) { } @Override public void onMarkup(WikiTokenizer wikiTokenizer) { } @Override public void onWikiLink(WikiTokenizer wikiTokenizer) { } @Override public void onNewline(WikiTokenizer wikiTokenizer) { } @Override public void onFunction(WikiTokenizer tokenizer, String functionName, List functionPositionArgs, Map functionNamedArgs) { } @Override public void onHeading(WikiTokenizer wikiTokenizer) { } @Override public void onListItem(WikiTokenizer wikiTokenizer) { } @Override public void onComment(WikiTokenizer wikiTokenizer) { } @Override public void onHtml(WikiTokenizer wikiTokenizer) { } } //private static final Pattern wikiTokenEvent = Pattern.compile("($)", Pattern.MULTILINE); private static final Pattern wikiTokenEvent = Pattern.compile("(" + "\\{\\{|\\}\\}|" + "\\[\\[|\\]\\]|" + "\\||" + // Need the | because we might have to find unescaped pipes "=|" + // Need the = because we might have to find unescaped = "", "\n"); return this; } if (wikiText.startsWith("}}", start) || wikiText.startsWith("]]", start)) { errors.add("Close without open!"); end += 2; return this; } if (wikiText.charAt(start) == '|' || wikiText.charAt(start) == '=') { isPlainText = true; ++end; return this; } if (this.matcher.find(start)) { end = this.matcher.start(1); isPlainText = true; if (end == start) { errors.add("Empty group: " + this.matcher.group()); assert false; } return this; } end = wikiText.length(); return this; } finally { if (!errors.isEmpty()) { System.err.println("Errors: " + errors + ", token=" + token()); } } } public String token() { final String token = wikiText.substring(start, end); assert token.equals("\n") || !token.endsWith("\n") : "token='" + token + "'"; return token; } final static String[] patterns = { "\n", "{{", "}}", "[[", "]]", "[", "]", "|", "=", "", matchStart); if (end == -1) { errors.add("Unmatched