+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
package com.hughes.android.dictionary.parser;
import java.util.ArrayList;
public WikiTokenizer(final String wikiText) {
+ this(wikiText, true);
+ }
+
+ public WikiTokenizer(final String wikiText, final boolean isNewline) {
this.wikiText = wikiText;
this.matcher = wikiTokenEvent.matcher(wikiText);
+ justReturnedNewline = isNewline;
}
-
+
private void clear() {
errors.clear();
tokenStack.clear();
if (lastUnescapedPipePos != -1) {
return wikiText.substring(lastUnescapedPipePos + 1, end - 2);
}
+ assert start + 2 < wikiText.length() && end >= 2: wikiText;
return wikiText.substring(start + 2, end - 2);
}
}
// Eat a newline if we're looking at one:
- final boolean atNewline = wikiText.charAt(end) == '\n';
+ final boolean atNewline = wikiText.charAt(end) == '\n' || wikiText.charAt(end) == '\u2028';
if (atNewline) {
justReturnedNewline = true;
++end;
public String token() {
final String token = wikiText.substring(start, end);
- assert token.equals("\n") || !token.endsWith("\n") : token;
+ assert token.equals("\n") || !token.endsWith("\n") : "token='" + token + "'";
return token;
}
final boolean insideFunction = toFind.equals("}}");
int end = start;
+ int firstNewline = -1;
while (end < wikiText.length()) {
if (matcher.find(end)) {
final String matchText = matcher.group();
assert matcher.end() > end || matchText.length() == 0: "Group=" + matcher.group();
if (matchText.length() == 0) {
assert matchStart == wikiText.length() || wikiText.charAt(matchStart) == '\n';
+ if (firstNewline == -1) {
+ firstNewline = matcher.end();
+ }
if (tokenStack.isEmpty() && toFind.equals("\n")) {
return matchStart;
}
// Inside the while loop. Just go forward.
end = Math.max(end, matcher.end());
}
+ if (toFind.equals("\n") && tokenStack.isEmpty()) {
+ // We were looking for the end, we got it.
+ return end;
+ }
+ if (firstNewline != -1) {
+ errors.add("Couldn't find: " + toFind + ", "+ wikiText.substring(start));
+ return firstNewline;
+ }
return end;
}
return s.length();
}
- public static String toPlainText(String sense) {
- final WikiTokenizer wikiTokenizer = new WikiTokenizer(sense);
+ public static String toPlainText(final String wikiText) {
+ final WikiTokenizer wikiTokenizer = new WikiTokenizer(wikiText);
final StringBuilder builder = new StringBuilder();
while (wikiTokenizer.nextToken() != null) {
if (wikiTokenizer.isPlainText()) {
return builder.toString();
}
+ public static StringBuilder appendFunction(final StringBuilder builder, final String name, List<String> args,
+ final Map<String, String> namedArgs) {
+ builder.append(name);
+ for (final String arg : args) {
+ builder.append("|").append(arg);
+ }
+ for (final Map.Entry<String, String> entry : namedArgs.entrySet()) {
+ builder.append("|").append(entry.getKey()).append("=").append(entry.getValue());
+ }
+ return builder;
+ }
+
}