1 package com.hughes.android.dictionary.parser;
3 import java.util.ArrayList;
5 import java.util.regex.Matcher;
6 import java.util.regex.Pattern;
8 public class WikiLineReader {
10 private final List<String> lineStack = new ArrayList<String>();
12 private final String wikiText;
13 private int lineStart = 0;
15 private static final Pattern wikiLineEvent = Pattern.compile("$|\\{\\{|\\[\\[|\\}\\}|\\]\\]|<!--|<pre>|<math>", Pattern.MULTILINE);
17 private static final Pattern whitespace = Pattern.compile("\\s+");
19 public WikiLineReader(final String wikiText) {
20 this.wikiText = wikiText;
23 public String readLine() {
24 if (stuffedLine != null) {
25 final String line = stuffedLine;
29 while (lineStart < wikiText.length() &&
30 Character.isWhitespace(wikiText.charAt(lineStart)) &&
31 wikiText.charAt(lineStart) != '\n') {
34 if (lineStart >= wikiText.length()) {
38 int lineEnd = lineStart;
40 int firstNewline = -1;
41 final Matcher matcher = wikiLineEvent.matcher(wikiText);
42 while (lineEnd < wikiText.length()) {
43 if (!matcher.find(lineEnd)) {
44 lineEnd = wikiText.length();
47 lineEnd = matcher.end();
48 if (lineEnd == wikiText.length()) {
51 if (matcher.group().equals("")) {
52 assert (wikiText.charAt(matcher.start()) == '\n'): "Invalid: " + wikiText.substring(matcher.start());
54 if (lineStack.size() == 0) {
57 if (firstNewline == -1) {
58 firstNewline = matcher.end();
63 if (matcher.group().equals("[[") || matcher.group().equals("{{")) {
64 lineStack.add(matcher.group());
65 } else if (matcher.group().equals("}}") || matcher.group().equals("]]")) {
66 if (lineStack.size() > 0) {
67 final String removed = lineStack.remove(lineStack.size() - 1);
68 if (removed.equals("{{") && !matcher.group().equals("}}")) {
69 System.err.println("Unmatched {{ error: " + wikiText.substring(lineStart));
71 if (removed.equals("[[") && !matcher.group().equals("]]")) {
72 System.err.println("Unmatched [[ error: " + wikiText.substring(lineStart));
75 System.err.println("Pop too many error: " + wikiText.substring(lineStart).replaceAll("\n", "\\n"));
77 } else if (matcher.group().equals("<!--")) {
78 lineEnd = safeIndexOf(wikiText, lineEnd, "-->", "\n");
79 } else if (matcher.group().equals("<pre>")) {
80 lineEnd = safeIndexOf(wikiText, lineEnd, "</pre>", "\n");
81 } else if (matcher.group().equals("<math>")) {
82 lineEnd = safeIndexOf(wikiText, lineEnd, "</math>", "\n");
85 if (lineStack.size() > 0 && firstNewline != -1) {
86 lineEnd = firstNewline + 1;
88 final String result = wikiText.substring(lineStart, lineEnd);
90 return cleanUpLine(result);
94 static int safeIndexOf(final String s, final int start, final String target, final String backup) {
95 int close = s.indexOf(target, start);
97 return close + target.length();
99 close = s.indexOf(backup, start);
101 return close + backup.length();
106 public static String cleanUpLine(String line) {
108 while ((pos = line.indexOf("<!--")) != -1) {
109 int end = line.indexOf("-->");
111 line = line.substring(0, pos) + line.substring(end + 3);
114 final Matcher matcher = whitespace.matcher(line);
115 line = matcher.replaceAll(" ");
120 String stuffedLine = null;
121 public void stuffLine(final String line) {
122 assert stuffedLine == null;