import com.hughes.android.dictionary.parser.WikiWord.Translation;
import com.hughes.util.ListUtil;
import com.hughes.util.StringUtil;
+import com.sun.tools.internal.ws.wsdl.document.jaxws.Exception;
public class EnWiktionaryXmlParser extends org.xml.sax.helpers.DefaultHandler implements WikiCallback {
StringBuilder titleBuilder;
StringBuilder textBuilder;
StringBuilder currentBuilder = null;
+
+ static void assertTrue(final boolean condition) {
+ assertTrue(condition, "");
+ }
+
+ static void assertTrue(final boolean condition, final String message) {
+ if (!condition) {
+ System.err.println("Assertion failed, message: " + message);
+ new RuntimeException().printStackTrace(System.err);
+ }
+ }
public EnWiktionaryXmlParser(final DictionaryBuilder dictBuilder, final Pattern[] langPatterns, final int enIndexBuilder) {
- assert langPatterns.length == 2;
+ assertTrue(langPatterns.length == 2);
this.dictBuilder = dictBuilder;
this.indexBuilders = dictBuilder.indexBuilders.toArray(new IndexBuilder[0]);
this.langPatterns = langPatterns;
static final Set<String> useRemainingArgTemplates = new LinkedHashSet<String>(Arrays.asList(
"Arab", "Cyrl", "fa-Arab", "italbrac", "Khmr", "ku-Arab", "IPAchar", "Laoo",
"sd-Arab", "Thai", "ttbc", "unicode", "ur-Arab", "yue-yue-j", "zh-ts",
- "zh-tsp", "zh-zh-p"));
- static final Set<String> ignoreTemplates = new LinkedHashSet<String>(Arrays.asList(""));
- static final Set<String> grammarTemplates = new LinkedHashSet<String>(Arrays.asList("impf", "pf"));
+ "zh-tsp", "zh-zh-p", "ug-Arab", "ko-inline", "Jpan", "Kore", "rfscript", "Latinx"));
+ static final Set<String> ignoreTemplates = new LinkedHashSet<String>(Arrays.asList("audio", "rhymes", "hyphenation", "homophones", "wikipedia", "rel-top", "rel-bottom", "sense", "wikisource1911Enc", "g"));
+ static final Set<String> grammarTemplates = new LinkedHashSet<String>(Arrays.asList("impf", "pf", "pf.", "indeclinable"));
static final Set<String> passThroughTemplates = new LinkedHashSet<String>(Arrays.asList("zzzzzzzzzzzzzzz"));
@Override
}
if (name.equals("qualifier")) {
- //assert positionalArgs.size() == 2 && namedArgs.isEmpty() : positionalArgs.toString() + namedArgs.toString();
+ //assertTrue(positionalArgs.size() == 2 && namedArgs.isEmpty() : positionalArgs.toString() + namedArgs.toString());
if (wikiBuilder == null) {
return;
}
}
if (passThroughTemplates.contains(name)) {
- assert positionalArgs.size() == 1 && namedArgs.isEmpty() : positionalArgs.toString() + namedArgs;
+ assertTrue(positionalArgs.size() == 1 && namedArgs.isEmpty(), positionalArgs.toString() + namedArgs);
wikiBuilder.append(name);
return;
}
- if (name.equals("audio") || name.equals("rhymes") || name.equals("hyphenation")) {
+ if (ignoreTemplates.contains(name)) {
return;
}
// Translations
if (name.equals("trans-top")) {
- assert positionalArgs.size() >= 1 && namedArgs.isEmpty() : positionalArgs.toString() + namedArgs + title;
+ assertTrue(positionalArgs.size() >= 1 && namedArgs.isEmpty(), positionalArgs.toString() + namedArgs + title);
if (currentPartOfSpeech == null) {
- assert currentWord != null && !currentWord.partsOfSpeech.isEmpty() : title;
+ assertTrue(currentWord != null && !currentWord.partsOfSpeech.isEmpty(), title);
System.err.println("Assuming last part of speech for non-nested translation section: " + title);
currentPartOfSpeech = ListUtil.getLast(currentWord.partsOfSpeech);
}
return;
}
if (name.equals("m") || name.equals("f") || name.equals("n") || name.equals("c")) {
- assert positionalArgs.size() >= 1 && namedArgs.isEmpty() : positionalArgs.toString() + namedArgs.toString();
+ assertTrue(positionalArgs.size() >= 1 && namedArgs.isEmpty(), positionalArgs.toString() + namedArgs.toString());
wikiBuilder.append("{");
for (int i = 1; i < positionalArgs.size(); ++i) {
wikiBuilder.append(i > 1 ? "," : "");
wikiBuilder.append(name).append("}");
} else if (name.equals("p")) {
- assert positionalArgs.size() == 1 && namedArgs.isEmpty();
+ assertTrue(positionalArgs.size() == 1 && namedArgs.isEmpty());
wikiBuilder.append("pl.");
} else if (name.equals("s")) {
- assert positionalArgs.size() == 1 && namedArgs.isEmpty() || title.equals("dobra");
+ assertTrue(positionalArgs.size() == 1 && namedArgs.isEmpty() || title.equals("dobra"), title);
wikiBuilder.append("sg.");
} else if (grammarTemplates.contains(name)) {
- assert positionalArgs.size() == 1 && namedArgs.isEmpty();
+ assert positionalArgs.size() == 1 && namedArgs.isEmpty() : positionalArgs.toString() + namedArgs;
wikiBuilder.append(name).append(".");
} else if (name.equals("l")) {
@Override
public void onUnterminated(String start, String rest) {
- throw new RuntimeException(start + rest);
+ System.err.printf("OnUnterminated: %s %s %s\n", title, start, rest);
}
@Override
public void onInvalidHeaderEnd(String rest) {