X-Git-Url: http://gitweb.fperrin.net/?a=blobdiff_plain;f=src%2Fcom%2Fhughes%2Fandroid%2Fdictionary%2Fparser%2Fenwiktionary%2FFunctionCallbacksDefault.java;h=fde94f709f9a692fc53f8bd01958ca180d085b4c;hb=d06b99b469b18cfa4a8a4bd45d51ee4ebd7efaca;hp=f9ad939d841240d252f45e7dfdfe899b5442e93f;hpb=794c2989d4ff4c456c9aa1066150c6d51a5aae84;p=DictionaryPC.git diff --git a/src/com/hughes/android/dictionary/parser/enwiktionary/FunctionCallbacksDefault.java b/src/com/hughes/android/dictionary/parser/enwiktionary/FunctionCallbacksDefault.java index f9ad939..fde94f7 100644 --- a/src/com/hughes/android/dictionary/parser/enwiktionary/FunctionCallbacksDefault.java +++ b/src/com/hughes/android/dictionary/parser/enwiktionary/FunctionCallbacksDefault.java @@ -55,10 +55,45 @@ public final class FunctionCallbacksDefault { callback = new Ignore(); DEFAULT.put("trreq", callback); DEFAULT.put("t-image", callback); + DEFAULT.put("defn", callback); + DEFAULT.put("rfdef", callback); + DEFAULT.put("attention", callback); + DEFAULT.put("zh-attention", callback); DEFAULT.put("not used", new not_used()); + DEFAULT.put("form of", new FormOf()); + DEFAULT.put("wikipedia", new wikipedia()); + + callback = new InflOrHead(); + DEFAULT.put("infl", callback); + DEFAULT.put("head", callback); } + + static final class NameAndArgs implements FunctionCallback { + @Override + public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List args, + final Map namedArgs, final EnWiktionaryXmlParser parser, + final AppendAndIndexWikiCallback appendAndIndexWikiCallback) { + + appendAndIndexWikiCallback.builder.append(name); + for (int i = 0; i < args.size(); ++i) { + if (args.get(i).length() > 0) { + appendAndIndexWikiCallback.builder.append("|"); + appendAndIndexWikiCallback.dispatch(args.get(i), null, null); + } + } + for (final Map.Entry entry : namedArgs.entrySet()) { + appendAndIndexWikiCallback.builder.append("|"); + appendAndIndexWikiCallback.dispatch(entry.getKey(), null, null); + appendAndIndexWikiCallback.builder.append("="); + appendAndIndexWikiCallback.dispatch(entry.getValue(), null, null); + } + return true; + } + } + static NameAndArgs NAME_AND_ARGS = new NameAndArgs(); + // ------------------------------------------------------------------ static final class TranslationCallback implements FunctionCallback { @@ -162,9 +197,19 @@ public final class FunctionCallbacksDefault { // TODO: rewrite this! // encodes text in various langs. // lang is arg 0. - // TODO: set that we're inside L - // EntryTypeName.WIKTIONARY_TRANSLATION_OTHER_TEXT - WikiTokenizer.dispatch(args.get(1), false, appendAndIndexWikiCallback); + // + final EntryTypeName entryTypeName; + switch (parser.state) { + case TRANSLATION_LINE: entryTypeName = EntryTypeName.WIKTIONARY_TRANSLATION_OTHER_TEXT; break; + case ENGLISH_DEF_OF_FOREIGN: entryTypeName = EntryTypeName.WIKTIONARY_ENGLISH_DEF_WIKI_LINK; break; + default: throw new IllegalStateException("Invalid enum value: " + parser.state); + } + final String langCode = args.get(0); + if ("en".equals(langCode)) { + appendAndIndexWikiCallback.dispatch(args.get(1), parser.enIndexBuilder, entryTypeName); + } else { + appendAndIndexWikiCallback.dispatch(args.get(1), parser.foreignIndexBuilder, entryTypeName); + } // TODO: transliteration return true; } @@ -249,5 +294,186 @@ public final class FunctionCallbacksDefault { } + // -------------------------------------------------------------------- + // -------------------------------------------------------------------- + + + static final class FormOf implements FunctionCallback { + @Override + public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List args, + final Map namedArgs, + final EnWiktionaryXmlParser parser, + final AppendAndIndexWikiCallback appendAndIndexWikiCallback) { + String formName = name; + if (name.equals("form of")) { + formName = ListUtil.remove(args, 0, null); + } + if (formName == null) { + LOG.warning("Missing form name: " + parser.title); + formName = "form of"; + } + String baseForm = ListUtil.get(args, 1, ""); + if ("".equals(baseForm)) { + baseForm = ListUtil.get(args, 0, null); + ListUtil.remove(args, 1, ""); + } else { + ListUtil.remove(args, 0, null); + } + namedArgs.keySet().removeAll(EnWiktionaryXmlParser.USELESS_WIKI_ARGS); + + appendAndIndexWikiCallback.builder.append("{"); + NAME_AND_ARGS.onWikiFunction(wikiTokenizer, formName, args, namedArgs, parser, appendAndIndexWikiCallback); + appendAndIndexWikiCallback.builder.append("}"); + if (baseForm != null && appendAndIndexWikiCallback.indexedEntry != null) { + parser.foreignIndexBuilder.addEntryWithString(appendAndIndexWikiCallback.indexedEntry, baseForm, EntryTypeName.WIKTIONARY_BASE_FORM_MULTI); + } else { + // null baseForm happens in Danish. + LOG.warning("Null baseform: " + parser.title); + } + return true; + } + } + + static final FormOf FORM_OF = new FormOf(); + + + // -------------------------------------------------------------------- + // -------------------------------------------------------------------- + + static final class wikipedia implements FunctionCallback { + @Override + public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List args, + final Map namedArgs, + final EnWiktionaryXmlParser parser, + final AppendAndIndexWikiCallback appendAndIndexWikiCallback) { + namedArgs.remove("lang"); + if (args.size() > 1 || !namedArgs.isEmpty()) { + // Unindexed! + return false; + } else if (args.size() == 1) { + return false; + } else { + return true; + } + } + } + + static final class InflOrHead implements FunctionCallback { + @Override + public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List args, + final Map namedArgs, + final EnWiktionaryXmlParser parser, + final AppendAndIndexWikiCallback appendAndIndexWikiCallback) { + // See: http://en.wiktionary.org/wiki/Template:infl + final String langCode = ListUtil.get(args, 0); + String head = namedArgs.remove("head"); + if (head == null) { + head = namedArgs.remove("title"); // Bug + } + if (head == null) { + head = parser.title; + } else { + head = WikiTokenizer.toPlainText(head); + } + parser.titleAppended = true; + + namedArgs.keySet().removeAll(EnWiktionaryXmlParser.USELESS_WIKI_ARGS); + + final String tr = namedArgs.remove("tr"); + String g = namedArgs.remove("g"); + if (g == null) { + g = namedArgs.remove("gender"); + } + final String g2 = namedArgs.remove("g2"); + final String g3 = namedArgs.remove("g3"); + + appendAndIndexWikiCallback.dispatch(head, EntryTypeName.WIKTIONARY_TITLE_MULTI); + + if (g != null) { + appendAndIndexWikiCallback.builder.append(" {").append(g); + if (g2 != null) { + appendAndIndexWikiCallback.builder.append("|").append(g2); + } + if (g3 != null) { + appendAndIndexWikiCallback.builder.append("|").append(g3); + } + appendAndIndexWikiCallback.builder.append("}"); + } + + if (tr != null) { + appendAndIndexWikiCallback.builder.append(" (tr. "); + appendAndIndexWikiCallback.dispatch(tr, EntryTypeName.WIKTIONARY_TITLE_MULTI); + appendAndIndexWikiCallback.builder.append(")"); + parser.wordForms.add(tr); + } + + final String pos = ListUtil.get(args, 1); + if (pos != null) { + appendAndIndexWikiCallback.builder.append(" (").append(pos).append(")"); + } + for (int i = 2; i < args.size(); i += 2) { + final String inflName = ListUtil.get(args, i); + final String inflValue = ListUtil.get(args, i + 1); + appendAndIndexWikiCallback.builder.append(", "); + appendAndIndexWikiCallback.dispatch(inflName, null, null); + if (inflValue != null && inflValue.length() > 0) { + appendAndIndexWikiCallback.builder.append(": "); + appendAndIndexWikiCallback.dispatch(inflValue, null, null); + parser.wordForms.add(inflValue); + } + } + for (final String key : namedArgs.keySet()) { + final String value = WikiTokenizer.toPlainText(namedArgs.get(key)); + appendAndIndexWikiCallback.builder.append(" "); + appendAndIndexWikiCallback.dispatch(key, null, null); + appendAndIndexWikiCallback.builder.append("="); + appendAndIndexWikiCallback.dispatch(value, null, null); + parser.wordForms.add(value); + } + return true; + } + } + + + static { + DEFAULT.put("it-noun", new it_noun()); + } + static final class it_noun implements FunctionCallback { + @Override + public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List args, + final Map namedArgs, + final EnWiktionaryXmlParser parser, + final AppendAndIndexWikiCallback appendAndIndexWikiCallback) { + parser.titleAppended = true; + final String base = ListUtil.get(args, 0); + final String gender = ListUtil.get(args, 1); + final String singular = base + ListUtil.get(args, 2, null); + final String plural = base + ListUtil.get(args, 3, null); + appendAndIndexWikiCallback.builder.append(" "); + appendAndIndexWikiCallback.dispatch(singular, null, null); + appendAndIndexWikiCallback.builder.append(" {").append(gender).append("}, "); + appendAndIndexWikiCallback.dispatch(plural, null, null); + appendAndIndexWikiCallback.builder.append(" {pl}"); + parser.wordForms.add(singular); + parser.wordForms.add(plural); + if (!namedArgs.isEmpty() || args.size() > 4) { + LOG.warning("Invalid it-noun: " + wikiTokenizer.token()); + } + return true; + } + } + + static { + DEFAULT.put("it-proper noun", new it_proper_noun()); + } + static final class it_proper_noun implements FunctionCallback { + @Override + public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List args, + final Map namedArgs, + final EnWiktionaryXmlParser parser, + final AppendAndIndexWikiCallback appendAndIndexWikiCallback) { + return false; + } + } }