1 package com.hughes.android.dictionary.parser.enwiktionary;
3 import java.util.Arrays;
4 import java.util.LinkedHashMap;
5 import java.util.LinkedHashSet;
9 import java.util.logging.Logger;
11 import com.hughes.android.dictionary.engine.EntryTypeName;
12 import com.hughes.android.dictionary.parser.WikiTokenizer;
13 import com.hughes.util.ListUtil;
15 public final class FunctionCallbacksDefault {
17 static final Logger LOG = Logger.getLogger(EnWiktionaryXmlParser.class.getName());
19 static final Map<String,FunctionCallback> DEFAULT = new LinkedHashMap<String, FunctionCallback>();
21 FunctionCallback callback = new TranslationCallback();
22 DEFAULT.put("t", callback);
23 DEFAULT.put("t+", callback);
24 DEFAULT.put("t-", callback);
25 DEFAULT.put("tø", callback);
26 DEFAULT.put("apdx-t", callback);
28 DEFAULT.put("qualifier", new QualifierCallback());
30 callback = new EncodingCallback();
31 Set<String> encodings = new LinkedHashSet<String>(Arrays.asList(
33 "sd-Arab", "ku-Arab", "Arab", "unicode", "Laoo", "ur-Arab", "Thai",
34 "fa-Arab", "Khmr", "Cyrl", "IPAchar", "ug-Arab", "ko-inline",
35 "Jpan", "Kore", "Hebr", "rfscript", "Beng", "Mong", "Knda", "Cyrs",
36 "yue-tsj", "Mlym", "Tfng", "Grek", "yue-yue-j"));
37 for (final String encoding : encodings) {
38 DEFAULT.put(encoding, callback);
41 callback = new Gender();
42 DEFAULT.put("m", callback);
43 DEFAULT.put("f", callback);
44 DEFAULT.put("n", callback);
45 DEFAULT.put("p", callback);
46 DEFAULT.put("g", callback);
48 DEFAULT.put("l", new l());
49 DEFAULT.put("italbrac", new italbrac());
50 DEFAULT.put("gloss", new gloss());
52 callback = new AppendArg0();
53 DEFAULT.put("term", callback);
55 callback = new Ignore();
56 DEFAULT.put("trreq", callback);
57 DEFAULT.put("t-image", callback);
58 DEFAULT.put("defn", callback);
59 DEFAULT.put("rfdef", callback);
60 DEFAULT.put("attention", callback);
61 DEFAULT.put("zh-attention", callback);
63 DEFAULT.put("not used", new not_used());
64 DEFAULT.put("form of", new FormOf());
65 DEFAULT.put("wikipedia", new wikipedia());
67 callback = new InflOrHead();
68 DEFAULT.put("infl", callback);
69 DEFAULT.put("head", callback);
73 static final class NameAndArgs implements FunctionCallback {
75 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
76 final Map<String, String> namedArgs, final EnWiktionaryXmlParser parser,
77 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
79 appendAndIndexWikiCallback.builder.append(name);
80 for (int i = 0; i < args.size(); ++i) {
81 if (args.get(i).length() > 0) {
82 appendAndIndexWikiCallback.builder.append("|");
83 appendAndIndexWikiCallback.dispatch(args.get(i), null, null);
86 for (final Map.Entry<String, String> entry : namedArgs.entrySet()) {
87 appendAndIndexWikiCallback.builder.append("|");
88 appendAndIndexWikiCallback.dispatch(entry.getKey(), null, null);
89 appendAndIndexWikiCallback.builder.append("=");
90 appendAndIndexWikiCallback.dispatch(entry.getValue(), null, null);
95 static NameAndArgs NAME_AND_ARGS = new NameAndArgs();
97 // ------------------------------------------------------------------
99 static final class TranslationCallback implements FunctionCallback {
101 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
102 final Map<String, String> namedArgs, final EnWiktionaryXmlParser parser,
103 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
105 final String transliteration = namedArgs.remove("tr");
106 namedArgs.keySet().removeAll(EnWiktionaryXmlParser.USELESS_WIKI_ARGS);
107 if (args.size() < 2) {
108 LOG.warning("{{t...}} with wrong args: title=" + parser.title);
111 final String langCode = ListUtil.get(args, 0);
112 final String word = ListUtil.get(args, 1);
113 final String gender = ListUtil.get(args, 2);
114 // TODO: deal with second (and third...) gender, and alt.
116 appendAndIndexWikiCallback.dispatch(word, EntryTypeName.WIKTIONARY_TITLE_MULTI);
118 if (gender != null) {
119 appendAndIndexWikiCallback.builder.append(String.format(" {%s}", gender));
121 if (transliteration != null) {
122 appendAndIndexWikiCallback.builder.append(" (tr. ");
123 appendAndIndexWikiCallback.dispatch(transliteration, EntryTypeName.WIKTIONARY_TRANSLITERATION);
124 appendAndIndexWikiCallback.builder.append(")");
131 // ------------------------------------------------------------------
133 static final class QualifierCallback implements FunctionCallback {
135 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
136 final Map<String, String> namedArgs,
137 final EnWiktionaryXmlParser parser,
138 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
139 if (args.size() != 1 || !namedArgs.isEmpty()) {
140 LOG.warning("weird qualifier: ");
143 String qualifier = args.get(0);
144 appendAndIndexWikiCallback.builder.append("(");
145 appendAndIndexWikiCallback.dispatch(qualifier, null);
146 appendAndIndexWikiCallback.builder.append(")");
151 // ------------------------------------------------------------------
153 static final class EncodingCallback implements FunctionCallback {
155 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
156 final Map<String, String> namedArgs,
157 final EnWiktionaryXmlParser parser,
158 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
159 if (args.size() != 1 || !namedArgs.isEmpty()) {
160 LOG.warning("weird encoding: " + wikiTokenizer.token());
162 final String wikiText = args.get(0);
163 appendAndIndexWikiCallback.dispatch(wikiText, appendAndIndexWikiCallback.entryTypeName);
168 // ------------------------------------------------------------------
170 static final class Gender implements FunctionCallback {
172 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
173 final Map<String, String> namedArgs,
174 final EnWiktionaryXmlParser parser,
175 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
176 if (!namedArgs.isEmpty()) {
179 appendAndIndexWikiCallback.builder.append("{");
180 appendAndIndexWikiCallback.builder.append(name);
181 for (int i = 0; i < args.size(); ++i) {
182 appendAndIndexWikiCallback.builder.append("|").append(args.get(i));
184 appendAndIndexWikiCallback.builder.append("}");
189 // ------------------------------------------------------------------
191 static final class l implements FunctionCallback {
193 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
194 final Map<String, String> namedArgs,
195 final EnWiktionaryXmlParser parser,
196 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
197 // TODO: rewrite this!
198 // encodes text in various langs.
201 final EntryTypeName entryTypeName;
202 switch (parser.state) {
203 case TRANSLATION_LINE: entryTypeName = EntryTypeName.WIKTIONARY_TRANSLATION_OTHER_TEXT; break;
204 case ENGLISH_DEF_OF_FOREIGN: entryTypeName = EntryTypeName.WIKTIONARY_ENGLISH_DEF_WIKI_LINK; break;
205 default: throw new IllegalStateException("Invalid enum value: " + parser.state);
207 final String langCode = args.get(0);
208 if ("en".equals(langCode)) {
209 appendAndIndexWikiCallback.dispatch(args.get(1), parser.enIndexBuilder, entryTypeName);
211 appendAndIndexWikiCallback.dispatch(args.get(1), parser.foreignIndexBuilder, entryTypeName);
213 // TODO: transliteration
218 // ------------------------------------------------------------------
220 static final class AppendArg0 implements FunctionCallback {
222 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
223 final Map<String, String> namedArgs,
224 final EnWiktionaryXmlParser parser,
225 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
226 if (args.size() != 1 || !namedArgs.isEmpty()) {
229 appendAndIndexWikiCallback.dispatch(args.get(0), EntryTypeName.WIKTIONARY_TRANSLATION_OTHER_TEXT);
230 // TODO: transliteration
235 // ------------------------------------------------------------------
237 static final class italbrac implements FunctionCallback {
239 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
240 final Map<String, String> namedArgs,
241 final EnWiktionaryXmlParser parser,
242 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
243 if (args.size() != 1 || !namedArgs.isEmpty()) {
246 appendAndIndexWikiCallback.builder.append("[");
247 appendAndIndexWikiCallback.dispatch(args.get(0), EntryTypeName.WIKTIONARY_TRANSLATION_OTHER_TEXT);
248 appendAndIndexWikiCallback.builder.append("]");
253 // ------------------------------------------------------------------
255 static final class gloss implements FunctionCallback {
257 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
258 final Map<String, String> namedArgs,
259 final EnWiktionaryXmlParser parser,
260 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
261 if (args.size() != 1 || !namedArgs.isEmpty()) {
264 appendAndIndexWikiCallback.builder.append("[");
265 appendAndIndexWikiCallback.dispatch(args.get(0), EntryTypeName.WIKTIONARY_TRANSLATION_OTHER_TEXT);
266 appendAndIndexWikiCallback.builder.append("]");
271 // ------------------------------------------------------------------
273 static final class Ignore implements FunctionCallback {
275 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
276 final Map<String, String> namedArgs,
277 final EnWiktionaryXmlParser parser,
278 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
283 // ------------------------------------------------------------------
285 static final class not_used implements FunctionCallback {
287 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
288 final Map<String, String> namedArgs,
289 final EnWiktionaryXmlParser parser,
290 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
291 appendAndIndexWikiCallback.builder.append("(not used)");
297 // --------------------------------------------------------------------
298 // --------------------------------------------------------------------
301 static final class FormOf implements FunctionCallback {
303 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
304 final Map<String, String> namedArgs,
305 final EnWiktionaryXmlParser parser,
306 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
307 String formName = name;
308 if (name.equals("form of")) {
309 formName = ListUtil.remove(args, 0, null);
311 if (formName == null) {
312 LOG.warning("Missing form name: " + parser.title);
313 formName = "form of";
315 String baseForm = ListUtil.get(args, 1, "");
316 if ("".equals(baseForm)) {
317 baseForm = ListUtil.get(args, 0, null);
318 ListUtil.remove(args, 1, "");
320 ListUtil.remove(args, 0, null);
322 namedArgs.keySet().removeAll(EnWiktionaryXmlParser.USELESS_WIKI_ARGS);
324 appendAndIndexWikiCallback.builder.append("{");
325 NAME_AND_ARGS.onWikiFunction(wikiTokenizer, formName, args, namedArgs, parser, appendAndIndexWikiCallback);
326 appendAndIndexWikiCallback.builder.append("}");
327 if (baseForm != null && appendAndIndexWikiCallback.indexedEntry != null) {
328 parser.foreignIndexBuilder.addEntryWithString(appendAndIndexWikiCallback.indexedEntry, baseForm, EntryTypeName.WIKTIONARY_BASE_FORM_MULTI);
330 // null baseForm happens in Danish.
331 LOG.warning("Null baseform: " + parser.title);
337 static final FormOf FORM_OF = new FormOf();
340 // --------------------------------------------------------------------
341 // --------------------------------------------------------------------
343 static final class wikipedia implements FunctionCallback {
345 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
346 final Map<String, String> namedArgs,
347 final EnWiktionaryXmlParser parser,
348 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
349 namedArgs.remove("lang");
350 if (args.size() > 1 || !namedArgs.isEmpty()) {
353 } else if (args.size() == 1) {
361 static final class InflOrHead implements FunctionCallback {
363 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
364 final Map<String, String> namedArgs,
365 final EnWiktionaryXmlParser parser,
366 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
367 // See: http://en.wiktionary.org/wiki/Template:infl
368 final String langCode = ListUtil.get(args, 0);
369 String head = namedArgs.remove("head");
371 head = namedArgs.remove("title"); // Bug
376 head = WikiTokenizer.toPlainText(head);
378 parser.titleAppended = true;
380 namedArgs.keySet().removeAll(EnWiktionaryXmlParser.USELESS_WIKI_ARGS);
382 final String tr = namedArgs.remove("tr");
383 String g = namedArgs.remove("g");
385 g = namedArgs.remove("gender");
387 final String g2 = namedArgs.remove("g2");
388 final String g3 = namedArgs.remove("g3");
390 appendAndIndexWikiCallback.dispatch(head, EntryTypeName.WIKTIONARY_TITLE_MULTI);
393 appendAndIndexWikiCallback.builder.append(" {").append(g);
395 appendAndIndexWikiCallback.builder.append("|").append(g2);
398 appendAndIndexWikiCallback.builder.append("|").append(g3);
400 appendAndIndexWikiCallback.builder.append("}");
404 appendAndIndexWikiCallback.builder.append(" (tr. ");
405 appendAndIndexWikiCallback.dispatch(tr, EntryTypeName.WIKTIONARY_TITLE_MULTI);
406 appendAndIndexWikiCallback.builder.append(")");
407 parser.wordForms.add(tr);
410 final String pos = ListUtil.get(args, 1);
412 appendAndIndexWikiCallback.builder.append(" (").append(pos).append(")");
414 for (int i = 2; i < args.size(); i += 2) {
415 final String inflName = ListUtil.get(args, i);
416 final String inflValue = ListUtil.get(args, i + 1);
417 appendAndIndexWikiCallback.builder.append(", ");
418 appendAndIndexWikiCallback.dispatch(inflName, null, null);
419 if (inflValue != null && inflValue.length() > 0) {
420 appendAndIndexWikiCallback.builder.append(": ");
421 appendAndIndexWikiCallback.dispatch(inflValue, null, null);
422 parser.wordForms.add(inflValue);
425 for (final String key : namedArgs.keySet()) {
426 final String value = WikiTokenizer.toPlainText(namedArgs.get(key));
427 appendAndIndexWikiCallback.builder.append(" ");
428 appendAndIndexWikiCallback.dispatch(key, null, null);
429 appendAndIndexWikiCallback.builder.append("=");
430 appendAndIndexWikiCallback.dispatch(value, null, null);
431 parser.wordForms.add(value);
439 DEFAULT.put("it-noun", new it_noun());
441 static final class it_noun implements FunctionCallback {
443 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
444 final Map<String, String> namedArgs,
445 final EnWiktionaryXmlParser parser,
446 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
447 parser.titleAppended = true;
448 final String base = ListUtil.get(args, 0);
449 final String gender = ListUtil.get(args, 1);
450 final String singular = base + ListUtil.get(args, 2, null);
451 final String plural = base + ListUtil.get(args, 3, null);
452 appendAndIndexWikiCallback.builder.append(" ");
453 appendAndIndexWikiCallback.dispatch(singular, null, null);
454 appendAndIndexWikiCallback.builder.append(" {").append(gender).append("}, ");
455 appendAndIndexWikiCallback.dispatch(plural, null, null);
456 appendAndIndexWikiCallback.builder.append(" {pl}");
457 parser.wordForms.add(singular);
458 parser.wordForms.add(plural);
459 if (!namedArgs.isEmpty() || args.size() > 4) {
460 LOG.warning("Invalid it-noun: " + wikiTokenizer.token());
467 DEFAULT.put("it-proper noun", new it_proper_noun());
469 static final class it_proper_noun implements FunctionCallback {
471 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
472 final Map<String, String> namedArgs,
473 final EnWiktionaryXmlParser parser,
474 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {