1 // Copyright 2012 Google Inc. All Rights Reserved.
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
15 package com.hughes.android.dictionary.parser.enwiktionary;
17 import java.util.Arrays;
18 import java.util.LinkedHashMap;
19 import java.util.LinkedHashSet;
20 import java.util.List;
23 import java.util.concurrent.atomic.AtomicInteger;
24 import java.util.logging.Logger;
26 import com.hughes.android.dictionary.engine.EntryTypeName;
27 import com.hughes.android.dictionary.engine.IndexBuilder;
28 import com.hughes.android.dictionary.parser.WikiTokenizer;
29 import com.hughes.util.ListUtil;
31 public final class FunctionCallbacksDefault {
33 static final Logger LOG = Logger.getLogger(EnWiktionaryXmlParser.class.getName());
35 static final Map<String,FunctionCallback> DEFAULT = new LinkedHashMap<String, FunctionCallback>();
38 FunctionCallback callback = new TranslationCallback();
39 DEFAULT.put("t", callback);
40 DEFAULT.put("t+", callback);
41 DEFAULT.put("t-", callback);
42 DEFAULT.put("tø", callback);
43 DEFAULT.put("apdx-t", callback);
45 callback = new EncodingCallback();
46 Set<String> encodings = new LinkedHashSet<String>(Arrays.asList(
48 "sd-Arab", "ku-Arab", "Arab", "unicode", "Laoo", "ur-Arab", "Thai",
49 "fa-Arab", "Khmr", "Cyrl", "IPAchar", "ug-Arab", "ko-inline",
50 "Jpan", "Kore", "Hebr", "rfscript", "Beng", "Mong", "Knda", "Cyrs",
51 "yue-tsj", "Mlym", "Tfng", "Grek", "yue-yue-j"));
52 for (final String encoding : encodings) {
53 DEFAULT.put(encoding, callback);
56 callback = new l_term();
57 DEFAULT.put("l", callback);
58 DEFAULT.put("term", callback);
60 callback = new Gender();
61 DEFAULT.put("m", callback);
62 DEFAULT.put("f", callback);
63 DEFAULT.put("n", callback);
64 DEFAULT.put("p", callback);
65 DEFAULT.put("g", callback);
67 callback = new AppendArg0();
69 callback = new Ignore();
70 DEFAULT.put("trreq", callback);
71 DEFAULT.put("t-image", callback);
72 DEFAULT.put("defn", callback);
73 DEFAULT.put("rfdef", callback);
74 DEFAULT.put("rfdate", callback);
75 DEFAULT.put("rfex", callback);
76 DEFAULT.put("rfquote", callback);
77 DEFAULT.put("attention", callback);
78 DEFAULT.put("zh-attention", callback);
81 callback = new FormOf();
82 DEFAULT.put("form of", callback);
83 DEFAULT.put("conjugation of", callback);
84 DEFAULT.put("participle of", callback);
85 DEFAULT.put("present participle of", callback);
86 DEFAULT.put("past participle of", callback);
87 DEFAULT.put("feminine past participle of", callback);
88 DEFAULT.put("gerund of", callback);
89 DEFAULT.put("feminine of", callback);
90 DEFAULT.put("plural of", callback);
91 DEFAULT.put("feminine plural of", callback);
92 DEFAULT.put("inflected form of", callback);
93 DEFAULT.put("alternative form of", callback);
94 DEFAULT.put("dated form of", callback);
95 DEFAULT.put("apocopic form of", callback);
97 callback = new InflOrHead();
98 DEFAULT.put("infl", callback);
99 DEFAULT.put("head", callback);
101 callback = new AppendName();
102 DEFAULT.put("...", callback);
104 DEFAULT.put("qualifier", new QualifierCallback());
105 DEFAULT.put("italbrac", new italbrac());
106 DEFAULT.put("gloss", new gloss());
107 DEFAULT.put("not used", new not_used());
108 DEFAULT.put("wikipedia", new wikipedia());
112 static final class NameAndArgs implements FunctionCallback {
114 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
115 final Map<String, String> namedArgs, final EnWiktionaryXmlParser parser,
116 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
118 appendAndIndexWikiCallback.builder.append(name);
119 for (int i = 0; i < args.size(); ++i) {
120 if (args.get(i).length() > 0) {
121 appendAndIndexWikiCallback.builder.append("|");
122 appendAndIndexWikiCallback.dispatch(args.get(i), null, null);
125 appendNamedArgs(namedArgs, appendAndIndexWikiCallback);
129 static NameAndArgs NAME_AND_ARGS = new NameAndArgs();
131 private static void appendNamedArgs(final Map<String, String> namedArgs,
132 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
133 for (final Map.Entry<String, String> entry : namedArgs.entrySet()) {
134 appendAndIndexWikiCallback.builder.append("|");
135 appendAndIndexWikiCallback.dispatch(entry.getKey(), null, null);
136 appendAndIndexWikiCallback.builder.append("=");
137 EntryTypeName entryTypeName = null;
138 IndexBuilder indexBuilder = null;
139 // This doesn't work: we'd need to add to word-forms.
140 // System.out.println(entry.getKey());
141 // if (entry.getKey().equals("tr")) {
142 // entryTypeName = EntryTypeName.WIKTIONARY_TRANSLITERATION;
143 // indexBuilder = appendAndIndexWikiCallback.parser.foreignIndexBuilder;
145 appendAndIndexWikiCallback.dispatch(entry.getValue(), indexBuilder, entryTypeName);
149 // ------------------------------------------------------------------
151 static final class TranslationCallback implements FunctionCallback {
153 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
154 final Map<String, String> namedArgs, final EnWiktionaryXmlParser parser,
155 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
157 final String transliteration = namedArgs.remove("tr");
158 namedArgs.keySet().removeAll(EnWiktionaryXmlParser.USELESS_WIKI_ARGS);
159 if (args.size() < 2) {
160 LOG.warning("{{t...}} with wrong args: title=" + parser.title);
163 final String langCode = ListUtil.get(args, 0);
164 if (!appendAndIndexWikiCallback.langCodeToTCount.containsKey(langCode)) {
165 appendAndIndexWikiCallback.langCodeToTCount.put(langCode, new AtomicInteger());
167 appendAndIndexWikiCallback.langCodeToTCount.get(langCode).incrementAndGet();
168 final String word = ListUtil.get(args, 1);
169 final String gender = ListUtil.get(args, 2);
170 // TODO: deal with second (and third...) gender, and alt.
172 appendAndIndexWikiCallback.dispatch(word, EntryTypeName.WIKTIONARY_TITLE_MULTI);
174 if (gender != null) {
175 appendAndIndexWikiCallback.builder.append(String.format(" {%s}", gender));
177 if (transliteration != null) {
178 appendAndIndexWikiCallback.builder.append(" (");
179 appendAndIndexWikiCallback.dispatch(transliteration, EntryTypeName.WIKTIONARY_TRANSLITERATION);
180 appendAndIndexWikiCallback.builder.append(")");
187 // ------------------------------------------------------------------
189 static final class QualifierCallback implements FunctionCallback {
191 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
192 final Map<String, String> namedArgs,
193 final EnWiktionaryXmlParser parser,
194 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
195 if (args.size() != 1 || !namedArgs.isEmpty()) {
196 LOG.warning("weird qualifier: ");
199 String qualifier = args.get(0);
200 appendAndIndexWikiCallback.builder.append("(");
201 appendAndIndexWikiCallback.dispatch(qualifier, null);
202 appendAndIndexWikiCallback.builder.append(")");
207 // ------------------------------------------------------------------
209 static final class EncodingCallback implements FunctionCallback {
211 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
212 final Map<String, String> namedArgs,
213 final EnWiktionaryXmlParser parser,
214 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
215 if (!namedArgs.isEmpty()) {
216 LOG.warning("weird encoding: " + wikiTokenizer.token());
218 if (args.size() == 0) {
219 // Things like "{{Jpan}}" exist.
223 for (int i = 0; i < args.size(); ++i) {
225 appendAndIndexWikiCallback.builder.append(", ");
227 final String arg = args.get(i);
228 // if (arg.equals(parser.title)) {
229 // parser.titleAppended = true;
231 appendAndIndexWikiCallback.dispatch(arg, appendAndIndexWikiCallback.entryTypeName);
238 // ------------------------------------------------------------------
240 static final class Gender implements FunctionCallback {
242 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
243 final Map<String, String> namedArgs,
244 final EnWiktionaryXmlParser parser,
245 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
246 if (!namedArgs.isEmpty()) {
249 appendAndIndexWikiCallback.builder.append("{");
250 appendAndIndexWikiCallback.builder.append(name);
251 for (int i = 0; i < args.size(); ++i) {
252 appendAndIndexWikiCallback.builder.append("|").append(args.get(i));
254 appendAndIndexWikiCallback.builder.append("}");
259 // ------------------------------------------------------------------
261 static final class l_term implements FunctionCallback {
263 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
264 final Map<String, String> namedArgs,
265 final EnWiktionaryXmlParser parser,
266 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
268 // for {{l}}, lang is arg 0, but not for {{term}}
269 if (name.equals("term")) {
273 final EntryTypeName entryTypeName;
274 switch (parser.state) {
275 case TRANSLATION_LINE: entryTypeName = EntryTypeName.WIKTIONARY_TRANSLATION_OTHER_TEXT; break;
276 case ENGLISH_DEF_OF_FOREIGN: entryTypeName = EntryTypeName.WIKTIONARY_ENGLISH_DEF_WIKI_LINK; break;
277 default: throw new IllegalStateException("Invalid enum value: " + parser.state);
280 final String langCode = args.get(0);
281 final IndexBuilder indexBuilder;
282 if ("".equals(langCode)) {
283 indexBuilder = parser.foreignIndexBuilder;
284 } else if ("en".equals(langCode)) {
285 indexBuilder = parser.enIndexBuilder;
287 indexBuilder = parser.foreignIndexBuilder;
290 String displayText = ListUtil.get(args, 2, "");
291 if (displayText.equals("")) {
292 displayText = ListUtil.get(args, 1, null);
295 if (displayText != null) {
296 appendAndIndexWikiCallback.dispatch(displayText, indexBuilder, entryTypeName);
298 LOG.warning("no display text: " + wikiTokenizer.token());
301 final String tr = namedArgs.remove("tr");
303 appendAndIndexWikiCallback.builder.append(" (");
304 appendAndIndexWikiCallback.dispatch(tr, indexBuilder, EntryTypeName.WIKTIONARY_TRANSLITERATION);
305 appendAndIndexWikiCallback.builder.append(")");
308 final String gloss = ListUtil.get(args, 3, "");
309 if (!gloss.equals("")) {
310 appendAndIndexWikiCallback.builder.append(" (");
311 appendAndIndexWikiCallback.dispatch(gloss, parser.enIndexBuilder, EntryTypeName.WIKTIONARY_ENGLISH_DEF);
312 appendAndIndexWikiCallback.builder.append(")");
315 namedArgs.keySet().removeAll(EnWiktionaryXmlParser.USELESS_WIKI_ARGS);
316 if (!namedArgs.isEmpty()) {
317 appendAndIndexWikiCallback.builder.append(" {").append(name);
318 appendNamedArgs(namedArgs, appendAndIndexWikiCallback);
319 appendAndIndexWikiCallback.builder.append("}");
326 // ------------------------------------------------------------------
328 static final class AppendArg0 implements FunctionCallback {
330 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
331 final Map<String, String> namedArgs,
332 final EnWiktionaryXmlParser parser,
333 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
334 if (args.size() != 1 || !namedArgs.isEmpty()) {
337 appendAndIndexWikiCallback.dispatch(args.get(0), EntryTypeName.WIKTIONARY_TRANSLATION_OTHER_TEXT);
338 // TODO: transliteration
343 // ------------------------------------------------------------------
345 static final class italbrac implements FunctionCallback {
347 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
348 final Map<String, String> namedArgs,
349 final EnWiktionaryXmlParser parser,
350 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
351 if (args.size() != 1 || !namedArgs.isEmpty()) {
354 appendAndIndexWikiCallback.builder.append("(");
355 appendAndIndexWikiCallback.dispatch(args.get(0), EntryTypeName.WIKTIONARY_TRANSLATION_OTHER_TEXT);
356 appendAndIndexWikiCallback.builder.append(")");
361 // ------------------------------------------------------------------
363 static final class gloss implements FunctionCallback {
365 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
366 final Map<String, String> namedArgs,
367 final EnWiktionaryXmlParser parser,
368 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
369 if (args.size() != 1 || !namedArgs.isEmpty()) {
372 appendAndIndexWikiCallback.builder.append("(");
373 appendAndIndexWikiCallback.dispatch(args.get(0), EntryTypeName.WIKTIONARY_TRANSLATION_OTHER_TEXT);
374 appendAndIndexWikiCallback.builder.append(")");
379 // ------------------------------------------------------------------
381 static final class Ignore implements FunctionCallback {
383 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
384 final Map<String, String> namedArgs,
385 final EnWiktionaryXmlParser parser,
386 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
391 // ------------------------------------------------------------------
393 static final class not_used implements FunctionCallback {
395 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
396 final Map<String, String> namedArgs,
397 final EnWiktionaryXmlParser parser,
398 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
399 appendAndIndexWikiCallback.builder.append("(not used)");
405 // ------------------------------------------------------------------
407 static final class AppendName implements FunctionCallback {
409 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
410 final Map<String, String> namedArgs,
411 final EnWiktionaryXmlParser parser,
412 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
413 if (!args.isEmpty() || !namedArgs.isEmpty()) {
416 appendAndIndexWikiCallback.builder.append(name);
421 // --------------------------------------------------------------------
422 // --------------------------------------------------------------------
425 static final class FormOf implements FunctionCallback {
427 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
428 final Map<String, String> namedArgs,
429 final EnWiktionaryXmlParser parser,
430 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
431 parser.entryIsFormOfSomething = true;
432 String formName = name;
433 if (name.equals("form of")) {
434 formName = ListUtil.remove(args, 0, null);
436 if (formName == null) {
437 LOG.warning("Missing form name: " + parser.title);
438 formName = "form of";
440 String baseForm = ListUtil.get(args, 1, "");
441 if ("".equals(baseForm)) {
442 baseForm = ListUtil.get(args, 0, null);
443 ListUtil.remove(args, 1, "");
445 ListUtil.remove(args, 0, null);
447 namedArgs.keySet().removeAll(EnWiktionaryXmlParser.USELESS_WIKI_ARGS);
449 appendAndIndexWikiCallback.builder.append("{");
450 NAME_AND_ARGS.onWikiFunction(wikiTokenizer, formName, args, namedArgs, parser, appendAndIndexWikiCallback);
451 appendAndIndexWikiCallback.builder.append("}");
452 if (baseForm != null && appendAndIndexWikiCallback.indexedEntry != null) {
453 parser.foreignIndexBuilder.addEntryWithString(appendAndIndexWikiCallback.indexedEntry, baseForm, EntryTypeName.WIKTIONARY_BASE_FORM_MULTI);
455 // null baseForm happens in Danish.
456 LOG.warning("Null baseform: " + parser.title);
462 static final FormOf FORM_OF = new FormOf();
465 // --------------------------------------------------------------------
466 // --------------------------------------------------------------------
468 static final class wikipedia implements FunctionCallback {
470 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
471 final Map<String, String> namedArgs,
472 final EnWiktionaryXmlParser parser,
473 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
474 namedArgs.remove("lang");
475 if (args.size() > 1 || !namedArgs.isEmpty()) {
478 } else if (args.size() == 1) {
486 static final class InflOrHead implements FunctionCallback {
488 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
489 final Map<String, String> namedArgs,
490 final EnWiktionaryXmlParser parser,
491 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
492 // See: http://en.wiktionary.org/wiki/Template:infl
493 final String langCode = ListUtil.get(args, 0);
494 String head = namedArgs.remove("head");
496 head = namedArgs.remove("title"); // Bug
501 parser.titleAppended = true;
503 namedArgs.keySet().removeAll(EnWiktionaryXmlParser.USELESS_WIKI_ARGS);
505 final String tr = namedArgs.remove("tr");
506 String g = namedArgs.remove("g");
508 g = namedArgs.remove("gender");
510 final String g2 = namedArgs.remove("g2");
511 final String g3 = namedArgs.remove("g3");
513 appendAndIndexWikiCallback.dispatch(head, EntryTypeName.WIKTIONARY_TITLE_MULTI);
516 appendAndIndexWikiCallback.builder.append(" {").append(g);
518 appendAndIndexWikiCallback.builder.append("|").append(g2);
521 appendAndIndexWikiCallback.builder.append("|").append(g3);
523 appendAndIndexWikiCallback.builder.append("}");
527 appendAndIndexWikiCallback.builder.append(" (");
528 appendAndIndexWikiCallback.dispatch(tr, EntryTypeName.WIKTIONARY_TITLE_MULTI);
529 appendAndIndexWikiCallback.builder.append(")");
530 parser.wordForms.add(tr);
533 final String pos = ListUtil.get(args, 1);
535 appendAndIndexWikiCallback.builder.append(" (").append(pos).append(")");
537 for (int i = 2; i < args.size(); i += 2) {
538 final String inflName = ListUtil.get(args, i);
539 final String inflValue = ListUtil.get(args, i + 1);
540 appendAndIndexWikiCallback.builder.append(", ");
541 appendAndIndexWikiCallback.dispatch(inflName, null, null);
542 if (inflValue != null && inflValue.length() > 0) {
543 appendAndIndexWikiCallback.builder.append(": ");
544 appendAndIndexWikiCallback.dispatch(inflValue, null, null);
545 parser.wordForms.add(inflValue);
548 for (final String key : namedArgs.keySet()) {
549 final String value = WikiTokenizer.toPlainText(namedArgs.get(key));
550 appendAndIndexWikiCallback.builder.append(" ");
551 appendAndIndexWikiCallback.dispatch(key, null, null);
552 appendAndIndexWikiCallback.builder.append("=");
553 appendAndIndexWikiCallback.dispatch(value, null, null);
554 parser.wordForms.add(value);
562 DEFAULT.put("it-noun", new it_noun());
564 static final class it_noun implements FunctionCallback {
566 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
567 final Map<String, String> namedArgs,
568 final EnWiktionaryXmlParser parser,
569 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
570 parser.titleAppended = true;
571 final String base = ListUtil.get(args, 0);
572 final String gender = ListUtil.get(args, 1);
573 final String singular = base + ListUtil.get(args, 2, null);
574 final String plural = base + ListUtil.get(args, 3, null);
575 appendAndIndexWikiCallback.builder.append(" ");
576 appendAndIndexWikiCallback.dispatch(singular, null, null);
577 appendAndIndexWikiCallback.builder.append(" {").append(gender).append("}, ");
578 appendAndIndexWikiCallback.dispatch(plural, null, null);
579 appendAndIndexWikiCallback.builder.append(" {pl}");
580 parser.wordForms.add(singular);
581 parser.wordForms.add(plural);
582 if (!namedArgs.isEmpty() || args.size() > 4) {
583 LOG.warning("Invalid it-noun: " + wikiTokenizer.token());
590 DEFAULT.put("it-proper noun", new it_proper_noun());
592 static final class it_proper_noun implements FunctionCallback {
594 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
595 final Map<String, String> namedArgs,
596 final EnWiktionaryXmlParser parser,
597 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {