1 // Copyright 2012 Google Inc. All Rights Reserved.
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
15 package com.hughes.android.dictionary.parser.enwiktionary;
17 import java.util.Arrays;
18 import java.util.LinkedHashMap;
19 import java.util.LinkedHashSet;
20 import java.util.List;
23 import java.util.logging.Logger;
25 import com.hughes.android.dictionary.engine.EntryTypeName;
26 import com.hughes.android.dictionary.engine.IndexBuilder;
27 import com.hughes.android.dictionary.parser.WikiTokenizer;
28 import com.hughes.util.ListUtil;
30 public final class FunctionCallbacksDefault {
32 static final Logger LOG = Logger.getLogger(EnWiktionaryXmlParser.class.getName());
34 static final Map<String,FunctionCallback> DEFAULT = new LinkedHashMap<String, FunctionCallback>();
37 FunctionCallback callback = new TranslationCallback();
38 DEFAULT.put("t", callback);
39 DEFAULT.put("t+", callback);
40 DEFAULT.put("t-", callback);
41 DEFAULT.put("tø", callback);
42 DEFAULT.put("apdx-t", callback);
44 callback = new EncodingCallback();
45 Set<String> encodings = new LinkedHashSet<String>(Arrays.asList(
47 "sd-Arab", "ku-Arab", "Arab", "unicode", "Laoo", "ur-Arab", "Thai",
48 "fa-Arab", "Khmr", "Cyrl", "IPAchar", "ug-Arab", "ko-inline",
49 "Jpan", "Kore", "Hebr", "rfscript", "Beng", "Mong", "Knda", "Cyrs",
50 "yue-tsj", "Mlym", "Tfng", "Grek", "yue-yue-j"));
51 for (final String encoding : encodings) {
52 DEFAULT.put(encoding, callback);
55 callback = new l_term();
56 DEFAULT.put("l", callback);
57 DEFAULT.put("term", callback);
59 callback = new Gender();
60 DEFAULT.put("m", callback);
61 DEFAULT.put("f", callback);
62 DEFAULT.put("n", callback);
63 DEFAULT.put("p", callback);
64 DEFAULT.put("g", callback);
66 callback = new AppendArg0();
68 callback = new Ignore();
69 DEFAULT.put("trreq", callback);
70 DEFAULT.put("t-image", callback);
71 DEFAULT.put("defn", callback);
72 DEFAULT.put("rfdef", callback);
73 DEFAULT.put("rfdate", callback);
74 DEFAULT.put("rfex", callback);
75 DEFAULT.put("rfquote", callback);
76 DEFAULT.put("attention", callback);
77 DEFAULT.put("zh-attention", callback);
80 callback = new FormOf();
81 DEFAULT.put("form of", callback);
82 DEFAULT.put("conjugation of", callback);
83 DEFAULT.put("participle of", callback);
84 DEFAULT.put("present participle of", callback);
85 DEFAULT.put("past participle of", callback);
86 DEFAULT.put("feminine past participle of", callback);
87 DEFAULT.put("gerund of", callback);
88 DEFAULT.put("feminine of", callback);
89 DEFAULT.put("plural of", callback);
90 DEFAULT.put("feminine plural of", callback);
91 DEFAULT.put("inflected form of", callback);
92 DEFAULT.put("alternative form of", callback);
93 DEFAULT.put("dated form of", callback);
94 DEFAULT.put("apocopic form of", callback);
96 callback = new InflOrHead();
97 DEFAULT.put("infl", callback);
98 DEFAULT.put("head", callback);
100 callback = new AppendName();
101 DEFAULT.put("...", callback);
103 DEFAULT.put("qualifier", new QualifierCallback());
104 DEFAULT.put("italbrac", new italbrac());
105 DEFAULT.put("gloss", new gloss());
106 DEFAULT.put("not used", new not_used());
107 DEFAULT.put("wikipedia", new wikipedia());
111 static final class NameAndArgs implements FunctionCallback {
113 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
114 final Map<String, String> namedArgs, final EnWiktionaryXmlParser parser,
115 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
117 appendAndIndexWikiCallback.builder.append(name);
118 for (int i = 0; i < args.size(); ++i) {
119 if (args.get(i).length() > 0) {
120 appendAndIndexWikiCallback.builder.append("|");
121 appendAndIndexWikiCallback.dispatch(args.get(i), null, null);
124 appendNamedArgs(namedArgs, appendAndIndexWikiCallback);
128 static NameAndArgs NAME_AND_ARGS = new NameAndArgs();
130 private static void appendNamedArgs(final Map<String, String> namedArgs,
131 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
132 for (final Map.Entry<String, String> entry : namedArgs.entrySet()) {
133 appendAndIndexWikiCallback.builder.append("|");
134 appendAndIndexWikiCallback.dispatch(entry.getKey(), null, null);
135 appendAndIndexWikiCallback.builder.append("=");
136 EntryTypeName entryTypeName = null;
137 IndexBuilder indexBuilder = null;
138 // This doesn't work: we'd need to add to word-forms.
139 // System.out.println(entry.getKey());
140 // if (entry.getKey().equals("tr")) {
141 // entryTypeName = EntryTypeName.WIKTIONARY_TRANSLITERATION;
142 // indexBuilder = appendAndIndexWikiCallback.parser.foreignIndexBuilder;
144 appendAndIndexWikiCallback.dispatch(entry.getValue(), indexBuilder, entryTypeName);
148 // ------------------------------------------------------------------
150 static final class TranslationCallback implements FunctionCallback {
152 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
153 final Map<String, String> namedArgs, final EnWiktionaryXmlParser parser,
154 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
156 final String transliteration = namedArgs.remove("tr");
157 namedArgs.keySet().removeAll(EnWiktionaryXmlParser.USELESS_WIKI_ARGS);
158 if (args.size() < 2) {
159 LOG.warning("{{t...}} with wrong args: title=" + parser.title);
162 final String langCode = ListUtil.get(args, 0);
163 final String word = ListUtil.get(args, 1);
164 final String gender = ListUtil.get(args, 2);
165 // TODO: deal with second (and third...) gender, and alt.
167 appendAndIndexWikiCallback.dispatch(word, EntryTypeName.WIKTIONARY_TITLE_MULTI);
169 if (gender != null) {
170 appendAndIndexWikiCallback.builder.append(String.format(" {%s}", gender));
172 if (transliteration != null) {
173 appendAndIndexWikiCallback.builder.append(" (");
174 appendAndIndexWikiCallback.dispatch(transliteration, EntryTypeName.WIKTIONARY_TRANSLITERATION);
175 appendAndIndexWikiCallback.builder.append(")");
182 // ------------------------------------------------------------------
184 static final class QualifierCallback implements FunctionCallback {
186 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
187 final Map<String, String> namedArgs,
188 final EnWiktionaryXmlParser parser,
189 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
190 if (args.size() != 1 || !namedArgs.isEmpty()) {
191 LOG.warning("weird qualifier: ");
194 String qualifier = args.get(0);
195 appendAndIndexWikiCallback.builder.append("(");
196 appendAndIndexWikiCallback.dispatch(qualifier, null);
197 appendAndIndexWikiCallback.builder.append(")");
202 // ------------------------------------------------------------------
204 static final class EncodingCallback implements FunctionCallback {
206 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
207 final Map<String, String> namedArgs,
208 final EnWiktionaryXmlParser parser,
209 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
210 if (!namedArgs.isEmpty()) {
211 LOG.warning("weird encoding: " + wikiTokenizer.token());
213 if (args.size() == 0) {
214 // Things like "{{Jpan}}" exist.
218 for (int i = 0; i < args.size(); ++i) {
220 appendAndIndexWikiCallback.builder.append(", ");
222 final String arg = args.get(i);
223 // if (arg.equals(parser.title)) {
224 // parser.titleAppended = true;
226 appendAndIndexWikiCallback.dispatch(arg, appendAndIndexWikiCallback.entryTypeName);
233 // ------------------------------------------------------------------
235 static final class Gender implements FunctionCallback {
237 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
238 final Map<String, String> namedArgs,
239 final EnWiktionaryXmlParser parser,
240 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
241 if (!namedArgs.isEmpty()) {
244 appendAndIndexWikiCallback.builder.append("{");
245 appendAndIndexWikiCallback.builder.append(name);
246 for (int i = 0; i < args.size(); ++i) {
247 appendAndIndexWikiCallback.builder.append("|").append(args.get(i));
249 appendAndIndexWikiCallback.builder.append("}");
254 // ------------------------------------------------------------------
256 static final class l_term implements FunctionCallback {
258 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
259 final Map<String, String> namedArgs,
260 final EnWiktionaryXmlParser parser,
261 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
263 // for {{l}}, lang is arg 0, but not for {{term}}
264 if (name.equals("term")) {
268 final EntryTypeName entryTypeName;
269 switch (parser.state) {
270 case TRANSLATION_LINE: entryTypeName = EntryTypeName.WIKTIONARY_TRANSLATION_OTHER_TEXT; break;
271 case ENGLISH_DEF_OF_FOREIGN: entryTypeName = EntryTypeName.WIKTIONARY_ENGLISH_DEF_WIKI_LINK; break;
272 default: throw new IllegalStateException("Invalid enum value: " + parser.state);
275 final String langCode = args.get(0);
276 final IndexBuilder indexBuilder;
277 if ("".equals(langCode)) {
278 indexBuilder = parser.foreignIndexBuilder;
279 } else if ("en".equals(langCode)) {
280 indexBuilder = parser.enIndexBuilder;
282 indexBuilder = parser.foreignIndexBuilder;
285 String displayText = ListUtil.get(args, 2, "");
286 if (displayText.equals("")) {
287 displayText = ListUtil.get(args, 1, null);
290 if (displayText != null) {
291 appendAndIndexWikiCallback.dispatch(displayText, indexBuilder, entryTypeName);
293 LOG.warning("no display text: " + wikiTokenizer.token());
296 final String tr = namedArgs.remove("tr");
298 appendAndIndexWikiCallback.builder.append(" (");
299 appendAndIndexWikiCallback.dispatch(tr, indexBuilder, EntryTypeName.WIKTIONARY_TRANSLITERATION);
300 appendAndIndexWikiCallback.builder.append(")");
303 final String gloss = ListUtil.get(args, 3, "");
304 if (!gloss.equals("")) {
305 appendAndIndexWikiCallback.builder.append(" (");
306 appendAndIndexWikiCallback.dispatch(gloss, parser.enIndexBuilder, EntryTypeName.WIKTIONARY_ENGLISH_DEF);
307 appendAndIndexWikiCallback.builder.append(")");
310 namedArgs.keySet().removeAll(EnWiktionaryXmlParser.USELESS_WIKI_ARGS);
311 if (!namedArgs.isEmpty()) {
312 appendAndIndexWikiCallback.builder.append(" {").append(name);
313 appendNamedArgs(namedArgs, appendAndIndexWikiCallback);
314 appendAndIndexWikiCallback.builder.append("}");
321 // ------------------------------------------------------------------
323 static final class AppendArg0 implements FunctionCallback {
325 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
326 final Map<String, String> namedArgs,
327 final EnWiktionaryXmlParser parser,
328 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
329 if (args.size() != 1 || !namedArgs.isEmpty()) {
332 appendAndIndexWikiCallback.dispatch(args.get(0), EntryTypeName.WIKTIONARY_TRANSLATION_OTHER_TEXT);
333 // TODO: transliteration
338 // ------------------------------------------------------------------
340 static final class italbrac implements FunctionCallback {
342 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
343 final Map<String, String> namedArgs,
344 final EnWiktionaryXmlParser parser,
345 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
346 if (args.size() != 1 || !namedArgs.isEmpty()) {
349 appendAndIndexWikiCallback.builder.append("(");
350 appendAndIndexWikiCallback.dispatch(args.get(0), EntryTypeName.WIKTIONARY_TRANSLATION_OTHER_TEXT);
351 appendAndIndexWikiCallback.builder.append(")");
356 // ------------------------------------------------------------------
358 static final class gloss implements FunctionCallback {
360 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
361 final Map<String, String> namedArgs,
362 final EnWiktionaryXmlParser parser,
363 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
364 if (args.size() != 1 || !namedArgs.isEmpty()) {
367 appendAndIndexWikiCallback.builder.append("(");
368 appendAndIndexWikiCallback.dispatch(args.get(0), EntryTypeName.WIKTIONARY_TRANSLATION_OTHER_TEXT);
369 appendAndIndexWikiCallback.builder.append(")");
374 // ------------------------------------------------------------------
376 static final class Ignore implements FunctionCallback {
378 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
379 final Map<String, String> namedArgs,
380 final EnWiktionaryXmlParser parser,
381 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
386 // ------------------------------------------------------------------
388 static final class not_used implements FunctionCallback {
390 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
391 final Map<String, String> namedArgs,
392 final EnWiktionaryXmlParser parser,
393 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
394 appendAndIndexWikiCallback.builder.append("(not used)");
400 // ------------------------------------------------------------------
402 static final class AppendName implements FunctionCallback {
404 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
405 final Map<String, String> namedArgs,
406 final EnWiktionaryXmlParser parser,
407 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
408 if (!args.isEmpty() || !namedArgs.isEmpty()) {
411 appendAndIndexWikiCallback.builder.append(name);
416 // --------------------------------------------------------------------
417 // --------------------------------------------------------------------
420 static final class FormOf implements FunctionCallback {
422 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
423 final Map<String, String> namedArgs,
424 final EnWiktionaryXmlParser parser,
425 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
426 parser.entryIsFormOfSomething = true;
427 String formName = name;
428 if (name.equals("form of")) {
429 formName = ListUtil.remove(args, 0, null);
431 if (formName == null) {
432 LOG.warning("Missing form name: " + parser.title);
433 formName = "form of";
435 String baseForm = ListUtil.get(args, 1, "");
436 if ("".equals(baseForm)) {
437 baseForm = ListUtil.get(args, 0, null);
438 ListUtil.remove(args, 1, "");
440 ListUtil.remove(args, 0, null);
442 namedArgs.keySet().removeAll(EnWiktionaryXmlParser.USELESS_WIKI_ARGS);
444 appendAndIndexWikiCallback.builder.append("{");
445 NAME_AND_ARGS.onWikiFunction(wikiTokenizer, formName, args, namedArgs, parser, appendAndIndexWikiCallback);
446 appendAndIndexWikiCallback.builder.append("}");
447 if (baseForm != null && appendAndIndexWikiCallback.indexedEntry != null) {
448 parser.foreignIndexBuilder.addEntryWithString(appendAndIndexWikiCallback.indexedEntry, baseForm, EntryTypeName.WIKTIONARY_BASE_FORM_MULTI);
450 // null baseForm happens in Danish.
451 LOG.warning("Null baseform: " + parser.title);
457 static final FormOf FORM_OF = new FormOf();
460 // --------------------------------------------------------------------
461 // --------------------------------------------------------------------
463 static final class wikipedia implements FunctionCallback {
465 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
466 final Map<String, String> namedArgs,
467 final EnWiktionaryXmlParser parser,
468 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
469 namedArgs.remove("lang");
470 if (args.size() > 1 || !namedArgs.isEmpty()) {
473 } else if (args.size() == 1) {
481 static final class InflOrHead implements FunctionCallback {
483 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
484 final Map<String, String> namedArgs,
485 final EnWiktionaryXmlParser parser,
486 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
487 // See: http://en.wiktionary.org/wiki/Template:infl
488 final String langCode = ListUtil.get(args, 0);
489 String head = namedArgs.remove("head");
491 head = namedArgs.remove("title"); // Bug
496 parser.titleAppended = true;
498 namedArgs.keySet().removeAll(EnWiktionaryXmlParser.USELESS_WIKI_ARGS);
500 final String tr = namedArgs.remove("tr");
501 String g = namedArgs.remove("g");
503 g = namedArgs.remove("gender");
505 final String g2 = namedArgs.remove("g2");
506 final String g3 = namedArgs.remove("g3");
508 appendAndIndexWikiCallback.dispatch(head, EntryTypeName.WIKTIONARY_TITLE_MULTI);
511 appendAndIndexWikiCallback.builder.append(" {").append(g);
513 appendAndIndexWikiCallback.builder.append("|").append(g2);
516 appendAndIndexWikiCallback.builder.append("|").append(g3);
518 appendAndIndexWikiCallback.builder.append("}");
522 appendAndIndexWikiCallback.builder.append(" (");
523 appendAndIndexWikiCallback.dispatch(tr, EntryTypeName.WIKTIONARY_TITLE_MULTI);
524 appendAndIndexWikiCallback.builder.append(")");
525 parser.wordForms.add(tr);
528 final String pos = ListUtil.get(args, 1);
530 appendAndIndexWikiCallback.builder.append(" (").append(pos).append(")");
532 for (int i = 2; i < args.size(); i += 2) {
533 final String inflName = ListUtil.get(args, i);
534 final String inflValue = ListUtil.get(args, i + 1);
535 appendAndIndexWikiCallback.builder.append(", ");
536 appendAndIndexWikiCallback.dispatch(inflName, null, null);
537 if (inflValue != null && inflValue.length() > 0) {
538 appendAndIndexWikiCallback.builder.append(": ");
539 appendAndIndexWikiCallback.dispatch(inflValue, null, null);
540 parser.wordForms.add(inflValue);
543 for (final String key : namedArgs.keySet()) {
544 final String value = WikiTokenizer.toPlainText(namedArgs.get(key));
545 appendAndIndexWikiCallback.builder.append(" ");
546 appendAndIndexWikiCallback.dispatch(key, null, null);
547 appendAndIndexWikiCallback.builder.append("=");
548 appendAndIndexWikiCallback.dispatch(value, null, null);
549 parser.wordForms.add(value);
557 DEFAULT.put("it-noun", new it_noun());
559 static final class it_noun implements FunctionCallback {
561 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
562 final Map<String, String> namedArgs,
563 final EnWiktionaryXmlParser parser,
564 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
565 parser.titleAppended = true;
566 final String base = ListUtil.get(args, 0);
567 final String gender = ListUtil.get(args, 1);
568 final String singular = base + ListUtil.get(args, 2, null);
569 final String plural = base + ListUtil.get(args, 3, null);
570 appendAndIndexWikiCallback.builder.append(" ");
571 appendAndIndexWikiCallback.dispatch(singular, null, null);
572 appendAndIndexWikiCallback.builder.append(" {").append(gender).append("}, ");
573 appendAndIndexWikiCallback.dispatch(plural, null, null);
574 appendAndIndexWikiCallback.builder.append(" {pl}");
575 parser.wordForms.add(singular);
576 parser.wordForms.add(plural);
577 if (!namedArgs.isEmpty() || args.size() > 4) {
578 LOG.warning("Invalid it-noun: " + wikiTokenizer.token());
585 DEFAULT.put("it-proper noun", new it_proper_noun());
587 static final class it_proper_noun implements FunctionCallback {
589 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
590 final Map<String, String> namedArgs,
591 final EnWiktionaryXmlParser parser,
592 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {