1 // Copyright 2012 Google Inc. All Rights Reserved.
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
15 package com.hughes.android.dictionary.parser.wiktionary;
17 import java.util.Arrays;
18 import java.util.LinkedHashMap;
19 import java.util.LinkedHashSet;
20 import java.util.List;
23 import java.util.concurrent.atomic.AtomicInteger;
25 import com.hughes.android.dictionary.engine.EntryTypeName;
26 import com.hughes.android.dictionary.engine.IndexBuilder;
27 import com.hughes.android.dictionary.parser.WikiTokenizer;
28 import com.hughes.android.dictionary.parser.wiktionary.AbstractWiktionaryParser.AppendAndIndexWikiCallback;
29 import com.hughes.android.dictionary.parser.wiktionary.AbstractWiktionaryParser.NameAndArgs;
30 import com.hughes.util.ListUtil;
32 class EnFunctionCallbacks {
34 static final Map<String,FunctionCallback<EnParser>> DEFAULT = new LinkedHashMap<String, FunctionCallback<EnParser>>();
36 static final Map<String,FunctionCallback<AbstractWiktionaryParser>> DEFAULT_GENERIC = new LinkedHashMap<String, FunctionCallback<AbstractWiktionaryParser>>();
38 FunctionCallback<AbstractWiktionaryParser> callback = new TranslationCallback<AbstractWiktionaryParser>();
39 DEFAULT_GENERIC.put("t", callback);
42 static <T extends AbstractWiktionaryParser> void addGenericCallbacks(Map<String, FunctionCallback<T>> callbacks) {
43 FunctionCallback<T> callback = new Gender<T>();
44 callbacks.put("m", callback);
45 callbacks.put("f", callback);
46 callbacks.put("n", callback);
47 callbacks.put("p", callback);
48 callbacks.put("g", callback);
50 callback = new EncodingCallback<T>();
51 Set<String> encodings = new LinkedHashSet<String>(Arrays.asList(
53 "sd-Arab", "ku-Arab", "Arab", "unicode", "Laoo", "ur-Arab", "Thai",
54 "fa-Arab", "Khmr", "Cyrl", "IPAchar", "ug-Arab", "ko-inline",
55 "Jpan", "Kore", "Hebr", "rfscript", "Beng", "Mong", "Knda", "Cyrs",
56 "yue-tsj", "Mlym", "Tfng", "Grek", "yue-yue-j"));
57 for (final String encoding : encodings) {
58 callbacks.put(encoding, callback);
61 callback = new Ignore<T>();
62 callbacks.put("trreq", callback);
63 callbacks.put("t-image", callback);
64 callbacks.put("defn", callback);
65 callbacks.put("rfdef", callback);
66 callbacks.put("rfdate", callback);
67 callbacks.put("rfex", callback);
68 callbacks.put("rfquote", callback);
69 callbacks.put("attention", callback);
70 callbacks.put("zh-attention", callback);
72 callback = new AppendName<T>();
73 callbacks.put("...", callback);
75 callbacks.put("qualifier", new QualifierCallback<T>());
76 callbacks.put("italbrac", new italbrac<T>());
77 callbacks.put("gloss", new gloss<T>());
78 callbacks.put("not used", new not_used<T>());
79 callbacks.put("wikipedia", new wikipedia<T>());
83 addGenericCallbacks(DEFAULT);
85 FunctionCallback<EnParser> callback = new TranslationCallback<EnParser>();
86 DEFAULT.put("t", callback);
87 DEFAULT.put("t+", callback);
88 DEFAULT.put("t-", callback);
89 DEFAULT.put("tø", callback);
90 DEFAULT.put("apdx-t", callback);
92 callback = new l_term();
93 DEFAULT.put("l", callback);
94 DEFAULT.put("term", callback);
96 //callback = new AppendArg0();
98 callback = new FormOf();
99 DEFAULT.put("form of", callback);
100 DEFAULT.put("conjugation of", callback);
101 DEFAULT.put("participle of", callback);
102 DEFAULT.put("present participle of", callback);
103 DEFAULT.put("past participle of", callback);
104 DEFAULT.put("feminine past participle of", callback);
105 DEFAULT.put("gerund of", callback);
106 DEFAULT.put("feminine of", callback);
107 DEFAULT.put("plural of", callback);
108 DEFAULT.put("feminine plural of", callback);
109 DEFAULT.put("inflected form of", callback);
110 DEFAULT.put("alternative form of", callback);
111 DEFAULT.put("dated form of", callback);
112 DEFAULT.put("apocopic form of", callback);
114 callback = new InflOrHead();
115 DEFAULT.put("infl", callback);
116 DEFAULT.put("head", callback);
119 static final NameAndArgs<EnParser> NAME_AND_ARGS = new NameAndArgs<EnParser>();
121 // ------------------------------------------------------------------
123 static final class TranslationCallback<T extends AbstractWiktionaryParser> implements FunctionCallback<T> {
125 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
126 final Map<String, String> namedArgs, final T parser,
127 final AppendAndIndexWikiCallback<T> appendAndIndexWikiCallback) {
129 final String transliteration = namedArgs.remove("tr");
130 final String alt = namedArgs.remove("alt");
131 namedArgs.keySet().removeAll(EnParser.USELESS_WIKI_ARGS);
132 if (args.size() < 2) {
133 if (!name.equals("ttbc")) {
134 EnParser.LOG.warning("{{t...}} with wrong args: title=" + parser.title + ", " + wikiTokenizer.token());
138 final String langCode = ListUtil.get(args, 0);
139 if (!appendAndIndexWikiCallback.langCodeToTCount.containsKey(langCode)) {
140 appendAndIndexWikiCallback.langCodeToTCount.put(langCode, new AtomicInteger());
142 appendAndIndexWikiCallback.langCodeToTCount.get(langCode).incrementAndGet();
143 final String word = ListUtil.get(args, 1);
144 appendAndIndexWikiCallback.dispatch(alt != null ? alt : word, EntryTypeName.WIKTIONARY_TITLE_MULTI);
147 if (args.size() > 2) {
148 appendAndIndexWikiCallback.builder.append(" {");
149 for (int i = 2; i < args.size(); ++i) {
151 appendAndIndexWikiCallback.builder.append("|");
153 appendAndIndexWikiCallback.builder.append(args.get(i));
155 appendAndIndexWikiCallback.builder.append("}");
158 if (transliteration != null) {
159 appendAndIndexWikiCallback.builder.append(" (");
160 appendAndIndexWikiCallback.dispatch(transliteration, EntryTypeName.WIKTIONARY_TRANSLITERATION);
161 appendAndIndexWikiCallback.builder.append(")");
165 // If alt wasn't null, we appended alt instead of the actual word
166 // we're filing under..
167 appendAndIndexWikiCallback.builder.append(" (");
168 appendAndIndexWikiCallback.dispatch(word, EntryTypeName.WIKTIONARY_TITLE_MULTI);
169 appendAndIndexWikiCallback.builder.append(")");
172 // Catch-all for anything else...
173 if (!namedArgs.isEmpty()) {
174 appendAndIndexWikiCallback.builder.append(" {");
175 EnParser.appendNamedArgs(namedArgs, appendAndIndexWikiCallback);
176 appendAndIndexWikiCallback.builder.append("}");
183 // ------------------------------------------------------------------
185 static final class QualifierCallback<T extends AbstractWiktionaryParser> implements FunctionCallback<T> {
187 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
188 final Map<String, String> namedArgs,
190 final AppendAndIndexWikiCallback<T> appendAndIndexWikiCallback) {
191 if (args.size() != 1 || !namedArgs.isEmpty()) {
192 EnParser.LOG.warning("weird qualifier: ");
195 String qualifier = args.get(0);
196 appendAndIndexWikiCallback.builder.append("(");
197 appendAndIndexWikiCallback.dispatch(qualifier, null);
198 appendAndIndexWikiCallback.builder.append(")");
203 // ------------------------------------------------------------------
205 static final class EncodingCallback<T extends AbstractWiktionaryParser> implements FunctionCallback<T> {
207 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
208 final Map<String, String> namedArgs,
210 final AppendAndIndexWikiCallback<T> appendAndIndexWikiCallback) {
211 if (!namedArgs.isEmpty()) {
212 EnParser.LOG.warning("weird encoding: " + wikiTokenizer.token());
214 if (args.size() == 0) {
215 // Things like "{{Jpan}}" exist.
219 for (int i = 0; i < args.size(); ++i) {
221 appendAndIndexWikiCallback.builder.append(", ");
223 final String arg = args.get(i);
224 // if (arg.equals(parser.title)) {
225 // parser.titleAppended = true;
227 appendAndIndexWikiCallback.dispatch(arg, appendAndIndexWikiCallback.entryTypeName);
234 // ------------------------------------------------------------------
236 static final class Gender<T extends AbstractWiktionaryParser> implements FunctionCallback<T> {
238 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
239 final Map<String, String> namedArgs,
241 final AppendAndIndexWikiCallback<T> appendAndIndexWikiCallback) {
242 if (!namedArgs.isEmpty()) {
245 appendAndIndexWikiCallback.builder.append("{");
246 appendAndIndexWikiCallback.builder.append(name);
247 for (int i = 0; i < args.size(); ++i) {
248 appendAndIndexWikiCallback.builder.append("|").append(args.get(i));
250 appendAndIndexWikiCallback.builder.append("}");
255 // ------------------------------------------------------------------
257 static final class l_term implements FunctionCallback<EnParser> {
259 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
260 final Map<String, String> namedArgs,
261 final EnParser parser,
262 final AppendAndIndexWikiCallback<EnParser> appendAndIndexWikiCallback) {
264 // for {{l}}, lang is arg 0, but not for {{term}}
265 if (name.equals("term")) {
269 final EntryTypeName entryTypeName;
270 switch (parser.state) {
271 case TRANSLATION_LINE: entryTypeName = EntryTypeName.WIKTIONARY_TRANSLATION_OTHER_TEXT; break;
272 case ENGLISH_DEF_OF_FOREIGN: entryTypeName = EntryTypeName.WIKTIONARY_ENGLISH_DEF_WIKI_LINK; break;
273 default: throw new IllegalStateException("Invalid enum value: " + parser.state);
276 final String langCode = args.get(0);
277 final IndexBuilder indexBuilder;
278 if ("".equals(langCode)) {
279 indexBuilder = parser.foreignIndexBuilder;
280 } else if ("en".equals(langCode)) {
281 indexBuilder = parser.enIndexBuilder;
283 indexBuilder = parser.foreignIndexBuilder;
286 String displayText = ListUtil.get(args, 2, "");
287 if (displayText.equals("")) {
288 displayText = ListUtil.get(args, 1, null);
291 if (displayText != null) {
292 appendAndIndexWikiCallback.dispatch(displayText, indexBuilder, entryTypeName);
294 EnParser.LOG.warning("no display text: " + wikiTokenizer.token());
297 final String tr = namedArgs.remove("tr");
299 appendAndIndexWikiCallback.builder.append(" (");
300 appendAndIndexWikiCallback.dispatch(tr, indexBuilder, EntryTypeName.WIKTIONARY_TRANSLITERATION);
301 appendAndIndexWikiCallback.builder.append(")");
304 final String gloss = ListUtil.get(args, 3, "");
305 if (!gloss.equals("")) {
306 appendAndIndexWikiCallback.builder.append(" (");
307 appendAndIndexWikiCallback.dispatch(gloss, parser.enIndexBuilder, EntryTypeName.WIKTIONARY_ENGLISH_DEF);
308 appendAndIndexWikiCallback.builder.append(")");
311 namedArgs.keySet().removeAll(EnParser.USELESS_WIKI_ARGS);
312 if (!namedArgs.isEmpty()) {
313 appendAndIndexWikiCallback.builder.append(" {").append(name);
314 EnParser.appendNamedArgs(namedArgs, appendAndIndexWikiCallback);
315 appendAndIndexWikiCallback.builder.append("}");
322 // ------------------------------------------------------------------
324 static final class AppendArg0<T extends AbstractWiktionaryParser> implements FunctionCallback<EnParser> {
326 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
327 final Map<String, String> namedArgs,
328 final EnParser parser,
329 final AppendAndIndexWikiCallback<EnParser> appendAndIndexWikiCallback) {
330 if (args.size() != 1 || !namedArgs.isEmpty()) {
333 appendAndIndexWikiCallback.dispatch(args.get(0), EntryTypeName.WIKTIONARY_TRANSLATION_OTHER_TEXT);
335 final String tr = namedArgs.remove("tr");
337 appendAndIndexWikiCallback.builder.append(" (");
338 appendAndIndexWikiCallback.dispatch(tr, EntryTypeName.WIKTIONARY_TRANSLATION_OTHER_TEXT);
339 appendAndIndexWikiCallback.builder.append(")");
340 parser.wordForms.add(tr);
347 // ------------------------------------------------------------------
349 static final class italbrac<T extends AbstractWiktionaryParser> implements FunctionCallback<T> {
351 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
352 final Map<String, String> namedArgs,
354 final AppendAndIndexWikiCallback<T> appendAndIndexWikiCallback) {
355 if (args.size() != 1 || !namedArgs.isEmpty()) {
358 appendAndIndexWikiCallback.builder.append("(");
359 appendAndIndexWikiCallback.dispatch(args.get(0), EntryTypeName.WIKTIONARY_TRANSLATION_OTHER_TEXT);
360 appendAndIndexWikiCallback.builder.append(")");
365 // ------------------------------------------------------------------
367 static final class gloss<T extends AbstractWiktionaryParser> implements FunctionCallback<T> {
369 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
370 final Map<String, String> namedArgs,
372 final AppendAndIndexWikiCallback<T> appendAndIndexWikiCallback) {
373 if (args.size() != 1 || !namedArgs.isEmpty()) {
376 appendAndIndexWikiCallback.builder.append("(");
377 appendAndIndexWikiCallback.dispatch(args.get(0), EntryTypeName.WIKTIONARY_TRANSLATION_OTHER_TEXT);
378 appendAndIndexWikiCallback.builder.append(")");
383 // ------------------------------------------------------------------
385 static final class Ignore<T extends AbstractWiktionaryParser> implements FunctionCallback<T> {
387 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
388 final Map<String, String> namedArgs,
390 final AppendAndIndexWikiCallback<T> appendAndIndexWikiCallback) {
395 // ------------------------------------------------------------------
397 static final class not_used<T extends AbstractWiktionaryParser> implements FunctionCallback<T> {
399 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
400 final Map<String, String> namedArgs,
402 final AppendAndIndexWikiCallback<T> appendAndIndexWikiCallback) {
403 appendAndIndexWikiCallback.builder.append("(not used)");
409 // ------------------------------------------------------------------
411 static final class AppendName<T extends AbstractWiktionaryParser> implements FunctionCallback<T> {
413 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
414 final Map<String, String> namedArgs,
416 final AppendAndIndexWikiCallback<T> appendAndIndexWikiCallback) {
417 if (!args.isEmpty() || !namedArgs.isEmpty()) {
420 appendAndIndexWikiCallback.builder.append(name);
425 // --------------------------------------------------------------------
426 // --------------------------------------------------------------------
429 static final class FormOf implements FunctionCallback<EnParser> {
431 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
432 final Map<String, String> namedArgs,
433 final EnParser parser,
434 final AppendAndIndexWikiCallback<EnParser> appendAndIndexWikiCallback) {
435 parser.entryIsFormOfSomething = true;
436 String formName = name;
437 if (name.equals("form of")) {
438 formName = ListUtil.remove(args, 0, null);
440 if (formName == null) {
441 EnParser.LOG.warning("Missing form name: " + parser.title);
442 formName = "form of";
444 String baseForm = ListUtil.get(args, 1, "");
445 if ("".equals(baseForm)) {
446 baseForm = ListUtil.get(args, 0, null);
447 ListUtil.remove(args, 1, "");
449 ListUtil.remove(args, 0, null);
451 namedArgs.keySet().removeAll(EnParser.USELESS_WIKI_ARGS);
453 appendAndIndexWikiCallback.builder.append("{");
454 NAME_AND_ARGS.onWikiFunction(wikiTokenizer, formName, args, namedArgs, parser, appendAndIndexWikiCallback);
455 appendAndIndexWikiCallback.builder.append("}");
456 if (baseForm != null && appendAndIndexWikiCallback.indexedEntry != null) {
457 parser.foreignIndexBuilder.addEntryWithString(appendAndIndexWikiCallback.indexedEntry, baseForm, EntryTypeName.WIKTIONARY_BASE_FORM_MULTI);
459 // null baseForm happens in Danish.
460 EnParser.LOG.warning("Null baseform: " + parser.title);
466 static final EnFunctionCallbacks.FormOf FORM_OF = new FormOf();
469 // --------------------------------------------------------------------
470 // --------------------------------------------------------------------
472 static final class wikipedia<T extends AbstractWiktionaryParser> implements FunctionCallback<T> {
474 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
475 final Map<String, String> namedArgs,
477 final AppendAndIndexWikiCallback<T> appendAndIndexWikiCallback) {
478 namedArgs.remove("lang");
479 if (args.size() > 1 || !namedArgs.isEmpty()) {
482 } else if (args.size() == 1) {
490 static final class InflOrHead implements FunctionCallback<EnParser> {
492 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
493 final Map<String, String> namedArgs,
494 final EnParser parser,
495 final AppendAndIndexWikiCallback<EnParser> appendAndIndexWikiCallback) {
496 // See: http://en.wiktionary.org/wiki/Template:infl
497 // TODO: Actually these functions should start a new WordPOS:
498 // See: http://en.wiktionary.org/wiki/quattro
499 final String langCode = ListUtil.get(args, 0);
500 String head = namedArgs.remove("head");
502 head = namedArgs.remove("title"); // Bug
508 namedArgs.keySet().removeAll(EnParser.USELESS_WIKI_ARGS);
510 final String tr = namedArgs.remove("tr");
511 String g = namedArgs.remove("g");
513 g = namedArgs.remove("gender");
515 final String g2 = namedArgs.remove("g2");
516 final String g3 = namedArgs.remove("g3");
518 // We might have already taken care of this in a generic way...
519 if (!parser.titleAppended) {
520 appendAndIndexWikiCallback.dispatch(head, EntryTypeName.WIKTIONARY_TITLE_MULTI);
521 parser.titleAppended = true;
525 appendAndIndexWikiCallback.builder.append(" {").append(g);
527 appendAndIndexWikiCallback.builder.append("|").append(g2);
530 appendAndIndexWikiCallback.builder.append("|").append(g3);
532 appendAndIndexWikiCallback.builder.append("}");
536 appendAndIndexWikiCallback.builder.append(" (");
537 appendAndIndexWikiCallback.dispatch(tr, EntryTypeName.WIKTIONARY_TITLE_MULTI);
538 appendAndIndexWikiCallback.builder.append(")");
539 parser.wordForms.add(tr);
542 final String pos = ListUtil.get(args, 1);
544 appendAndIndexWikiCallback.builder.append(" (").append(pos).append(")");
546 for (int i = 2; i < args.size(); i += 2) {
547 final String inflName = ListUtil.get(args, i);
548 final String inflValue = ListUtil.get(args, i + 1);
549 appendAndIndexWikiCallback.builder.append(", ");
550 appendAndIndexWikiCallback.dispatch(inflName, null, null);
551 if (inflValue != null && inflValue.length() > 0) {
552 appendAndIndexWikiCallback.builder.append(": ");
553 appendAndIndexWikiCallback.dispatch(inflValue, null, null);
554 parser.wordForms.add(inflValue);
557 for (final String key : namedArgs.keySet()) {
558 final String value = WikiTokenizer.toPlainText(namedArgs.get(key));
559 appendAndIndexWikiCallback.builder.append(" ");
560 appendAndIndexWikiCallback.dispatch(key, null, null);
561 appendAndIndexWikiCallback.builder.append("=");
562 appendAndIndexWikiCallback.dispatch(value, null, null);
563 parser.wordForms.add(value);
571 DEFAULT.put("it-noun", new it_noun());
573 static final class it_noun implements FunctionCallback<EnParser> {
575 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
576 final Map<String, String> namedArgs,
577 final EnParser parser,
578 final AppendAndIndexWikiCallback<EnParser> appendAndIndexWikiCallback) {
579 parser.titleAppended = true;
580 final String base = ListUtil.get(args, 0);
581 final String gender = ListUtil.get(args, 1);
582 final String singular = base + ListUtil.get(args, 2, null);
583 final String plural = base + ListUtil.get(args, 3, null);
584 appendAndIndexWikiCallback.builder.append(" ");
585 appendAndIndexWikiCallback.dispatch(singular, null, null);
586 appendAndIndexWikiCallback.builder.append(" {").append(gender).append("}, ");
587 appendAndIndexWikiCallback.dispatch(plural, null, null);
588 appendAndIndexWikiCallback.builder.append(" {pl}");
589 parser.wordForms.add(singular);
590 parser.wordForms.add(plural);
591 if (!namedArgs.isEmpty() || args.size() > 4) {
592 EnParser.LOG.warning("Invalid it-noun: " + wikiTokenizer.token());
599 DEFAULT.put("it-proper noun", new it_proper_noun());
601 static final class it_proper_noun implements FunctionCallback<EnParser> {
603 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
604 final Map<String, String> namedArgs,
605 final EnParser parser,
606 final AppendAndIndexWikiCallback<EnParser> appendAndIndexWikiCallback) {
611 // -----------------------------------------------------------------------
613 // -----------------------------------------------------------------------