1 // Copyright 2012 Google Inc. All Rights Reserved.
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
15 package com.hughes.android.dictionary.parser.wiktionary;
17 import java.util.Arrays;
18 import java.util.LinkedHashMap;
19 import java.util.LinkedHashSet;
20 import java.util.List;
23 import java.util.concurrent.atomic.AtomicInteger;
25 import com.hughes.android.dictionary.engine.EntryTypeName;
26 import com.hughes.android.dictionary.engine.IndexBuilder;
27 import com.hughes.android.dictionary.parser.WikiTokenizer;
28 import com.hughes.android.dictionary.parser.wiktionary.AbstractWiktionaryParser.AppendAndIndexWikiCallback;
29 import com.hughes.android.dictionary.parser.wiktionary.AbstractWiktionaryParser.NameAndArgs;
30 import com.hughes.util.ListUtil;
32 class EnFunctionCallbacks {
34 static final Map<String,FunctionCallback<EnParser>> DEFAULT = new LinkedHashMap<String, FunctionCallback<EnParser>>();
36 static final Map<String,FunctionCallback<AbstractWiktionaryParser>> DEFAULT_GENERIC = new LinkedHashMap<String, FunctionCallback<AbstractWiktionaryParser>>();
38 FunctionCallback<AbstractWiktionaryParser> callback = new TranslationCallback<AbstractWiktionaryParser>();
39 DEFAULT_GENERIC.put("t", callback);
43 FunctionCallback<EnParser> callback = new TranslationCallback<EnParser>();
44 DEFAULT.put("t", callback);
45 DEFAULT.put("t+", callback);
46 DEFAULT.put("t-", callback);
47 DEFAULT.put("tø", callback);
48 DEFAULT.put("apdx-t", callback);
50 callback = new EncodingCallback();
51 Set<String> encodings = new LinkedHashSet<String>(Arrays.asList(
53 "sd-Arab", "ku-Arab", "Arab", "unicode", "Laoo", "ur-Arab", "Thai",
54 "fa-Arab", "Khmr", "Cyrl", "IPAchar", "ug-Arab", "ko-inline",
55 "Jpan", "Kore", "Hebr", "rfscript", "Beng", "Mong", "Knda", "Cyrs",
56 "yue-tsj", "Mlym", "Tfng", "Grek", "yue-yue-j"));
57 for (final String encoding : encodings) {
58 DEFAULT.put(encoding, callback);
61 callback = new l_term();
62 DEFAULT.put("l", callback);
63 DEFAULT.put("term", callback);
65 callback = new Gender();
66 DEFAULT.put("m", callback);
67 DEFAULT.put("f", callback);
68 DEFAULT.put("n", callback);
69 DEFAULT.put("p", callback);
70 DEFAULT.put("g", callback);
72 callback = new AppendArg0();
74 callback = new Ignore();
75 DEFAULT.put("trreq", callback);
76 DEFAULT.put("t-image", callback);
77 DEFAULT.put("defn", callback);
78 DEFAULT.put("rfdef", callback);
79 DEFAULT.put("rfdate", callback);
80 DEFAULT.put("rfex", callback);
81 DEFAULT.put("rfquote", callback);
82 DEFAULT.put("attention", callback);
83 DEFAULT.put("zh-attention", callback);
86 callback = new FormOf();
87 DEFAULT.put("form of", callback);
88 DEFAULT.put("conjugation of", callback);
89 DEFAULT.put("participle of", callback);
90 DEFAULT.put("present participle of", callback);
91 DEFAULT.put("past participle of", callback);
92 DEFAULT.put("feminine past participle of", callback);
93 DEFAULT.put("gerund of", callback);
94 DEFAULT.put("feminine of", callback);
95 DEFAULT.put("plural of", callback);
96 DEFAULT.put("feminine plural of", callback);
97 DEFAULT.put("inflected form of", callback);
98 DEFAULT.put("alternative form of", callback);
99 DEFAULT.put("dated form of", callback);
100 DEFAULT.put("apocopic form of", callback);
102 callback = new InflOrHead();
103 DEFAULT.put("infl", callback);
104 DEFAULT.put("head", callback);
106 callback = new AppendName();
107 DEFAULT.put("...", callback);
109 DEFAULT.put("qualifier", new QualifierCallback<EnParser>());
110 DEFAULT.put("italbrac", new italbrac());
111 DEFAULT.put("gloss", new gloss());
112 DEFAULT.put("not used", new not_used());
113 DEFAULT.put("wikipedia", new wikipedia());
116 static final NameAndArgs<EnParser> NAME_AND_ARGS = new NameAndArgs<EnParser>();
118 // ------------------------------------------------------------------
120 static final class TranslationCallback<T extends AbstractWiktionaryParser> implements FunctionCallback<T> {
122 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
123 final Map<String, String> namedArgs, final T parser,
124 final AppendAndIndexWikiCallback<T> appendAndIndexWikiCallback) {
126 final String transliteration = namedArgs.remove("tr");
127 final String alt = namedArgs.remove("alt");
128 namedArgs.keySet().removeAll(EnParser.USELESS_WIKI_ARGS);
129 if (args.size() < 2) {
130 if (!name.equals("ttbc")) {
131 EnParser.LOG.warning("{{t...}} with wrong args: title=" + parser.title + ", " + wikiTokenizer.token());
135 final String langCode = ListUtil.get(args, 0);
136 if (!appendAndIndexWikiCallback.langCodeToTCount.containsKey(langCode)) {
137 appendAndIndexWikiCallback.langCodeToTCount.put(langCode, new AtomicInteger());
139 appendAndIndexWikiCallback.langCodeToTCount.get(langCode).incrementAndGet();
140 final String word = ListUtil.get(args, 1);
141 appendAndIndexWikiCallback.dispatch(alt != null ? alt : word, EntryTypeName.WIKTIONARY_TITLE_MULTI);
144 if (args.size() > 2) {
145 appendAndIndexWikiCallback.builder.append(" {");
146 for (int i = 2; i < args.size(); ++i) {
148 appendAndIndexWikiCallback.builder.append("|");
150 appendAndIndexWikiCallback.builder.append(args.get(i));
152 appendAndIndexWikiCallback.builder.append("}");
155 if (transliteration != null) {
156 appendAndIndexWikiCallback.builder.append(" (");
157 appendAndIndexWikiCallback.dispatch(transliteration, EntryTypeName.WIKTIONARY_TRANSLITERATION);
158 appendAndIndexWikiCallback.builder.append(")");
162 // If alt wasn't null, we appended alt instead of the actual word
163 // we're filing under..
164 appendAndIndexWikiCallback.builder.append(" (");
165 appendAndIndexWikiCallback.dispatch(word, EntryTypeName.WIKTIONARY_TITLE_MULTI);
166 appendAndIndexWikiCallback.builder.append(")");
169 // Catch-all for anything else...
170 if (!namedArgs.isEmpty()) {
171 appendAndIndexWikiCallback.builder.append(" {");
172 EnParser.appendNamedArgs(namedArgs, appendAndIndexWikiCallback);
173 appendAndIndexWikiCallback.builder.append("}");
180 // ------------------------------------------------------------------
182 static final class QualifierCallback<T extends AbstractWiktionaryParser> implements FunctionCallback<T> {
184 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
185 final Map<String, String> namedArgs,
187 final AppendAndIndexWikiCallback<T> appendAndIndexWikiCallback) {
188 if (args.size() != 1 || !namedArgs.isEmpty()) {
189 EnParser.LOG.warning("weird qualifier: ");
192 String qualifier = args.get(0);
193 appendAndIndexWikiCallback.builder.append("(");
194 appendAndIndexWikiCallback.dispatch(qualifier, null);
195 appendAndIndexWikiCallback.builder.append(")");
200 // ------------------------------------------------------------------
202 static final class EncodingCallback<T extends AbstractWiktionaryParser> implements FunctionCallback<T> {
204 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
205 final Map<String, String> namedArgs,
207 final AppendAndIndexWikiCallback<T> appendAndIndexWikiCallback) {
208 if (!namedArgs.isEmpty()) {
209 EnParser.LOG.warning("weird encoding: " + wikiTokenizer.token());
211 if (args.size() == 0) {
212 // Things like "{{Jpan}}" exist.
216 for (int i = 0; i < args.size(); ++i) {
218 appendAndIndexWikiCallback.builder.append(", ");
220 final String arg = args.get(i);
221 // if (arg.equals(parser.title)) {
222 // parser.titleAppended = true;
224 appendAndIndexWikiCallback.dispatch(arg, appendAndIndexWikiCallback.entryTypeName);
231 // ------------------------------------------------------------------
233 static final class Gender<T extends AbstractWiktionaryParser> implements FunctionCallback<T> {
235 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
236 final Map<String, String> namedArgs,
238 final AppendAndIndexWikiCallback<T> appendAndIndexWikiCallback) {
239 if (!namedArgs.isEmpty()) {
242 appendAndIndexWikiCallback.builder.append("{");
243 appendAndIndexWikiCallback.builder.append(name);
244 for (int i = 0; i < args.size(); ++i) {
245 appendAndIndexWikiCallback.builder.append("|").append(args.get(i));
247 appendAndIndexWikiCallback.builder.append("}");
252 // ------------------------------------------------------------------
254 static final class l_term implements FunctionCallback<EnParser> {
256 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
257 final Map<String, String> namedArgs,
258 final EnParser parser,
259 final AppendAndIndexWikiCallback<EnParser> appendAndIndexWikiCallback) {
261 // for {{l}}, lang is arg 0, but not for {{term}}
262 if (name.equals("term")) {
266 final EntryTypeName entryTypeName;
267 switch (parser.state) {
268 case TRANSLATION_LINE: entryTypeName = EntryTypeName.WIKTIONARY_TRANSLATION_OTHER_TEXT; break;
269 case ENGLISH_DEF_OF_FOREIGN: entryTypeName = EntryTypeName.WIKTIONARY_ENGLISH_DEF_WIKI_LINK; break;
270 default: throw new IllegalStateException("Invalid enum value: " + parser.state);
273 final String langCode = args.get(0);
274 final IndexBuilder indexBuilder;
275 if ("".equals(langCode)) {
276 indexBuilder = parser.foreignIndexBuilder;
277 } else if ("en".equals(langCode)) {
278 indexBuilder = parser.enIndexBuilder;
280 indexBuilder = parser.foreignIndexBuilder;
283 String displayText = ListUtil.get(args, 2, "");
284 if (displayText.equals("")) {
285 displayText = ListUtil.get(args, 1, null);
288 if (displayText != null) {
289 appendAndIndexWikiCallback.dispatch(displayText, indexBuilder, entryTypeName);
291 EnParser.LOG.warning("no display text: " + wikiTokenizer.token());
294 final String tr = namedArgs.remove("tr");
296 appendAndIndexWikiCallback.builder.append(" (");
297 appendAndIndexWikiCallback.dispatch(tr, indexBuilder, EntryTypeName.WIKTIONARY_TRANSLITERATION);
298 appendAndIndexWikiCallback.builder.append(")");
301 final String gloss = ListUtil.get(args, 3, "");
302 if (!gloss.equals("")) {
303 appendAndIndexWikiCallback.builder.append(" (");
304 appendAndIndexWikiCallback.dispatch(gloss, parser.enIndexBuilder, EntryTypeName.WIKTIONARY_ENGLISH_DEF);
305 appendAndIndexWikiCallback.builder.append(")");
308 namedArgs.keySet().removeAll(EnParser.USELESS_WIKI_ARGS);
309 if (!namedArgs.isEmpty()) {
310 appendAndIndexWikiCallback.builder.append(" {").append(name);
311 EnParser.appendNamedArgs(namedArgs, appendAndIndexWikiCallback);
312 appendAndIndexWikiCallback.builder.append("}");
319 // ------------------------------------------------------------------
321 static final class AppendArg0<T extends AbstractWiktionaryParser> implements FunctionCallback<EnParser> {
323 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
324 final Map<String, String> namedArgs,
325 final EnParser parser,
326 final AppendAndIndexWikiCallback<EnParser> appendAndIndexWikiCallback) {
327 if (args.size() != 1 || !namedArgs.isEmpty()) {
330 appendAndIndexWikiCallback.dispatch(args.get(0), EntryTypeName.WIKTIONARY_TRANSLATION_OTHER_TEXT);
332 final String tr = namedArgs.remove("tr");
334 appendAndIndexWikiCallback.builder.append(" (");
335 appendAndIndexWikiCallback.dispatch(tr, EntryTypeName.WIKTIONARY_TRANSLATION_OTHER_TEXT);
336 appendAndIndexWikiCallback.builder.append(")");
337 parser.wordForms.add(tr);
344 // ------------------------------------------------------------------
346 static final class italbrac<T extends AbstractWiktionaryParser> implements FunctionCallback<T> {
348 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
349 final Map<String, String> namedArgs,
351 final AppendAndIndexWikiCallback<T> appendAndIndexWikiCallback) {
352 if (args.size() != 1 || !namedArgs.isEmpty()) {
355 appendAndIndexWikiCallback.builder.append("(");
356 appendAndIndexWikiCallback.dispatch(args.get(0), EntryTypeName.WIKTIONARY_TRANSLATION_OTHER_TEXT);
357 appendAndIndexWikiCallback.builder.append(")");
362 // ------------------------------------------------------------------
364 static final class gloss<T extends AbstractWiktionaryParser> implements FunctionCallback<T> {
366 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
367 final Map<String, String> namedArgs,
369 final AppendAndIndexWikiCallback<T> appendAndIndexWikiCallback) {
370 if (args.size() != 1 || !namedArgs.isEmpty()) {
373 appendAndIndexWikiCallback.builder.append("(");
374 appendAndIndexWikiCallback.dispatch(args.get(0), EntryTypeName.WIKTIONARY_TRANSLATION_OTHER_TEXT);
375 appendAndIndexWikiCallback.builder.append(")");
380 // ------------------------------------------------------------------
382 static final class Ignore<T extends AbstractWiktionaryParser> implements FunctionCallback<T> {
384 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
385 final Map<String, String> namedArgs,
387 final AppendAndIndexWikiCallback<T> appendAndIndexWikiCallback) {
392 // ------------------------------------------------------------------
394 static final class not_used<T extends AbstractWiktionaryParser> implements FunctionCallback<T> {
396 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
397 final Map<String, String> namedArgs,
399 final AppendAndIndexWikiCallback<T> appendAndIndexWikiCallback) {
400 appendAndIndexWikiCallback.builder.append("(not used)");
406 // ------------------------------------------------------------------
408 static final class AppendName<T extends AbstractWiktionaryParser> implements FunctionCallback<T> {
410 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
411 final Map<String, String> namedArgs,
413 final AppendAndIndexWikiCallback<T> appendAndIndexWikiCallback) {
414 if (!args.isEmpty() || !namedArgs.isEmpty()) {
417 appendAndIndexWikiCallback.builder.append(name);
422 // --------------------------------------------------------------------
423 // --------------------------------------------------------------------
426 static final class FormOf implements FunctionCallback<EnParser> {
428 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
429 final Map<String, String> namedArgs,
430 final EnParser parser,
431 final AppendAndIndexWikiCallback<EnParser> appendAndIndexWikiCallback) {
432 parser.entryIsFormOfSomething = true;
433 String formName = name;
434 if (name.equals("form of")) {
435 formName = ListUtil.remove(args, 0, null);
437 if (formName == null) {
438 EnParser.LOG.warning("Missing form name: " + parser.title);
439 formName = "form of";
441 String baseForm = ListUtil.get(args, 1, "");
442 if ("".equals(baseForm)) {
443 baseForm = ListUtil.get(args, 0, null);
444 ListUtil.remove(args, 1, "");
446 ListUtil.remove(args, 0, null);
448 namedArgs.keySet().removeAll(EnParser.USELESS_WIKI_ARGS);
450 appendAndIndexWikiCallback.builder.append("{");
451 NAME_AND_ARGS.onWikiFunction(wikiTokenizer, formName, args, namedArgs, parser, appendAndIndexWikiCallback);
452 appendAndIndexWikiCallback.builder.append("}");
453 if (baseForm != null && appendAndIndexWikiCallback.indexedEntry != null) {
454 parser.foreignIndexBuilder.addEntryWithString(appendAndIndexWikiCallback.indexedEntry, baseForm, EntryTypeName.WIKTIONARY_BASE_FORM_MULTI);
456 // null baseForm happens in Danish.
457 EnParser.LOG.warning("Null baseform: " + parser.title);
463 static final EnFunctionCallbacks.FormOf FORM_OF = new FormOf();
466 // --------------------------------------------------------------------
467 // --------------------------------------------------------------------
469 static final class wikipedia<T extends AbstractWiktionaryParser> implements FunctionCallback<T> {
471 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
472 final Map<String, String> namedArgs,
474 final AppendAndIndexWikiCallback<T> appendAndIndexWikiCallback) {
475 namedArgs.remove("lang");
476 if (args.size() > 1 || !namedArgs.isEmpty()) {
479 } else if (args.size() == 1) {
487 static final class InflOrHead implements FunctionCallback<EnParser> {
489 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
490 final Map<String, String> namedArgs,
491 final EnParser parser,
492 final AppendAndIndexWikiCallback<EnParser> appendAndIndexWikiCallback) {
493 // See: http://en.wiktionary.org/wiki/Template:infl
494 // TODO: Actually these functions should start a new WordPOS:
495 // See: http://en.wiktionary.org/wiki/quattro
496 final String langCode = ListUtil.get(args, 0);
497 String head = namedArgs.remove("head");
499 head = namedArgs.remove("title"); // Bug
505 namedArgs.keySet().removeAll(EnParser.USELESS_WIKI_ARGS);
507 final String tr = namedArgs.remove("tr");
508 String g = namedArgs.remove("g");
510 g = namedArgs.remove("gender");
512 final String g2 = namedArgs.remove("g2");
513 final String g3 = namedArgs.remove("g3");
515 // We might have already taken care of this in a generic way...
516 if (!parser.titleAppended) {
517 appendAndIndexWikiCallback.dispatch(head, EntryTypeName.WIKTIONARY_TITLE_MULTI);
518 parser.titleAppended = true;
522 appendAndIndexWikiCallback.builder.append(" {").append(g);
524 appendAndIndexWikiCallback.builder.append("|").append(g2);
527 appendAndIndexWikiCallback.builder.append("|").append(g3);
529 appendAndIndexWikiCallback.builder.append("}");
533 appendAndIndexWikiCallback.builder.append(" (");
534 appendAndIndexWikiCallback.dispatch(tr, EntryTypeName.WIKTIONARY_TITLE_MULTI);
535 appendAndIndexWikiCallback.builder.append(")");
536 parser.wordForms.add(tr);
539 final String pos = ListUtil.get(args, 1);
541 appendAndIndexWikiCallback.builder.append(" (").append(pos).append(")");
543 for (int i = 2; i < args.size(); i += 2) {
544 final String inflName = ListUtil.get(args, i);
545 final String inflValue = ListUtil.get(args, i + 1);
546 appendAndIndexWikiCallback.builder.append(", ");
547 appendAndIndexWikiCallback.dispatch(inflName, null, null);
548 if (inflValue != null && inflValue.length() > 0) {
549 appendAndIndexWikiCallback.builder.append(": ");
550 appendAndIndexWikiCallback.dispatch(inflValue, null, null);
551 parser.wordForms.add(inflValue);
554 for (final String key : namedArgs.keySet()) {
555 final String value = WikiTokenizer.toPlainText(namedArgs.get(key));
556 appendAndIndexWikiCallback.builder.append(" ");
557 appendAndIndexWikiCallback.dispatch(key, null, null);
558 appendAndIndexWikiCallback.builder.append("=");
559 appendAndIndexWikiCallback.dispatch(value, null, null);
560 parser.wordForms.add(value);
568 DEFAULT.put("it-noun", new it_noun());
570 static final class it_noun implements FunctionCallback<EnParser> {
572 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
573 final Map<String, String> namedArgs,
574 final EnParser parser,
575 final AppendAndIndexWikiCallback<EnParser> appendAndIndexWikiCallback) {
576 parser.titleAppended = true;
577 final String base = ListUtil.get(args, 0);
578 final String gender = ListUtil.get(args, 1);
579 final String singular = base + ListUtil.get(args, 2, null);
580 final String plural = base + ListUtil.get(args, 3, null);
581 appendAndIndexWikiCallback.builder.append(" ");
582 appendAndIndexWikiCallback.dispatch(singular, null, null);
583 appendAndIndexWikiCallback.builder.append(" {").append(gender).append("}, ");
584 appendAndIndexWikiCallback.dispatch(plural, null, null);
585 appendAndIndexWikiCallback.builder.append(" {pl}");
586 parser.wordForms.add(singular);
587 parser.wordForms.add(plural);
588 if (!namedArgs.isEmpty() || args.size() > 4) {
589 EnParser.LOG.warning("Invalid it-noun: " + wikiTokenizer.token());
596 DEFAULT.put("it-proper noun", new it_proper_noun());
598 static final class it_proper_noun implements FunctionCallback<EnParser> {
600 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
601 final Map<String, String> namedArgs,
602 final EnParser parser,
603 final AppendAndIndexWikiCallback<EnParser> appendAndIndexWikiCallback) {