1 // Copyright 2012 Google Inc. All Rights Reserved.
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
15 package com.hughes.android.dictionary.parser.wiktionary;
17 import java.util.Arrays;
18 import java.util.LinkedHashMap;
19 import java.util.LinkedHashSet;
20 import java.util.List;
23 import java.util.concurrent.atomic.AtomicInteger;
25 import com.hughes.android.dictionary.engine.EntryTypeName;
26 import com.hughes.android.dictionary.engine.IndexBuilder;
27 import com.hughes.android.dictionary.parser.WikiTokenizer;
28 import com.hughes.android.dictionary.parser.wiktionary.AbstractWiktionaryParser.AppendAndIndexWikiCallback;
29 import com.hughes.android.dictionary.parser.wiktionary.AbstractWiktionaryParser.NameAndArgs;
30 import com.hughes.util.ListUtil;
32 class EnFunctionCallbacks {
34 static final Map<String,FunctionCallback<EnParser>> DEFAULT = new LinkedHashMap<String, FunctionCallback<EnParser>>();
37 FunctionCallback<EnParser> callback = new TranslationCallback();
38 DEFAULT.put("t", callback);
39 DEFAULT.put("t+", callback);
40 DEFAULT.put("t-", callback);
41 DEFAULT.put("tø", callback);
42 DEFAULT.put("apdx-t", callback);
44 callback = new EncodingCallback();
45 Set<String> encodings = new LinkedHashSet<String>(Arrays.asList(
47 "sd-Arab", "ku-Arab", "Arab", "unicode", "Laoo", "ur-Arab", "Thai",
48 "fa-Arab", "Khmr", "Cyrl", "IPAchar", "ug-Arab", "ko-inline",
49 "Jpan", "Kore", "Hebr", "rfscript", "Beng", "Mong", "Knda", "Cyrs",
50 "yue-tsj", "Mlym", "Tfng", "Grek", "yue-yue-j"));
51 for (final String encoding : encodings) {
52 DEFAULT.put(encoding, callback);
55 callback = new l_term();
56 DEFAULT.put("l", callback);
57 DEFAULT.put("term", callback);
59 callback = new Gender();
60 DEFAULT.put("m", callback);
61 DEFAULT.put("f", callback);
62 DEFAULT.put("n", callback);
63 DEFAULT.put("p", callback);
64 DEFAULT.put("g", callback);
66 callback = new AppendArg0();
68 callback = new Ignore();
69 DEFAULT.put("trreq", callback);
70 DEFAULT.put("t-image", callback);
71 DEFAULT.put("defn", callback);
72 DEFAULT.put("rfdef", callback);
73 DEFAULT.put("rfdate", callback);
74 DEFAULT.put("rfex", callback);
75 DEFAULT.put("rfquote", callback);
76 DEFAULT.put("attention", callback);
77 DEFAULT.put("zh-attention", callback);
80 callback = new FormOf();
81 DEFAULT.put("form of", callback);
82 DEFAULT.put("conjugation of", callback);
83 DEFAULT.put("participle of", callback);
84 DEFAULT.put("present participle of", callback);
85 DEFAULT.put("past participle of", callback);
86 DEFAULT.put("feminine past participle of", callback);
87 DEFAULT.put("gerund of", callback);
88 DEFAULT.put("feminine of", callback);
89 DEFAULT.put("plural of", callback);
90 DEFAULT.put("feminine plural of", callback);
91 DEFAULT.put("inflected form of", callback);
92 DEFAULT.put("alternative form of", callback);
93 DEFAULT.put("dated form of", callback);
94 DEFAULT.put("apocopic form of", callback);
96 callback = new InflOrHead();
97 DEFAULT.put("infl", callback);
98 DEFAULT.put("head", callback);
100 callback = new AppendName();
101 DEFAULT.put("...", callback);
103 DEFAULT.put("qualifier", new QualifierCallback());
104 DEFAULT.put("italbrac", new italbrac());
105 DEFAULT.put("gloss", new gloss());
106 DEFAULT.put("not used", new not_used());
107 DEFAULT.put("wikipedia", new wikipedia());
110 static final NameAndArgs<EnParser> NAME_AND_ARGS = new NameAndArgs<EnParser>();
112 // ------------------------------------------------------------------
114 static final class TranslationCallback implements FunctionCallback<EnParser> {
116 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
117 final Map<String, String> namedArgs, final EnParser parser,
118 final AppendAndIndexWikiCallback<EnParser> appendAndIndexWikiCallback) {
120 final String transliteration = namedArgs.remove("tr");
121 final String alt = namedArgs.remove("alt");
122 namedArgs.keySet().removeAll(EnParser.USELESS_WIKI_ARGS);
123 if (args.size() < 2) {
124 EnParser.LOG.warning("{{t...}} with wrong args: title=" + parser.title);
127 final String langCode = ListUtil.get(args, 0);
128 if (!appendAndIndexWikiCallback.langCodeToTCount.containsKey(langCode)) {
129 appendAndIndexWikiCallback.langCodeToTCount.put(langCode, new AtomicInteger());
131 appendAndIndexWikiCallback.langCodeToTCount.get(langCode).incrementAndGet();
132 final String word = ListUtil.get(args, 1);
133 appendAndIndexWikiCallback.dispatch(alt != null ? alt : word, EntryTypeName.WIKTIONARY_TITLE_MULTI);
136 if (args.size() > 2) {
137 appendAndIndexWikiCallback.builder.append(" {");
138 for (int i = 2; i < args.size(); ++i) {
140 appendAndIndexWikiCallback.builder.append("|");
142 appendAndIndexWikiCallback.builder.append(args.get(i));
144 appendAndIndexWikiCallback.builder.append("}");
147 if (transliteration != null) {
148 appendAndIndexWikiCallback.builder.append(" (");
149 appendAndIndexWikiCallback.dispatch(transliteration, EntryTypeName.WIKTIONARY_TRANSLITERATION);
150 appendAndIndexWikiCallback.builder.append(")");
154 // If alt wasn't null, we appended alt instead of the actual word
155 // we're filing under..
156 appendAndIndexWikiCallback.builder.append(" (");
157 appendAndIndexWikiCallback.dispatch(word, EntryTypeName.WIKTIONARY_TITLE_MULTI);
158 appendAndIndexWikiCallback.builder.append(")");
161 // Catch-all for anything else...
162 if (!namedArgs.isEmpty()) {
163 appendAndIndexWikiCallback.builder.append(" {");
164 EnParser.appendNamedArgs(namedArgs, appendAndIndexWikiCallback);
165 appendAndIndexWikiCallback.builder.append("}");
172 // ------------------------------------------------------------------
174 static final class QualifierCallback implements FunctionCallback<EnParser> {
176 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
177 final Map<String, String> namedArgs,
178 final EnParser parser,
179 final AppendAndIndexWikiCallback<EnParser> appendAndIndexWikiCallback) {
180 if (args.size() != 1 || !namedArgs.isEmpty()) {
181 EnParser.LOG.warning("weird qualifier: ");
184 String qualifier = args.get(0);
185 appendAndIndexWikiCallback.builder.append("(");
186 appendAndIndexWikiCallback.dispatch(qualifier, null);
187 appendAndIndexWikiCallback.builder.append(")");
192 // ------------------------------------------------------------------
194 static final class EncodingCallback implements FunctionCallback<EnParser> {
196 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
197 final Map<String, String> namedArgs,
198 final EnParser parser,
199 final AppendAndIndexWikiCallback<EnParser> appendAndIndexWikiCallback) {
200 if (!namedArgs.isEmpty()) {
201 EnParser.LOG.warning("weird encoding: " + wikiTokenizer.token());
203 if (args.size() == 0) {
204 // Things like "{{Jpan}}" exist.
208 for (int i = 0; i < args.size(); ++i) {
210 appendAndIndexWikiCallback.builder.append(", ");
212 final String arg = args.get(i);
213 // if (arg.equals(parser.title)) {
214 // parser.titleAppended = true;
216 appendAndIndexWikiCallback.dispatch(arg, appendAndIndexWikiCallback.entryTypeName);
223 // ------------------------------------------------------------------
225 static final class Gender implements FunctionCallback<EnParser> {
227 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
228 final Map<String, String> namedArgs,
229 final EnParser parser,
230 final AppendAndIndexWikiCallback<EnParser> appendAndIndexWikiCallback) {
231 if (!namedArgs.isEmpty()) {
234 appendAndIndexWikiCallback.builder.append("{");
235 appendAndIndexWikiCallback.builder.append(name);
236 for (int i = 0; i < args.size(); ++i) {
237 appendAndIndexWikiCallback.builder.append("|").append(args.get(i));
239 appendAndIndexWikiCallback.builder.append("}");
244 // ------------------------------------------------------------------
246 static final class l_term implements FunctionCallback<EnParser> {
248 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
249 final Map<String, String> namedArgs,
250 final EnParser parser,
251 final AppendAndIndexWikiCallback<EnParser> appendAndIndexWikiCallback) {
253 // for {{l}}, lang is arg 0, but not for {{term}}
254 if (name.equals("term")) {
258 final EntryTypeName entryTypeName;
259 switch (parser.state) {
260 case TRANSLATION_LINE: entryTypeName = EntryTypeName.WIKTIONARY_TRANSLATION_OTHER_TEXT; break;
261 case ENGLISH_DEF_OF_FOREIGN: entryTypeName = EntryTypeName.WIKTIONARY_ENGLISH_DEF_WIKI_LINK; break;
262 default: throw new IllegalStateException("Invalid enum value: " + parser.state);
265 final String langCode = args.get(0);
266 final IndexBuilder indexBuilder;
267 if ("".equals(langCode)) {
268 indexBuilder = parser.foreignIndexBuilder;
269 } else if ("en".equals(langCode)) {
270 indexBuilder = parser.enIndexBuilder;
272 indexBuilder = parser.foreignIndexBuilder;
275 String displayText = ListUtil.get(args, 2, "");
276 if (displayText.equals("")) {
277 displayText = ListUtil.get(args, 1, null);
280 if (displayText != null) {
281 appendAndIndexWikiCallback.dispatch(displayText, indexBuilder, entryTypeName);
283 EnParser.LOG.warning("no display text: " + wikiTokenizer.token());
286 final String tr = namedArgs.remove("tr");
288 appendAndIndexWikiCallback.builder.append(" (");
289 appendAndIndexWikiCallback.dispatch(tr, indexBuilder, EntryTypeName.WIKTIONARY_TRANSLITERATION);
290 appendAndIndexWikiCallback.builder.append(")");
293 final String gloss = ListUtil.get(args, 3, "");
294 if (!gloss.equals("")) {
295 appendAndIndexWikiCallback.builder.append(" (");
296 appendAndIndexWikiCallback.dispatch(gloss, parser.enIndexBuilder, EntryTypeName.WIKTIONARY_ENGLISH_DEF);
297 appendAndIndexWikiCallback.builder.append(")");
300 namedArgs.keySet().removeAll(EnParser.USELESS_WIKI_ARGS);
301 if (!namedArgs.isEmpty()) {
302 appendAndIndexWikiCallback.builder.append(" {").append(name);
303 EnParser.appendNamedArgs(namedArgs, appendAndIndexWikiCallback);
304 appendAndIndexWikiCallback.builder.append("}");
311 // ------------------------------------------------------------------
313 static final class AppendArg0 implements FunctionCallback<EnParser> {
315 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
316 final Map<String, String> namedArgs,
317 final EnParser parser,
318 final AppendAndIndexWikiCallback<EnParser> appendAndIndexWikiCallback) {
319 if (args.size() != 1 || !namedArgs.isEmpty()) {
322 appendAndIndexWikiCallback.dispatch(args.get(0), EntryTypeName.WIKTIONARY_TRANSLATION_OTHER_TEXT);
324 final String tr = namedArgs.remove("tr");
326 appendAndIndexWikiCallback.builder.append(" (");
327 appendAndIndexWikiCallback.dispatch(tr, EntryTypeName.WIKTIONARY_TRANSLATION_OTHER_TEXT);
328 appendAndIndexWikiCallback.builder.append(")");
329 parser.wordForms.add(tr);
336 // ------------------------------------------------------------------
338 static final class italbrac implements FunctionCallback<EnParser> {
340 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
341 final Map<String, String> namedArgs,
342 final EnParser parser,
343 final AppendAndIndexWikiCallback<EnParser> appendAndIndexWikiCallback) {
344 if (args.size() != 1 || !namedArgs.isEmpty()) {
347 appendAndIndexWikiCallback.builder.append("(");
348 appendAndIndexWikiCallback.dispatch(args.get(0), EntryTypeName.WIKTIONARY_TRANSLATION_OTHER_TEXT);
349 appendAndIndexWikiCallback.builder.append(")");
354 // ------------------------------------------------------------------
356 static final class gloss implements FunctionCallback<EnParser> {
358 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
359 final Map<String, String> namedArgs,
360 final EnParser parser,
361 final AppendAndIndexWikiCallback<EnParser> appendAndIndexWikiCallback) {
362 if (args.size() != 1 || !namedArgs.isEmpty()) {
365 appendAndIndexWikiCallback.builder.append("(");
366 appendAndIndexWikiCallback.dispatch(args.get(0), EntryTypeName.WIKTIONARY_TRANSLATION_OTHER_TEXT);
367 appendAndIndexWikiCallback.builder.append(")");
372 // ------------------------------------------------------------------
374 static final class Ignore implements FunctionCallback<EnParser> {
376 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
377 final Map<String, String> namedArgs,
378 final EnParser parser,
379 final AppendAndIndexWikiCallback<EnParser> appendAndIndexWikiCallback) {
384 // ------------------------------------------------------------------
386 static final class not_used implements FunctionCallback<EnParser> {
388 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
389 final Map<String, String> namedArgs,
390 final EnParser parser,
391 final AppendAndIndexWikiCallback<EnParser> appendAndIndexWikiCallback) {
392 appendAndIndexWikiCallback.builder.append("(not used)");
398 // ------------------------------------------------------------------
400 static final class AppendName implements FunctionCallback<EnParser> {
402 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
403 final Map<String, String> namedArgs,
404 final EnParser parser,
405 final AppendAndIndexWikiCallback<EnParser> appendAndIndexWikiCallback) {
406 if (!args.isEmpty() || !namedArgs.isEmpty()) {
409 appendAndIndexWikiCallback.builder.append(name);
414 // --------------------------------------------------------------------
415 // --------------------------------------------------------------------
418 static final class FormOf implements FunctionCallback<EnParser> {
420 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
421 final Map<String, String> namedArgs,
422 final EnParser parser,
423 final AppendAndIndexWikiCallback<EnParser> appendAndIndexWikiCallback) {
424 parser.entryIsFormOfSomething = true;
425 String formName = name;
426 if (name.equals("form of")) {
427 formName = ListUtil.remove(args, 0, null);
429 if (formName == null) {
430 EnParser.LOG.warning("Missing form name: " + parser.title);
431 formName = "form of";
433 String baseForm = ListUtil.get(args, 1, "");
434 if ("".equals(baseForm)) {
435 baseForm = ListUtil.get(args, 0, null);
436 ListUtil.remove(args, 1, "");
438 ListUtil.remove(args, 0, null);
440 namedArgs.keySet().removeAll(EnParser.USELESS_WIKI_ARGS);
442 appendAndIndexWikiCallback.builder.append("{");
443 NAME_AND_ARGS.onWikiFunction(wikiTokenizer, formName, args, namedArgs, parser, appendAndIndexWikiCallback);
444 appendAndIndexWikiCallback.builder.append("}");
445 if (baseForm != null && appendAndIndexWikiCallback.indexedEntry != null) {
446 parser.foreignIndexBuilder.addEntryWithString(appendAndIndexWikiCallback.indexedEntry, baseForm, EntryTypeName.WIKTIONARY_BASE_FORM_MULTI);
448 // null baseForm happens in Danish.
449 EnParser.LOG.warning("Null baseform: " + parser.title);
455 static final EnFunctionCallbacks.FormOf FORM_OF = new FormOf();
458 // --------------------------------------------------------------------
459 // --------------------------------------------------------------------
461 static final class wikipedia implements FunctionCallback<EnParser> {
463 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
464 final Map<String, String> namedArgs,
465 final EnParser parser,
466 final AppendAndIndexWikiCallback<EnParser> appendAndIndexWikiCallback) {
467 namedArgs.remove("lang");
468 if (args.size() > 1 || !namedArgs.isEmpty()) {
471 } else if (args.size() == 1) {
479 static final class InflOrHead implements FunctionCallback<EnParser> {
481 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
482 final Map<String, String> namedArgs,
483 final EnParser parser,
484 final AppendAndIndexWikiCallback<EnParser> appendAndIndexWikiCallback) {
485 // See: http://en.wiktionary.org/wiki/Template:infl
486 final String langCode = ListUtil.get(args, 0);
487 String head = namedArgs.remove("head");
489 head = namedArgs.remove("title"); // Bug
494 parser.titleAppended = true;
496 namedArgs.keySet().removeAll(EnParser.USELESS_WIKI_ARGS);
498 final String tr = namedArgs.remove("tr");
499 String g = namedArgs.remove("g");
501 g = namedArgs.remove("gender");
503 final String g2 = namedArgs.remove("g2");
504 final String g3 = namedArgs.remove("g3");
506 appendAndIndexWikiCallback.dispatch(head, EntryTypeName.WIKTIONARY_TITLE_MULTI);
509 appendAndIndexWikiCallback.builder.append(" {").append(g);
511 appendAndIndexWikiCallback.builder.append("|").append(g2);
514 appendAndIndexWikiCallback.builder.append("|").append(g3);
516 appendAndIndexWikiCallback.builder.append("}");
520 appendAndIndexWikiCallback.builder.append(" (");
521 appendAndIndexWikiCallback.dispatch(tr, EntryTypeName.WIKTIONARY_TITLE_MULTI);
522 appendAndIndexWikiCallback.builder.append(")");
523 parser.wordForms.add(tr);
526 final String pos = ListUtil.get(args, 1);
528 appendAndIndexWikiCallback.builder.append(" (").append(pos).append(")");
530 for (int i = 2; i < args.size(); i += 2) {
531 final String inflName = ListUtil.get(args, i);
532 final String inflValue = ListUtil.get(args, i + 1);
533 appendAndIndexWikiCallback.builder.append(", ");
534 appendAndIndexWikiCallback.dispatch(inflName, null, null);
535 if (inflValue != null && inflValue.length() > 0) {
536 appendAndIndexWikiCallback.builder.append(": ");
537 appendAndIndexWikiCallback.dispatch(inflValue, null, null);
538 parser.wordForms.add(inflValue);
541 for (final String key : namedArgs.keySet()) {
542 final String value = WikiTokenizer.toPlainText(namedArgs.get(key));
543 appendAndIndexWikiCallback.builder.append(" ");
544 appendAndIndexWikiCallback.dispatch(key, null, null);
545 appendAndIndexWikiCallback.builder.append("=");
546 appendAndIndexWikiCallback.dispatch(value, null, null);
547 parser.wordForms.add(value);
555 DEFAULT.put("it-noun", new it_noun());
557 static final class it_noun implements FunctionCallback<EnParser> {
559 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
560 final Map<String, String> namedArgs,
561 final EnParser parser,
562 final AppendAndIndexWikiCallback<EnParser> appendAndIndexWikiCallback) {
563 parser.titleAppended = true;
564 final String base = ListUtil.get(args, 0);
565 final String gender = ListUtil.get(args, 1);
566 final String singular = base + ListUtil.get(args, 2, null);
567 final String plural = base + ListUtil.get(args, 3, null);
568 appendAndIndexWikiCallback.builder.append(" ");
569 appendAndIndexWikiCallback.dispatch(singular, null, null);
570 appendAndIndexWikiCallback.builder.append(" {").append(gender).append("}, ");
571 appendAndIndexWikiCallback.dispatch(plural, null, null);
572 appendAndIndexWikiCallback.builder.append(" {pl}");
573 parser.wordForms.add(singular);
574 parser.wordForms.add(plural);
575 if (!namedArgs.isEmpty() || args.size() > 4) {
576 EnParser.LOG.warning("Invalid it-noun: " + wikiTokenizer.token());
583 DEFAULT.put("it-proper noun", new it_proper_noun());
585 static final class it_proper_noun implements FunctionCallback<EnParser> {
587 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
588 final Map<String, String> namedArgs,
589 final EnParser parser,
590 final AppendAndIndexWikiCallback<EnParser> appendAndIndexWikiCallback) {