1 // Copyright 2012 Google Inc. All Rights Reserved.
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
15 package com.hughes.android.dictionary.parser.enwiktionary;
17 import java.util.Arrays;
18 import java.util.LinkedHashMap;
19 import java.util.LinkedHashSet;
20 import java.util.List;
23 import java.util.logging.Logger;
25 import com.hughes.android.dictionary.engine.EntryTypeName;
26 import com.hughes.android.dictionary.engine.IndexBuilder;
27 import com.hughes.android.dictionary.parser.WikiTokenizer;
28 import com.hughes.util.ListUtil;
30 public final class FunctionCallbacksDefault {
32 static final Logger LOG = Logger.getLogger(EnWiktionaryXmlParser.class.getName());
34 static final Map<String,FunctionCallback> DEFAULT = new LinkedHashMap<String, FunctionCallback>();
37 FunctionCallback callback = new TranslationCallback();
38 DEFAULT.put("t", callback);
39 DEFAULT.put("t+", callback);
40 DEFAULT.put("t-", callback);
41 DEFAULT.put("tø", callback);
42 DEFAULT.put("apdx-t", callback);
44 DEFAULT.put("qualifier", new QualifierCallback());
46 callback = new EncodingCallback();
47 Set<String> encodings = new LinkedHashSet<String>(Arrays.asList(
49 "sd-Arab", "ku-Arab", "Arab", "unicode", "Laoo", "ur-Arab", "Thai",
50 "fa-Arab", "Khmr", "Cyrl", "IPAchar", "ug-Arab", "ko-inline",
51 "Jpan", "Kore", "Hebr", "rfscript", "Beng", "Mong", "Knda", "Cyrs",
52 "yue-tsj", "Mlym", "Tfng", "Grek", "yue-yue-j"));
53 for (final String encoding : encodings) {
54 DEFAULT.put(encoding, callback);
57 callback = new l_term();
58 DEFAULT.put("l", callback);
59 DEFAULT.put("term", callback);
61 callback = new Gender();
62 DEFAULT.put("m", callback);
63 DEFAULT.put("f", callback);
64 DEFAULT.put("n", callback);
65 DEFAULT.put("p", callback);
66 DEFAULT.put("g", callback);
68 DEFAULT.put("italbrac", new italbrac());
69 DEFAULT.put("gloss", new gloss());
71 callback = new AppendArg0();
73 callback = new Ignore();
74 DEFAULT.put("trreq", callback);
75 DEFAULT.put("t-image", callback);
76 DEFAULT.put("defn", callback);
77 DEFAULT.put("rfdef", callback);
78 DEFAULT.put("rfdate", callback);
79 DEFAULT.put("rfex", callback);
80 DEFAULT.put("rfquote", callback);
81 DEFAULT.put("attention", callback);
82 DEFAULT.put("zh-attention", callback);
84 DEFAULT.put("not used", new not_used());
85 DEFAULT.put("form of", new FormOf());
86 DEFAULT.put("wikipedia", new wikipedia());
88 callback = new InflOrHead();
89 DEFAULT.put("infl", callback);
90 DEFAULT.put("head", callback);
92 callback = new AppendName();
93 DEFAULT.put("...", callback);
98 static final class NameAndArgs implements FunctionCallback {
100 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
101 final Map<String, String> namedArgs, final EnWiktionaryXmlParser parser,
102 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
104 appendAndIndexWikiCallback.builder.append(name);
105 for (int i = 0; i < args.size(); ++i) {
106 if (args.get(i).length() > 0) {
107 appendAndIndexWikiCallback.builder.append("|");
108 appendAndIndexWikiCallback.dispatch(args.get(i), null, null);
111 appendNamedArgs(namedArgs, appendAndIndexWikiCallback);
115 static NameAndArgs NAME_AND_ARGS = new NameAndArgs();
117 private static void appendNamedArgs(final Map<String, String> namedArgs,
118 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
119 for (final Map.Entry<String, String> entry : namedArgs.entrySet()) {
120 appendAndIndexWikiCallback.builder.append("|");
121 appendAndIndexWikiCallback.dispatch(entry.getKey(), null, null);
122 appendAndIndexWikiCallback.builder.append("=");
123 appendAndIndexWikiCallback.dispatch(entry.getValue(), null, null);
127 // ------------------------------------------------------------------
129 static final class TranslationCallback implements FunctionCallback {
131 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
132 final Map<String, String> namedArgs, final EnWiktionaryXmlParser parser,
133 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
135 final String transliteration = namedArgs.remove("tr");
136 namedArgs.keySet().removeAll(EnWiktionaryXmlParser.USELESS_WIKI_ARGS);
137 if (args.size() < 2) {
138 LOG.warning("{{t...}} with wrong args: title=" + parser.title);
141 final String langCode = ListUtil.get(args, 0);
142 final String word = ListUtil.get(args, 1);
143 final String gender = ListUtil.get(args, 2);
144 // TODO: deal with second (and third...) gender, and alt.
146 appendAndIndexWikiCallback.dispatch(word, EntryTypeName.WIKTIONARY_TITLE_MULTI);
148 if (gender != null) {
149 appendAndIndexWikiCallback.builder.append(String.format(" {%s}", gender));
151 if (transliteration != null) {
152 appendAndIndexWikiCallback.builder.append(" (");
153 appendAndIndexWikiCallback.dispatch(transliteration, EntryTypeName.WIKTIONARY_TRANSLITERATION);
154 appendAndIndexWikiCallback.builder.append(")");
161 // ------------------------------------------------------------------
163 static final class QualifierCallback implements FunctionCallback {
165 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
166 final Map<String, String> namedArgs,
167 final EnWiktionaryXmlParser parser,
168 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
169 if (args.size() != 1 || !namedArgs.isEmpty()) {
170 LOG.warning("weird qualifier: ");
173 String qualifier = args.get(0);
174 appendAndIndexWikiCallback.builder.append("(");
175 appendAndIndexWikiCallback.dispatch(qualifier, null);
176 appendAndIndexWikiCallback.builder.append(")");
181 // ------------------------------------------------------------------
183 static final class EncodingCallback implements FunctionCallback {
185 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
186 final Map<String, String> namedArgs,
187 final EnWiktionaryXmlParser parser,
188 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
189 if (args.size() != 1 || !namedArgs.isEmpty()) {
190 LOG.warning("weird encoding: " + wikiTokenizer.token());
192 if (args.size() == 0) {
193 // Things like "{{Jpan}}" exist.
196 final String wikiText = args.get(0);
197 appendAndIndexWikiCallback.dispatch(wikiText, appendAndIndexWikiCallback.entryTypeName);
202 // ------------------------------------------------------------------
204 static final class Gender implements FunctionCallback {
206 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
207 final Map<String, String> namedArgs,
208 final EnWiktionaryXmlParser parser,
209 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
210 if (!namedArgs.isEmpty()) {
213 appendAndIndexWikiCallback.builder.append("{");
214 appendAndIndexWikiCallback.builder.append(name);
215 for (int i = 0; i < args.size(); ++i) {
216 appendAndIndexWikiCallback.builder.append("|").append(args.get(i));
218 appendAndIndexWikiCallback.builder.append("}");
223 // ------------------------------------------------------------------
225 static final class l_term implements FunctionCallback {
227 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
228 final Map<String, String> namedArgs,
229 final EnWiktionaryXmlParser parser,
230 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
232 // for {{l}}, lang is arg 0, but not for {{term}}
233 if (name.equals("term")) {
237 final EntryTypeName entryTypeName;
238 switch (parser.state) {
239 case TRANSLATION_LINE: entryTypeName = EntryTypeName.WIKTIONARY_TRANSLATION_OTHER_TEXT; break;
240 case ENGLISH_DEF_OF_FOREIGN: entryTypeName = EntryTypeName.WIKTIONARY_ENGLISH_DEF_WIKI_LINK; break;
241 default: throw new IllegalStateException("Invalid enum value: " + parser.state);
244 final String langCode = args.get(0);
245 final IndexBuilder indexBuilder;
246 if ("".equals(langCode)) {
247 indexBuilder = parser.foreignIndexBuilder;
248 } else if ("en".equals(langCode)) {
249 indexBuilder = parser.enIndexBuilder;
251 indexBuilder = parser.foreignIndexBuilder;
254 String displayText = ListUtil.get(args, 2, "");
255 if (displayText.equals("")) {
256 displayText = ListUtil.get(args, 1, null);
259 if (displayText != null) {
260 appendAndIndexWikiCallback.dispatch(displayText, indexBuilder, entryTypeName);
262 LOG.warning("no display text: " + wikiTokenizer.token());
265 final String tr = namedArgs.remove("tr");
267 appendAndIndexWikiCallback.builder.append(" (");
268 appendAndIndexWikiCallback.dispatch(tr, indexBuilder, EntryTypeName.WIKTIONARY_TRANSLITERATION);
269 appendAndIndexWikiCallback.builder.append(")");
272 final String gloss = ListUtil.get(args, 3, "");
273 if (!gloss.equals("")) {
274 appendAndIndexWikiCallback.builder.append(" (");
275 appendAndIndexWikiCallback.dispatch(gloss, parser.enIndexBuilder, EntryTypeName.WIKTIONARY_ENGLISH_DEF);
276 appendAndIndexWikiCallback.builder.append(")");
279 namedArgs.keySet().removeAll(EnWiktionaryXmlParser.USELESS_WIKI_ARGS);
280 if (!namedArgs.isEmpty()) {
281 appendAndIndexWikiCallback.builder.append(" {").append(name);
282 appendNamedArgs(namedArgs, appendAndIndexWikiCallback);
283 appendAndIndexWikiCallback.builder.append("}");
290 // ------------------------------------------------------------------
292 static final class AppendArg0 implements FunctionCallback {
294 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
295 final Map<String, String> namedArgs,
296 final EnWiktionaryXmlParser parser,
297 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
298 if (args.size() != 1 || !namedArgs.isEmpty()) {
301 appendAndIndexWikiCallback.dispatch(args.get(0), EntryTypeName.WIKTIONARY_TRANSLATION_OTHER_TEXT);
302 // TODO: transliteration
307 // ------------------------------------------------------------------
309 static final class italbrac implements FunctionCallback {
311 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
312 final Map<String, String> namedArgs,
313 final EnWiktionaryXmlParser parser,
314 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
315 if (args.size() != 1 || !namedArgs.isEmpty()) {
318 appendAndIndexWikiCallback.builder.append("(");
319 appendAndIndexWikiCallback.dispatch(args.get(0), EntryTypeName.WIKTIONARY_TRANSLATION_OTHER_TEXT);
320 appendAndIndexWikiCallback.builder.append(")");
325 // ------------------------------------------------------------------
327 static final class gloss implements FunctionCallback {
329 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
330 final Map<String, String> namedArgs,
331 final EnWiktionaryXmlParser parser,
332 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
333 if (args.size() != 1 || !namedArgs.isEmpty()) {
336 appendAndIndexWikiCallback.builder.append("(");
337 appendAndIndexWikiCallback.dispatch(args.get(0), EntryTypeName.WIKTIONARY_TRANSLATION_OTHER_TEXT);
338 appendAndIndexWikiCallback.builder.append(")");
343 // ------------------------------------------------------------------
345 static final class Ignore implements FunctionCallback {
347 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
348 final Map<String, String> namedArgs,
349 final EnWiktionaryXmlParser parser,
350 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
355 // ------------------------------------------------------------------
357 static final class not_used implements FunctionCallback {
359 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
360 final Map<String, String> namedArgs,
361 final EnWiktionaryXmlParser parser,
362 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
363 appendAndIndexWikiCallback.builder.append("(not used)");
369 // ------------------------------------------------------------------
371 static final class AppendName implements FunctionCallback {
373 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
374 final Map<String, String> namedArgs,
375 final EnWiktionaryXmlParser parser,
376 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
377 if (!args.isEmpty() || !namedArgs.isEmpty()) {
380 appendAndIndexWikiCallback.builder.append(name);
385 // --------------------------------------------------------------------
386 // --------------------------------------------------------------------
389 static final class FormOf implements FunctionCallback {
391 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
392 final Map<String, String> namedArgs,
393 final EnWiktionaryXmlParser parser,
394 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
395 String formName = name;
396 if (name.equals("form of")) {
397 formName = ListUtil.remove(args, 0, null);
399 if (formName == null) {
400 LOG.warning("Missing form name: " + parser.title);
401 formName = "form of";
403 String baseForm = ListUtil.get(args, 1, "");
404 if ("".equals(baseForm)) {
405 baseForm = ListUtil.get(args, 0, null);
406 ListUtil.remove(args, 1, "");
408 ListUtil.remove(args, 0, null);
410 namedArgs.keySet().removeAll(EnWiktionaryXmlParser.USELESS_WIKI_ARGS);
412 appendAndIndexWikiCallback.builder.append("{");
413 NAME_AND_ARGS.onWikiFunction(wikiTokenizer, formName, args, namedArgs, parser, appendAndIndexWikiCallback);
414 appendAndIndexWikiCallback.builder.append("}");
415 if (baseForm != null && appendAndIndexWikiCallback.indexedEntry != null) {
416 parser.foreignIndexBuilder.addEntryWithString(appendAndIndexWikiCallback.indexedEntry, baseForm, EntryTypeName.WIKTIONARY_BASE_FORM_MULTI);
418 // null baseForm happens in Danish.
419 LOG.warning("Null baseform: " + parser.title);
425 static final FormOf FORM_OF = new FormOf();
428 // --------------------------------------------------------------------
429 // --------------------------------------------------------------------
431 static final class wikipedia implements FunctionCallback {
433 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
434 final Map<String, String> namedArgs,
435 final EnWiktionaryXmlParser parser,
436 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
437 namedArgs.remove("lang");
438 if (args.size() > 1 || !namedArgs.isEmpty()) {
441 } else if (args.size() == 1) {
449 static final class InflOrHead implements FunctionCallback {
451 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
452 final Map<String, String> namedArgs,
453 final EnWiktionaryXmlParser parser,
454 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
455 // See: http://en.wiktionary.org/wiki/Template:infl
456 final String langCode = ListUtil.get(args, 0);
457 String head = namedArgs.remove("head");
459 head = namedArgs.remove("title"); // Bug
464 parser.titleAppended = true;
466 namedArgs.keySet().removeAll(EnWiktionaryXmlParser.USELESS_WIKI_ARGS);
468 final String tr = namedArgs.remove("tr");
469 String g = namedArgs.remove("g");
471 g = namedArgs.remove("gender");
473 final String g2 = namedArgs.remove("g2");
474 final String g3 = namedArgs.remove("g3");
476 appendAndIndexWikiCallback.dispatch(head, EntryTypeName.WIKTIONARY_TITLE_MULTI);
479 appendAndIndexWikiCallback.builder.append(" {").append(g);
481 appendAndIndexWikiCallback.builder.append("|").append(g2);
484 appendAndIndexWikiCallback.builder.append("|").append(g3);
486 appendAndIndexWikiCallback.builder.append("}");
490 appendAndIndexWikiCallback.builder.append(" (");
491 appendAndIndexWikiCallback.dispatch(tr, EntryTypeName.WIKTIONARY_TITLE_MULTI);
492 appendAndIndexWikiCallback.builder.append(")");
493 parser.wordForms.add(tr);
496 final String pos = ListUtil.get(args, 1);
498 appendAndIndexWikiCallback.builder.append(" (").append(pos).append(")");
500 for (int i = 2; i < args.size(); i += 2) {
501 final String inflName = ListUtil.get(args, i);
502 final String inflValue = ListUtil.get(args, i + 1);
503 appendAndIndexWikiCallback.builder.append(", ");
504 appendAndIndexWikiCallback.dispatch(inflName, null, null);
505 if (inflValue != null && inflValue.length() > 0) {
506 appendAndIndexWikiCallback.builder.append(": ");
507 appendAndIndexWikiCallback.dispatch(inflValue, null, null);
508 parser.wordForms.add(inflValue);
511 for (final String key : namedArgs.keySet()) {
512 final String value = WikiTokenizer.toPlainText(namedArgs.get(key));
513 appendAndIndexWikiCallback.builder.append(" ");
514 appendAndIndexWikiCallback.dispatch(key, null, null);
515 appendAndIndexWikiCallback.builder.append("=");
516 appendAndIndexWikiCallback.dispatch(value, null, null);
517 parser.wordForms.add(value);
525 DEFAULT.put("it-noun", new it_noun());
527 static final class it_noun implements FunctionCallback {
529 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
530 final Map<String, String> namedArgs,
531 final EnWiktionaryXmlParser parser,
532 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {
533 parser.titleAppended = true;
534 final String base = ListUtil.get(args, 0);
535 final String gender = ListUtil.get(args, 1);
536 final String singular = base + ListUtil.get(args, 2, null);
537 final String plural = base + ListUtil.get(args, 3, null);
538 appendAndIndexWikiCallback.builder.append(" ");
539 appendAndIndexWikiCallback.dispatch(singular, null, null);
540 appendAndIndexWikiCallback.builder.append(" {").append(gender).append("}, ");
541 appendAndIndexWikiCallback.dispatch(plural, null, null);
542 appendAndIndexWikiCallback.builder.append(" {pl}");
543 parser.wordForms.add(singular);
544 parser.wordForms.add(plural);
545 if (!namedArgs.isEmpty() || args.size() > 4) {
546 LOG.warning("Invalid it-noun: " + wikiTokenizer.token());
553 DEFAULT.put("it-proper noun", new it_proper_noun());
555 static final class it_proper_noun implements FunctionCallback {
557 public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List<String> args,
558 final Map<String, String> namedArgs,
559 final EnWiktionaryXmlParser parser,
560 final AppendAndIndexWikiCallback appendAndIndexWikiCallback) {