1 // Copyright 2012 Google Inc. All Rights Reserved.
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
15 package com.hughes.android.dictionary.parser.wiktionary;
17 import java.util.Arrays;
18 import java.util.LinkedHashSet;
19 import java.util.List;
22 import java.util.HashSet;
23 import java.util.regex.Pattern;
25 import com.hughes.android.dictionary.engine.IndexBuilder;
26 import com.hughes.android.dictionary.engine.IndexedEntry;
27 import com.hughes.android.dictionary.engine.PairEntry;
28 import com.hughes.android.dictionary.engine.PairEntry.Pair;
29 import com.hughes.android.dictionary.parser.WikiTokenizer;
30 import com.hughes.android.dictionary.parser.wiktionary.EnFunctionCallbacks.TranslationCallback;
31 import com.hughes.util.ListUtil;
33 public final class EnTranslationToTranslationParser extends AbstractWiktionaryParser {
35 final List<IndexBuilder> indexBuilders;
36 final Pattern[] langCodePatterns;
38 PairEntry pairEntry = null;
39 IndexedEntry indexedEntry = null;
40 StringBuilder[] builders = null;
41 HashSet<Pair> allPairs = new HashSet<Pair>();
43 public static final String NAME = "EnTranslationToTranslation";
45 final Set<String> Ts = new LinkedHashSet<String>(Arrays.asList("t", "t+",
46 "t-", "tø", "apdx-t", "ttbc"));
48 public EnTranslationToTranslationParser(final List<IndexBuilder> indexBuilders,
49 final Pattern[] langCodePatterns) {
50 this.indexBuilders = indexBuilders;
51 this.langCodePatterns = langCodePatterns;
55 void removeUselessArgs(Map<String, String> namedArgs) {
56 namedArgs.keySet().removeAll(EnParser.USELESS_WIKI_ARGS);
60 void parseSection(String heading, String text) {
61 if (EnParser.isIgnorableTitle(title)) {
64 final WikiTokenizer.Callback callback = new WikiTokenizer.DoNothingCallback() {
66 public void onFunction(WikiTokenizer wikiTokenizer, String name,
67 List<String> functionPositionArgs,
68 Map<String, String> functionNamedArgs) {
69 //System.out.println(wikiTokenizer.token());
70 if (Ts.contains(name)) {
72 } else if (name.equals("trans-top") || name.equals("checktrans-top") || name.equals("checktrans")) {
73 startEntry(title, wikiTokenizer.token());
74 } else if (name.equals("trans-bottom")) {
80 public void onListItem(WikiTokenizer wikiTokenizer) {
81 WikiTokenizer.dispatch(wikiTokenizer.listItemWikiText(), false, this);
84 WikiTokenizer.dispatch(text, true, callback);
86 if (builders != null) {
87 LOG.warning("unended translations: " + title);
92 final TranslationCallback<EnTranslationToTranslationParser> translationCallback = new TranslationCallback<EnTranslationToTranslationParser>();
94 final AppendAndIndexWikiCallback<EnTranslationToTranslationParser> appendAndIndexWikiCallback = new AppendAndIndexWikiCallback<EnTranslationToTranslationParser>(
97 for (final String t : Ts) {
98 appendAndIndexWikiCallback.functionCallbacks.put(t, translationCallback);
102 private void onT(WikiTokenizer wikiTokenizer) {
103 if (builders == null) {
104 LOG.warning("{{t...}} section outside of {{trans-top}}: " + title);
105 startEntry(title, "QUICKDIC_OUTSIDE");
108 final List<String> args = wikiTokenizer.functionPositionArgs();
109 final String langCode = ListUtil.get(args, 0);
110 if (langCode == null) {
111 LOG.warning("Missing langCode: " + wikiTokenizer.token());
114 for (int p = 0; p < 2; ++p) {
115 if (langCodePatterns[p].matcher(langCode).matches()) {
116 appendAndIndexWikiCallback.builder = builders[p];
117 if (appendAndIndexWikiCallback.builder.length() > 0) {
118 appendAndIndexWikiCallback.builder.append(", ");
120 appendAndIndexWikiCallback.indexBuilder = indexBuilders.get(p);
121 appendAndIndexWikiCallback.onFunction(wikiTokenizer,
122 wikiTokenizer.functionName(), wikiTokenizer.functionPositionArgs(),
123 wikiTokenizer.functionNamedArgs());
128 void startEntry(final String title, final String func) {
129 if (pairEntry != null) {
130 LOG.warning("startEntry() twice: " + title + ", " + func);
134 pairEntry = new PairEntry(entrySource);
135 indexedEntry = new IndexedEntry(pairEntry);
136 builders = new StringBuilder[] { new StringBuilder(), new StringBuilder() };
137 appendAndIndexWikiCallback.indexedEntry = indexedEntry;
140 void finishEntry(final String title) {
141 if (pairEntry == null) {
142 LOG.warning("finalizeEntry() twice: " + title);
145 final String lang1 = builders[0].toString();
146 final String lang2 = builders[1].toString();
147 if (lang1.length() > 0 && lang2.length() > 0) {
148 final Pair newPair = new Pair(lang1, lang2);
149 // brute-force approach to prevent adding duplicates
150 if (!allPairs.contains(newPair))
152 allPairs.add(newPair);
153 pairEntry.pairs.add(new Pair(lang1, lang2));
154 indexedEntry.isValid = true;