X-Git-Url: http://gitweb.fperrin.net/?a=blobdiff_plain;f=src%2Fcom%2Fhughes%2Fandroid%2Fdictionary%2Fparser%2Fwiktionary%2FEnTranslationToTranslationParser.java;h=67f451a781d486f15debf18b78fb9bb2998c9c93;hb=5a1b9f8a37d03dc675e1d895817422e9743a5b5b;hp=8025021aea1460d1f37a41716f349a8d5202560d;hpb=90247c9eb280bd2b55f9b2b2816bad03a0821a7f;p=DictionaryPC.git diff --git a/src/com/hughes/android/dictionary/parser/wiktionary/EnTranslationToTranslationParser.java b/src/com/hughes/android/dictionary/parser/wiktionary/EnTranslationToTranslationParser.java index 8025021..67f451a 100644 --- a/src/com/hughes/android/dictionary/parser/wiktionary/EnTranslationToTranslationParser.java +++ b/src/com/hughes/android/dictionary/parser/wiktionary/EnTranslationToTranslationParser.java @@ -15,6 +15,7 @@ package com.hughes.android.dictionary.parser.wiktionary; import java.util.Arrays; +import java.util.HashSet; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; @@ -24,132 +25,138 @@ import java.util.regex.Pattern; import com.hughes.android.dictionary.engine.IndexBuilder; import com.hughes.android.dictionary.engine.IndexedEntry; import com.hughes.android.dictionary.engine.PairEntry; -import com.hughes.android.dictionary.engine.PairEntry.Pair; import com.hughes.android.dictionary.parser.WikiTokenizer; import com.hughes.android.dictionary.parser.wiktionary.EnFunctionCallbacks.TranslationCallback; import com.hughes.util.ListUtil; public final class EnTranslationToTranslationParser extends AbstractWiktionaryParser { - + final List indexBuilders; final Pattern[] langCodePatterns; PairEntry pairEntry = null; IndexedEntry indexedEntry = null; - StringBuilder[] builders = null; - - public static final String NAME = "EnTranslationToTranslation"; - - final Set Ts = new LinkedHashSet(Arrays.asList("t", "t+", - "t-", "tø", "apdx-t", "ttbc")); - + StringBuilder[] builders = null; + HashSet allPairs = new HashSet<>(); + + public static final String NAME = "EnTranslationToTranslation"; + + final Set Ts = new LinkedHashSet(Arrays.asList("t", "t+", + "t-", "tø", "apdx-t", "ttbc")); + public EnTranslationToTranslationParser(final List indexBuilders, - final Pattern[] langCodePatterns) { - this.indexBuilders = indexBuilders; - this.langCodePatterns = langCodePatterns; + final Pattern[] langCodePatterns) { + this.indexBuilders = indexBuilders; + this.langCodePatterns = langCodePatterns; } - + @Override void removeUselessArgs(Map namedArgs) { - namedArgs.keySet().removeAll(EnParser.USELESS_WIKI_ARGS); + namedArgs.keySet().removeAll(EnParser.USELESS_WIKI_ARGS); } - + @Override void parseSection(String heading, String text) { - if (EnParser.isIgnorableTitle(title)) { - return; - } - final WikiTokenizer.Callback callback = new WikiTokenizer.DoNothingCallback() { - @Override - public void onFunction(WikiTokenizer wikiTokenizer, String name, - List functionPositionArgs, - Map functionNamedArgs) { - //System.out.println(wikiTokenizer.token()); - if (Ts.contains(name)) { - onT(wikiTokenizer); - } else if (name.equals("trans-top") || name.equals("checktrans-top") || name.equals("checktrans")) { - startEntry(title, wikiTokenizer.token()); - } else if (name.equals("trans-bottom")) { - finishEntry(title); - } + if (EnParser.isIgnorableTitle(title)) { + return; } + final WikiTokenizer.Callback callback = new WikiTokenizer.DoNothingCallback() { + @Override + public void onFunction(WikiTokenizer wikiTokenizer, String name, + List functionPositionArgs, + Map functionNamedArgs) { + //System.out.println(wikiTokenizer.token()); + if (Ts.contains(name)) { + onT(wikiTokenizer); + } else if (name.equals("trans-top") || name.equals("checktrans-top") || name.equals("checktrans")) { + startEntry(title, wikiTokenizer.token()); + } else if (name.equals("trans-bottom")) { + finishEntry(title); + } + } - @Override - public void onListItem(WikiTokenizer wikiTokenizer) { - WikiTokenizer.dispatch(wikiTokenizer.listItemWikiText(), false, this); + @Override + public void onListItem(WikiTokenizer wikiTokenizer) { + WikiTokenizer.dispatch(wikiTokenizer.listItemWikiText(), false, this); + } + }; + WikiTokenizer.dispatch(text, true, callback); + + if (builders != null) { + LOG.warning("unended translations: " + title); + finishEntry(title); } - }; - WikiTokenizer.dispatch(text, true, callback); - - if (builders != null) { - LOG.warning("unended translations: " + title); - finishEntry(title); - } - } - - final TranslationCallback translationCallback = new TranslationCallback(); - - final AppendAndIndexWikiCallback appendAndIndexWikiCallback = new AppendAndIndexWikiCallback( - this); - { - for (final String t : Ts) { - appendAndIndexWikiCallback.functionCallbacks.put(t, translationCallback); } - } - - private void onT(WikiTokenizer wikiTokenizer) { - if (builders == null) { - LOG.warning("{{t...}} section outside of {{trans-top}}: " + title); - startEntry(title, "QUICKDIC_OUTSIDE"); - } - - final List args = wikiTokenizer.functionPositionArgs(); - final String langCode = ListUtil.get(args, 0); - if (langCode == null) { - LOG.warning("Missing langCode: " + wikiTokenizer.token()); - return; + + final TranslationCallback translationCallback = new TranslationCallback(); + + final AppendAndIndexWikiCallback appendAndIndexWikiCallback = new AppendAndIndexWikiCallback( + this); + { + for (final String t : Ts) { + appendAndIndexWikiCallback.functionCallbacks.put(t, translationCallback); + } } - for (int p = 0; p < 2; ++p) { - if (langCodePatterns[p].matcher(langCode).matches()) { - appendAndIndexWikiCallback.builder = builders[p]; - if (appendAndIndexWikiCallback.builder.length() > 0) { - appendAndIndexWikiCallback.builder.append(", "); + + private void onT(WikiTokenizer wikiTokenizer) { + if (builders == null) { + LOG.warning("{{t...}} section outside of {{trans-top}}: " + title); + startEntry(title, "QUICKDIC_OUTSIDE"); + } + + final List args = wikiTokenizer.functionPositionArgs(); + final String langCode = ListUtil.get(args, 0); + if (langCode == null) { + LOG.warning("Missing langCode: " + wikiTokenizer.token()); + return; + } + for (int p = 0; p < 2; ++p) { + if (langCodePatterns[p].matcher(langCode).matches()) { + appendAndIndexWikiCallback.builder = builders[p]; + if (appendAndIndexWikiCallback.builder.length() > 0) { + appendAndIndexWikiCallback.builder.append(", "); + } + appendAndIndexWikiCallback.indexBuilder = indexBuilders.get(p); + appendAndIndexWikiCallback.onFunction(wikiTokenizer, + wikiTokenizer.functionName(), wikiTokenizer.functionPositionArgs(), + wikiTokenizer.functionNamedArgs()); + } } - appendAndIndexWikiCallback.indexBuilder = indexBuilders.get(p); - appendAndIndexWikiCallback.onFunction(wikiTokenizer, - wikiTokenizer.functionName(), wikiTokenizer.functionPositionArgs(), - wikiTokenizer.functionNamedArgs()); - } } - } void startEntry(final String title, final String func) { - if (pairEntry != null) { - LOG.warning("startEntry() twice: " + title + ", " + func); - finishEntry(title); - } - - pairEntry = new PairEntry(entrySource); - indexedEntry = new IndexedEntry(pairEntry); - builders = new StringBuilder[] { new StringBuilder(), new StringBuilder() }; - appendAndIndexWikiCallback.indexedEntry = indexedEntry; + if (pairEntry != null) { + LOG.warning("startEntry() twice: " + title + ", " + func); + finishEntry(title); + } + + pairEntry = new PairEntry(entrySource); + indexedEntry = new IndexedEntry(pairEntry); + builders = new StringBuilder[] { new StringBuilder(), new StringBuilder() }; + appendAndIndexWikiCallback.indexedEntry = indexedEntry; } - + void finishEntry(final String title) { - if (pairEntry == null) { - LOG.warning("finalizeEntry() twice: " + title); - return; - } - final String lang1 = builders[0].toString(); - final String lang2 = builders[1].toString(); - if (lang1.length() > 0 && lang2.length() > 0) { - pairEntry.pairs.add(new Pair(lang1, lang2)); - indexedEntry.isValid = true; - } - - pairEntry = null; - indexedEntry = null; - builders = null; + if (pairEntry == null) { + LOG.warning("finalizeEntry() twice: " + title); + return; + } + final String lang1 = builders[0].toString(); + final String lang2 = builders[1].toString(); + if (lang1.length() > 0 && lang2.length() > 0) { + final PairEntry.Pair newPair = new PairEntry.Pair(lang1, lang2); + // brute-force approach to prevent adding duplicates + if (!allPairs.contains(newPair)) + { + allPairs.add(newPair); + pairEntry.pairs.add(new PairEntry.Pair(lang1, lang2)); + indexedEntry.isValid = true; + } + } + + pairEntry = null; + indexedEntry = null; + builders = null; } - } \ No newline at end of file +}