]> gitweb.fperrin.net Git - DictionaryPC.git/blobdiff - src/com/hughes/android/dictionary/parser/wiktionary/EnTranslationToTranslationParser.java
Minor automated code simplifications.
[DictionaryPC.git] / src / com / hughes / android / dictionary / parser / wiktionary / EnTranslationToTranslationParser.java
index d088266e9fd45a4952fd7f2ddaddf7fb5845be32..042f0fac3289280cb91db44ac86f5404d09fba6d 100644 (file)
@@ -15,6 +15,7 @@
 package com.hughes.android.dictionary.parser.wiktionary;
 
 import java.util.Arrays;
+import java.util.HashSet;
 import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Map;
@@ -24,104 +25,138 @@ import java.util.regex.Pattern;
 import com.hughes.android.dictionary.engine.IndexBuilder;
 import com.hughes.android.dictionary.engine.IndexedEntry;
 import com.hughes.android.dictionary.engine.PairEntry;
-import com.hughes.android.dictionary.engine.PairEntry.Pair;
 import com.hughes.android.dictionary.parser.WikiTokenizer;
 import com.hughes.android.dictionary.parser.wiktionary.EnFunctionCallbacks.TranslationCallback;
 import com.hughes.util.ListUtil;
 
 public final class EnTranslationToTranslationParser extends AbstractWiktionaryParser {
-  
-    final IndexBuilder[] indexBuilders;
+
+    final List<IndexBuilder> indexBuilders;
     final Pattern[] langCodePatterns;
 
     PairEntry pairEntry = null;
     IndexedEntry indexedEntry = null;
-    StringBuilder[] builders = null; 
-    
-  final Set<String> Ts = new LinkedHashSet<String>(Arrays.asList("t", "t+",
-      "t-", "tø", "apdx-t", "ttbc"));
-    
-    public EnTranslationToTranslationParser(final IndexBuilder[] indexBuilders,
-        final Pattern[] langCodePatterns) {
-      this.indexBuilders = indexBuilders;
-      this.langCodePatterns = langCodePatterns;
+    StringBuilder[] builders = null;
+    final HashSet<PairEntry.Pair> allPairs = new HashSet<>();
+
+    public static final String NAME = "EnTranslationToTranslation";
+
+    final Set<String> Ts = new LinkedHashSet<>(Arrays.asList("t", "t+",
+            "t-", "tø", "apdx-t", "ttbc"));
+
+    public EnTranslationToTranslationParser(final List<IndexBuilder> indexBuilders,
+                                            final Pattern[] langCodePatterns) {
+        this.indexBuilders = indexBuilders;
+        this.langCodePatterns = langCodePatterns;
     }
-    
+
     @Override
     void removeUselessArgs(Map<String, String> namedArgs) {
-      namedArgs.keySet().removeAll(EnParser.USELESS_WIKI_ARGS);
+        namedArgs.keySet().removeAll(EnParser.USELESS_WIKI_ARGS);
     }
-    
+
     @Override
     void parseSection(String heading, String text) {
-      if (EnParser.isIgnorableTitle(title)) {
-        return;
-      }
-      final WikiTokenizer wikiTokenizer = new WikiTokenizer(text);
-      while (wikiTokenizer.nextToken() != null) {
-        if (wikiTokenizer.isFunction()) {
-          final String name = wikiTokenizer.functionName();
-          if (Ts.contains(name)) {
-            onT(wikiTokenizer);
-          } else if (name.equals("trans-top")) {
-            startEntry(title, wikiTokenizer.token());
-          } else if (name.equals("trans-bottom")) {
+        if (EnParser.isIgnorableTitle(title)) {
+            return;
+        }
+        final WikiTokenizer.Callback callback = new WikiTokenizer.DoNothingCallback() {
+            @Override
+            public void onFunction(WikiTokenizer wikiTokenizer, String name,
+                                   List<String> functionPositionArgs,
+                                   Map<String, String> functionNamedArgs) {
+                //System.out.println(wikiTokenizer.token());
+                if (Ts.contains(name)) {
+                    onT(wikiTokenizer);
+                } else if (name.equals("trans-top") || name.equals("checktrans-top") || name.equals("checktrans")) {
+                    startEntry(title, wikiTokenizer.token());
+                } else if (name.equals("trans-bottom")) {
+                    finishEntry(title);
+                }
+            }
+
+            @Override
+            public void onListItem(WikiTokenizer wikiTokenizer) {
+                WikiTokenizer.dispatch(wikiTokenizer.listItemWikiText(), false, this);
+            }
+        };
+        WikiTokenizer.dispatch(text, true, callback);
+
+        if (builders != null) {
+            LOG.warning("unended translations: " + title);
             finishEntry(title);
-          }
         }
-      }
     }
-    
-    final TranslationCallback<EnTranslationToTranslationParser> translationCallback = new TranslationCallback<EnTranslationToTranslationParser>();
-    
-  final AppendAndIndexWikiCallback<EnTranslationToTranslationParser> appendAndIndexWikiCallback = new AppendAndIndexWikiCallback<EnTranslationToTranslationParser>(
-      this);
-  {
-    for (final String t : Ts) {
-      appendAndIndexWikiCallback.functionCallbacks.put(t, translationCallback);
+
+    final TranslationCallback<EnTranslationToTranslationParser> translationCallback = new TranslationCallback<>();
+
+    final AppendAndIndexWikiCallback<EnTranslationToTranslationParser> appendAndIndexWikiCallback = new AppendAndIndexWikiCallback<>(
+            this);
+    {
+        for (final String t : Ts) {
+            appendAndIndexWikiCallback.functionCallbacks.put(t, translationCallback);
+        }
     }
-  }
-    
-  private void onT(WikiTokenizer wikiTokenizer) {
-    final List<String> args = wikiTokenizer.functionPositionArgs();
-    final String langCode = ListUtil.get(args, 0);
-    for (int p = 0; p < 2; ++p) {
-      if (langCodePatterns[p].matcher(langCode).matches()) {
-        appendAndIndexWikiCallback.builder = builders[p];
-        appendAndIndexWikiCallback.indexBuilder = indexBuilders[p];
-        appendAndIndexWikiCallback.onFunction(wikiTokenizer,
-            wikiTokenizer.functionName(), wikiTokenizer.functionPositionArgs(),
-            wikiTokenizer.functionNamedArgs());
-      }
+
+    private void onT(WikiTokenizer wikiTokenizer) {
+        if (builders == null) {
+            LOG.warning("{{t...}} section outside of {{trans-top}}: " + title);
+            startEntry(title, "QUICKDIC_OUTSIDE");
+        }
+
+        final List<String> args = wikiTokenizer.functionPositionArgs();
+        final String langCode = ListUtil.get(args, 0);
+        if (langCode == null) {
+            LOG.warning("Missing langCode: " + wikiTokenizer.token());
+            return;
+        }
+        for (int p = 0; p < 2; ++p) {
+            if (langCodePatterns[p].matcher(langCode).matches()) {
+                appendAndIndexWikiCallback.builder = builders[p];
+                if (appendAndIndexWikiCallback.builder.length() > 0) {
+                    appendAndIndexWikiCallback.builder.append(", ");
+                }
+                appendAndIndexWikiCallback.indexBuilder = indexBuilders.get(p);
+                appendAndIndexWikiCallback.onFunction(wikiTokenizer,
+                                                      wikiTokenizer.functionName(), wikiTokenizer.functionPositionArgs(),
+                                                      wikiTokenizer.functionNamedArgs());
+            }
+        }
     }
-  }
 
     void startEntry(final String title, final String func) {
-      if (pairEntry != null) {
-        LOG.warning("startEntry() twice" + func);
-        finishEntry(title);
-      }
-      
-      pairEntry = new PairEntry(entrySource);
-      indexedEntry = new IndexedEntry(pairEntry);
-      builders = new StringBuilder[] { new StringBuilder(), new StringBuilder() }; 
+        if (pairEntry != null) {
+            LOG.warning("startEntry() twice: " + title + ", " + func);
+            finishEntry(title);
+        }
+
+        pairEntry = new PairEntry(entrySource);
+        indexedEntry = new IndexedEntry(pairEntry);
+        builders = new StringBuilder[] { new StringBuilder(), new StringBuilder() };
+        appendAndIndexWikiCallback.indexedEntry = indexedEntry;
     }
-    
+
     void finishEntry(final String title) {
-      if (pairEntry == null) {
-        LOG.warning("finalizeEntry() twice" + title);
-        return;
-      }
-      final String lang1 = builders[0].toString();
-      final String lang2 = builders[1].toString();
-      if (lang1.length() > 0 && lang2.length() > 0) {
-        pairEntry.pairs.add(new Pair(lang1, lang2));
-        indexedEntry.isValid = true;
-      }
-      
-      pairEntry = null;
-      indexedEntry = null;
-      builders = null;
+        if (pairEntry == null) {
+            LOG.warning("finalizeEntry() twice: " + title);
+            return;
+        }
+        final String lang1 = builders[0].toString();
+        final String lang2 = builders[1].toString();
+        if (lang1.length() > 0 && lang2.length() > 0) {
+            final PairEntry.Pair newPair = new PairEntry.Pair(lang1, lang2);
+            // brute-force approach to prevent adding duplicates
+            if (!allPairs.contains(newPair))
+            {
+                allPairs.add(newPair);
+                pairEntry.pairs.add(new PairEntry.Pair(lang1, lang2));
+                indexedEntry.isValid = true;
+            }
+        }
+
+        pairEntry = null;
+        indexedEntry = null;
+        builders = null;
     }
 
-  }
\ No newline at end of file
+}