]> gitweb.fperrin.net Git - DictionaryPC.git/blobdiff - src/com/hughes/android/dictionary/parser/wiktionary/EnParser.java
Minor automated code simplifications.
[DictionaryPC.git] / src / com / hughes / android / dictionary / parser / wiktionary / EnParser.java
index db7adf1c30f54a53e98fc96da35a14da8649e3c8..b60235c23991744392ce0799f485829595448107 100644 (file)
@@ -28,128 +28,127 @@ import com.hughes.android.dictionary.parser.WikiTokenizer;
 
 public abstract class EnParser extends AbstractWiktionaryParser {
 
-  // TODO: process {{ttbc}} lines
-  
-  public static final Pattern partOfSpeechHeader = Pattern.compile(
-      "Noun|Verb|Adjective|Adverb|Pronoun|Conjunction|Interjection|" +
-      "Preposition|Proper noun|Article|Prepositional phrase|Acronym|" +
-      "Abbreviation|Initialism|Contraction|Prefix|Suffix|Symbol|Letter|" +
-      "Ligature|Idiom|Phrase|\\{\\{acronym\\}\\}|\\{\\{initialism\\}\\}|" +
-      "\\{\\{abbreviation\\}\\}|" +
-      // These are @deprecated:
-      "Noun form|Verb form|Adjective form|Nominal phrase|Noun phrase|" +
-      "Verb phrase|Transitive verb|Intransitive verb|Reflexive verb|" +
-      // These are extras I found:
-      "Determiner|Numeral|Number|Cardinal number|Ordinal number|Proverb|" +
-      "Particle|Interjection|Pronominal adverb" +
-      "Han character|Hanzi|Hanja|Kanji|Katakana character|Syllable");
-  
-  // Might only want to remove "lang" if it's equal to "zh", for example.
-  static final Set<String> USELESS_WIKI_ARGS = new LinkedHashSet<String>(
-      Arrays.asList(
-          "lang",
-          "sc",
-          "sort",
-          "cat",
-          "cat2",
-          "xs",
-          "nodot"));
-
-  static boolean isIgnorableTitle(final String title) {
-    return title.startsWith("Wiktionary:") ||
-        title.startsWith("Template:") ||
-        title.startsWith("Appendix:") ||
-        title.startsWith("Category:") ||
-        title.startsWith("Index:") ||
-        title.startsWith("MediaWiki:") ||
-        title.startsWith("TransWiki:") ||
-        title.startsWith("Citations:") ||
-        title.startsWith("Concordance:") ||
-        title.startsWith("Help:");
-  }
-  
-  final IndexBuilder enIndexBuilder;
-  final IndexBuilder foreignIndexBuilder;
-  final Pattern langPattern;
-  final Pattern langCodePattern;
-  final boolean swap;
-  
-  // State used while parsing.
-  enum State {
-    TRANSLATION_LINE,
-    ENGLISH_DEF_OF_FOREIGN,
-    ENGLISH_EXAMPLE,
-    FOREIGN_EXAMPLE,
-  }
-  State state = null;
-
-  public boolean entryIsFormOfSomething = false;
-  final Collection<String> wordForms = new ArrayList<String>();
-  boolean titleAppended = false;
-
-
-  final AppendAndIndexWikiCallback<EnParser> appendAndIndexWikiCallback = new AppendAndIndexCallback(this);
-  {
-    appendAndIndexWikiCallback.functionCallbacks.putAll(EnFunctionCallbacks.DEFAULT);
-    for (final String key : new ArrayList<String>(appendAndIndexWikiCallback.functionCallbacks.keySet())) {
-        // Don't handle the it-conj functions here.
-        if (key.startsWith("it-conj")) {
-            appendAndIndexWikiCallback.functionCallbacks.remove(key);
+    // TODO: process {{ttbc}} lines
+
+    public static final Pattern partOfSpeechHeader = Pattern.compile(
+                "Noun|Verb|Adjective|Adverb|Pronoun|Conjunction|Interjection|" +
+                "Preposition|Proper noun|Article|Prepositional phrase|Acronym|" +
+                "Abbreviation|Initialism|Contraction|Prefix|Suffix|Symbol|Letter|" +
+                "Ligature|Idiom|Phrase|\\{\\{acronym\\}\\}|\\{\\{initialism\\}\\}|" +
+                "\\{\\{abbreviation\\}\\}|" +
+                // These are @deprecated:
+                "Noun form|Verb form|Adjective form|Nominal phrase|Noun phrase|" +
+                "Verb phrase|Transitive verb|Intransitive verb|Reflexive verb|" +
+                // These are extras I found:
+                "Determiner|Numeral|Number|Cardinal number|Ordinal number|Proverb|" +
+                "Particle|Interjection|Pronominal adverb|" +
+                "Han character|Hanzi|Hanja|Kanji|Katakana character|Syllable");
+
+    static final Set<String> USELESS_WIKI_ARGS = new LinkedHashSet<>(
+            Arrays.asList(
+                    "lang",
+                    "sc",
+                    "sort",
+                    "cat",
+                    "cat2",
+                    "xs",
+                    "nodot"));
+
+    static boolean isIgnorableTitle(final String title) {
+        return title.startsWith("Wiktionary:") ||
+               title.startsWith("Template:") ||
+               title.startsWith("Appendix:") ||
+               title.startsWith("Category:") ||
+               title.startsWith("Index:") ||
+               title.startsWith("MediaWiki:") ||
+               title.startsWith("TransWiki:") ||
+               title.startsWith("Citations:") ||
+               title.startsWith("Concordance:") ||
+               title.startsWith("Help:");
+    }
+
+    final IndexBuilder enIndexBuilder;
+    final IndexBuilder foreignIndexBuilder;
+    final Pattern langPattern;
+    final Pattern langCodePattern;
+    final boolean swap;
+
+    // State used while parsing.
+    enum State {
+        TRANSLATION_LINE,
+        ENGLISH_DEF_OF_FOREIGN,
+        ENGLISH_EXAMPLE,
+        FOREIGN_EXAMPLE,
+    }
+    State state = null;
+
+    public boolean entryIsFormOfSomething = false;
+    final Collection<String> wordForms = new ArrayList<>();
+    boolean titleAppended = false;
+
+
+    final AppendAndIndexWikiCallback<EnParser> appendAndIndexWikiCallback = new AppendAndIndexCallback(this);
+    {
+        appendAndIndexWikiCallback.functionCallbacks.putAll(EnFunctionCallbacks.DEFAULT);
+        for (final String key : new ArrayList<>(appendAndIndexWikiCallback.functionCallbacks.keySet())) {
+            // Don't handle the it-conj functions here.
+            if (key.startsWith("it-conj")) {
+                appendAndIndexWikiCallback.functionCallbacks.remove(key);
+            }
         }
     }
-  }
-  
-  EnParser(final IndexBuilder enIndexBuilder, final IndexBuilder otherIndexBuilder, final Pattern langPattern, final Pattern langCodePattern, final boolean swap) {
-    this.enIndexBuilder = enIndexBuilder;
-    this.foreignIndexBuilder = otherIndexBuilder;
-    this.langPattern = langPattern;
-    this.langCodePattern = langCodePattern;
-    this.swap = swap;
-  }
-
-  @Override
-  void removeUselessArgs(Map<String, String> namedArgs) {
-    namedArgs.keySet().removeAll(USELESS_WIKI_ARGS);
-  }
-  
-  static class AppendAndIndexCallback extends AppendAndIndexWikiCallback<EnParser> {
-
-    public AppendAndIndexCallback(EnParser parser) {
-      super(parser);
+
+    EnParser(final IndexBuilder enIndexBuilder, final IndexBuilder otherIndexBuilder, final Pattern langPattern, final Pattern langCodePattern, final boolean swap) {
+        this.enIndexBuilder = enIndexBuilder;
+        this.foreignIndexBuilder = otherIndexBuilder;
+        this.langPattern = langPattern;
+        this.langCodePattern = langCodePattern;
+        this.swap = swap;
     }
 
     @Override
-    public void onWikiLink(WikiTokenizer wikiTokenizer) {
-      final String text = wikiTokenizer.wikiLinkText();
-      final String link = wikiTokenizer.wikiLinkDest();
-      if (link != null) {
-        if (link.contains("#English")) {
-          dispatch(text, parser.enIndexBuilder, EntryTypeName.WIKTIONARY_ENGLISH_DEF_WIKI_LINK);
-        } else if (link.contains("#") && parser.langPattern.matcher(link).find()) {
-          dispatch(text, parser.foreignIndexBuilder, EntryTypeName.WIKTIONARY_ENGLISH_DEF_OTHER_LANG);
-        } else if (link.equals("plural")) {
-          builder.append(text);
-        } else {
-          //LOG.warning("Special link: " + englishTokenizer.token());
-          dispatch(text, EntryTypeName.WIKTIONARY_ENGLISH_DEF_WIKI_LINK);
+    void removeUselessArgs(Map<String, String> namedArgs) {
+        namedArgs.keySet().removeAll(USELESS_WIKI_ARGS);
+    }
+
+    static class AppendAndIndexCallback extends AppendAndIndexWikiCallback<EnParser> {
+
+        public AppendAndIndexCallback(EnParser parser) {
+            super(parser);
         }
-      } else {
-        // link == null
-        final EntryTypeName entryTypeName;
-        switch (parser.state) {
-        case TRANSLATION_LINE:
-          entryTypeName = EntryTypeName.WIKTIONARY_TRANSLATION_WIKI_TEXT;
-          break;
-        case ENGLISH_DEF_OF_FOREIGN:
-          entryTypeName = EntryTypeName.WIKTIONARY_ENGLISH_DEF_WIKI_LINK;
-          break;
-          default:
-            throw new IllegalStateException("Invalid enum value: " + parser.state);
+
+        @Override
+        public void onWikiLink(WikiTokenizer wikiTokenizer) {
+            final String text = wikiTokenizer.wikiLinkText();
+            final String link = wikiTokenizer.wikiLinkDest();
+            if (link != null) {
+                if (link.contains("#English")) {
+                    dispatch(text, parser.enIndexBuilder, EntryTypeName.WIKTIONARY_ENGLISH_DEF_WIKI_LINK);
+                } else if (link.contains("#") && parser.langPattern.matcher(link).find()) {
+                    dispatch(text, parser.foreignIndexBuilder, EntryTypeName.WIKTIONARY_ENGLISH_DEF_OTHER_LANG);
+                } else if (link.equals("plural")) {
+                    builder.append(text);
+                } else {
+                    //LOG.warning("Special link: " + englishTokenizer.token());
+                    dispatch(text, EntryTypeName.WIKTIONARY_ENGLISH_DEF_WIKI_LINK);
+                }
+            } else {
+                // link == null
+                final EntryTypeName entryTypeName;
+                switch (parser.state) {
+                case TRANSLATION_LINE:
+                    entryTypeName = EntryTypeName.WIKTIONARY_TRANSLATION_WIKI_TEXT;
+                    break;
+                case ENGLISH_DEF_OF_FOREIGN:
+                    entryTypeName = EntryTypeName.WIKTIONARY_ENGLISH_DEF_WIKI_LINK;
+                    break;
+                default:
+                    throw new IllegalStateException("Invalid enum value: " + parser.state);
+                }
+                dispatch(text, entryTypeName);
+            }
         }
-        dispatch(text, entryTypeName);
-      }
+
     }
-    
-  }
 
 }