]> gitweb.fperrin.net Git - DictionaryPC.git/blobdiff - src/com/hughes/android/dictionary/parser/wiktionary/EnToTranslationParser.java
Minor automated code simplifications.
[DictionaryPC.git] / src / com / hughes / android / dictionary / parser / wiktionary / EnToTranslationParser.java
index c1280558b10e62795be4c1753fd5d351b04d849b..73b2db697ce86a8a92179b30baaaeefac38e5dea 100644 (file)
@@ -27,205 +27,205 @@ import com.hughes.android.dictionary.parser.WikiTokenizer;
 public final class EnToTranslationParser extends EnParser {
 
     public EnToTranslationParser(final IndexBuilder enIndexBuilder,
-        final IndexBuilder otherIndexBuilder, final Pattern langPattern,
-        final Pattern langCodePattern, final boolean swap) {
-      super(enIndexBuilder, otherIndexBuilder, langPattern, langCodePattern, swap);
+                                 final IndexBuilder otherIndexBuilder, final Pattern langPattern,
+                                 final Pattern langCodePattern, final boolean swap) {
+        super(enIndexBuilder, otherIndexBuilder, langPattern, langCodePattern, swap);
     }
 
     @Override
     void parseSection(String heading, String text) {
-      if (isIgnorableTitle(title)) {
-        return;
-      }
-      heading = heading.replaceAll("=", "").trim(); 
-      if (!heading.contains("English")) {
-        return;
-      }
-
-      String pos = null;
-      int posDepth = -1;
-
-      final WikiTokenizer wikiTokenizer = new WikiTokenizer(text);
-      while (wikiTokenizer.nextToken() != null) {
-        
-        if (wikiTokenizer.isHeading()) {
-          final String headerName = wikiTokenizer.headingWikiText();
-          
-          if (wikiTokenizer.headingDepth() <= posDepth) {
-            pos = null;
-            posDepth = -1;
-          }
-          
-          if (partOfSpeechHeader.matcher(headerName).matches()) {
-            posDepth = wikiTokenizer.headingDepth();
-            pos = wikiTokenizer.headingWikiText();
-            // TODO: if we're inside the POS section, we should handle the first title line...
-            
-          } else if (headerName.equals("Translations")) {
-            if (pos == null) {
-              LOG.info("Translations without POS (but using anyway): " + title);
+        if (isIgnorableTitle(title)) {
+            return;
+        }
+        heading = heading.replace("=", "").trim();
+        if (!heading.contains("English")) {
+            return;
+        }
+
+        String pos = null;
+        int posDepth = -1;
+
+        final WikiTokenizer wikiTokenizer = new WikiTokenizer(text);
+        while (wikiTokenizer.nextToken() != null) {
+
+            if (wikiTokenizer.isHeading()) {
+                final String headerName = wikiTokenizer.headingWikiText();
+
+                if (wikiTokenizer.headingDepth() <= posDepth) {
+                    pos = null;
+                    posDepth = -1;
+                }
+
+                if (partOfSpeechHeader.matcher(headerName).matches()) {
+                    posDepth = wikiTokenizer.headingDepth();
+                    pos = wikiTokenizer.headingWikiText();
+                    // TODO: if we're inside the POS section, we should handle the first title line...
+
+                } else if (headerName.equals("Translations")) {
+                    if (pos == null) {
+                        LOG.info("Translations without POS (but using anyway): " + title);
+                    }
+                    doTranslations(wikiTokenizer, pos);
+                } else if (headerName.equals("Pronunciation")) {
+                    //doPronunciation(wikiLineReader);
+                }
+            } else if (wikiTokenizer.isFunction()) {
+                final String name = wikiTokenizer.functionName();
+                if (name.equals("head") && pos == null) {
+                    LOG.warning("{{head}} without POS: " + title);
+                }
             }
-            doTranslations(wikiTokenizer, pos);
-          } else if (headerName.equals("Pronunciation")) {
-            //doPronunciation(wikiLineReader);
-          }
-        } else if (wikiTokenizer.isFunction()) {
-          final String name = wikiTokenizer.functionName();
-          if (name.equals("head") && pos == null) {
-            LOG.warning("{{head}} without POS: " + title);
-          }
         }
-      }
     }
 
     private void doTranslations(final WikiTokenizer wikiTokenizer, final String pos) {
-      if (title.equals("absolutely")) {
-        //System.out.println();
-      }
-      
-      String topLevelLang = null;
-      String sense = null;
-      boolean done = false;
-      while (wikiTokenizer.nextToken() != null) {
-        if (wikiTokenizer.isHeading()) {
-          wikiTokenizer.returnToLineStart();
-          return;
-        }
-        if (done) {
-          continue;
+        if (title.equals("absolutely")) {
+            //System.out.println();
         }
-        
-        // Check whether we care about this line:
-        
-        if (wikiTokenizer.isFunction()) {
-          final String functionName = wikiTokenizer.functionName();
-          final List<String> positionArgs = wikiTokenizer.functionPositionArgs();
-          
-          if (functionName.equals("trans-top")) {
-            sense = null;
-            if (wikiTokenizer.functionPositionArgs().size() >= 1) {
-              sense = positionArgs.get(0);
-              sense = WikiTokenizer.toPlainText(sense);
-              //LOG.info("Sense: " + sense);
+
+        String topLevelLang = null;
+        String sense = null;
+        boolean done = false;
+        while (wikiTokenizer.nextToken() != null) {
+            if (wikiTokenizer.isHeading()) {
+                wikiTokenizer.returnToLineStart();
+                return;
+            }
+            if (done) {
+                continue;
             }
-          } else if (functionName.equals("trans-bottom")) {
-            sense = null;
-          } else if (functionName.equals("trans-mid")) {
-          } else if (functionName.equals("trans-see")) {
-           incrementCount("WARNING:trans-see");
-          } else if (functionName.startsWith("picdic")) {
-          } else if (functionName.startsWith("checktrans")) {
-            done = true;
-          } else if (functionName.startsWith("ttbc")) {
-            wikiTokenizer.nextLine();
-            // TODO: would be great to handle ttbc
-            // TODO: Check this: done = true;
-          } else {
-            LOG.warning("Unexpected translation wikifunction: " + wikiTokenizer.token() + ", title=" + title);
-          }
-        } else if (wikiTokenizer.isListItem()) {
-          final String line = wikiTokenizer.listItemWikiText();
-          // This line could produce an output...
-          
+
+            // Check whether we care about this line:
+
+            if (wikiTokenizer.isFunction()) {
+                final String functionName = wikiTokenizer.functionName();
+                final List<String> positionArgs = wikiTokenizer.functionPositionArgs();
+
+                if (functionName.equals("trans-top")) {
+                    sense = null;
+                    if (wikiTokenizer.functionPositionArgs().size() >= 1) {
+                        sense = positionArgs.get(0);
+                        sense = WikiTokenizer.toPlainText(sense);
+                        //LOG.info("Sense: " + sense);
+                    }
+                } else if (functionName.equals("trans-bottom")) {
+                    sense = null;
+                } else if (functionName.equals("trans-mid")) {
+                } else if (functionName.equals("trans-see")) {
+                    incrementCount("WARNING:trans-see");
+                } else if (functionName.startsWith("picdic")) {
+                } else if (functionName.startsWith("checktrans")) {
+                    done = true;
+                } else if (functionName.startsWith("ttbc")) {
+                    wikiTokenizer.nextLine();
+                    // TODO: would be great to handle ttbc
+                    // TODO: Check this: done = true;
+                } else {
+                    LOG.warning("Unexpected translation wikifunction: " + wikiTokenizer.token() + ", title=" + title);
+                }
+            } else if (wikiTokenizer.isListItem()) {
+                final String line = wikiTokenizer.listItemWikiText();
+                // This line could produce an output...
+
 //          if (line.contains("ich hoan dich gear")) {
 //            //System.out.println();
 //          }
-          
-          // First strip the language and check whether it matches.
-          // And hold onto it for sub-lines.
-          final int colonIndex = line.indexOf(":");
-          if (colonIndex == -1) {
-            continue;
-          }
-          
-          final String lang = trim(WikiTokenizer.toPlainText(line.substring(0, colonIndex)));
-          incrementCount("tCount:" + lang);
-          final boolean appendLang;
-          if (wikiTokenizer.listItemPrefix().length() == 1) {
-            topLevelLang = lang;
-            final boolean thisFind = langPattern.matcher(lang).find();
-            if (!thisFind) {
-              continue;
-            }
-            appendLang = !langPattern.matcher(lang).matches();
-          } else if (topLevelLang == null) {
-            continue;
-          } else {
-            // Two-level -- the only way we won't append is if this second level matches exactly.
-            if (!langPattern.matcher(lang).matches() && !langPattern.matcher(topLevelLang).find()) {
-              continue;
+
+                // First strip the language and check whether it matches.
+                // And hold onto it for sub-lines.
+                final int colonIndex = line.indexOf(":");
+                if (colonIndex == -1) {
+                    continue;
+                }
+
+                final String lang = trim(WikiTokenizer.toPlainText(line.substring(0, colonIndex)));
+                incrementCount("tCount:" + lang);
+                final boolean appendLang;
+                if (wikiTokenizer.listItemPrefix().length() == 1) {
+                    topLevelLang = lang;
+                    final boolean thisFind = langPattern.matcher(lang).find();
+                    if (!thisFind) {
+                        continue;
+                    }
+                    appendLang = !langPattern.matcher(lang).matches();
+                } else if (topLevelLang == null) {
+                    continue;
+                } else {
+                    // Two-level -- the only way we won't append is if this second level matches exactly.
+                    if (!langPattern.matcher(lang).matches() && !langPattern.matcher(topLevelLang).find()) {
+                        continue;
+                    }
+                    appendLang = !langPattern.matcher(lang).matches();
+                }
+
+                String rest = line.substring(colonIndex + 1).trim();
+                if (rest.length() > 0) {
+                    doTranslationLine(line, appendLang ? lang : null, pos, sense, rest);
+                }
+
+            } else if (wikiTokenizer.remainderStartsWith("''See''")) {
+                wikiTokenizer.nextLine();
+                incrementCount("WARNING: ''See''" );
+                LOG.fine("Skipping See line: " + wikiTokenizer.token());
+            } else if (wikiTokenizer.isWikiLink()) {
+                final String wikiLink = wikiTokenizer.wikiLinkText();
+                if (wikiLink.contains(":") && wikiLink.contains(title)) {
+                } else if (wikiLink.contains("Category:")) {
+                } else  {
+                    incrementCount("WARNING: Unexpected wikiLink" );
+                    LOG.warning("Unexpected wikiLink: " + wikiTokenizer.token() + ", title=" + title);
+                }
+            } else if (wikiTokenizer.isNewline() || wikiTokenizer.isMarkup() || wikiTokenizer.isComment()) {
+            } else {
+                final String token = wikiTokenizer.token();
+                if (token.equals("----")) {
+                } else {
+                    LOG.warning("Unexpected translation token: " + wikiTokenizer.token() + ", title=" + title);
+                    incrementCount("WARNING: Unexpected translation token" );
+                }
             }
-            appendLang = !langPattern.matcher(lang).matches();
-          }
-          
-          String rest = line.substring(colonIndex + 1).trim();
-          if (rest.length() > 0) {
-            doTranslationLine(line, appendLang ? lang : null, pos, sense, rest);
-          }
-          
-        } else if (wikiTokenizer.remainderStartsWith("''See''")) {
-          wikiTokenizer.nextLine();
-          incrementCount("WARNING: ''See''" );
-          LOG.fine("Skipping See line: " + wikiTokenizer.token());
-        } else if (wikiTokenizer.isWikiLink()) {
-          final String wikiLink = wikiTokenizer.wikiLinkText();
-          if (wikiLink.contains(":") && wikiLink.contains(title)) {
-          } else if (wikiLink.contains("Category:")) {
-          } else  {
-            incrementCount("WARNING: Unexpected wikiLink" );
-            LOG.warning("Unexpected wikiLink: " + wikiTokenizer.token() + ", title=" + title);
-          }
-        } else if (wikiTokenizer.isNewline() || wikiTokenizer.isMarkup() || wikiTokenizer.isComment()) {
-        } else {
-          final String token = wikiTokenizer.token();
-          if (token.equals("----")) { 
-          } else {
-            LOG.warning("Unexpected translation token: " + wikiTokenizer.token() + ", title=" + title);
-            incrementCount("WARNING: Unexpected translation token" );
-          }
+
         }
-        
-      }
     }
-    
+
     private void doTranslationLine(final String line, final String lang, final String pos, final String sense, final String rest) {
-      state = State.TRANSLATION_LINE;
-      // Good chance we'll actually file this one...
-      final PairEntry pairEntry = new PairEntry(entrySource);
-      final IndexedEntry indexedEntry = new IndexedEntry(pairEntry);
-      indexedEntry.isValid = true;
-      
-      final StringBuilder foreignText = new StringBuilder();
-      appendAndIndexWikiCallback.reset(foreignText, indexedEntry);
-      appendAndIndexWikiCallback.dispatch(rest, foreignIndexBuilder, EntryTypeName.WIKTIONARY_TRANSLATION_OTHER_TEXT);
-      
-      if (foreignText.length() == 0) {
-        LOG.warning("Empty foreignText: " + line);
-        incrementCount("WARNING: Empty foreignText" );
-        return;
-      }
-      
-      if (lang != null) {
-        foreignText.insert(0, String.format("(%s) ", lang));
-      }
-      
-      StringBuilder englishText = new StringBuilder();
-      
-      englishText.append(title);
-      if (sense != null) {
-        englishText.append(" (").append(sense).append(")");
-        enIndexBuilder.addEntryWithString(indexedEntry, sense, EntryTypeName.WIKTIONARY_TRANSLATION_SENSE);
-      }
-      if (pos != null) {
-        englishText.append(" (").append(pos.toLowerCase()).append(")");
-      }
-      enIndexBuilder.addEntryWithString(indexedEntry, title, EntryTypeName.WIKTIONARY_TITLE_MULTI);
-      
-      final Pair pair = new Pair(trim(englishText.toString()), trim(foreignText.toString()), swap);
-      pairEntry.pairs.add(pair);
-      if (!pairsAdded.add(pair.toString())) {
-        LOG.warning("Duplicate pair: " + pair.toString());
-        incrementCount("WARNING: Duplicate pair" );
-      }
+        state = State.TRANSLATION_LINE;
+        // Good chance we'll actually file this one...
+        final PairEntry pairEntry = new PairEntry(entrySource);
+        final IndexedEntry indexedEntry = new IndexedEntry(pairEntry);
+        indexedEntry.isValid = true;
+
+        final StringBuilder foreignText = new StringBuilder();
+        appendAndIndexWikiCallback.reset(foreignText, indexedEntry);
+        appendAndIndexWikiCallback.dispatch(rest, foreignIndexBuilder, EntryTypeName.WIKTIONARY_TRANSLATION_OTHER_TEXT);
+
+        if (foreignText.length() == 0) {
+            LOG.warning("Empty foreignText: " + line);
+            incrementCount("WARNING: Empty foreignText" );
+            return;
+        }
+
+        if (lang != null) {
+            foreignText.insert(0, String.format("(%s) ", lang));
+        }
+
+        StringBuilder englishText = new StringBuilder();
+
+        englishText.append(title);
+        if (sense != null) {
+            englishText.append(" (").append(sense).append(")");
+            enIndexBuilder.addEntryWithString(indexedEntry, sense, EntryTypeName.WIKTIONARY_TRANSLATION_SENSE);
+        }
+        if (pos != null) {
+            englishText.append(" (").append(pos.toLowerCase()).append(")");
+        }
+        enIndexBuilder.addEntryWithString(indexedEntry, title, EntryTypeName.WIKTIONARY_TITLE_MULTI);
+
+        final Pair pair = new Pair(trim(englishText.toString()), trim(foreignText.toString()), swap);
+        pairEntry.pairs.add(pair);
+        if (!pairsAdded.add(pair.toString())) {
+            LOG.warning("Duplicate pair: " + pair);
+            incrementCount("WARNING: Duplicate pair" );
+        }
     }
-  }  // EnToTranslationParser
\ No newline at end of file
+}  // EnToTranslationParser