]> gitweb.fperrin.net Git - DictionaryPC.git/blobdiff - src/com/hughes/android/dictionary/parser/wiktionary/WholeSectionToHtmlParser.java
Synonyms, antonyms.
[DictionaryPC.git] / src / com / hughes / android / dictionary / parser / wiktionary / WholeSectionToHtmlParser.java
index f7a249507c9c75ad04f8e6a20774686f35d17028..0b4bc0dd2be0acf34cf248185654febe4487ab61 100644 (file)
@@ -1,6 +1,7 @@
 
 package com.hughes.android.dictionary.parser.wiktionary;
 
+import com.hughes.android.dictionary.engine.EntryTypeName;
 import com.hughes.android.dictionary.engine.HtmlEntry;
 import com.hughes.android.dictionary.engine.IndexBuilder;
 import com.hughes.android.dictionary.engine.IndexBuilder.TokenData;
@@ -9,7 +10,6 @@ import com.hughes.android.dictionary.parser.WikiTokenizer;
 import com.hughes.util.StringUtil;
 
 import org.apache.commons.lang3.StringEscapeUtils;
-import org.apache.commons.lang3.StringUtils;
 
 import java.util.ArrayList;
 import java.util.LinkedHashMap;
@@ -23,6 +23,7 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser {
 
     interface LangConfig {
         boolean skipSection(final String name);
+        EntryTypeName sectionNameToEntryType(String sectionName);
         boolean skipWikiLink(final WikiTokenizer wikiTokenizer);
         String adjustWikiLink(String wikiLinkDest);
         void addFunctionCallbacks(
@@ -36,6 +37,24 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser {
             public boolean skipSection(String headingText) {
                 return enSkipSections.matcher(headingText).matches();
             }
+            
+            @Override
+            public EntryTypeName sectionNameToEntryType(String sectionName) {
+                if (sectionName.equalsIgnoreCase("Synonyms")) {
+                    return EntryTypeName.SYNONYM_MULTI;
+                }
+                if (sectionName.equalsIgnoreCase("Antonyms")) {
+                    return EntryTypeName.ANTONYM_MULTI;
+                }
+                if (EnParser.partOfSpeechHeader.matcher(sectionName).matches()) {
+                    // We need to put it in the other index, too.
+                    return null;
+                }
+                if (sectionName.equalsIgnoreCase("Derived Terms")) {
+                    return null;
+                }
+                return null;
+            }
 
             @Override
             public boolean skipWikiLink(WikiTokenizer wikiTokenizer) {
@@ -58,6 +77,37 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser {
                     Map<String, FunctionCallback<WholeSectionToHtmlParser>> functionCallbacks) {
                 EnFunctionCallbacks.addGenericCallbacks(functionCallbacks);
             }});
+        
+        final LangConfig basicLangConfig = new LangConfig() {
+            @Override
+            public boolean skipSection(String headingText) {
+                return false;
+            }
+            @Override
+            public EntryTypeName sectionNameToEntryType(String sectionName) {
+                return EntryTypeName.WIKTIONARY_MENTIONED;
+            }
+            @Override
+            public boolean skipWikiLink(WikiTokenizer wikiTokenizer) {
+                final String wikiText = wikiTokenizer.wikiLinkText();
+                if (wikiText.startsWith("Category:")) {
+                    return true;
+                }
+                return false;
+            }
+            @Override
+            public String adjustWikiLink(String wikiLinkDest) {
+                return wikiLinkDest;
+            }
+
+            @Override
+            public void addFunctionCallbacks(
+                    Map<String, FunctionCallback<WholeSectionToHtmlParser>> functionCallbacks) {
+            }
+        };
+        isoToLangConfig.put("FR", basicLangConfig);
+        isoToLangConfig.put("DE", basicLangConfig);
+        isoToLangConfig.put("IT", basicLangConfig);
     }
 
     final IndexBuilder titleIndexBuilder;
@@ -70,11 +120,14 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser {
         this.langConfig = isoToLangConfig.get(wiktionaryIso);
         this.skipLangIso = skipLangIso;
     }
+    
+    IndexedEntry indexedEntry = null;
 
     @Override
-    void parseSection(String heading, String text) {
-        HtmlEntry htmlEntry = new HtmlEntry(entrySource, StringEscapeUtils.escapeHtml3(title));
-        IndexedEntry indexedEntry = new IndexedEntry(htmlEntry);
+    public void parseSection(String heading, String text) {
+        assert entrySource != null;
+        final HtmlEntry htmlEntry = new HtmlEntry(entrySource, StringEscapeUtils.escapeHtml3(title));
+        indexedEntry = new IndexedEntry(htmlEntry);
 
         final AppendAndIndexWikiCallback<WholeSectionToHtmlParser> callback = new AppendCallback(
                 this);
@@ -93,12 +146,21 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser {
         tokenData.htmlEntries.add(htmlEntry);
         // titleIndexBuilder.addEntryWithString(indexedEntry, title,
         // EntryTypeName.WIKTIONARY_TITLE_MULTI_DETAIL);
+        
+        indexedEntry = null;
     }
 
     @Override
     void removeUselessArgs(Map<String, String> namedArgs) {
     }
     
+    @Override
+    public void addLinkToCurrentEntry(String token, EntryTypeName entryTypeName) {
+        titleIndexBuilder.addEntryWithString(indexedEntry, token, entryTypeName);
+    }
+
+
+
     static final Pattern ALL_ASCII = Pattern.compile("[\\p{ASCII}]*");
 
     class AppendCallback extends AppendAndIndexWikiCallback<WholeSectionToHtmlParser> {
@@ -131,6 +193,10 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser {
             } else {
                 linkDest = wikiTokenizer.wikiLinkText();
             }
+            if (sectionEntryTypeName != null) {
+                // TODO: inside a definition, this could be the wrong language.
+                titleIndexBuilder.addEntryWithString(indexedEntry, wikiTokenizer.wikiLinkText(), sectionEntryTypeName);
+            }
             if (linkDest != null) {
                 builder.append(String.format("<a href=\"%s\">", linkDest));
                 super.onWikiLink(wikiTokenizer);
@@ -157,10 +223,13 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser {
         @Override
         public void onNewline(WikiTokenizer wikiTokenizer) {
         }
+        
+        EntryTypeName sectionEntryTypeName;
 
         @Override
         public void onHeading(WikiTokenizer wikiTokenizer) {
             final String headingText = wikiTokenizer.headingWikiText();
+            sectionEntryTypeName = langConfig.sectionNameToEntryType(headingText);
             final int depth = wikiTokenizer.headingDepth();
             if (langConfig.skipSection(headingText)) {
                 while ((wikiTokenizer = wikiTokenizer.nextToken()) != null) {