]> gitweb.fperrin.net Git - DictionaryPC.git/blobdiff - src/com/hughes/android/dictionary/parser/wiktionary/WholeSectionToHtmlParser.java
Fix compilation.
[DictionaryPC.git] / src / com / hughes / android / dictionary / parser / wiktionary / WholeSectionToHtmlParser.java
index 0066d3bc7aeed2dabbb85dccad7d89df848be65d..2b719db747b3b5f64edf4c8f938d333c9da04073 100644 (file)
@@ -1,6 +1,15 @@
 
 package com.hughes.android.dictionary.parser.wiktionary;
 
+import java.net.URI;
+import java.util.ArrayList;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Pattern;
+
+import org.apache.commons.text.StringEscapeUtils;
+
 import com.hughes.android.dictionary.engine.EntryTypeName;
 import com.hughes.android.dictionary.engine.HtmlEntry;
 import com.hughes.android.dictionary.engine.IndexBuilder;
@@ -9,15 +18,6 @@ import com.hughes.android.dictionary.engine.IndexedEntry;
 import com.hughes.android.dictionary.parser.WikiTokenizer;
 import com.hughes.util.StringUtil;
 
-import org.apache.commons.lang3.StringEscapeUtils;
-
-import java.net.URI;
-import java.util.ArrayList;
-import java.util.LinkedHashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.regex.Pattern;
-
 public class WholeSectionToHtmlParser extends AbstractWiktionaryParser {
 
     public static final String NAME = "WholeSectionToHtmlParser";
@@ -30,7 +30,7 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser {
         void addFunctionCallbacks(
             Map<String, FunctionCallback<WholeSectionToHtmlParser>> functionCallbacks);
     }
-    static final Map<String,LangConfig> isoToLangConfig = new LinkedHashMap<String,LangConfig>();
+    static final Map<String,LangConfig> isoToLangConfig = new LinkedHashMap<>();
     static {
         final Pattern enSkipSections = Pattern.compile(".*(Translations|Anagrams|References).*");
         isoToLangConfig.put("EN", new LangConfig() {
@@ -47,23 +47,18 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser {
                 if (sectionName.equalsIgnoreCase("Antonyms")) {
                     return EntryTypeName.ANTONYM_MULTI;
                 }
-                if (EnParser.partOfSpeechHeader.matcher(sectionName).matches()) {
-                    // We need to put it in the other index, too (probably)
-                    return null;
-                }
-                if (sectionName.equalsIgnoreCase("Derived Terms")) {
-                    return null;
-                }
+                // We need to put it in the other index, too (probably) ?
+                // EnParser.partOfSpeechHeader.matcher(sectionName).matches()
+
+                // Needs special handling?
+                // sectionName.equalsIgnoreCase("Derived Terms")
                 return null;
             }
 
             @Override
             public boolean skipWikiLink(WikiTokenizer wikiTokenizer) {
                 final String wikiText = wikiTokenizer.wikiLinkText();
-                if (wikiText.startsWith("Category:")) {
-                    return true;
-                }
-                return false;
+                return wikiText.startsWith("Category:");
             }
             @Override
             public String adjustWikiLink(String wikiLinkDest, String wikiLinkText) {
@@ -108,10 +103,7 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser {
             @Override
             public boolean skipWikiLink(WikiTokenizer wikiTokenizer) {
                 final String wikiText = wikiTokenizer.wikiLinkText();
-                if (wikiText.startsWith("Categoría:")) {
-                    return true;
-                }
-                return false;
+                return wikiText.startsWith("Categoría:");
             }
             @Override
             public String adjustWikiLink(String wikiLinkDest, String wikiLinkText) {
@@ -135,6 +127,51 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser {
             }
         });
 
+        final Pattern ptSkipSections = Pattern.compile(".*Tradução.*");
+        isoToLangConfig.put("PT", new LangConfig() {
+            @Override
+            public boolean skipSection(String headingText) {
+                return esSkipSections.matcher(headingText).matches();
+            }
+
+            @Override
+            public EntryTypeName sectionNameToEntryType(String sectionName) {
+                if (sectionName.equalsIgnoreCase("Sinônimo") || sectionName.equalsIgnoreCase("Sinônimos")) {
+                    return EntryTypeName.SYNONYM_MULTI;
+                }
+                if (sectionName.equalsIgnoreCase("Antônimo") || sectionName.equalsIgnoreCase("Antônimos")) {
+                    return EntryTypeName.ANTONYM_MULTI;
+                }
+                return null;
+            }
+
+            @Override
+            public boolean skipWikiLink(WikiTokenizer wikiTokenizer) {
+                final String wikiText = wikiTokenizer.wikiLinkText();
+                return wikiText.startsWith("Categoria:");
+            }
+            @Override
+            public String adjustWikiLink(String wikiLinkDest, String wikiLinkText) {
+                if (wikiLinkDest.startsWith("w:") || wikiLinkDest.startsWith("Image:")) {
+                    return null;
+                }
+                final int hashPos = wikiLinkDest.indexOf("#");
+                if (hashPos != -1) {
+                    wikiLinkDest = wikiLinkDest.substring(0, hashPos);
+                    if (wikiLinkDest.isEmpty()) {
+                        wikiLinkDest = wikiLinkText;
+                    }
+                }
+                return wikiLinkDest;
+            }
+
+            @Override
+            public void addFunctionCallbacks(
+                Map<String, FunctionCallback<WholeSectionToHtmlParser>> functionCallbacks) {
+                // TODO: need Portuguese variant
+            }
+        });
+
         final Pattern deSkipSections = Pattern.compile(".*(Übersetzungen|Referenzen|Quellen).*");
         isoToLangConfig.put("DE", new LangConfig() {
             @Override
@@ -156,10 +193,7 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser {
             @Override
             public boolean skipWikiLink(WikiTokenizer wikiTokenizer) {
                 final String wikiText = wikiTokenizer.wikiLinkText();
-                if (wikiText.startsWith("Kategorie:")) {
-                    return true;
-                }
-                return false;
+                return wikiText.startsWith("Kategorie:");
             }
             @Override
             public String adjustWikiLink(String wikiLinkDest, String wikiLinkText) {
@@ -204,10 +238,7 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser {
             @Override
             public boolean skipWikiLink(WikiTokenizer wikiTokenizer) {
                 final String wikiText = wikiTokenizer.wikiLinkText();
-                if (wikiText.startsWith("Categoria:")) {
-                    return true;
-                }
-                return false;
+                return wikiText.startsWith("Categoria:");
             }
             @Override
             public String adjustWikiLink(String wikiLinkDest, String wikiLinkText) {
@@ -253,10 +284,7 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser {
             @Override
             public boolean skipWikiLink(WikiTokenizer wikiTokenizer) {
                 final String wikiText = wikiTokenizer.wikiLinkText();
-                if (wikiText.startsWith("Catégorie:")) {
-                    return true;
-                }
-                return false;
+                return wikiText.startsWith("Catégorie:");
             }
             @Override
             public String adjustWikiLink(String wikiLinkDest, String wikiLinkText) {
@@ -316,11 +344,19 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser {
 
         if (webUrlTemplate != null) {
             final String webUrl = String.format(webUrlTemplate, title);
+            String asciiWebUrl = null;
             // URI.create can raise an exception e.g. if webUrl contains %, just ignore those cases.
             try {
-                callback.builder.append(String.format("<p> <a href=\"%s\">%s</a>", URI.create(webUrl).toASCIIString(), escapeHtmlLiteral(webUrl)));
+                asciiWebUrl = URI.create(webUrl).toASCIIString();
             } catch (Exception e) {
             }
+            if (asciiWebUrl != null) {
+                callback.builder.append("<p> <a href=\"");
+                callback.builder.append(asciiWebUrl);
+                callback.builder.append("\">");
+                callback.builder.append(escapeHtmlLiteral(webUrl));
+                callback.builder.append("</a>");
+            }
         }
         htmlEntry.html = callback.builder.toString();
         indexedEntry.isValid = true;
@@ -389,9 +425,11 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser {
                 titleIndexBuilder.addEntryWithString(indexedEntry, wikiTokenizer.wikiLinkText(), sectionEntryTypeName);
             }
             if (!StringUtil.isNullOrEmpty(linkDest)) {
-                builder.append(String.format("<a href=\"%s\">", HtmlEntry.formatQuickdicUrl("", linkDest)));
+                builder.append("<a href=\"");
+                builder.append(HtmlEntry.formatQuickdicUrl("", linkDest));
+                builder.append("\">");
                 super.onWikiLink(wikiTokenizer);
-                builder.append(String.format("</a>"));
+                builder.append("</a>");
             } else {
                 super.onWikiLink(wikiTokenizer);
             }
@@ -436,12 +474,16 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser {
                 }
                 return;
             }
-            builder.append(String.format("\n<h%d>", depth));
+            builder.append("\n<h");
+            builder.append(depth);
+            builder.append('>');
             dispatch(headingText, null);
-            builder.append(String.format("</h%d>\n", depth));
+            builder.append("</h");
+            builder.append(depth);
+            builder.append(">\n");
         }
 
-        final List<Character> listPrefixStack = new ArrayList<Character>();
+        final List<Character> listPrefixStack = new ArrayList<>();
 
         @Override
         public void onListItem(WikiTokenizer wikiTokenizer) {
@@ -450,8 +492,9 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser {
             }
             final String prefix = wikiTokenizer.listItemPrefix();
             while (listPrefixStack.size() < prefix.length()) {
-                builder.append(String.format("<%s>",
-                                             WikiTokenizer.getListTag(prefix.charAt(listPrefixStack.size()))));
+                builder.append('<');
+                builder.append(WikiTokenizer.getListTag(prefix.charAt(listPrefixStack.size())));
+                builder.append('>');
                 listPrefixStack.add(prefix.charAt(listPrefixStack.size()));
             }
             builder.append("<li>");
@@ -475,7 +518,9 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser {
             }
             while (listPrefixStack.size() > nextListHeader.length()) {
                 final char prefixChar = listPrefixStack.remove(listPrefixStack.size() - 1);
-                builder.append(String.format("</%s>\n", WikiTokenizer.getListTag(prefixChar)));
+                builder.append("</");
+                builder.append(WikiTokenizer.getListTag(prefixChar));
+                builder.append(">\n");
             }
         }