]> gitweb.fperrin.net Git - DictionaryPC.git/blobdiff - src/com/hughes/android/dictionary/parser/wiktionary/WholeSectionToHtmlParser.java
Fix compile warnings.
[DictionaryPC.git] / src / com / hughes / android / dictionary / parser / wiktionary / WholeSectionToHtmlParser.java
index e861b9ddabd3da127833f83b0db3ada33c77f97e..63e507ee9a97d9433efbc863f094480c5175dc96 100644 (file)
@@ -9,7 +9,7 @@ import com.hughes.android.dictionary.engine.IndexedEntry;
 import com.hughes.android.dictionary.parser.WikiTokenizer;
 import com.hughes.util.StringUtil;
 
-import org.apache.commons.lang3.StringEscapeUtils;
+import org.apache.commons.text.StringEscapeUtils;
 
 import java.net.URI;
 import java.util.ArrayList;
@@ -28,7 +28,7 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser {
         boolean skipWikiLink(final WikiTokenizer wikiTokenizer);
         String adjustWikiLink(String wikiLinkDest, final String wikiLinkText);
         void addFunctionCallbacks(
-                Map<String, FunctionCallback<WholeSectionToHtmlParser>> functionCallbacks);
+            Map<String, FunctionCallback<WholeSectionToHtmlParser>> functionCallbacks);
     }
     static final Map<String,LangConfig> isoToLangConfig = new LinkedHashMap<String,LangConfig>();
     static {
@@ -38,7 +38,7 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser {
             public boolean skipSection(String headingText) {
                 return enSkipSections.matcher(headingText).matches();
             }
-            
+
             @Override
             public EntryTypeName sectionNameToEntryType(String sectionName) {
                 if (sectionName.equalsIgnoreCase("Synonyms")) {
@@ -56,7 +56,7 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser {
                 }
                 return null;
             }
-            
+
             @Override
             public boolean skipWikiLink(WikiTokenizer wikiTokenizer) {
                 final String wikiText = wikiTokenizer.wikiLinkText();
@@ -82,11 +82,11 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser {
 
             @Override
             public void addFunctionCallbacks(
-                    Map<String, FunctionCallback<WholeSectionToHtmlParser>> functionCallbacks) {
+                Map<String, FunctionCallback<WholeSectionToHtmlParser>> functionCallbacks) {
                 EnFunctionCallbacks.addGenericCallbacks(functionCallbacks);
             }
         });
-        
+
         final Pattern esSkipSections = Pattern.compile(".*(Traducciones|Locuciones).*");
         isoToLangConfig.put("ES", new LangConfig() {
             @Override
@@ -130,18 +130,66 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser {
 
             @Override
             public void addFunctionCallbacks(
-                    Map<String, FunctionCallback<WholeSectionToHtmlParser>> functionCallbacks) {
+                Map<String, FunctionCallback<WholeSectionToHtmlParser>> functionCallbacks) {
                 // TODO: need Spanish variant
             }
         });
 
+        final Pattern ptSkipSections = Pattern.compile(".*Tradução.*");
+        isoToLangConfig.put("PT", new LangConfig() {
+            @Override
+            public boolean skipSection(String headingText) {
+                return esSkipSections.matcher(headingText).matches();
+            }
+
+            @Override
+            public EntryTypeName sectionNameToEntryType(String sectionName) {
+                if (sectionName.equalsIgnoreCase("Sinônimo") || sectionName.equalsIgnoreCase("Sinônimos")) {
+                    return EntryTypeName.SYNONYM_MULTI;
+                }
+                if (sectionName.equalsIgnoreCase("Antônimo") || sectionName.equalsIgnoreCase("Antônimos")) {
+                    return EntryTypeName.ANTONYM_MULTI;
+                }
+                return null;
+            }
+
+            @Override
+            public boolean skipWikiLink(WikiTokenizer wikiTokenizer) {
+                final String wikiText = wikiTokenizer.wikiLinkText();
+                if (wikiText.startsWith("Categoria:")) {
+                    return true;
+                }
+                return false;
+            }
+            @Override
+            public String adjustWikiLink(String wikiLinkDest, String wikiLinkText) {
+                if (wikiLinkDest.startsWith("w:") || wikiLinkDest.startsWith("Image:")) {
+                    return null;
+                }
+                final int hashPos = wikiLinkDest.indexOf("#");
+                if (hashPos != -1) {
+                    wikiLinkDest = wikiLinkDest.substring(0, hashPos);
+                    if (wikiLinkDest.isEmpty()) {
+                        wikiLinkDest = wikiLinkText;
+                    }
+                }
+                return wikiLinkDest;
+            }
+
+            @Override
+            public void addFunctionCallbacks(
+                Map<String, FunctionCallback<WholeSectionToHtmlParser>> functionCallbacks) {
+                // TODO: need Portuguese variant
+            }
+        });
+
         final Pattern deSkipSections = Pattern.compile(".*(Übersetzungen|Referenzen|Quellen).*");
         isoToLangConfig.put("DE", new LangConfig() {
             @Override
             public boolean skipSection(String headingText) {
                 return deSkipSections.matcher(headingText).matches();
             }
-            
+
             @Override
             public EntryTypeName sectionNameToEntryType(String sectionName) {
                 if (sectionName.equalsIgnoreCase("Synonyme")) {
@@ -152,7 +200,7 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser {
                 }
                 return null;
             }
-            
+
             @Override
             public boolean skipWikiLink(WikiTokenizer wikiTokenizer) {
                 final String wikiText = wikiTokenizer.wikiLinkText();
@@ -178,18 +226,18 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser {
 
             @Override
             public void addFunctionCallbacks(
-                    Map<String, FunctionCallback<WholeSectionToHtmlParser>> functionCallbacks) {
+                Map<String, FunctionCallback<WholeSectionToHtmlParser>> functionCallbacks) {
                 DeFunctionCallbacks.addGenericCallbacks(functionCallbacks);
             }
         });
-        
+
         final Pattern itSkipSections = Pattern.compile(".*(Traduzione|Note / Riferimenti).*");
         isoToLangConfig.put("IT", new LangConfig() {
             @Override
             public boolean skipSection(String headingText) {
                 return itSkipSections.matcher(headingText).matches();
             }
-            
+
             @Override
             public EntryTypeName sectionNameToEntryType(String sectionName) {
                 if (sectionName.equalsIgnoreCase("Sinonimi")) {
@@ -200,7 +248,7 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser {
                 }
                 return null;
             }
-            
+
             @Override
             public boolean skipWikiLink(WikiTokenizer wikiTokenizer) {
                 final String wikiText = wikiTokenizer.wikiLinkText();
@@ -226,7 +274,7 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser {
 
             @Override
             public void addFunctionCallbacks(
-                    Map<String, FunctionCallback<WholeSectionToHtmlParser>> functionCallbacks) {
+                Map<String, FunctionCallback<WholeSectionToHtmlParser>> functionCallbacks) {
                 ItFunctionCallbacks.addGenericCallbacks(functionCallbacks);
             }
         });
@@ -238,7 +286,7 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser {
             public boolean skipSection(String headingText) {
                 return frSkipSections.matcher(headingText).matches();
             }
-            
+
             @Override
             public EntryTypeName sectionNameToEntryType(String sectionName) {
                 if (sectionName.equalsIgnoreCase("Synonymes")) {
@@ -249,7 +297,7 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser {
                 }
                 return null;
             }
-            
+
             @Override
             public boolean skipWikiLink(WikiTokenizer wikiTokenizer) {
                 final String wikiText = wikiTokenizer.wikiLinkText();
@@ -275,7 +323,7 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser {
 
             @Override
             public void addFunctionCallbacks(
-                    Map<String, FunctionCallback<WholeSectionToHtmlParser>> functionCallbacks) {
+                Map<String, FunctionCallback<WholeSectionToHtmlParser>> functionCallbacks) {
                 FrFunctionCallbacks.addGenericCallbacks(functionCallbacks);
             }
         });
@@ -286,10 +334,10 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser {
     final String skipLangIso;
     final LangConfig langConfig;
     final String webUrlTemplate;
-    
+
 
     public WholeSectionToHtmlParser(final IndexBuilder titleIndexBuilder, final IndexBuilder defIndexBuilder, final String wiktionaryIso, final String skipLangIso,
-            final String webUrlTemplate) {
+                                    final String webUrlTemplate) {
         this.titleIndexBuilder = titleIndexBuilder;
         this.defIndexBuilder = defIndexBuilder;
         assert isoToLangConfig.containsKey(wiktionaryIso): wiktionaryIso;
@@ -297,7 +345,7 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser {
         this.skipLangIso = skipLangIso;
         this.webUrlTemplate = webUrlTemplate;
     }
-    
+
     IndexedEntry indexedEntry = null;
 
     @Override
@@ -307,7 +355,7 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser {
         indexedEntry = new IndexedEntry(htmlEntry);
 
         final AppendAndIndexWikiCallback<WholeSectionToHtmlParser> callback = new AppendCallback(
-                this);
+            this);
         langConfig.addFunctionCallbacks(callback.functionCallbacks);
 
         callback.builder = new StringBuilder();
@@ -316,11 +364,11 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser {
 
         if (webUrlTemplate != null) {
             final String webUrl = String.format(webUrlTemplate, title);
-           // URI.create can raise an exception e.g. if webUrl contains %, just ignore those cases.
-           try {
-            callback.builder.append(String.format("<p> <a href=\"%s\">%s</a>", URI.create(webUrl).toASCIIString(), escapeHtmlLiteral(webUrl)));
-           } catch (Exception e)
-           {}
+            // URI.create can raise an exception e.g. if webUrl contains %, just ignore those cases.
+            try {
+                callback.builder.append(String.format("<p> <a href=\"%s\">%s</a>", URI.create(webUrl).toASCIIString(), escapeHtmlLiteral(webUrl)));
+            } catch (Exception e) {
+            }
         }
         htmlEntry.html = callback.builder.toString();
         indexedEntry.isValid = true;
@@ -332,26 +380,26 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser {
         tokenData.htmlEntries.add(htmlEntry);
         // titleIndexBuilder.addEntryWithString(indexedEntry, title,
         // EntryTypeName.WIKTIONARY_TITLE_MULTI_DETAIL);
-        
+
         indexedEntry = null;
     }
 
     @Override
     void removeUselessArgs(Map<String, String> namedArgs) {
     }
-    
+
     @Override
     public void addLinkToCurrentEntry(String token, final String lang, EntryTypeName entryTypeName) {
         if (lang == null || lang.equals(skipLangIso)) {
             titleIndexBuilder.addEntryWithString(indexedEntry, token, entryTypeName);
         }
     }
-    
+
     public static String escapeHtmlLiteral(final String plainText) {
         final String htmlEscaped = StringEscapeUtils.escapeHtml3(plainText);
         if (StringUtil.isAscii(htmlEscaped)) {
             return htmlEscaped;
-        } else { 
+        } else {
             return StringUtil.escapeUnicodeToPureHtml(plainText);
         }
 
@@ -399,7 +447,7 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser {
 
         @Override
         public void onFunction(WikiTokenizer wikiTokenizer, String name,
-                List<String> args, Map<String, String> namedArgs) {
+                               List<String> args, Map<String, String> namedArgs) {
             if (skipLangIso.equalsIgnoreCase(namedArgs.get("lang"))) {
                 namedArgs.remove("lang");
             }
@@ -414,7 +462,7 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser {
         @Override
         public void onNewline(WikiTokenizer wikiTokenizer) {
         }
-        
+
         EntryTypeName sectionEntryTypeName;
         IndexBuilder currentIndexBuilder;
 
@@ -451,7 +499,7 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser {
             final String prefix = wikiTokenizer.listItemPrefix();
             while (listPrefixStack.size() < prefix.length()) {
                 builder.append(String.format("<%s>",
-                        WikiTokenizer.getListTag(prefix.charAt(listPrefixStack.size()))));
+                                             WikiTokenizer.getListTag(prefix.charAt(listPrefixStack.size()))));
                 listPrefixStack.add(prefix.charAt(listPrefixStack.size()));
             }
             builder.append("<li>");