]> gitweb.fperrin.net Git - DictionaryPC.git/blobdiff - src/com/hughes/android/dictionary/parser/wiktionary/WholeSectionToHtmlParser.java
Put links into HtmlEntry.
[DictionaryPC.git] / src / com / hughes / android / dictionary / parser / wiktionary / WholeSectionToHtmlParser.java
index 2cb6ec000bff7635a3ea0215e797c75984be2d9b..0a702d0bcb30125cd9872a244118cdfb0b3f0abc 100644 (file)
@@ -1,11 +1,13 @@
 
 package com.hughes.android.dictionary.parser.wiktionary;
 
+import com.hughes.android.dictionary.engine.EntryTypeName;
 import com.hughes.android.dictionary.engine.HtmlEntry;
 import com.hughes.android.dictionary.engine.IndexBuilder;
 import com.hughes.android.dictionary.engine.IndexBuilder.TokenData;
 import com.hughes.android.dictionary.engine.IndexedEntry;
 import com.hughes.android.dictionary.parser.WikiTokenizer;
+import com.hughes.util.StringUtil;
 
 import org.apache.commons.lang3.StringEscapeUtils;
 
@@ -68,11 +70,14 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser {
         this.langConfig = isoToLangConfig.get(wiktionaryIso);
         this.skipLangIso = skipLangIso;
     }
+    
+    IndexedEntry indexedEntry = null;
 
     @Override
-    void parseSection(String heading, String text) {
-        HtmlEntry htmlEntry = new HtmlEntry(entrySource, StringEscapeUtils.escapeHtml3(title));
-        IndexedEntry indexedEntry = new IndexedEntry(htmlEntry);
+    public void parseSection(String heading, String text) {
+        assert entrySource != null;
+        final HtmlEntry htmlEntry = new HtmlEntry(entrySource, StringEscapeUtils.escapeHtml3(title));
+        indexedEntry = new IndexedEntry(htmlEntry);
 
         final AppendAndIndexWikiCallback<WholeSectionToHtmlParser> callback = new AppendCallback(
                 this);
@@ -91,11 +96,22 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser {
         tokenData.htmlEntries.add(htmlEntry);
         // titleIndexBuilder.addEntryWithString(indexedEntry, title,
         // EntryTypeName.WIKTIONARY_TITLE_MULTI_DETAIL);
+        
+        indexedEntry = null;
     }
 
     @Override
     void removeUselessArgs(Map<String, String> namedArgs) {
     }
+    
+    @Override
+    public void addLinkToCurrentEntry(String token, EntryTypeName entryTypeName) {
+        titleIndexBuilder.addEntryWithString(indexedEntry, token, entryTypeName);
+    }
+
+
+
+    static final Pattern ALL_ASCII = Pattern.compile("[\\p{ASCII}]*");
 
     class AppendCallback extends AppendAndIndexWikiCallback<WholeSectionToHtmlParser> {
         public AppendCallback(WholeSectionToHtmlParser parser) {
@@ -104,7 +120,12 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser {
 
         @Override
         public void onPlainText(String plainText) {
-            super.onPlainText(StringEscapeUtils.escapeHtml3(plainText));
+            final String htmlEscaped = StringEscapeUtils.escapeHtml3(plainText);
+            if (ALL_ASCII.matcher(htmlEscaped).matches()) {
+                super.onPlainText(htmlEscaped);
+            } else { 
+                super.onPlainText(StringUtil.escapeToPureHtmlUnicode(plainText));
+            }
         }
 
         @Override