package com.hughes.android.dictionary.parser.wiktionary;
+import com.hughes.android.dictionary.engine.EntryTypeName;
import com.hughes.android.dictionary.engine.HtmlEntry;
import com.hughes.android.dictionary.engine.IndexBuilder;
import com.hughes.android.dictionary.engine.IndexBuilder.TokenData;
import com.hughes.android.dictionary.engine.IndexedEntry;
import com.hughes.android.dictionary.parser.WikiTokenizer;
+import com.hughes.util.StringUtil;
import org.apache.commons.lang3.StringEscapeUtils;
this.langConfig = isoToLangConfig.get(wiktionaryIso);
this.skipLangIso = skipLangIso;
}
+
+ IndexedEntry indexedEntry = null;
@Override
- void parseSection(String heading, String text) {
- HtmlEntry htmlEntry = new HtmlEntry(entrySource, StringEscapeUtils.escapeHtml3(title));
- IndexedEntry indexedEntry = new IndexedEntry(htmlEntry);
+ public void parseSection(String heading, String text) {
+ assert entrySource != null;
+ final HtmlEntry htmlEntry = new HtmlEntry(entrySource, StringEscapeUtils.escapeHtml3(title));
+ indexedEntry = new IndexedEntry(htmlEntry);
final AppendAndIndexWikiCallback<WholeSectionToHtmlParser> callback = new AppendCallback(
this);
tokenData.htmlEntries.add(htmlEntry);
// titleIndexBuilder.addEntryWithString(indexedEntry, title,
// EntryTypeName.WIKTIONARY_TITLE_MULTI_DETAIL);
+
+ indexedEntry = null;
}
@Override
void removeUselessArgs(Map<String, String> namedArgs) {
}
+
+ @Override
+ public void addLinkToCurrentEntry(String token, EntryTypeName entryTypeName) {
+ titleIndexBuilder.addEntryWithString(indexedEntry, token, entryTypeName);
+ }
+
+
+
+ static final Pattern ALL_ASCII = Pattern.compile("[\\p{ASCII}]*");
class AppendCallback extends AppendAndIndexWikiCallback<WholeSectionToHtmlParser> {
public AppendCallback(WholeSectionToHtmlParser parser) {
@Override
public void onPlainText(String plainText) {
- super.onPlainText(StringEscapeUtils.escapeHtml3(plainText));
+ final String htmlEscaped = StringEscapeUtils.escapeHtml3(plainText);
+ if (ALL_ASCII.matcher(htmlEscaped).matches()) {
+ super.onPlainText(htmlEscaped);
+ } else {
+ super.onPlainText(StringUtil.escapeToPureHtmlUnicode(plainText));
+ }
}
@Override