import com.hughes.android.dictionary.engine.IndexBuilder.TokenData;
import com.hughes.android.dictionary.engine.IndexedEntry;
import com.hughes.android.dictionary.parser.WikiTokenizer;
+import com.hughes.util.StringUtil;
import org.apache.commons.lang3.StringEscapeUtils;
+import org.apache.commons.lang3.StringUtils;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.regex.Pattern;
public class WholeSectionToHtmlParser extends AbstractWiktionaryParser {
-
+
+ public static final String NAME = "WholeSectionToHtmlParser";
+
interface LangConfig {
boolean skipSection(final String name);
boolean skipWikiLink(final WikiTokenizer wikiTokenizer);
String adjustWikiLink(String wikiLinkDest);
+ void addFunctionCallbacks(
+ Map<String, FunctionCallback<WholeSectionToHtmlParser>> functionCallbacks);
}
static final Map<String,LangConfig> isoToLangConfig = new LinkedHashMap<String,LangConfig>();
static {
return null;
}
return wikiLinkDest;
+ }
+
+ @Override
+ public void addFunctionCallbacks(
+ Map<String, FunctionCallback<WholeSectionToHtmlParser>> functionCallbacks) {
+ EnFunctionCallbacks.addGenericCallbacks(functionCallbacks);
}});
}
- public static final String NAME = "WholeSectionToHtmlParser";
-
final IndexBuilder titleIndexBuilder;
+ final String skipLangIso;
final LangConfig langConfig;
- public WholeSectionToHtmlParser(final IndexBuilder titleIndexBuilder, final String wiktionaryIso) {
+ public WholeSectionToHtmlParser(final IndexBuilder titleIndexBuilder, final String wiktionaryIso, final String skipLangIso) {
this.titleIndexBuilder = titleIndexBuilder;
assert isoToLangConfig.containsKey(wiktionaryIso): wiktionaryIso;
this.langConfig = isoToLangConfig.get(wiktionaryIso);
+ this.skipLangIso = skipLangIso;
}
@Override
final AppendAndIndexWikiCallback<WholeSectionToHtmlParser> callback = new AppendCallback(
this);
+ langConfig.addFunctionCallbacks(callback.functionCallbacks);
callback.builder = new StringBuilder();
callback.indexedEntry = indexedEntry;
@Override
void removeUselessArgs(Map<String, String> namedArgs) {
}
+
+ static final Pattern ALL_ASCII = Pattern.compile("[\\p{ASCII}]*");
class AppendCallback extends AppendAndIndexWikiCallback<WholeSectionToHtmlParser> {
public AppendCallback(WholeSectionToHtmlParser parser) {
@Override
public void onPlainText(String plainText) {
- super.onPlainText(StringEscapeUtils.escapeHtml3(plainText));
+ final String htmlEscaped = StringEscapeUtils.escapeHtml3(plainText);
+ if (ALL_ASCII.matcher(htmlEscaped).matches()) {
+ super.onPlainText(htmlEscaped);
+ } else {
+ super.onPlainText(StringUtil.escapeToPureHtmlUnicode(plainText));
+ }
}
@Override
@Override
public void onFunction(WikiTokenizer wikiTokenizer, String name,
List<String> args, Map<String, String> namedArgs) {
+ if (skipLangIso.equalsIgnoreCase(namedArgs.get("lang"))) {
+ namedArgs.remove("lang");
+ }
super.onFunction(wikiTokenizer, name, args, namedArgs);
}