import java.util.regex.Pattern;
public class WholeSectionToHtmlParser extends AbstractWiktionaryParser {
-
+
+ public static final String NAME = "WholeSectionToHtmlParser";
+
interface LangConfig {
boolean skipSection(final String name);
boolean skipWikiLink(final WikiTokenizer wikiTokenizer);
+ String adjustWikiLink(String wikiLinkDest);
}
static final Map<String,LangConfig> isoToLangConfig = new LinkedHashMap<String,LangConfig>();
static {
- final Pattern enSkipSections = Pattern.compile(".*Translations.*");
+ final Pattern enSkipSections = Pattern.compile(".*Translations|Anagrams|References.*");
isoToLangConfig.put("EN", new LangConfig() {
@Override
public boolean skipSection(String headingText) {
return true;
}
return false;
+ }
+ @Override
+ public String adjustWikiLink(String wikiLinkDest) {
+ if (wikiLinkDest.startsWith("w:") || wikiLinkDest.startsWith("Image:")) {
+ return null;
+ }
+ return wikiLinkDest;
}});
}
- public static final String NAME = "WholeSectionToHtmlParser";
-
final IndexBuilder titleIndexBuilder;
+ final String skipLangIso;
final LangConfig langConfig;
- public WholeSectionToHtmlParser(final IndexBuilder titleIndexBuilder, final String wiktionaryIso) {
+ public WholeSectionToHtmlParser(final IndexBuilder titleIndexBuilder, final String wiktionaryIso, final String skipLangIso) {
this.titleIndexBuilder = titleIndexBuilder;
assert isoToLangConfig.containsKey(wiktionaryIso): wiktionaryIso;
this.langConfig = isoToLangConfig.get(wiktionaryIso);
+ this.skipLangIso = skipLangIso;
}
@Override
if (langConfig.skipWikiLink(wikiTokenizer)) {
return;
}
- super.onWikiLink(wikiTokenizer);
+ String linkDest;
+ if (wikiTokenizer.wikiLinkDest() != null) {
+ linkDest = langConfig.adjustWikiLink(wikiTokenizer.wikiLinkDest());
+ } else {
+ linkDest = wikiTokenizer.wikiLinkText();
+ }
+ if (linkDest != null) {
+ builder.append(String.format("<a href=\"%s\">", linkDest));
+ super.onWikiLink(wikiTokenizer);
+ builder.append(String.format("</a>"));
+ } else {
+ super.onWikiLink(wikiTokenizer);
+ }
}
@Override
public void onFunction(WikiTokenizer wikiTokenizer, String name,
List<String> args, Map<String, String> namedArgs) {
+ if (skipLangIso.equalsIgnoreCase(namedArgs.get("lang"))) {
+ namedArgs.remove("lang");
+ }
super.onFunction(wikiTokenizer, name, args, namedArgs);
}