import com.hughes.android.dictionary.engine.IndexedEntry;
import com.hughes.android.dictionary.parser.WikiTokenizer;
import com.hughes.util.StringUtil;
+import com.sun.xml.internal.rngom.util.Uri;
import org.apache.commons.lang3.StringEscapeUtils;
interface LangConfig {
boolean skipSection(final String name);
+ EntryTypeName sectionNameToEntryType(String sectionName);
boolean skipWikiLink(final WikiTokenizer wikiTokenizer);
- String adjustWikiLink(String wikiLinkDest);
+ String adjustWikiLink(String wikiLinkDest, final String wikiLinkText);
void addFunctionCallbacks(
Map<String, FunctionCallback<WholeSectionToHtmlParser>> functionCallbacks);
}
public boolean skipSection(String headingText) {
return enSkipSections.matcher(headingText).matches();
}
-
+
+ @Override
+ public EntryTypeName sectionNameToEntryType(String sectionName) {
+ if (sectionName.equalsIgnoreCase("Synonyms")) {
+ return EntryTypeName.SYNONYM_MULTI;
+ }
+ if (sectionName.equalsIgnoreCase("Antonyms")) {
+ return EntryTypeName.ANTONYM_MULTI;
+ }
+ if (EnParser.partOfSpeechHeader.matcher(sectionName).matches()) {
+ // We need to put it in the other index, too.
+ return null;
+ }
+ if (sectionName.equalsIgnoreCase("Derived Terms")) {
+ return null;
+ }
+ return null;
+ }
+
@Override
public boolean skipWikiLink(WikiTokenizer wikiTokenizer) {
final String wikiText = wikiTokenizer.wikiLinkText();
return false;
}
@Override
- public String adjustWikiLink(String wikiLinkDest) {
+ public String adjustWikiLink(String wikiLinkDest, String wikiLinkText) {
if (wikiLinkDest.startsWith("w:") || wikiLinkDest.startsWith("Image:")) {
return null;
}
+ final int hashPos = wikiLinkDest.indexOf("#");
+ if (hashPos != -1) {
+ wikiLinkDest = wikiLinkDest.substring(0, hashPos);
+ if (wikiLinkDest.isEmpty()) {
+ wikiLinkDest = wikiLinkText;
+ }
+ }
return wikiLinkDest;
}
public void addFunctionCallbacks(
Map<String, FunctionCallback<WholeSectionToHtmlParser>> functionCallbacks) {
EnFunctionCallbacks.addGenericCallbacks(functionCallbacks);
- }});
+ }
+ });
final LangConfig basicLangConfig = new LangConfig() {
@Override
public boolean skipSection(String headingText) {
return false;
}
-
+ @Override
+ public EntryTypeName sectionNameToEntryType(String sectionName) {
+ return EntryTypeName.WIKTIONARY_MENTIONED;
+ }
@Override
public boolean skipWikiLink(WikiTokenizer wikiTokenizer) {
final String wikiText = wikiTokenizer.wikiLinkText();
return false;
}
@Override
- public String adjustWikiLink(String wikiLinkDest) {
+ public String adjustWikiLink(String wikiLinkDest, final String wikiLinkText) {
return wikiLinkDest;
}
}
final IndexBuilder titleIndexBuilder;
+ final IndexBuilder defIndexBuilder;
final String skipLangIso;
final LangConfig langConfig;
+ final String webUrlTemplate;
+
- public WholeSectionToHtmlParser(final IndexBuilder titleIndexBuilder, final String wiktionaryIso, final String skipLangIso) {
+ public WholeSectionToHtmlParser(final IndexBuilder titleIndexBuilder, final IndexBuilder defIndexBuilder, final String wiktionaryIso, final String skipLangIso,
+ final String webUrlTemplate) {
this.titleIndexBuilder = titleIndexBuilder;
+ this.defIndexBuilder = defIndexBuilder;
assert isoToLangConfig.containsKey(wiktionaryIso): wiktionaryIso;
this.langConfig = isoToLangConfig.get(wiktionaryIso);
this.skipLangIso = skipLangIso;
+ this.webUrlTemplate = webUrlTemplate;
}
IndexedEntry indexedEntry = null;
@Override
public void parseSection(String heading, String text) {
assert entrySource != null;
- final HtmlEntry htmlEntry = new HtmlEntry(entrySource, StringEscapeUtils.escapeHtml3(title));
+ final HtmlEntry htmlEntry = new HtmlEntry(entrySource, title);
indexedEntry = new IndexedEntry(htmlEntry);
final AppendAndIndexWikiCallback<WholeSectionToHtmlParser> callback = new AppendCallback(
callback.indexedEntry = indexedEntry;
callback.dispatch(text, null);
+ if (webUrlTemplate != null) {
+ final String webUrl = String.format(webUrlTemplate, title);
+ callback.builder.append(String.format("<p> <a href=\"%s\">%s</a>", Uri.escapeDisallowedChars(webUrl), escapeHtmlLiteral(webUrl)));
+ }
htmlEntry.html = callback.builder.toString();
indexedEntry.isValid = true;
public void addLinkToCurrentEntry(String token, EntryTypeName entryTypeName) {
titleIndexBuilder.addEntryWithString(indexedEntry, token, entryTypeName);
}
+
+ public static String escapeHtmlLiteral(final String plainText) {
+ final String htmlEscaped = StringEscapeUtils.escapeHtml3(plainText);
+ if (StringUtil.isAscii(htmlEscaped)) {
+ return htmlEscaped;
+ } else {
+ return StringUtil.escapeToPureHtmlUnicode(plainText);
+ }
+ }
- static final Pattern ALL_ASCII = Pattern.compile("[\\p{ASCII}]*");
class AppendCallback extends AppendAndIndexWikiCallback<WholeSectionToHtmlParser> {
public AppendCallback(WholeSectionToHtmlParser parser) {
@Override
public void onPlainText(String plainText) {
- final String htmlEscaped = StringEscapeUtils.escapeHtml3(plainText);
- if (ALL_ASCII.matcher(htmlEscaped).matches()) {
- super.onPlainText(htmlEscaped);
- } else {
- super.onPlainText(StringUtil.escapeToPureHtmlUnicode(plainText));
- }
+ super.onPlainText(escapeHtmlLiteral(plainText));
}
@Override
}
String linkDest;
if (wikiTokenizer.wikiLinkDest() != null) {
- linkDest = langConfig.adjustWikiLink(wikiTokenizer.wikiLinkDest());
+ linkDest = langConfig.adjustWikiLink(wikiTokenizer.wikiLinkDest(), wikiTokenizer.wikiLinkText());
} else {
linkDest = wikiTokenizer.wikiLinkText();
}
+ if (sectionEntryTypeName != null) {
+ // TODO: inside a definition, this could be the wrong language.
+ titleIndexBuilder.addEntryWithString(indexedEntry, wikiTokenizer.wikiLinkText(), sectionEntryTypeName);
+ }
if (linkDest != null) {
- builder.append(String.format("<a href=\"%s\">", linkDest));
+ builder.append(String.format("<a href=\"%s\">", HtmlEntry.formatQuickdicUrl("", linkDest)));
super.onWikiLink(wikiTokenizer);
builder.append(String.format("</a>"));
} else {
@Override
public void onNewline(WikiTokenizer wikiTokenizer) {
}
+
+ EntryTypeName sectionEntryTypeName;
+ IndexBuilder currentIndexBuilder;
@Override
public void onHeading(WikiTokenizer wikiTokenizer) {
final String headingText = wikiTokenizer.headingWikiText();
+ sectionEntryTypeName = langConfig.sectionNameToEntryType(headingText);
final int depth = wikiTokenizer.headingDepth();
if (langConfig.skipSection(headingText)) {
while ((wikiTokenizer = wikiTokenizer.nextToken()) != null) {