import com.hughes.android.dictionary.engine.IndexedEntry;
import com.hughes.android.dictionary.parser.WikiTokenizer;
import com.hughes.util.StringUtil;
-import com.sun.xml.internal.rngom.util.Uri;
import org.apache.commons.lang3.StringEscapeUtils;
+import java.net.URI;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
}
static final Map<String,LangConfig> isoToLangConfig = new LinkedHashMap<String,LangConfig>();
static {
- final Pattern enSkipSections = Pattern.compile(".*Translations|Anagrams|References.*");
+ final Pattern enSkipSections = Pattern.compile(".*(Translations|Anagrams|References).*");
isoToLangConfig.put("EN", new LangConfig() {
@Override
public boolean skipSection(String headingText) {
return EntryTypeName.ANTONYM_MULTI;
}
if (EnParser.partOfSpeechHeader.matcher(sectionName).matches()) {
- // We need to put it in the other index, too.
+ // We need to put it in the other index, too (probably)
return null;
}
if (sectionName.equalsIgnoreCase("Derived Terms")) {
}
});
- final LangConfig basicLangConfig = new LangConfig() {
+ final Pattern deSkipSections = Pattern.compile(".*(Übersetzungen|Referenzen|Quellen).*");
+ isoToLangConfig.put("DE", new LangConfig() {
@Override
public boolean skipSection(String headingText) {
+ return deSkipSections.matcher(headingText).matches();
+ }
+
+ @Override
+ public EntryTypeName sectionNameToEntryType(String sectionName) {
+ if (sectionName.equalsIgnoreCase("Synonyme")) {
+ return EntryTypeName.SYNONYM_MULTI;
+ }
+ if (sectionName.equalsIgnoreCase("Gegenwörter")) {
+ return EntryTypeName.ANTONYM_MULTI;
+ }
+ return null;
+ }
+
+ @Override
+ public boolean skipWikiLink(WikiTokenizer wikiTokenizer) {
+ final String wikiText = wikiTokenizer.wikiLinkText();
+ if (wikiText.startsWith("???Category:")) {
+ return true;
+ }
return false;
}
+ @Override
+ public String adjustWikiLink(String wikiLinkDest, String wikiLinkText) {
+ if (wikiLinkDest.startsWith("w:") || wikiLinkDest.startsWith("Image:")) {
+ return null;
+ }
+ final int hashPos = wikiLinkDest.indexOf("#");
+ if (hashPos != -1) {
+ wikiLinkDest = wikiLinkDest.substring(0, hashPos);
+ if (wikiLinkDest.isEmpty()) {
+ wikiLinkDest = wikiLinkText;
+ }
+ }
+ return wikiLinkDest;
+ }
+
+ @Override
+ public void addFunctionCallbacks(
+ Map<String, FunctionCallback<WholeSectionToHtmlParser>> functionCallbacks) {
+ DeFunctionCallbacks.addGenericCallbacks(functionCallbacks);
+ }
+ });
+
+ final Pattern itSkipSections = Pattern.compile(".*(Traduzione|Note / Riferimenti).*");
+ isoToLangConfig.put("IT", new LangConfig() {
+ @Override
+ public boolean skipSection(String headingText) {
+ return itSkipSections.matcher(headingText).matches();
+ }
+
@Override
public EntryTypeName sectionNameToEntryType(String sectionName) {
- return EntryTypeName.WIKTIONARY_MENTIONED;
+ if (sectionName.equalsIgnoreCase("Sinonimi")) {
+ return EntryTypeName.SYNONYM_MULTI;
+ }
+ if (sectionName.equalsIgnoreCase("Antonimi/Contrari")) {
+ return EntryTypeName.ANTONYM_MULTI;
+ }
+ return null;
}
+
@Override
public boolean skipWikiLink(WikiTokenizer wikiTokenizer) {
final String wikiText = wikiTokenizer.wikiLinkText();
- if (wikiText.startsWith("Category:")) {
+ if (wikiText.startsWith("???Category:")) {
return true;
}
return false;
}
@Override
- public String adjustWikiLink(String wikiLinkDest, final String wikiLinkText) {
+ public String adjustWikiLink(String wikiLinkDest, String wikiLinkText) {
+ if (wikiLinkDest.startsWith("w:") || wikiLinkDest.startsWith("Image:")) {
+ return null;
+ }
+ final int hashPos = wikiLinkDest.indexOf("#");
+ if (hashPos != -1) {
+ wikiLinkDest = wikiLinkDest.substring(0, hashPos);
+ if (wikiLinkDest.isEmpty()) {
+ wikiLinkDest = wikiLinkText;
+ }
+ }
+ return wikiLinkDest;
+ }
+
+ @Override
+ public void addFunctionCallbacks(
+ Map<String, FunctionCallback<WholeSectionToHtmlParser>> functionCallbacks) {
+ ItFunctionCallbacks.addGenericCallbacks(functionCallbacks);
+ }
+ });
+
+
+ final Pattern frSkipSections = Pattern.compile(".*(Traductions).*");
+ isoToLangConfig.put("FR", new LangConfig() {
+ @Override
+ public boolean skipSection(String headingText) {
+ return frSkipSections.matcher(headingText).matches();
+ }
+
+ @Override
+ public EntryTypeName sectionNameToEntryType(String sectionName) {
+ if (sectionName.equalsIgnoreCase("Synonymes")) {
+ return EntryTypeName.SYNONYM_MULTI;
+ }
+ return null;
+ }
+
+ @Override
+ public boolean skipWikiLink(WikiTokenizer wikiTokenizer) {
+ return false;
+ }
+ @Override
+ public String adjustWikiLink(String wikiLinkDest, String wikiLinkText) {
+ if (wikiLinkDest.startsWith("w:") || wikiLinkDest.startsWith("Image:")) {
+ return null;
+ }
+ final int hashPos = wikiLinkDest.indexOf("#");
+ if (hashPos != -1) {
+ wikiLinkDest = wikiLinkDest.substring(0, hashPos);
+ if (wikiLinkDest.isEmpty()) {
+ wikiLinkDest = wikiLinkText;
+ }
+ }
return wikiLinkDest;
}
@Override
public void addFunctionCallbacks(
Map<String, FunctionCallback<WholeSectionToHtmlParser>> functionCallbacks) {
+ FrFunctionCallbacks.addGenericCallbacks(functionCallbacks);
}
- };
- isoToLangConfig.put("FR", basicLangConfig);
- isoToLangConfig.put("DE", basicLangConfig);
- isoToLangConfig.put("IT", basicLangConfig);
+ });
}
final IndexBuilder titleIndexBuilder;
if (webUrlTemplate != null) {
final String webUrl = String.format(webUrlTemplate, title);
- callback.builder.append(String.format("<p> <a href=\"%s\">%s</a>", Uri.escapeDisallowedChars(webUrl), escapeHtmlLiteral(webUrl)));
+ // URI.create can raise an exception e.g. if webUrl contains %, just ignore those cases.
+ try {
+ callback.builder.append(String.format("<p> <a href=\"%s\">%s</a>", URI.create(webUrl).toString(), escapeHtmlLiteral(webUrl)));
+ } catch (Exception e)
+ {}
}
htmlEntry.html = callback.builder.toString();
indexedEntry.isValid = true;
final TokenData tokenData = titleIndexBuilder.getOrCreateTokenData(title);
+ tokenData.hasMainEntry = true;
htmlEntry.addToDictionary(titleIndexBuilder.index.dict);
tokenData.htmlEntries.add(htmlEntry);
}
@Override
- public void addLinkToCurrentEntry(String token, EntryTypeName entryTypeName) {
- titleIndexBuilder.addEntryWithString(indexedEntry, token, entryTypeName);
+ public void addLinkToCurrentEntry(String token, final String lang, EntryTypeName entryTypeName) {
+ if (lang == null || lang.equals(skipLangIso)) {
+ titleIndexBuilder.addEntryWithString(indexedEntry, token, entryTypeName);
+ }
}
public static String escapeHtmlLiteral(final String plainText) {
if (StringUtil.isAscii(htmlEscaped)) {
return htmlEscaped;
} else {
- return StringUtil.escapeToPureHtmlUnicode(plainText);
+ return StringUtil.escapeUnicodeToPureHtml(plainText);
}
}
// TODO: inside a definition, this could be the wrong language.
titleIndexBuilder.addEntryWithString(indexedEntry, wikiTokenizer.wikiLinkText(), sectionEntryTypeName);
}
- if (linkDest != null) {
+ if (!StringUtil.isNullOrEmpty(linkDest)) {
builder.append(String.format("<a href=\"%s\">", HtmlEntry.formatQuickdicUrl("", linkDest)));
super.onWikiLink(wikiTokenizer);
builder.append(String.format("</a>"));
sectionEntryTypeName = langConfig.sectionNameToEntryType(headingText);
final int depth = wikiTokenizer.headingDepth();
if (langConfig.skipSection(headingText)) {
+ //System.out.println("Skipping section:" + headingText);
while ((wikiTokenizer = wikiTokenizer.nextToken()) != null) {
if (wikiTokenizer.isHeading() && wikiTokenizer.headingDepth() <= depth) {
+ // System.out.println("Resume on: " + wikiTokenizer.token());
wikiTokenizer.returnToLineStart();
return;
+ } else {
+ // System.out.println("Skipped: " + wikiTokenizer.token());
}
}
return;