package com.hughes.android.dictionary.parser.wiktionary;
+import com.hughes.android.dictionary.engine.EntryTypeName;
import com.hughes.android.dictionary.engine.HtmlEntry;
import com.hughes.android.dictionary.engine.IndexBuilder;
import com.hughes.android.dictionary.engine.IndexBuilder.TokenData;
import com.hughes.util.StringUtil;
import org.apache.commons.lang3.StringEscapeUtils;
-import org.apache.commons.lang3.StringUtils;
import java.util.ArrayList;
import java.util.LinkedHashMap;
interface LangConfig {
boolean skipSection(final String name);
+ EntryTypeName sectionNameToEntryType(String sectionName);
boolean skipWikiLink(final WikiTokenizer wikiTokenizer);
String adjustWikiLink(String wikiLinkDest);
void addFunctionCallbacks(
public boolean skipSection(String headingText) {
return enSkipSections.matcher(headingText).matches();
}
+
+ @Override
+ public EntryTypeName sectionNameToEntryType(String sectionName) {
+ if (sectionName.equalsIgnoreCase("Synonyms")) {
+ return EntryTypeName.SYNONYM_MULTI;
+ }
+ if (sectionName.equalsIgnoreCase("Antonyms")) {
+ return EntryTypeName.ANTONYM_MULTI;
+ }
+ if (EnParser.partOfSpeechHeader.matcher(sectionName).matches()) {
+ // We need to put it in the other index, too.
+ return null;
+ }
+ if (sectionName.equalsIgnoreCase("Derived Terms")) {
+ return null;
+ }
+ return null;
+ }
@Override
public boolean skipWikiLink(WikiTokenizer wikiTokenizer) {
Map<String, FunctionCallback<WholeSectionToHtmlParser>> functionCallbacks) {
EnFunctionCallbacks.addGenericCallbacks(functionCallbacks);
}});
+
+ final LangConfig basicLangConfig = new LangConfig() {
+ @Override
+ public boolean skipSection(String headingText) {
+ return false;
+ }
+ @Override
+ public EntryTypeName sectionNameToEntryType(String sectionName) {
+ return EntryTypeName.WIKTIONARY_MENTIONED;
+ }
+ @Override
+ public boolean skipWikiLink(WikiTokenizer wikiTokenizer) {
+ final String wikiText = wikiTokenizer.wikiLinkText();
+ if (wikiText.startsWith("Category:")) {
+ return true;
+ }
+ return false;
+ }
+ @Override
+ public String adjustWikiLink(String wikiLinkDest) {
+ return wikiLinkDest;
+ }
+
+ @Override
+ public void addFunctionCallbacks(
+ Map<String, FunctionCallback<WholeSectionToHtmlParser>> functionCallbacks) {
+ }
+ };
+ isoToLangConfig.put("FR", basicLangConfig);
+ isoToLangConfig.put("DE", basicLangConfig);
+ isoToLangConfig.put("IT", basicLangConfig);
}
final IndexBuilder titleIndexBuilder;
this.langConfig = isoToLangConfig.get(wiktionaryIso);
this.skipLangIso = skipLangIso;
}
+
+ IndexedEntry indexedEntry = null;
@Override
- void parseSection(String heading, String text) {
- HtmlEntry htmlEntry = new HtmlEntry(entrySource, StringEscapeUtils.escapeHtml3(title));
- IndexedEntry indexedEntry = new IndexedEntry(htmlEntry);
+ public void parseSection(String heading, String text) {
+ assert entrySource != null;
+ final HtmlEntry htmlEntry = new HtmlEntry(entrySource, StringEscapeUtils.escapeHtml3(title));
+ indexedEntry = new IndexedEntry(htmlEntry);
final AppendAndIndexWikiCallback<WholeSectionToHtmlParser> callback = new AppendCallback(
this);
tokenData.htmlEntries.add(htmlEntry);
// titleIndexBuilder.addEntryWithString(indexedEntry, title,
// EntryTypeName.WIKTIONARY_TITLE_MULTI_DETAIL);
+
+ indexedEntry = null;
}
@Override
void removeUselessArgs(Map<String, String> namedArgs) {
}
+ @Override
+ public void addLinkToCurrentEntry(String token, EntryTypeName entryTypeName) {
+ titleIndexBuilder.addEntryWithString(indexedEntry, token, entryTypeName);
+ }
+
+
+
static final Pattern ALL_ASCII = Pattern.compile("[\\p{ASCII}]*");
class AppendCallback extends AppendAndIndexWikiCallback<WholeSectionToHtmlParser> {
} else {
linkDest = wikiTokenizer.wikiLinkText();
}
+ if (sectionEntryTypeName != null) {
+ // TODO: inside a definition, this could be the wrong language.
+ titleIndexBuilder.addEntryWithString(indexedEntry, wikiTokenizer.wikiLinkText(), sectionEntryTypeName);
+ }
if (linkDest != null) {
builder.append(String.format("<a href=\"%s\">", linkDest));
super.onWikiLink(wikiTokenizer);
@Override
public void onNewline(WikiTokenizer wikiTokenizer) {
}
+
+ EntryTypeName sectionEntryTypeName;
@Override
public void onHeading(WikiTokenizer wikiTokenizer) {
final String headingText = wikiTokenizer.headingWikiText();
+ sectionEntryTypeName = langConfig.sectionNameToEntryType(headingText);
final int depth = wikiTokenizer.headingDepth();
if (langConfig.skipSection(headingText)) {
while ((wikiTokenizer = wikiTokenizer.nextToken()) != null) {