+
+ interface LangConfig {
+ boolean skipSection(final String name);
+ EntryTypeName sectionNameToEntryType(String sectionName);
+ boolean skipWikiLink(final WikiTokenizer wikiTokenizer);
+ String adjustWikiLink(String wikiLinkDest, final String wikiLinkText);
+ void addFunctionCallbacks(
+ Map<String, FunctionCallback<WholeSectionToHtmlParser>> functionCallbacks);
+ }
+ static final Map<String,LangConfig> isoToLangConfig = new LinkedHashMap<String,LangConfig>();
+ static {
+ final Pattern enSkipSections = Pattern.compile(".*(Translations|Anagrams|References).*");
+ isoToLangConfig.put("EN", new LangConfig() {
+ @Override
+ public boolean skipSection(String headingText) {
+ return enSkipSections.matcher(headingText).matches();
+ }
+
+ @Override
+ public EntryTypeName sectionNameToEntryType(String sectionName) {
+ if (sectionName.equalsIgnoreCase("Synonyms")) {
+ return EntryTypeName.SYNONYM_MULTI;
+ }
+ if (sectionName.equalsIgnoreCase("Antonyms")) {
+ return EntryTypeName.ANTONYM_MULTI;
+ }
+ if (EnParser.partOfSpeechHeader.matcher(sectionName).matches()) {
+ // We need to put it in the other index, too (probably)
+ return null;
+ }
+ if (sectionName.equalsIgnoreCase("Derived Terms")) {
+ return null;
+ }
+ return null;
+ }
+
+ @Override
+ public boolean skipWikiLink(WikiTokenizer wikiTokenizer) {
+ final String wikiText = wikiTokenizer.wikiLinkText();
+ if (wikiText.startsWith("Category:")) {
+ return true;
+ }
+ return false;
+ }
+ @Override
+ public String adjustWikiLink(String wikiLinkDest, String wikiLinkText) {
+ if (wikiLinkDest.startsWith("w:") || wikiLinkDest.startsWith("Image:")) {
+ return null;
+ }
+ final int hashPos = wikiLinkDest.indexOf("#");
+ if (hashPos != -1) {
+ wikiLinkDest = wikiLinkDest.substring(0, hashPos);
+ if (wikiLinkDest.isEmpty()) {
+ wikiLinkDest = wikiLinkText;
+ }
+ }
+ return wikiLinkDest;
+ }
+
+ @Override
+ public void addFunctionCallbacks(
+ Map<String, FunctionCallback<WholeSectionToHtmlParser>> functionCallbacks) {
+ EnFunctionCallbacks.addGenericCallbacks(functionCallbacks);
+ }
+ });
+
+ final Pattern deSkipSections = Pattern.compile(".*(Übersetzungen|Referenzen|Quellen).*");
+ isoToLangConfig.put("DE", new LangConfig() {
+ @Override
+ public boolean skipSection(String headingText) {
+ return deSkipSections.matcher(headingText).matches();
+ }
+
+ @Override
+ public EntryTypeName sectionNameToEntryType(String sectionName) {
+ if (sectionName.equalsIgnoreCase("Synonyme")) {
+ return EntryTypeName.SYNONYM_MULTI;
+ }
+ if (sectionName.equalsIgnoreCase("Gegenwörter")) {
+ return EntryTypeName.ANTONYM_MULTI;
+ }
+ return null;
+ }
+
+ @Override
+ public boolean skipWikiLink(WikiTokenizer wikiTokenizer) {
+ final String wikiText = wikiTokenizer.wikiLinkText();
+ if (wikiText.startsWith("???Category:")) {
+ return true;
+ }
+ return false;
+ }
+ @Override
+ public String adjustWikiLink(String wikiLinkDest, String wikiLinkText) {
+ if (wikiLinkDest.startsWith("w:") || wikiLinkDest.startsWith("Image:")) {
+ return null;
+ }
+ final int hashPos = wikiLinkDest.indexOf("#");
+ if (hashPos != -1) {
+ wikiLinkDest = wikiLinkDest.substring(0, hashPos);
+ if (wikiLinkDest.isEmpty()) {
+ wikiLinkDest = wikiLinkText;
+ }
+ }
+ return wikiLinkDest;
+ }
+
+ @Override
+ public void addFunctionCallbacks(
+ Map<String, FunctionCallback<WholeSectionToHtmlParser>> functionCallbacks) {
+ DeFunctionCallbacks.addGenericCallbacks(functionCallbacks);
+ }
+ });
+
+ final Pattern itSkipSections = Pattern.compile(".*(Traduzione|Note / Riferimenti).*");
+ isoToLangConfig.put("IT", new LangConfig() {
+ @Override
+ public boolean skipSection(String headingText) {
+ return itSkipSections.matcher(headingText).matches();
+ }
+
+ @Override
+ public EntryTypeName sectionNameToEntryType(String sectionName) {
+ if (sectionName.equalsIgnoreCase("Sinonimi")) {
+ return EntryTypeName.SYNONYM_MULTI;
+ }
+ if (sectionName.equalsIgnoreCase("Antonimi/Contrari")) {
+ return EntryTypeName.ANTONYM_MULTI;
+ }
+ return null;
+ }
+
+ @Override
+ public boolean skipWikiLink(WikiTokenizer wikiTokenizer) {
+ final String wikiText = wikiTokenizer.wikiLinkText();
+ if (wikiText.startsWith("???Category:")) {
+ return true;
+ }
+ return false;
+ }
+ @Override
+ public String adjustWikiLink(String wikiLinkDest, String wikiLinkText) {
+ if (wikiLinkDest.startsWith("w:") || wikiLinkDest.startsWith("Image:")) {
+ return null;
+ }
+ final int hashPos = wikiLinkDest.indexOf("#");
+ if (hashPos != -1) {
+ wikiLinkDest = wikiLinkDest.substring(0, hashPos);
+ if (wikiLinkDest.isEmpty()) {
+ wikiLinkDest = wikiLinkText;
+ }
+ }
+ return wikiLinkDest;
+ }
+
+ @Override
+ public void addFunctionCallbacks(
+ Map<String, FunctionCallback<WholeSectionToHtmlParser>> functionCallbacks) {
+ ItFunctionCallbacks.addGenericCallbacks(functionCallbacks);
+ }
+ });
+
+
+ final Pattern frSkipSections = Pattern.compile(".*(Traductions).*");
+ isoToLangConfig.put("FR", new LangConfig() {
+ @Override
+ public boolean skipSection(String headingText) {
+ return frSkipSections.matcher(headingText).matches();
+ }
+
+ @Override
+ public EntryTypeName sectionNameToEntryType(String sectionName) {
+ if (sectionName.equalsIgnoreCase("Synonymes")) {
+ return EntryTypeName.SYNONYM_MULTI;
+ }
+ return null;
+ }
+
+ @Override
+ public boolean skipWikiLink(WikiTokenizer wikiTokenizer) {
+ return false;
+ }
+ @Override
+ public String adjustWikiLink(String wikiLinkDest, String wikiLinkText) {
+ if (wikiLinkDest.startsWith("w:") || wikiLinkDest.startsWith("Image:")) {
+ return null;
+ }
+ final int hashPos = wikiLinkDest.indexOf("#");
+ if (hashPos != -1) {
+ wikiLinkDest = wikiLinkDest.substring(0, hashPos);
+ if (wikiLinkDest.isEmpty()) {
+ wikiLinkDest = wikiLinkText;
+ }
+ }
+ return wikiLinkDest;
+ }
+
+ @Override
+ public void addFunctionCallbacks(
+ Map<String, FunctionCallback<WholeSectionToHtmlParser>> functionCallbacks) {
+ FrFunctionCallbacks.addGenericCallbacks(functionCallbacks);
+ }
+ });
+ }