X-Git-Url: http://gitweb.fperrin.net/?p=DictionaryPC.git;a=blobdiff_plain;f=src%2Fcom%2Fhughes%2Fandroid%2Fdictionary%2Fparser%2Fwiktionary%2FEnParser.java;h=b60235c23991744392ce0799f485829595448107;hp=60325058e9d55061310c265fd1336820a2155acb;hb=2fc669d88306d563fc9c899d8d91b25d591692ea;hpb=cb8c8722c1d928396d118cf420505bda8776b308 diff --git a/src/com/hughes/android/dictionary/parser/wiktionary/EnParser.java b/src/com/hughes/android/dictionary/parser/wiktionary/EnParser.java index 6032505..b60235c 100644 --- a/src/com/hughes/android/dictionary/parser/wiktionary/EnParser.java +++ b/src/com/hughes/android/dictionary/parser/wiktionary/EnParser.java @@ -17,1232 +17,138 @@ package com.hughes.android.dictionary.parser.wiktionary; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; -import java.util.LinkedHashMap; import java.util.LinkedHashSet; -import java.util.List; import java.util.Map; import java.util.Set; -import java.util.concurrent.atomic.AtomicInteger; import java.util.regex.Pattern; import com.hughes.android.dictionary.engine.EntryTypeName; import com.hughes.android.dictionary.engine.IndexBuilder; -import com.hughes.android.dictionary.engine.IndexedEntry; -import com.hughes.android.dictionary.engine.PairEntry; -import com.hughes.android.dictionary.engine.PairEntry.Pair; import com.hughes.android.dictionary.parser.WikiTokenizer; -import com.hughes.util.ListUtil; public abstract class EnParser extends AbstractWiktionaryParser { - // TODO: process {{ttbc}} lines - - static final Pattern partOfSpeechHeader = Pattern.compile( - "Noun|Verb|Adjective|Adverb|Pronoun|Conjunction|Interjection|" + - "Preposition|Proper noun|Article|Prepositional phrase|Acronym|" + - "Abbreviation|Initialism|Contraction|Prefix|Suffix|Symbol|Letter|" + - "Ligature|Idiom|Phrase|\\{\\{acronym\\}\\}|\\{\\{initialism\\}\\}|" + - "\\{\\{abbreviation\\}\\}|" + - // These are @deprecated: - "Noun form|Verb form|Adjective form|Nominal phrase|Noun phrase|" + - "Verb phrase|Transitive verb|Intransitive verb|Reflexive verb|" + - // These are extras I found: - "Determiner|Numeral|Number|Cardinal number|Ordinal number|Proverb|" + - "Particle|Interjection|Pronominal adverb" + - "Han character|Hanzi|Hanja|Kanji|Katakana character|Syllable"); - - // Might only want to remove "lang" if it's equal to "zh", for example. - static final Set USELESS_WIKI_ARGS = new LinkedHashSet( - Arrays.asList( - "lang", - "sc", - "sort", - "cat", - "cat2", - "xs", - "nodot")); - - static boolean isIgnorableTitle(final String title) { - return title.startsWith("Wiktionary:") || - title.startsWith("Template:") || - title.startsWith("Appendix:") || - title.startsWith("Category:") || - title.startsWith("Index:") || - title.startsWith("MediaWiki:") || - title.startsWith("TransWiki:") || - title.startsWith("Citations:") || - title.startsWith("Concordance:") || - title.startsWith("Help:"); - } - - final IndexBuilder enIndexBuilder; - final IndexBuilder foreignIndexBuilder; - final Pattern langPattern; - final Pattern langCodePattern; - final boolean swap; - - // State used while parsing. - enum State { - TRANSLATION_LINE, - ENGLISH_DEF_OF_FOREIGN, - ENGLISH_EXAMPLE, - FOREIGN_EXAMPLE, - } - State state = null; - - public boolean entryIsFormOfSomething = false; - final Collection wordForms = new ArrayList(); - boolean titleAppended = false; - - - final AppendAndIndexWikiCallback appendAndIndexWikiCallback = new AppendAndIndexWikiCallback(this); - { - appendAndIndexWikiCallback.functionCallbacks.putAll(FunctionCallbacks.DEFAULT); - } - - EnParser(final IndexBuilder enIndexBuilder, final IndexBuilder otherIndexBuilder, final Pattern langPattern, final Pattern langCodePattern, final boolean swap) { - this.enIndexBuilder = enIndexBuilder; - this.foreignIndexBuilder = otherIndexBuilder; - this.langPattern = langPattern; - this.langCodePattern = langCodePattern; - this.swap = swap; - } - - // -------------------------------------------------------------------- - - static final class EnToTranslationParser extends EnParser { - - EnToTranslationParser(final IndexBuilder enIndexBuilder, - final IndexBuilder otherIndexBuilder, final Pattern langPattern, - final Pattern langCodePattern, final boolean swap) { - super(enIndexBuilder, otherIndexBuilder, langPattern, langCodePattern, swap); - } - - @Override - void parseSection(String heading, String text) { - if (isIgnorableTitle(title)) { - return; - } - heading = heading.replaceAll("=", "").trim(); - if (!heading.contains("English")) { - return; - } - - String pos = null; - int posDepth = -1; - - final WikiTokenizer wikiTokenizer = new WikiTokenizer(text); - while (wikiTokenizer.nextToken() != null) { - - if (wikiTokenizer.isHeading()) { - final String headerName = wikiTokenizer.headingWikiText(); - - if (wikiTokenizer.headingDepth() <= posDepth) { - pos = null; - posDepth = -1; - } - - if (partOfSpeechHeader.matcher(headerName).matches()) { - posDepth = wikiTokenizer.headingDepth(); - pos = wikiTokenizer.headingWikiText(); - // TODO: if we're inside the POS section, we should handle the first title line... - - } else if (headerName.equals("Translations")) { - if (pos == null) { - LOG.info("Translations without POS (but using anyway): " + title); + // TODO: process {{ttbc}} lines + + public static final Pattern partOfSpeechHeader = Pattern.compile( + "Noun|Verb|Adjective|Adverb|Pronoun|Conjunction|Interjection|" + + "Preposition|Proper noun|Article|Prepositional phrase|Acronym|" + + "Abbreviation|Initialism|Contraction|Prefix|Suffix|Symbol|Letter|" + + "Ligature|Idiom|Phrase|\\{\\{acronym\\}\\}|\\{\\{initialism\\}\\}|" + + "\\{\\{abbreviation\\}\\}|" + + // These are @deprecated: + "Noun form|Verb form|Adjective form|Nominal phrase|Noun phrase|" + + "Verb phrase|Transitive verb|Intransitive verb|Reflexive verb|" + + // These are extras I found: + "Determiner|Numeral|Number|Cardinal number|Ordinal number|Proverb|" + + "Particle|Interjection|Pronominal adverb|" + + "Han character|Hanzi|Hanja|Kanji|Katakana character|Syllable"); + + static final Set USELESS_WIKI_ARGS = new LinkedHashSet<>( + Arrays.asList( + "lang", + "sc", + "sort", + "cat", + "cat2", + "xs", + "nodot")); + + static boolean isIgnorableTitle(final String title) { + return title.startsWith("Wiktionary:") || + title.startsWith("Template:") || + title.startsWith("Appendix:") || + title.startsWith("Category:") || + title.startsWith("Index:") || + title.startsWith("MediaWiki:") || + title.startsWith("TransWiki:") || + title.startsWith("Citations:") || + title.startsWith("Concordance:") || + title.startsWith("Help:"); + } + + final IndexBuilder enIndexBuilder; + final IndexBuilder foreignIndexBuilder; + final Pattern langPattern; + final Pattern langCodePattern; + final boolean swap; + + // State used while parsing. + enum State { + TRANSLATION_LINE, + ENGLISH_DEF_OF_FOREIGN, + ENGLISH_EXAMPLE, + FOREIGN_EXAMPLE, + } + State state = null; + + public boolean entryIsFormOfSomething = false; + final Collection wordForms = new ArrayList<>(); + boolean titleAppended = false; + + + final AppendAndIndexWikiCallback appendAndIndexWikiCallback = new AppendAndIndexCallback(this); + { + appendAndIndexWikiCallback.functionCallbacks.putAll(EnFunctionCallbacks.DEFAULT); + for (final String key : new ArrayList<>(appendAndIndexWikiCallback.functionCallbacks.keySet())) { + // Don't handle the it-conj functions here. + if (key.startsWith("it-conj")) { + appendAndIndexWikiCallback.functionCallbacks.remove(key); } - doTranslations(wikiTokenizer, pos); - } else if (headerName.equals("Pronunciation")) { - //doPronunciation(wikiLineReader); - } - } else if (wikiTokenizer.isFunction()) { - final String name = wikiTokenizer.functionName(); - if (name.equals("head") && pos == null) { - LOG.warning("{{head}} without POS: " + title); - } } - } } - private void doTranslations(final WikiTokenizer wikiTokenizer, final String pos) { - if (title.equals("absolutely")) { - //System.out.println(); - } - - String topLevelLang = null; - String sense = null; - boolean done = false; - while (wikiTokenizer.nextToken() != null) { - if (wikiTokenizer.isHeading()) { - wikiTokenizer.returnToLineStart(); - return; - } - if (done) { - continue; - } - - // Check whether we care about this line: - - if (wikiTokenizer.isFunction()) { - final String functionName = wikiTokenizer.functionName(); - final List positionArgs = wikiTokenizer.functionPositionArgs(); - - if (functionName.equals("trans-top")) { - sense = null; - if (wikiTokenizer.functionPositionArgs().size() >= 1) { - sense = positionArgs.get(0); - // TODO: could emphasize words in [[brackets]] inside sense. - sense = WikiTokenizer.toPlainText(sense); - //LOG.info("Sense: " + sense); + EnParser(final IndexBuilder enIndexBuilder, final IndexBuilder otherIndexBuilder, final Pattern langPattern, final Pattern langCodePattern, final boolean swap) { + this.enIndexBuilder = enIndexBuilder; + this.foreignIndexBuilder = otherIndexBuilder; + this.langPattern = langPattern; + this.langCodePattern = langCodePattern; + this.swap = swap; + } + + @Override + void removeUselessArgs(Map namedArgs) { + namedArgs.keySet().removeAll(USELESS_WIKI_ARGS); + } + + static class AppendAndIndexCallback extends AppendAndIndexWikiCallback { + + public AppendAndIndexCallback(EnParser parser) { + super(parser); + } + + @Override + public void onWikiLink(WikiTokenizer wikiTokenizer) { + final String text = wikiTokenizer.wikiLinkText(); + final String link = wikiTokenizer.wikiLinkDest(); + if (link != null) { + if (link.contains("#English")) { + dispatch(text, parser.enIndexBuilder, EntryTypeName.WIKTIONARY_ENGLISH_DEF_WIKI_LINK); + } else if (link.contains("#") && parser.langPattern.matcher(link).find()) { + dispatch(text, parser.foreignIndexBuilder, EntryTypeName.WIKTIONARY_ENGLISH_DEF_OTHER_LANG); + } else if (link.equals("plural")) { + builder.append(text); + } else { + //LOG.warning("Special link: " + englishTokenizer.token()); + dispatch(text, EntryTypeName.WIKTIONARY_ENGLISH_DEF_WIKI_LINK); + } + } else { + // link == null + final EntryTypeName entryTypeName; + switch (parser.state) { + case TRANSLATION_LINE: + entryTypeName = EntryTypeName.WIKTIONARY_TRANSLATION_WIKI_TEXT; + break; + case ENGLISH_DEF_OF_FOREIGN: + entryTypeName = EntryTypeName.WIKTIONARY_ENGLISH_DEF_WIKI_LINK; + break; + default: + throw new IllegalStateException("Invalid enum value: " + parser.state); + } + dispatch(text, entryTypeName); } - } else if (functionName.equals("trans-bottom")) { - sense = null; - } else if (functionName.equals("trans-mid")) { - } else if (functionName.equals("trans-see")) { - incrementCount("WARNING:trans-see"); - } else if (functionName.startsWith("picdic")) { - } else if (functionName.startsWith("checktrans")) { - done = true; - } else if (functionName.startsWith("ttbc")) { - wikiTokenizer.nextLine(); - // TODO: would be great to handle ttbc - // TODO: Check this: done = true; - } else { - LOG.warning("Unexpected translation wikifunction: " + wikiTokenizer.token() + ", title=" + title); - } - } else if (wikiTokenizer.isListItem()) { - final String line = wikiTokenizer.listItemWikiText(); - // This line could produce an output... - -// if (line.contains("ich hoan dich gear")) { -// //System.out.println(); -// } - - // First strip the language and check whether it matches. - // And hold onto it for sub-lines. - final int colonIndex = line.indexOf(":"); - if (colonIndex == -1) { - continue; - } - - final String lang = trim(WikiTokenizer.toPlainText(line.substring(0, colonIndex))); - incrementCount("tCount:" + lang); - final boolean appendLang; - if (wikiTokenizer.listItemPrefix().length() == 1) { - topLevelLang = lang; - final boolean thisFind = langPattern.matcher(lang).find(); - if (!thisFind) { - continue; - } - appendLang = !langPattern.matcher(lang).matches(); - } else if (topLevelLang == null) { - continue; - } else { - // Two-level -- the only way we won't append is if this second level matches exactly. - if (!langPattern.matcher(lang).matches() && !langPattern.matcher(topLevelLang).find()) { - continue; - } - appendLang = !langPattern.matcher(lang).matches(); - } - - String rest = line.substring(colonIndex + 1).trim(); - if (rest.length() > 0) { - doTranslationLine(line, appendLang ? lang : null, pos, sense, rest); - } - - } else if (wikiTokenizer.remainderStartsWith("''See''")) { - wikiTokenizer.nextLine(); - incrementCount("WARNING: ''See''" ); - LOG.fine("Skipping See line: " + wikiTokenizer.token()); - } else if (wikiTokenizer.isWikiLink()) { - final String wikiLink = wikiTokenizer.wikiLinkText(); - if (wikiLink.contains(":") && wikiLink.contains(title)) { - } else if (wikiLink.contains("Category:")) { - } else { - incrementCount("WARNING: Unexpected wikiLink" ); - LOG.warning("Unexpected wikiLink: " + wikiTokenizer.token() + ", title=" + title); - } - } else if (wikiTokenizer.isNewline() || wikiTokenizer.isMarkup() || wikiTokenizer.isComment()) { - } else { - final String token = wikiTokenizer.token(); - if (token.equals("----")) { - } else { - LOG.warning("Unexpected translation token: " + wikiTokenizer.token() + ", title=" + title); - incrementCount("WARNING: Unexpected translation token" ); - } } - - } - } - - private void doTranslationLine(final String line, final String lang, final String pos, final String sense, final String rest) { - state = State.TRANSLATION_LINE; - // Good chance we'll actually file this one... - final PairEntry pairEntry = new PairEntry(entrySource); - final IndexedEntry indexedEntry = new IndexedEntry(pairEntry); - - final StringBuilder foreignText = new StringBuilder(); - appendAndIndexWikiCallback.reset(foreignText, indexedEntry); - appendAndIndexWikiCallback.dispatch(rest, foreignIndexBuilder, EntryTypeName.WIKTIONARY_TRANSLATION_OTHER_TEXT); - - if (foreignText.length() == 0) { - LOG.warning("Empty foreignText: " + line); - incrementCount("WARNING: Empty foreignText" ); - return; - } - - if (lang != null) { - foreignText.insert(0, String.format("(%s) ", lang)); - } - - StringBuilder englishText = new StringBuilder(); - - englishText.append(title); - if (sense != null) { - englishText.append(" (").append(sense).append(")"); - enIndexBuilder.addEntryWithString(indexedEntry, sense, EntryTypeName.WIKTIONARY_TRANSLATION_SENSE); - } - if (pos != null) { - englishText.append(" (").append(pos.toLowerCase()).append(")"); - } - enIndexBuilder.addEntryWithString(indexedEntry, title, EntryTypeName.WIKTIONARY_TITLE_MULTI); - - final Pair pair = new Pair(trim(englishText.toString()), trim(foreignText.toString()), swap); - pairEntry.pairs.add(pair); - if (!pairsAdded.add(pair.toString())) { - LOG.warning("Duplicate pair: " + pair.toString()); - incrementCount("WARNING: Duplicate pair" ); - } - } - } // EnToTranslationParser - - // ----------------------------------------------------------------------- - - - static final class ForeignParser extends EnParser { - ForeignParser(final IndexBuilder enIndexBuilder, - final IndexBuilder otherIndexBuilder, final Pattern langPattern, - final Pattern langCodePattern, final boolean swap) { - super(enIndexBuilder, otherIndexBuilder, langPattern, langCodePattern, swap); } - @Override - void parseSection(String heading, String text) { - if (isIgnorableTitle(title)) { - return; - } - final String lang = heading.replaceAll("=", "").trim(); - if (!langPattern.matcher(lang).find()){ - return; - } - - final WikiTokenizer wikiTokenizer = new WikiTokenizer(text); - while (wikiTokenizer.nextToken() != null) { - if (wikiTokenizer.isHeading()) { - final String headingName = wikiTokenizer.headingWikiText(); - if (headingName.equals("Translations")) { - LOG.warning("Translations not in English section: " + title); - } else if (headingName.equals("Pronunciation")) { - //doPronunciation(wikiLineReader); - } else if (partOfSpeechHeader.matcher(headingName).matches()) { - doForeignPartOfSpeech(lang, headingName, wikiTokenizer.headingDepth(), wikiTokenizer); - } - } else { - // It's not a heading. - // TODO: optimization: skip to next heading. - } - } - } - - static final class ListSection { - final String firstPrefix; - final String firstLine; - final List nextPrefixes = new ArrayList(); - final List nextLines = new ArrayList(); - - public ListSection(String firstPrefix, String firstLine) { - this.firstPrefix = firstPrefix; - this.firstLine = firstLine; - } - - @Override - public String toString() { - return firstPrefix + firstLine + "{ " + nextPrefixes + "}"; - } - } - - int foreignCount = 0; - private void doForeignPartOfSpeech(final String lang, String posHeading, final int posDepth, WikiTokenizer wikiTokenizer) { - if (++foreignCount % 1000 == 0) { - LOG.info("***" + lang + ", " + title + ", pos=" + posHeading + ", foreignCount=" + foreignCount); - } - if (title.equals("6")) { - System.out.println(); - } - - final StringBuilder foreignBuilder = new StringBuilder(); - final List listSections = new ArrayList(); - - appendAndIndexWikiCallback.reset(foreignBuilder, null); - this.state = State.ENGLISH_DEF_OF_FOREIGN; // TODO: this is wrong, need new category.... - titleAppended = false; - wordForms.clear(); - - try { - - ListSection lastListSection = null; - - int currentHeadingDepth = posDepth; - while (wikiTokenizer.nextToken() != null) { - if (wikiTokenizer.isHeading()) { - currentHeadingDepth = wikiTokenizer.headingDepth(); - - if (currentHeadingDepth <= posDepth) { - wikiTokenizer.returnToLineStart(); - return; - } - } - - if (currentHeadingDepth > posDepth) { - // TODO: deal with other neat info sections - continue; - } - - if (wikiTokenizer.isFunction()) { - final String name = wikiTokenizer.functionName(); - final List args = wikiTokenizer.functionPositionArgs(); - final Map namedArgs = wikiTokenizer.functionNamedArgs(); - // First line is generally a repeat of the title with some extra information. - // We need to build up the left side (foreign text, tokens) separately from the - // right side (English). The left-side may get paired with multiple right sides. - // The left side should get filed under every form of the word in question (singular, plural). - - // For verbs, the conjugation comes later on in a deeper section. - // Ideally, we'd want to file every English entry with the verb - // under every verb form coming from the conjugation. - // Ie. under "fa": see: "make :: fare" and "do :: fare" - // But then where should we put the conjugation table? - // I think just under fare. But then we need a way to link to the entry (actually the row, since entries doesn't show up!) - // for the conjugation table from "fa". - // Would like to be able to link to a lang#token. - - appendAndIndexWikiCallback.onFunction(wikiTokenizer, name, args, namedArgs); - - } else if (wikiTokenizer.isListItem()) { - final String prefix = wikiTokenizer.listItemPrefix(); - if (lastListSection != null && - prefix.startsWith(lastListSection.firstPrefix) && - prefix.length() > lastListSection.firstPrefix.length()) { - lastListSection.nextPrefixes.add(prefix); - lastListSection.nextLines.add(wikiTokenizer.listItemWikiText()); - } else { - lastListSection = new ListSection(prefix, wikiTokenizer.listItemWikiText()); - listSections.add(lastListSection); - } - } else if (lastListSection != null) { - // Don't append anything after the lists, because there's crap. - } else if (wikiTokenizer.isWikiLink()) { - // Unindexed! - foreignBuilder.append(wikiTokenizer.wikiLinkText()); - - } else if (wikiTokenizer.isPlainText()) { - // Unindexed! - foreignBuilder.append(wikiTokenizer.token()); - - } else if (wikiTokenizer.isMarkup() || wikiTokenizer.isNewline() || wikiTokenizer.isComment()) { - // Do nothing. - } else { - LOG.warning("Unexpected token: " + wikiTokenizer.token()); - } - } - - } finally { - // Here's where we exit. - // Should we make an entry even if there are no foreign list items? - String foreign = foreignBuilder.toString().trim(); - if (!titleAppended && !foreign.toLowerCase().startsWith(title.toLowerCase())) { - foreign = String.format("%s %s", title, foreign); - } - if (!langPattern.matcher(lang).matches()) { - foreign = String.format("(%s) %s", lang, foreign); - } - for (final ListSection listSection : listSections) { - doForeignListSection(foreign, title, wordForms, listSection); - } - } - } - - - - private void doForeignListSection(final String foreignText, String title, final Collection forms, final ListSection listSection) { - state = State.ENGLISH_DEF_OF_FOREIGN; - final String prefix = listSection.firstPrefix; - if (prefix.length() > 1) { - // Could just get looser and say that any prefix longer than first is a sublist. - LOG.warning("Prefix too long: " + listSection); - return; - } - - final PairEntry pairEntry = new PairEntry(entrySource); - final IndexedEntry indexedEntry = new IndexedEntry(pairEntry); - - entryIsFormOfSomething = false; - final StringBuilder englishBuilder = new StringBuilder(); - final String mainLine = listSection.firstLine; - appendAndIndexWikiCallback.reset(englishBuilder, indexedEntry); - appendAndIndexWikiCallback.dispatch(mainLine, enIndexBuilder, EntryTypeName.WIKTIONARY_ENGLISH_DEF); - - final String english = trim(englishBuilder.toString()); - if (english.length() > 0) { - final Pair pair = new Pair(english, trim(foreignText), this.swap); - pairEntry.pairs.add(pair); - foreignIndexBuilder.addEntryWithString(indexedEntry, title, entryIsFormOfSomething ? EntryTypeName.WIKTIONARY_IS_FORM_OF_SOMETHING_ELSE : EntryTypeName.WIKTIONARY_TITLE_MULTI); - for (final String form : forms) { - foreignIndexBuilder.addEntryWithString(indexedEntry, form, EntryTypeName.WIKTIONARY_INFLECTED_FORM_MULTI); - } - } - - // Do examples. - String lastForeign = null; - for (int i = 0; i < listSection.nextPrefixes.size(); ++i) { - final String nextPrefix = listSection.nextPrefixes.get(i); - final String nextLine = listSection.nextLines.get(i); - - // TODO: This splitting is not sensitive to wiki code. - int dash = nextLine.indexOf("—"); - int mdashLen = 7; - if (dash == -1) { - dash = nextLine.indexOf("—"); - mdashLen = 1; - } - if (dash == -1) { - dash = nextLine.indexOf(" - "); - mdashLen = 3; - } - - if ((nextPrefix.equals("#:") || nextPrefix.equals("##:")) && dash != -1) { - final String foreignEx = nextLine.substring(0, dash); - final String englishEx = nextLine.substring(dash + mdashLen); - final Pair pair = new Pair(formatAndIndexExampleString(englishEx, enIndexBuilder, indexedEntry), formatAndIndexExampleString(foreignEx, foreignIndexBuilder, indexedEntry), swap); - if (pair.lang1 != "--" && pair.lang1 != "--") { - pairEntry.pairs.add(pair); - } - lastForeign = null; - } else if (nextPrefix.equals("#:") || nextPrefix.equals("##:")){ - final Pair pair = new Pair("--", formatAndIndexExampleString(nextLine, null, indexedEntry), swap); - lastForeign = nextLine; - if (pair.lang1 != "--" && pair.lang1 != "--") { - pairEntry.pairs.add(pair); - } - } else if (nextPrefix.equals("#::") || nextPrefix.equals("#**")) { - if (lastForeign != null && pairEntry.pairs.size() > 0) { - pairEntry.pairs.remove(pairEntry.pairs.size() - 1); - final Pair pair = new Pair(formatAndIndexExampleString(nextLine, enIndexBuilder, indexedEntry), formatAndIndexExampleString(lastForeign, foreignIndexBuilder, indexedEntry), swap); - if (pair.lang1 != "--" || pair.lang2 != "--") { - pairEntry.pairs.add(pair); - } - lastForeign = null; - } else { - LOG.warning("TODO: English example with no foreign: " + title + ", " + nextLine); - final Pair pair = new Pair("--", formatAndIndexExampleString(nextLine, null, indexedEntry), swap); - if (pair.lang1 != "--" || pair.lang2 != "--") { - pairEntry.pairs.add(pair); - } - } - } else if (nextPrefix.equals("#*")) { - // Can't really index these. - final Pair pair = new Pair("--", formatAndIndexExampleString(nextLine, null, indexedEntry), swap); - lastForeign = nextLine; - if (pair.lang1 != "--" || pair.lang2 != "--") { - pairEntry.pairs.add(pair); - } - } else if (nextPrefix.equals("#::*") || nextPrefix.equals("##") || nextPrefix.equals("#*:") || nextPrefix.equals("#:*") || true) { - final Pair pair = new Pair("--", formatAndIndexExampleString(nextLine, null, indexedEntry), swap); - if (pair.lang1 != "--" || pair.lang2 != "--") { - pairEntry.pairs.add(pair); - } -// } else { -// assert false; - } - } - } - - private String formatAndIndexExampleString(final String example, final IndexBuilder indexBuilder, final IndexedEntry indexedEntry) { - // TODO: -// if (wikiTokenizer.token().equals("'''")) { -// insideTripleQuotes = !insideTripleQuotes; -// } - final StringBuilder builder = new StringBuilder(); - appendAndIndexWikiCallback.reset(builder, indexedEntry); - appendAndIndexWikiCallback.entryTypeName = EntryTypeName.WIKTIONARY_EXAMPLE; - appendAndIndexWikiCallback.entryTypeNameSticks = true; - try { - // TODO: this is a hack needed because we don't safely split on the dash. - appendAndIndexWikiCallback.dispatch(example, indexBuilder, EntryTypeName.WIKTIONARY_EXAMPLE); - } catch (AssertionError e) { - return "--"; - } - final String result = trim(builder.toString()); - return result.length() > 0 ? result : "--"; - } - - - private void itConjAre(List args, Map namedArgs) { - final String base = args.get(0); - final String aux = args.get(1); - - putIfMissing(namedArgs, "inf", base + "are"); - putIfMissing(namedArgs, "aux", aux); - putIfMissing(namedArgs, "ger", base + "ando"); - putIfMissing(namedArgs, "presp", base + "ante"); - putIfMissing(namedArgs, "pastp", base + "ato"); - // Present - putIfMissing(namedArgs, "pres1s", base + "o"); - putIfMissing(namedArgs, "pres2s", base + "i"); - putIfMissing(namedArgs, "pres3s", base + "a"); - putIfMissing(namedArgs, "pres1p", base + "iamo"); - putIfMissing(namedArgs, "pres2p", base + "ate"); - putIfMissing(namedArgs, "pres3p", base + "ano"); - // Imperfect - putIfMissing(namedArgs, "imperf1s", base + "avo"); - putIfMissing(namedArgs, "imperf2s", base + "avi"); - putIfMissing(namedArgs, "imperf3s", base + "ava"); - putIfMissing(namedArgs, "imperf1p", base + "avamo"); - putIfMissing(namedArgs, "imperf2p", base + "avate"); - putIfMissing(namedArgs, "imperf3p", base + "avano"); - // Passato remoto - putIfMissing(namedArgs, "prem1s", base + "ai"); - putIfMissing(namedArgs, "prem2s", base + "asti"); - putIfMissing(namedArgs, "prem3s", base + "ò"); - putIfMissing(namedArgs, "prem1p", base + "ammo"); - putIfMissing(namedArgs, "prem2p", base + "aste"); - putIfMissing(namedArgs, "prem3p", base + "arono"); - // Future - putIfMissing(namedArgs, "fut1s", base + "erò"); - putIfMissing(namedArgs, "fut2s", base + "erai"); - putIfMissing(namedArgs, "fut3s", base + "erà"); - putIfMissing(namedArgs, "fut1p", base + "eremo"); - putIfMissing(namedArgs, "fut2p", base + "erete"); - putIfMissing(namedArgs, "fut3p", base + "eranno"); - // Conditional - putIfMissing(namedArgs, "cond1s", base + "erei"); - putIfMissing(namedArgs, "cond2s", base + "eresti"); - putIfMissing(namedArgs, "cond3s", base + "erebbe"); - putIfMissing(namedArgs, "cond1p", base + "eremmo"); - putIfMissing(namedArgs, "cond2p", base + "ereste"); - putIfMissing(namedArgs, "cond3p", base + "erebbero"); - // Subjunctive / congiuntivo - putIfMissing(namedArgs, "sub123s", base + "i"); - putIfMissing(namedArgs, "sub1p", base + "iamo"); - putIfMissing(namedArgs, "sub2p", base + "iate"); - putIfMissing(namedArgs, "sub3p", base + "ino"); - // Imperfect subjunctive - putIfMissing(namedArgs, "impsub12s", base + "assi"); - putIfMissing(namedArgs, "impsub3s", base + "asse"); - putIfMissing(namedArgs, "impsub1p", base + "assimo"); - putIfMissing(namedArgs, "impsub2p", base + "aste"); - putIfMissing(namedArgs, "impsub3p", base + "assero"); - // Imperative - putIfMissing(namedArgs, "imp2s", base + "a"); - putIfMissing(namedArgs, "imp3s", base + "i"); - putIfMissing(namedArgs, "imp1p", base + "iamo"); - putIfMissing(namedArgs, "imp2p", base + "ate"); - putIfMissing(namedArgs, "imp3p", base + "ino"); - - - itConj(args, namedArgs); - } - - - private void itConj(List args, Map namedArgs) { - // TODO Auto-generated method stub - - } - - - private static void putIfMissing(final Map namedArgs, final String key, - final String value) { - final String oldValue = namedArgs.get(key); - if (oldValue == null || oldValue.length() == 0) { - namedArgs.put(key, value); - } - } - - // TODO: check how ='' and =| are manifested.... - // TODO: get this right in -are - private static void putOrNullify(final Map namedArgs, final String key, - final String value) { - final String oldValue = namedArgs.get(key); - if (oldValue == null/* || oldValue.length() == 0*/) { - namedArgs.put(key, value); - } else { - if (oldValue.equals("''")) { - namedArgs.put(key, ""); - } - } - } - - } // ForeignParser - - // ----------------------------------------------------------------------- - - static class FunctionCallbacks { - - static final Map> DEFAULT = new LinkedHashMap>(); - - static { - FunctionCallback callback = new TranslationCallback(); - DEFAULT.put("t", callback); - DEFAULT.put("t+", callback); - DEFAULT.put("t-", callback); - DEFAULT.put("tø", callback); - DEFAULT.put("apdx-t", callback); - - callback = new EncodingCallback(); - Set encodings = new LinkedHashSet(Arrays.asList( - "zh-ts", "zh-tsp", - "sd-Arab", "ku-Arab", "Arab", "unicode", "Laoo", "ur-Arab", "Thai", - "fa-Arab", "Khmr", "Cyrl", "IPAchar", "ug-Arab", "ko-inline", - "Jpan", "Kore", "Hebr", "rfscript", "Beng", "Mong", "Knda", "Cyrs", - "yue-tsj", "Mlym", "Tfng", "Grek", "yue-yue-j")); - for (final String encoding : encodings) { - DEFAULT.put(encoding, callback); - } - - callback = new l_term(); - DEFAULT.put("l", callback); - DEFAULT.put("term", callback); - - callback = new Gender(); - DEFAULT.put("m", callback); - DEFAULT.put("f", callback); - DEFAULT.put("n", callback); - DEFAULT.put("p", callback); - DEFAULT.put("g", callback); - - callback = new AppendArg0(); - - callback = new Ignore(); - DEFAULT.put("trreq", callback); - DEFAULT.put("t-image", callback); - DEFAULT.put("defn", callback); - DEFAULT.put("rfdef", callback); - DEFAULT.put("rfdate", callback); - DEFAULT.put("rfex", callback); - DEFAULT.put("rfquote", callback); - DEFAULT.put("attention", callback); - DEFAULT.put("zh-attention", callback); - - - callback = new FormOf(); - DEFAULT.put("form of", callback); - DEFAULT.put("conjugation of", callback); - DEFAULT.put("participle of", callback); - DEFAULT.put("present participle of", callback); - DEFAULT.put("past participle of", callback); - DEFAULT.put("feminine past participle of", callback); - DEFAULT.put("gerund of", callback); - DEFAULT.put("feminine of", callback); - DEFAULT.put("plural of", callback); - DEFAULT.put("feminine plural of", callback); - DEFAULT.put("inflected form of", callback); - DEFAULT.put("alternative form of", callback); - DEFAULT.put("dated form of", callback); - DEFAULT.put("apocopic form of", callback); - - callback = new InflOrHead(); - DEFAULT.put("infl", callback); - DEFAULT.put("head", callback); - - callback = new AppendName(); - DEFAULT.put("...", callback); - - DEFAULT.put("qualifier", new QualifierCallback()); - DEFAULT.put("italbrac", new italbrac()); - DEFAULT.put("gloss", new gloss()); - DEFAULT.put("not used", new not_used()); - DEFAULT.put("wikipedia", new wikipedia()); - } - - static final NameAndArgs NAME_AND_ARGS = new NameAndArgs(); - - // ------------------------------------------------------------------ - - static final class TranslationCallback implements FunctionCallback { - @Override - public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List args, - final Map namedArgs, final EnParser parser, - final AppendAndIndexWikiCallback appendAndIndexWikiCallback) { - - final String transliteration = namedArgs.remove("tr"); - final String alt = namedArgs.remove("alt"); - namedArgs.keySet().removeAll(USELESS_WIKI_ARGS); - if (args.size() < 2) { - LOG.warning("{{t...}} with wrong args: title=" + parser.title); - return false; - } - final String langCode = ListUtil.get(args, 0); - if (!appendAndIndexWikiCallback.langCodeToTCount.containsKey(langCode)) { - appendAndIndexWikiCallback.langCodeToTCount.put(langCode, new AtomicInteger()); - } - appendAndIndexWikiCallback.langCodeToTCount.get(langCode).incrementAndGet(); - final String word = ListUtil.get(args, 1); - appendAndIndexWikiCallback.dispatch(alt != null ? alt : word, EntryTypeName.WIKTIONARY_TITLE_MULTI); - - // Genders... - if (args.size() > 2) { - appendAndIndexWikiCallback.builder.append(" {"); - for (int i = 2; i < args.size(); ++i) { - if (i > 2) { - appendAndIndexWikiCallback.builder.append("|"); - } - appendAndIndexWikiCallback.builder.append(args.get(i)); - } - appendAndIndexWikiCallback.builder.append("}"); - } - - if (transliteration != null) { - appendAndIndexWikiCallback.builder.append(" ("); - appendAndIndexWikiCallback.dispatch(transliteration, EntryTypeName.WIKTIONARY_TRANSLITERATION); - appendAndIndexWikiCallback.builder.append(")"); - } - - if (alt != null) { - // If alt wasn't null, we appended alt instead of the actual word - // we're filing under.. - appendAndIndexWikiCallback.builder.append(" ("); - appendAndIndexWikiCallback.dispatch(word, EntryTypeName.WIKTIONARY_TITLE_MULTI); - appendAndIndexWikiCallback.builder.append(")"); - } - - // Catch-all for anything else... - if (!namedArgs.isEmpty()) { - appendAndIndexWikiCallback.builder.append(" {"); - appendNamedArgs(namedArgs, appendAndIndexWikiCallback); - appendAndIndexWikiCallback.builder.append("}"); - } - - return true; - } - } - - // ------------------------------------------------------------------ - - static final class QualifierCallback implements FunctionCallback { - @Override - public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List args, - final Map namedArgs, - final EnParser parser, - final AppendAndIndexWikiCallback appendAndIndexWikiCallback) { - if (args.size() != 1 || !namedArgs.isEmpty()) { - LOG.warning("weird qualifier: "); - return false; - } - String qualifier = args.get(0); - appendAndIndexWikiCallback.builder.append("("); - appendAndIndexWikiCallback.dispatch(qualifier, null); - appendAndIndexWikiCallback.builder.append(")"); - return true; - } - } - - // ------------------------------------------------------------------ - - static final class EncodingCallback implements FunctionCallback { - @Override - public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List args, - final Map namedArgs, - final EnParser parser, - final AppendAndIndexWikiCallback appendAndIndexWikiCallback) { - if (!namedArgs.isEmpty()) { - LOG.warning("weird encoding: " + wikiTokenizer.token()); - } - if (args.size() == 0) { - // Things like "{{Jpan}}" exist. - return true; - } - - for (int i = 0; i < args.size(); ++i) { - if (i > 0) { - appendAndIndexWikiCallback.builder.append(", "); - } - final String arg = args.get(i); -// if (arg.equals(parser.title)) { -// parser.titleAppended = true; -// } - appendAndIndexWikiCallback.dispatch(arg, appendAndIndexWikiCallback.entryTypeName); - } - - return true; - } - } - - // ------------------------------------------------------------------ - - static final class Gender implements FunctionCallback { - @Override - public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List args, - final Map namedArgs, - final EnParser parser, - final AppendAndIndexWikiCallback appendAndIndexWikiCallback) { - if (!namedArgs.isEmpty()) { - return false; - } - appendAndIndexWikiCallback.builder.append("{"); - appendAndIndexWikiCallback.builder.append(name); - for (int i = 0; i < args.size(); ++i) { - appendAndIndexWikiCallback.builder.append("|").append(args.get(i)); - } - appendAndIndexWikiCallback.builder.append("}"); - return true; - } - } - - // ------------------------------------------------------------------ - - static final class l_term implements FunctionCallback { - @Override - public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List args, - final Map namedArgs, - final EnParser parser, - final AppendAndIndexWikiCallback appendAndIndexWikiCallback) { - - // for {{l}}, lang is arg 0, but not for {{term}} - if (name.equals("term")) { - args.add(0, ""); - } - - final EntryTypeName entryTypeName; - switch (parser.state) { - case TRANSLATION_LINE: entryTypeName = EntryTypeName.WIKTIONARY_TRANSLATION_OTHER_TEXT; break; - case ENGLISH_DEF_OF_FOREIGN: entryTypeName = EntryTypeName.WIKTIONARY_ENGLISH_DEF_WIKI_LINK; break; - default: throw new IllegalStateException("Invalid enum value: " + parser.state); - } - - final String langCode = args.get(0); - final IndexBuilder indexBuilder; - if ("".equals(langCode)) { - indexBuilder = parser.foreignIndexBuilder; - } else if ("en".equals(langCode)) { - indexBuilder = parser.enIndexBuilder; - } else { - indexBuilder = parser.foreignIndexBuilder; - } - - String displayText = ListUtil.get(args, 2, ""); - if (displayText.equals("")) { - displayText = ListUtil.get(args, 1, null); - } - - if (displayText != null) { - appendAndIndexWikiCallback.dispatch(displayText, indexBuilder, entryTypeName); - } else { - LOG.warning("no display text: " + wikiTokenizer.token()); - } - - final String tr = namedArgs.remove("tr"); - if (tr != null) { - appendAndIndexWikiCallback.builder.append(" ("); - appendAndIndexWikiCallback.dispatch(tr, indexBuilder, EntryTypeName.WIKTIONARY_TRANSLITERATION); - appendAndIndexWikiCallback.builder.append(")"); - } - - final String gloss = ListUtil.get(args, 3, ""); - if (!gloss.equals("")) { - appendAndIndexWikiCallback.builder.append(" ("); - appendAndIndexWikiCallback.dispatch(gloss, parser.enIndexBuilder, EntryTypeName.WIKTIONARY_ENGLISH_DEF); - appendAndIndexWikiCallback.builder.append(")"); - } - - namedArgs.keySet().removeAll(USELESS_WIKI_ARGS); - if (!namedArgs.isEmpty()) { - appendAndIndexWikiCallback.builder.append(" {").append(name); - appendNamedArgs(namedArgs, appendAndIndexWikiCallback); - appendAndIndexWikiCallback.builder.append("}"); - } - - return true; - } - } - - // ------------------------------------------------------------------ - - static final class AppendArg0 implements FunctionCallback { - @Override - public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List args, - final Map namedArgs, - final EnParser parser, - final AppendAndIndexWikiCallback appendAndIndexWikiCallback) { - if (args.size() != 1 || !namedArgs.isEmpty()) { - return false; - } - appendAndIndexWikiCallback.dispatch(args.get(0), EntryTypeName.WIKTIONARY_TRANSLATION_OTHER_TEXT); - // TODO: transliteration - return true; - } - } - - // ------------------------------------------------------------------ - - static final class italbrac implements FunctionCallback { - @Override - public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List args, - final Map namedArgs, - final EnParser parser, - final AppendAndIndexWikiCallback appendAndIndexWikiCallback) { - if (args.size() != 1 || !namedArgs.isEmpty()) { - return false; - } - appendAndIndexWikiCallback.builder.append("("); - appendAndIndexWikiCallback.dispatch(args.get(0), EntryTypeName.WIKTIONARY_TRANSLATION_OTHER_TEXT); - appendAndIndexWikiCallback.builder.append(")"); - return true; - } - } - - // ------------------------------------------------------------------ - - static final class gloss implements FunctionCallback { - @Override - public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List args, - final Map namedArgs, - final EnParser parser, - final AppendAndIndexWikiCallback appendAndIndexWikiCallback) { - if (args.size() != 1 || !namedArgs.isEmpty()) { - return false; - } - appendAndIndexWikiCallback.builder.append("("); - appendAndIndexWikiCallback.dispatch(args.get(0), EntryTypeName.WIKTIONARY_TRANSLATION_OTHER_TEXT); - appendAndIndexWikiCallback.builder.append(")"); - return true; - } - } - - // ------------------------------------------------------------------ - - static final class Ignore implements FunctionCallback { - @Override - public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List args, - final Map namedArgs, - final EnParser parser, - final AppendAndIndexWikiCallback appendAndIndexWikiCallback) { - return true; - } - } - - // ------------------------------------------------------------------ - - static final class not_used implements FunctionCallback { - @Override - public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List args, - final Map namedArgs, - final EnParser parser, - final AppendAndIndexWikiCallback appendAndIndexWikiCallback) { - appendAndIndexWikiCallback.builder.append("(not used)"); - return true; - } - } - - - // ------------------------------------------------------------------ - - static final class AppendName implements FunctionCallback { - @Override - public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List args, - final Map namedArgs, - final EnParser parser, - final AppendAndIndexWikiCallback appendAndIndexWikiCallback) { - if (!args.isEmpty() || !namedArgs.isEmpty()) { - return false; - } - appendAndIndexWikiCallback.builder.append(name); - return true; - } - } - - // -------------------------------------------------------------------- - // -------------------------------------------------------------------- - - - static final class FormOf implements FunctionCallback { - @Override - public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List args, - final Map namedArgs, - final EnParser parser, - final AppendAndIndexWikiCallback appendAndIndexWikiCallback) { - parser.entryIsFormOfSomething = true; - String formName = name; - if (name.equals("form of")) { - formName = ListUtil.remove(args, 0, null); - } - if (formName == null) { - LOG.warning("Missing form name: " + parser.title); - formName = "form of"; - } - String baseForm = ListUtil.get(args, 1, ""); - if ("".equals(baseForm)) { - baseForm = ListUtil.get(args, 0, null); - ListUtil.remove(args, 1, ""); - } else { - ListUtil.remove(args, 0, null); - } - namedArgs.keySet().removeAll(USELESS_WIKI_ARGS); - - appendAndIndexWikiCallback.builder.append("{"); - NAME_AND_ARGS.onWikiFunction(wikiTokenizer, formName, args, namedArgs, parser, appendAndIndexWikiCallback); - appendAndIndexWikiCallback.builder.append("}"); - if (baseForm != null && appendAndIndexWikiCallback.indexedEntry != null) { - parser.foreignIndexBuilder.addEntryWithString(appendAndIndexWikiCallback.indexedEntry, baseForm, EntryTypeName.WIKTIONARY_BASE_FORM_MULTI); - } else { - // null baseForm happens in Danish. - LOG.warning("Null baseform: " + parser.title); - } - return true; - } - } - - static final FormOf FORM_OF = new FormOf(); - - - // -------------------------------------------------------------------- - // -------------------------------------------------------------------- - - static final class wikipedia implements FunctionCallback { - @Override - public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List args, - final Map namedArgs, - final EnParser parser, - final AppendAndIndexWikiCallback appendAndIndexWikiCallback) { - namedArgs.remove("lang"); - if (args.size() > 1 || !namedArgs.isEmpty()) { - // Unindexed! - return false; - } else if (args.size() == 1) { - return false; - } else { - return true; - } - } - } - - static final class InflOrHead implements FunctionCallback { - @Override - public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List args, - final Map namedArgs, - final EnParser parser, - final AppendAndIndexWikiCallback appendAndIndexWikiCallback) { - // See: http://en.wiktionary.org/wiki/Template:infl - final String langCode = ListUtil.get(args, 0); - String head = namedArgs.remove("head"); - if (head == null) { - head = namedArgs.remove("title"); // Bug - } - if (head == null) { - head = parser.title; - } - parser.titleAppended = true; - - namedArgs.keySet().removeAll(USELESS_WIKI_ARGS); - - final String tr = namedArgs.remove("tr"); - String g = namedArgs.remove("g"); - if (g == null) { - g = namedArgs.remove("gender"); - } - final String g2 = namedArgs.remove("g2"); - final String g3 = namedArgs.remove("g3"); - - appendAndIndexWikiCallback.dispatch(head, EntryTypeName.WIKTIONARY_TITLE_MULTI); - - if (g != null) { - appendAndIndexWikiCallback.builder.append(" {").append(g); - if (g2 != null) { - appendAndIndexWikiCallback.builder.append("|").append(g2); - } - if (g3 != null) { - appendAndIndexWikiCallback.builder.append("|").append(g3); - } - appendAndIndexWikiCallback.builder.append("}"); - } - - if (tr != null) { - appendAndIndexWikiCallback.builder.append(" ("); - appendAndIndexWikiCallback.dispatch(tr, EntryTypeName.WIKTIONARY_TITLE_MULTI); - appendAndIndexWikiCallback.builder.append(")"); - parser.wordForms.add(tr); - } - - final String pos = ListUtil.get(args, 1); - if (pos != null) { - appendAndIndexWikiCallback.builder.append(" (").append(pos).append(")"); - } - for (int i = 2; i < args.size(); i += 2) { - final String inflName = ListUtil.get(args, i); - final String inflValue = ListUtil.get(args, i + 1); - appendAndIndexWikiCallback.builder.append(", "); - appendAndIndexWikiCallback.dispatch(inflName, null, null); - if (inflValue != null && inflValue.length() > 0) { - appendAndIndexWikiCallback.builder.append(": "); - appendAndIndexWikiCallback.dispatch(inflValue, null, null); - parser.wordForms.add(inflValue); - } - } - for (final String key : namedArgs.keySet()) { - final String value = WikiTokenizer.toPlainText(namedArgs.get(key)); - appendAndIndexWikiCallback.builder.append(" "); - appendAndIndexWikiCallback.dispatch(key, null, null); - appendAndIndexWikiCallback.builder.append("="); - appendAndIndexWikiCallback.dispatch(value, null, null); - parser.wordForms.add(value); - } - return true; - } - } - - - static { - DEFAULT.put("it-noun", new it_noun()); - } - static final class it_noun implements FunctionCallback { - @Override - public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List args, - final Map namedArgs, - final EnParser parser, - final AppendAndIndexWikiCallback appendAndIndexWikiCallback) { - parser.titleAppended = true; - final String base = ListUtil.get(args, 0); - final String gender = ListUtil.get(args, 1); - final String singular = base + ListUtil.get(args, 2, null); - final String plural = base + ListUtil.get(args, 3, null); - appendAndIndexWikiCallback.builder.append(" "); - appendAndIndexWikiCallback.dispatch(singular, null, null); - appendAndIndexWikiCallback.builder.append(" {").append(gender).append("}, "); - appendAndIndexWikiCallback.dispatch(plural, null, null); - appendAndIndexWikiCallback.builder.append(" {pl}"); - parser.wordForms.add(singular); - parser.wordForms.add(plural); - if (!namedArgs.isEmpty() || args.size() > 4) { - LOG.warning("Invalid it-noun: " + wikiTokenizer.token()); - } - return true; - } - } - - static { - DEFAULT.put("it-proper noun", new it_proper_noun()); - } - static final class it_proper_noun implements FunctionCallback { - @Override - public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List args, - final Map namedArgs, - final EnParser parser, - final AppendAndIndexWikiCallback appendAndIndexWikiCallback) { - return false; - } - } - - } - - }