X-Git-Url: http://gitweb.fperrin.net/?a=blobdiff_plain;f=src%2Fcom%2Fhughes%2Fandroid%2Fdictionary%2Fparser%2Fwiktionary%2FEnForeignParser.java;h=8954f3784472fe328bebffeb556f1e1357ad2c2b;hb=2fc669d88306d563fc9c899d8d91b25d591692ea;hp=7dd933e38815ee1d595fb50777653f617120c705;hpb=8df3f0a9fdbae32221cc7d552b70f80f40872d0c;p=DictionaryPC.git diff --git a/src/com/hughes/android/dictionary/parser/wiktionary/EnForeignParser.java b/src/com/hughes/android/dictionary/parser/wiktionary/EnForeignParser.java index 7dd933e..8954f37 100644 --- a/src/com/hughes/android/dictionary/parser/wiktionary/EnForeignParser.java +++ b/src/com/hughes/android/dictionary/parser/wiktionary/EnForeignParser.java @@ -24,318 +24,317 @@ import com.hughes.android.dictionary.engine.EntryTypeName; import com.hughes.android.dictionary.engine.IndexBuilder; import com.hughes.android.dictionary.engine.IndexedEntry; import com.hughes.android.dictionary.engine.PairEntry; -import com.hughes.android.dictionary.engine.PairEntry.Pair; import com.hughes.android.dictionary.parser.WikiTokenizer; public final class EnForeignParser extends EnParser { public EnForeignParser(final IndexBuilder enIndexBuilder, - final IndexBuilder otherIndexBuilder, final Pattern langPattern, - final Pattern langCodePattern, final boolean swap) { - super(enIndexBuilder, otherIndexBuilder, langPattern, langCodePattern, swap); + final IndexBuilder otherIndexBuilder, final Pattern langPattern, + final Pattern langCodePattern, final boolean swap) { + super(enIndexBuilder, otherIndexBuilder, langPattern, langCodePattern, swap); } @Override void parseSection(String heading, String text) { - if (isIgnorableTitle(title)) { - return; - } - final String lang = heading.replace("=", "").trim(); - if (!langPattern.matcher(lang).find()){ - return; - } - - final WikiTokenizer wikiTokenizer = new WikiTokenizer(text); - while (wikiTokenizer.nextToken() != null) { - if (wikiTokenizer.isHeading()) { - final String headingName = wikiTokenizer.headingWikiText(); - if (headingName.equals("Translations")) { - LOG.warning("Translations not in English section: " + title); - incrementCount("WARNING: Translations not in English section"); - } else if (headingName.equals("Pronunciation")) { - //doPronunciation(wikiLineReader); - } else if (headingName.startsWith(" {{S|")) { - // HACK to support parsing frwiktionary - String[] parts = headingName.split("\\|"); - if (parts.length > 2 && langCodePattern.matcher(parts[2]).find() && - (parts.length < 4 || !parts[3].startsWith("flexion"))) { - doForeignPartOfSpeech(lang, headingName, wikiTokenizer.headingDepth(), wikiTokenizer); + if (isIgnorableTitle(title)) { + return; + } + final String lang = heading.replace("=", "").trim(); + if (!langPattern.matcher(lang).find()) { + return; + } + + final WikiTokenizer wikiTokenizer = new WikiTokenizer(text); + while (wikiTokenizer.nextToken() != null) { + if (wikiTokenizer.isHeading()) { + final String headingName = wikiTokenizer.headingWikiText(); + if (headingName.equals("Translations")) { + LOG.warning("Translations not in English section: " + title); + incrementCount("WARNING: Translations not in English section"); + } else if (headingName.equals("Pronunciation")) { + //doPronunciation(wikiLineReader); + } else if (headingName.startsWith(" {{S|")) { + // HACK to support parsing frwiktionary + String[] parts = headingName.split("\\|"); + if (parts.length > 2 && langCodePattern.matcher(parts[2]).find() && + (parts.length < 4 || !parts[3].startsWith("flexion"))) { + doForeignPartOfSpeech(lang, headingName, wikiTokenizer.headingDepth(), wikiTokenizer); + } + } else if (partOfSpeechHeader.matcher(headingName).matches()) { + doForeignPartOfSpeech(lang, headingName, wikiTokenizer.headingDepth(), wikiTokenizer); + } + } else { + // It's not a heading. + // TODO: optimization: skip to next heading. } - } else if (partOfSpeechHeader.matcher(headingName).matches()) { - doForeignPartOfSpeech(lang, headingName, wikiTokenizer.headingDepth(), wikiTokenizer); - } - } else { - // It's not a heading. - // TODO: optimization: skip to next heading. } - } } - + static final class ListSection { - final String firstPrefix; - final String firstLine; - final List nextPrefixes = new ArrayList(); - final List nextLines = new ArrayList(); - - public ListSection(String firstPrefix, String firstLine) { - this.firstPrefix = firstPrefix; - this.firstLine = firstLine; - } - - @Override - public String toString() { - return firstPrefix + firstLine + "{ " + nextPrefixes + "}"; - } + final String firstPrefix; + final String firstLine; + final List nextPrefixes = new ArrayList<>(); + final List nextLines = new ArrayList<>(); + + public ListSection(String firstPrefix, String firstLine) { + this.firstPrefix = firstPrefix; + this.firstLine = firstLine; + } + + @Override + public String toString() { + return firstPrefix + firstLine + "{ " + nextPrefixes + "}"; + } } int foreignCount = 0; private void doForeignPartOfSpeech(final String lang, String posHeading, final int posDepth, WikiTokenizer wikiTokenizer) { - if (++foreignCount % 1000 == 0) { - LOG.info("***" + lang + ", " + title + ", pos=" + posHeading + ", foreignCount=" + foreignCount); - } - if (title.equals("6")) { - System.out.println(); - } - - final StringBuilder foreignBuilder = new StringBuilder(); - final List listSections = new ArrayList(); - - appendAndIndexWikiCallback.reset(foreignBuilder, null); - this.state = State.ENGLISH_DEF_OF_FOREIGN; // TODO: this is wrong, need new category.... - titleAppended = false; - wordForms.clear(); - - try { - - EnForeignParser.ListSection lastListSection = null; - - int currentHeadingDepth = posDepth; - while (wikiTokenizer.nextToken() != null) { - if (wikiTokenizer.isHeading()) { - currentHeadingDepth = wikiTokenizer.headingDepth(); - - if (currentHeadingDepth <= posDepth) { - wikiTokenizer.returnToLineStart(); - return; - } - } // heading - - if (currentHeadingDepth > posDepth) { - // TODO: deal with other neat info sections inside POS - continue; - } - - if (wikiTokenizer.isFunction()) { - final String name = wikiTokenizer.functionName(); - final List args = wikiTokenizer.functionPositionArgs(); - final Map namedArgs = wikiTokenizer.functionNamedArgs(); - // First line is generally a repeat of the title with some extra information. - // We need to build up the left side (foreign text, tokens) separately from the - // right side (English). The left-side may get paired with multiple right sides. - // The left side should get filed under every form of the word in question (singular, plural). - - // For verbs, the conjugation comes later on in a deeper section. - // Ideally, we'd want to file every English entry with the verb - // under every verb form coming from the conjugation. - // Ie. under "fa": see: "make :: fare" and "do :: fare" - // But then where should we put the conjugation table? - // I think just under fare. But then we need a way to link to the entry (actually the row, since entries doesn't show up!) - // for the conjugation table from "fa". - // Would like to be able to link to a lang#token. - - - String head = namedArgs.remove("head"); - final String tr = namedArgs.remove("tr"); - if (head == null && tr != null && !titleAppended) { - head = title; - } - if (head != null) { - final String form = appendAndIndexWikiCallback.dispatch(head, EntryTypeName.WIKTIONARY_TITLE_MULTI); - wordForms.add(form); - appendAndIndexWikiCallback.builder.append(" "); - titleAppended = true; - } - if (tr != null) { - appendAndIndexWikiCallback.builder.append(" ("); - final String form = appendAndIndexWikiCallback.dispatch(tr, EntryTypeName.WIKTIONARY_TRANSLITERATION); - wordForms.add(form); - appendAndIndexWikiCallback.builder.append(") "); - } - - appendAndIndexWikiCallback.onFunction(wikiTokenizer, name, args, namedArgs); - - } else if (wikiTokenizer.isListItem()) { - final String prefix = wikiTokenizer.listItemPrefix(); - if (lastListSection != null && - prefix.startsWith(lastListSection.firstPrefix) && - prefix.length() > lastListSection.firstPrefix.length()) { - lastListSection.nextPrefixes.add(prefix); - lastListSection.nextLines.add(wikiTokenizer.listItemWikiText()); - } else { - lastListSection = new ListSection(prefix, wikiTokenizer.listItemWikiText()); - listSections.add(lastListSection); - } - } else if (lastListSection != null) { - // Don't append anything after the lists, because there's crap. - } else if (wikiTokenizer.isWikiLink()) { - // Unindexed! - foreignBuilder.append(wikiTokenizer.wikiLinkText()); - - } else if (wikiTokenizer.isPlainText()) { - // Unindexed! - foreignBuilder.append(wikiTokenizer.token()); - } else if (wikiTokenizer.isHtml()) { - if (!wikiTokenizer.token().startsWith("")) { - foreignBuilder.append(wikiTokenizer.token()); - } - } else if (wikiTokenizer.isMarkup() || - wikiTokenizer.isNewline() || - wikiTokenizer.isComment()) { - // Do nothing. - } else { - LOG.warning("Unexpected token: " + wikiTokenizer.token()); - assert !wikiTokenizer.errors().isEmpty(); + if (++foreignCount % 1000 == 0) { + LOG.info("***" + lang + ", " + title + ", pos=" + posHeading + ", foreignCount=" + foreignCount); } - } - - } finally { - // Here's where we exit. - // Should we make an entry even if there are no foreign list items? - String foreign = foreignBuilder.toString().trim(); - if (!titleAppended && !foreign.toLowerCase().startsWith(title.toLowerCase())) { - foreign = String.format("%s %s", title, foreign); + if (title.equals("6")) { + System.out.println(); } - if (!langPattern.matcher(lang).matches()) { - foreign = String.format("(%s) %s", lang, foreign); - } - for (final EnForeignParser.ListSection listSection : listSections) { - doForeignListSection(foreign, title, wordForms, listSection); + + final StringBuilder foreignBuilder = new StringBuilder(); + final List listSections = new ArrayList<>(); + + appendAndIndexWikiCallback.reset(foreignBuilder, null); + this.state = State.ENGLISH_DEF_OF_FOREIGN; // TODO: this is wrong, need new category.... + titleAppended = false; + wordForms.clear(); + + try { + + EnForeignParser.ListSection lastListSection = null; + + int currentHeadingDepth = posDepth; + while (wikiTokenizer.nextToken() != null) { + if (wikiTokenizer.isHeading()) { + currentHeadingDepth = wikiTokenizer.headingDepth(); + + if (currentHeadingDepth <= posDepth) { + wikiTokenizer.returnToLineStart(); + return; + } + } // heading + + if (currentHeadingDepth > posDepth) { + // TODO: deal with other neat info sections inside POS + continue; + } + + if (wikiTokenizer.isFunction()) { + final String name = wikiTokenizer.functionName(); + final List args = wikiTokenizer.functionPositionArgs(); + final Map namedArgs = wikiTokenizer.functionNamedArgs(); + // First line is generally a repeat of the title with some extra information. + // We need to build up the left side (foreign text, tokens) separately from the + // right side (English). The left-side may get paired with multiple right sides. + // The left side should get filed under every form of the word in question (singular, plural). + + // For verbs, the conjugation comes later on in a deeper section. + // Ideally, we'd want to file every English entry with the verb + // under every verb form coming from the conjugation. + // Ie. under "fa": see: "make :: fare" and "do :: fare" + // But then where should we put the conjugation table? + // I think just under fare. But then we need a way to link to the entry (actually the row, since entries doesn't show up!) + // for the conjugation table from "fa". + // Would like to be able to link to a lang#token. + + + String head = namedArgs.remove("head"); + final String tr = namedArgs.remove("tr"); + if (head == null && tr != null && !titleAppended) { + head = title; + } + if (head != null) { + final String form = appendAndIndexWikiCallback.dispatch(head, EntryTypeName.WIKTIONARY_TITLE_MULTI); + wordForms.add(form); + appendAndIndexWikiCallback.builder.append(" "); + titleAppended = true; + } + if (tr != null) { + appendAndIndexWikiCallback.builder.append(" ("); + final String form = appendAndIndexWikiCallback.dispatch(tr, EntryTypeName.WIKTIONARY_TRANSLITERATION); + wordForms.add(form); + appendAndIndexWikiCallback.builder.append(") "); + } + + appendAndIndexWikiCallback.onFunction(wikiTokenizer, name, args, namedArgs); + + } else if (wikiTokenizer.isListItem()) { + final String prefix = wikiTokenizer.listItemPrefix(); + if (lastListSection != null && + prefix.startsWith(lastListSection.firstPrefix) && + prefix.length() > lastListSection.firstPrefix.length()) { + lastListSection.nextPrefixes.add(prefix); + lastListSection.nextLines.add(wikiTokenizer.listItemWikiText()); + } else { + lastListSection = new ListSection(prefix, wikiTokenizer.listItemWikiText()); + listSections.add(lastListSection); + } + } else if (lastListSection != null) { + // Don't append anything after the lists, because there's crap. + } else if (wikiTokenizer.isWikiLink()) { + // Unindexed! + foreignBuilder.append(wikiTokenizer.wikiLinkText()); + + } else if (wikiTokenizer.isPlainText()) { + // Unindexed! + foreignBuilder.append(wikiTokenizer.token()); + } else if (wikiTokenizer.isHtml()) { + if (!wikiTokenizer.token().startsWith("")) { + foreignBuilder.append(wikiTokenizer.token()); + } + } else if (wikiTokenizer.isMarkup() || + wikiTokenizer.isNewline() || + wikiTokenizer.isComment()) { + // Do nothing. + } else { + LOG.warning("Unexpected token: " + wikiTokenizer.token()); + assert !wikiTokenizer.errors().isEmpty(); + } + } + + } finally { + // Here's where we exit. + // Should we make an entry even if there are no foreign list items? + String foreign = foreignBuilder.toString().trim(); + if (!titleAppended && !foreign.toLowerCase().startsWith(title.toLowerCase())) { + foreign = String.format("%s %s", title, foreign); + } + if (!langPattern.matcher(lang).matches()) { + foreign = String.format("(%s) %s", lang, foreign); + } + for (final EnForeignParser.ListSection listSection : listSections) { + doForeignListSection(foreign, title, wordForms, listSection); + } } - } } - + private void doForeignListSection(final String foreignText, String title, final Collection forms, final EnForeignParser.ListSection listSection) { - state = State.ENGLISH_DEF_OF_FOREIGN; - final String prefix = listSection.firstPrefix; - if (prefix.length() > 1) { - // Could just get looser and say that any prefix longer than first is a sublist. - LOG.warning("Prefix '" + prefix + "' too long: " + listSection); - incrementCount("WARNING: Prefix too long"); - return; - } - - final PairEntry pairEntry = new PairEntry(entrySource); - final IndexedEntry indexedEntry = new IndexedEntry(pairEntry); - indexedEntry.isValid = true; - - entryIsFormOfSomething = false; - final StringBuilder englishBuilder = new StringBuilder(); - final String mainLine = listSection.firstLine; - appendAndIndexWikiCallback.reset(englishBuilder, indexedEntry); - appendAndIndexWikiCallback.dispatch(mainLine, enIndexBuilder, EntryTypeName.WIKTIONARY_ENGLISH_DEF); - - final String english = trim(englishBuilder.toString()); - if (english.length() > 0) { - final Pair pair = new Pair(english, trim(foreignText), this.swap); - pairEntry.pairs.add(pair); - foreignIndexBuilder.addEntryWithString(indexedEntry, title, entryIsFormOfSomething ? EntryTypeName.WIKTIONARY_IS_FORM_OF_SOMETHING_ELSE : EntryTypeName.WIKTIONARY_TITLE_MULTI); - for (final String form : forms) { - foreignIndexBuilder.addEntryWithString(indexedEntry, form, EntryTypeName.WIKTIONARY_INFLECTED_FORM_MULTI); - } - } - - // Do examples. - String lastForeign = null; - for (int i = 0; i < listSection.nextPrefixes.size(); ++i) { - final String nextPrefix = listSection.nextPrefixes.get(i); - String nextLine = listSection.nextLines.get(i); - - // TODO: This splitting is not sensitive to wiki code. - int dash = nextLine.indexOf("—"); - int mdashLen = 7; - if (dash == -1) { - dash = nextLine.indexOf("—"); - mdashLen = 1; - } - if (dash == -1) { - dash = nextLine.indexOf(" - "); - mdashLen = 3; + state = State.ENGLISH_DEF_OF_FOREIGN; + final String prefix = listSection.firstPrefix; + if (prefix.length() > 1) { + // Could just get looser and say that any prefix longer than first is a sublist. + LOG.warning("Prefix '" + prefix + "' too long: " + listSection); + incrementCount("WARNING: Prefix too long"); + return; } - - if ((nextPrefix.equals("#:") || nextPrefix.equals("##:")) && dash != -1) { - final String foreignEx = nextLine.substring(0, dash); - final String englishEx = nextLine.substring(dash + mdashLen); - final Pair pair = new Pair(formatAndIndexExampleString(englishEx, enIndexBuilder, indexedEntry), formatAndIndexExampleString(foreignEx, foreignIndexBuilder, indexedEntry), swap); - if (pair.lang1 != "--" && pair.lang1 != "--") { - pairEntry.pairs.add(pair); - } - lastForeign = null; - // TODO: make #* and #*: work - } else if (nextPrefix.equals("#:") || nextPrefix.equals("##:")/* || nextPrefix.equals("#*")*/){ - final Pair pair = new Pair("--", formatAndIndexExampleString(nextLine, null, indexedEntry), swap); - lastForeign = nextLine; - if (pair.lang1 != "--" && pair.lang1 != "--") { + + final PairEntry pairEntry = new PairEntry(entrySource); + final IndexedEntry indexedEntry = new IndexedEntry(pairEntry); + indexedEntry.isValid = true; + + entryIsFormOfSomething = false; + final StringBuilder englishBuilder = new StringBuilder(); + final String mainLine = listSection.firstLine; + appendAndIndexWikiCallback.reset(englishBuilder, indexedEntry); + appendAndIndexWikiCallback.dispatch(mainLine, enIndexBuilder, EntryTypeName.WIKTIONARY_ENGLISH_DEF); + + final String english = trim(englishBuilder.toString()); + if (english.length() > 0) { + final PairEntry.Pair pair = new PairEntry.Pair(english, trim(foreignText), this.swap); pairEntry.pairs.add(pair); - } - } else if (nextPrefix.equals("#::") || nextPrefix.equals("#**")/* || nextPrefix.equals("#*:")*/) { - if (lastForeign != null && pairEntry.pairs.size() > 0) { - if (i + 1 < listSection.nextPrefixes.size()) { - // Chinese has sometimes multiple foreign lines - final String nextNextPrefix = listSection.nextPrefixes.get(i + 1); - if (nextNextPrefix.equals("#::") || nextNextPrefix.equals("#**")) { - ++i; - nextLine += "\n" + listSection.nextLines.get(i); - } + foreignIndexBuilder.addEntryWithString(indexedEntry, title, entryIsFormOfSomething ? EntryTypeName.WIKTIONARY_IS_FORM_OF_SOMETHING_ELSE : EntryTypeName.WIKTIONARY_TITLE_MULTI); + for (final String form : forms) { + foreignIndexBuilder.addEntryWithString(indexedEntry, form, EntryTypeName.WIKTIONARY_INFLECTED_FORM_MULTI); } - pairEntry.pairs.remove(pairEntry.pairs.size() - 1); - final Pair pair = new Pair(formatAndIndexExampleString(nextLine, enIndexBuilder, indexedEntry), formatAndIndexExampleString(lastForeign, foreignIndexBuilder, indexedEntry), swap); - if (pair.lang1 != "--" || pair.lang2 != "--") { - pairEntry.pairs.add(pair); + } + + // Do examples. + String lastForeign = null; + for (int i = 0; i < listSection.nextPrefixes.size(); ++i) { + final String nextPrefix = listSection.nextPrefixes.get(i); + String nextLine = listSection.nextLines.get(i); + + // TODO: This splitting is not sensitive to wiki code. + int dash = nextLine.indexOf("—"); + int mdashLen = 7; + if (dash == -1) { + dash = nextLine.indexOf("—"); + mdashLen = 1; } - lastForeign = null; - } else { - LOG.warning("TODO: English example with no foreign: " + title + ", " + nextLine); - final Pair pair = new Pair("--", formatAndIndexExampleString(nextLine, null, indexedEntry), swap); - if (pair.lang1 != "--" || pair.lang2 != "--") { - pairEntry.pairs.add(pair); + if (dash == -1) { + dash = nextLine.indexOf(" - "); + mdashLen = 3; } - } - } else if (nextPrefix.equals("#*")) { - // Can't really index these. - final Pair pair = new Pair("--", formatAndIndexExampleString(nextLine, null, indexedEntry), swap); - lastForeign = nextLine; - if (pair.lang1 != "--" || pair.lang2 != "--") { - pairEntry.pairs.add(pair); - } - } else if (nextPrefix.equals("#::*") || nextPrefix.equals("##") || nextPrefix.equals("#*:") || nextPrefix.equals("#:*") || true) { - final Pair pair = new Pair("--", formatAndIndexExampleString(nextLine, null, indexedEntry), swap); - if (pair.lang1 != "--" || pair.lang2 != "--") { - pairEntry.pairs.add(pair); - } + + if ((nextPrefix.equals("#:") || nextPrefix.equals("##:")) && dash != -1) { + final String foreignEx = nextLine.substring(0, dash); + final String englishEx = nextLine.substring(dash + mdashLen); + final PairEntry.Pair pair = new PairEntry.Pair(formatAndIndexExampleString(englishEx, enIndexBuilder, indexedEntry), formatAndIndexExampleString(foreignEx, foreignIndexBuilder, indexedEntry), swap); + if (pair.lang1 != "--" && pair.lang1 != "--") { + pairEntry.pairs.add(pair); + } + lastForeign = null; + // TODO: make #* and #*: work + } else if (nextPrefix.equals("#:") || nextPrefix.equals("##:")/* || nextPrefix.equals("#*")*/) { + final PairEntry.Pair pair = new PairEntry.Pair("--", formatAndIndexExampleString(nextLine, null, indexedEntry), swap); + lastForeign = nextLine; + if (pair.lang1 != "--" && pair.lang1 != "--") { + pairEntry.pairs.add(pair); + } + } else if (nextPrefix.equals("#::") || nextPrefix.equals("#**")/* || nextPrefix.equals("#*:")*/) { + if (lastForeign != null && pairEntry.pairs.size() > 0) { + if (i + 1 < listSection.nextPrefixes.size()) { + // Chinese has sometimes multiple foreign lines + final String nextNextPrefix = listSection.nextPrefixes.get(i + 1); + if (nextNextPrefix.equals("#::") || nextNextPrefix.equals("#**")) { + ++i; + nextLine += "\n" + listSection.nextLines.get(i); + } + } + pairEntry.pairs.remove(pairEntry.pairs.size() - 1); + final PairEntry.Pair pair = new PairEntry.Pair(formatAndIndexExampleString(nextLine, enIndexBuilder, indexedEntry), formatAndIndexExampleString(lastForeign, foreignIndexBuilder, indexedEntry), swap); + if (pair.lang1 != "--" || pair.lang2 != "--") { + pairEntry.pairs.add(pair); + } + lastForeign = null; + } else { + LOG.warning("TODO: English example with no foreign: " + title + ", " + nextLine); + final PairEntry.Pair pair = new PairEntry.Pair("--", formatAndIndexExampleString(nextLine, null, indexedEntry), swap); + if (pair.lang1 != "--" || pair.lang2 != "--") { + pairEntry.pairs.add(pair); + } + } + } else if (nextPrefix.equals("#*")) { + // Can't really index these. + final PairEntry.Pair pair = new PairEntry.Pair("--", formatAndIndexExampleString(nextLine, null, indexedEntry), swap); + lastForeign = nextLine; + if (pair.lang1 != "--" || pair.lang2 != "--") { + pairEntry.pairs.add(pair); + } + } else if (nextPrefix.equals("#::*") || nextPrefix.equals("##") || nextPrefix.equals("#*:") || nextPrefix.equals("#:*") || true) { + final PairEntry.Pair pair = new PairEntry.Pair("--", formatAndIndexExampleString(nextLine, null, indexedEntry), swap); + if (pair.lang1 != "--" || pair.lang2 != "--") { + pairEntry.pairs.add(pair); + } // } else { // assert false; + } } - } } - + private String formatAndIndexExampleString(final String example, final IndexBuilder indexBuilder, final IndexedEntry indexedEntry) { - // TODO: + // TODO: // if (wikiTokenizer.token().equals("'''")) { // insideTripleQuotes = !insideTripleQuotes; // } - final StringBuilder builder = new StringBuilder(); - appendAndIndexWikiCallback.reset(builder, indexedEntry); - appendAndIndexWikiCallback.entryTypeName = EntryTypeName.WIKTIONARY_EXAMPLE; - appendAndIndexWikiCallback.entryTypeNameSticks = true; - try { - // TODO: this is a hack needed because we don't safely split on the dash. - appendAndIndexWikiCallback.dispatch(example, indexBuilder, EntryTypeName.WIKTIONARY_EXAMPLE); - } catch (AssertionError e) { - return "--"; - } - final String result = trim(builder.toString()); - return result.length() > 0 ? result : "--"; + final StringBuilder builder = new StringBuilder(); + appendAndIndexWikiCallback.reset(builder, indexedEntry); + appendAndIndexWikiCallback.entryTypeName = EntryTypeName.WIKTIONARY_EXAMPLE; + appendAndIndexWikiCallback.entryTypeNameSticks = true; + try { + // TODO: this is a hack needed because we don't safely split on the dash. + appendAndIndexWikiCallback.dispatch(example, indexBuilder, EntryTypeName.WIKTIONARY_EXAMPLE); + } catch (AssertionError e) { + return "--"; + } + final String result = trim(builder.toString()); + return result.length() > 0 ? result : "--"; } - } // ForeignParser +} // ForeignParser