+ final String mainLine = listSection.firstLine;
+ final WikiTokenizer englishTokenizer = new WikiTokenizer(mainLine, false);
+ while (englishTokenizer.nextToken() != null) {
+ // TODO handle form of....
+ if (englishTokenizer.isPlainText()) {
+ englishBuilder.append(englishTokenizer.token());
+ enIndexBuilder.addEntryWithString(indexedEntry, englishTokenizer.token(), EntryTypeName.WIKTIONARY_ENGLISH_DEF);
+ } else if (englishTokenizer.isWikiLink()) {
+ final String text = englishTokenizer.wikiLinkText();
+ final String link = englishTokenizer.wikiLinkDest();
+ if (link != null) {
+ if (link.contains("#English")) {
+ englishBuilder.append(text);
+ enIndexBuilder.addEntryWithString(indexedEntry, text, EntryTypeName.WIKTIONARY_ENGLISH_DEF_WIKI_LINK);
+ } else if (link.contains("#") && this.langPattern.matcher(link).find()) {
+ englishBuilder.append(text);
+ foreignIndexBuilder.addEntryWithString(indexedEntry, text, EntryTypeName.WIKTIONARY_ENGLISH_DEF_OTHER_LANG);
+ } else if (link.equals("plural")) {
+ englishBuilder.append(text);
+ } else {
+ //LOG.warning("Special link: " + englishTokenizer.token());
+ enIndexBuilder.addEntryWithString(indexedEntry, text, EntryTypeName.WIKTIONARY_ENGLISH_DEF_WIKI_LINK);
+ englishBuilder.append(text);
+ }
+ } else {
+ // link == null
+ englishBuilder.append(text);
+ if (!UNINDEXED_WIKI_TEXT.matcher(text).find()) {
+ enIndexBuilder.addEntryWithString(indexedEntry, text, EntryTypeName.WIKTIONARY_ENGLISH_DEF_WIKI_LINK);
+ }
+ }
+ } else if (englishTokenizer.isFunction()) {
+ final String name = englishTokenizer.functionName();
+ final List<String> args = englishTokenizer.functionPositionArgs();
+ final Map<String,String> namedArgs = englishTokenizer.functionNamedArgs();
+
+ if (
+ name.equals("form of") ||
+ name.contains("conjugation of") ||
+ name.contains("participle of") ||
+ name.contains("gerund of") ||
+ name.contains("feminine of") ||
+ name.contains("plural of")) {
+ String formName = name;
+ if (name.equals("form of")) {
+ formName = remove(args, 0, null);
+ }
+ if (formName == null) {
+ LOG.warning("Missing form name: " + title);
+ formName = "form of";
+ }
+ String baseForm = get(args, 1, "");
+ if ("".equals(baseForm)) {
+ baseForm = get(args, 0, null);
+ remove(args, 1, "");
+ } else {
+ remove(args, 0, null);
+ }
+ namedArgs.keySet().removeAll(USELESS_WIKI_ARGS);
+ WikiTokenizer.appendFunction(englishBuilder.append("{"), formName, args, namedArgs).append("}");
+ if (baseForm != null) {
+ foreignIndexBuilder.addEntryWithString(indexedEntry, baseForm, EntryTypeName.WIKTIONARY_BASE_FORM_SINGLE, EntryTypeName.WIKTIONARY_BASE_FORM_MULTI);
+ } else {
+ // null baseForm happens in Danish.
+ LOG.warning("Null baseform: " + title);
+ }
+// } else if (name.equals("defn")) {
+ // TODO: test me!
+ // Do nothing.
+ // http://en.wiktionary.org/wiki/Wiktionary:Requests_for_deletion/Others#Template:defn
+ // Redundant, used for the same purpose as {{rfdef}}, but this
+ // doesn't produce the "This word needs a definition" text.
+ // Delete or redirect.
+ } else {
+ namedArgs.keySet().removeAll(USELESS_WIKI_ARGS);
+ if (args.size() == 0 && namedArgs.isEmpty()) {
+ englishBuilder.append("{").append(name).append("}");
+ } else {
+ WikiTokenizer.appendFunction(englishBuilder.append("{{"), name, args, namedArgs).append("}}");
+ }
+// LOG.warning("Unexpected function: " + englishTokenizer.token());
+ }
+ } else {
+ if (englishTokenizer.isComment() || englishTokenizer.isMarkup()) {
+ } else {
+ LOG.warning("Unexpected definition type: " + englishTokenizer.token());
+ }
+ }
+ }
+
+ final String english = trim(englishBuilder.toString());
+ if (english.length() > 0) {
+ final Pair pair = new Pair(english, trim(foreignText), this.swap);
+ pairEntry.pairs.add(pair);
+ foreignIndexBuilder.addEntryWithString(indexedEntry, title, EntryTypeName.WIKTIONARY_TITLE_SINGLE, EntryTypeName.WIKTIONARY_TITLE_MULTI);
+ for (final String form : forms) {
+ foreignIndexBuilder.addEntryWithString(indexedEntry, form, EntryTypeName.WIKTIONARY_INFLECTD_FORM_SINGLE, EntryTypeName.WIKTIONARY_INFLECTED_FORM_MULTI);
+ }
+ }
+
+ // Do examples.
+ String lastForeign = null;
+ for (int i = 0; i < listSection.nextPrefixes.size(); ++i) {
+ final String nextPrefix = listSection.nextPrefixes.get(i);
+ final String nextLine = listSection.nextLines.get(i);
+ int dash = nextLine.indexOf("—");
+ int mdashLen = 7;
+ if (dash == -1) {
+ dash = nextLine.indexOf("—");
+ mdashLen = 1;
+ }
+ if (dash == -1) {
+ dash = nextLine.indexOf(" - ");
+ mdashLen = 3;
+ }
+
+ if ((nextPrefix.equals("#:") || nextPrefix.equals("##:")) && dash != -1) {
+ final String foreignEx = nextLine.substring(0, dash);
+ final String englishEx = nextLine.substring(dash + mdashLen);
+ final Pair pair = new Pair(formatAndIndexExampleString(englishEx, enIndexBuilder, indexedEntry), formatAndIndexExampleString(foreignEx, foreignIndexBuilder, indexedEntry), swap);
+ if (pair.lang1 != "--" && pair.lang1 != "--") {
+ pairEntry.pairs.add(pair);
+ }
+ lastForeign = null;
+ } else if (nextPrefix.equals("#:") || nextPrefix.equals("##:")){
+ final Pair pair = new Pair("--", formatAndIndexExampleString(nextLine, null, indexedEntry), swap);
+ lastForeign = nextLine;
+ if (pair.lang1 != "--" && pair.lang1 != "--") {
+ pairEntry.pairs.add(pair);
+ }
+ } else if (nextPrefix.equals("#::") || nextPrefix.equals("#**")) {
+ if (lastForeign != null && pairEntry.pairs.size() > 0) {
+ pairEntry.pairs.remove(pairEntry.pairs.size() - 1);
+ final Pair pair = new Pair(formatAndIndexExampleString(nextLine, enIndexBuilder, indexedEntry), formatAndIndexExampleString(lastForeign, foreignIndexBuilder, indexedEntry), swap);
+ if (pair.lang1 != "--" || pair.lang2 != "--") {
+ pairEntry.pairs.add(pair);
+ }
+ lastForeign = null;
+ } else {
+ LOG.warning("TODO: English example with no foreign: " + title + ", " + nextLine);
+ final Pair pair = new Pair("--", formatAndIndexExampleString(nextLine, null, indexedEntry), swap);
+ if (pair.lang1 != "--" || pair.lang2 != "--") {
+ pairEntry.pairs.add(pair);
+ }
+ }
+ } else if (nextPrefix.equals("#*")) {
+ // Can't really index these.
+ final Pair pair = new Pair("--", formatAndIndexExampleString(nextLine, null, indexedEntry), swap);
+ lastForeign = nextLine;
+ if (pair.lang1 != "--" || pair.lang2 != "--") {
+ pairEntry.pairs.add(pair);
+ }
+ } else if (nextPrefix.equals("#::*") || nextPrefix.equals("##") || nextPrefix.equals("#*:") || nextPrefix.equals("#:*") || true) {
+ final Pair pair = new Pair("--", formatAndIndexExampleString(nextLine, null, indexedEntry), swap);
+ if (pair.lang1 != "--" || pair.lang2 != "--") {
+ pairEntry.pairs.add(pair);
+ }
+// } else {
+// assert false;
+ }
+ }