- final WikiTokenizer wikiTokenizer = new WikiTokenizer(rest, false);
- while (wikiTokenizer.nextToken() != null) {
-
- if (wikiTokenizer.isPlainText()) {
- final String plainText = wikiTokenizer.token();
- foreignText.append(plainText);
- foreignIndexBuilder.addEntryWithString(indexedEntry, plainText, EntryTypeName.WIKTIONARY_TRANSLATION_OTHER_TEXT);
-
- } else if (wikiTokenizer.isWikiLink()) {
- final String plainText = wikiTokenizer.wikiLinkText();
- foreignText.append(plainText);
- // TODO: should check for English before appending.
- foreignIndexBuilder.addEntryWithString(indexedEntry, plainText, EntryTypeName.WIKTIONARY_TRANSLATION_WIKI_TEXT);
-
- } else if (wikiTokenizer.isFunction()) {
- final String functionName = wikiTokenizer.functionName();
- final List<String> args = wikiTokenizer.functionPositionArgs();
- final Map<String,String> namedArgs = wikiTokenizer.functionNamedArgs();
-
- if (functionName.equals("t") || functionName.equals("t+") || functionName.equals("t-") || functionName.equals("tø") || functionName.equals("apdx-t")) {
- } else if (functionName.equals("qualifier")) {
- } else if (encodings.contains(functionName)) {
- foreignText.append("").append(args.get(0));
- foreignIndexBuilder.addEntryWithString(indexedEntry, args.get(0), EntryTypeName.WIKTIONARY_TRANSLATION_OTHER_TEXT);
- } else if (isGender(functionName)) {
- appendGender(foreignText, functionName, args);
- } else if (functionName.equals("g")) {
- foreignText.append("{g}");
- } else if (functionName.equals("l")) {
- // encodes text in various langs.
- // lang is arg 0.
- foreignText.append("").append(args.get(1));
- foreignIndexBuilder.addEntryWithString(indexedEntry, args.get(1), EntryTypeName.WIKTIONARY_TRANSLATION_OTHER_TEXT);
- // TODO: transliteration
- } else if (functionName.equals("term")) {
- // cross-reference to another dictionary
- foreignText.append("").append(args.get(0));
- foreignIndexBuilder.addEntryWithString(indexedEntry, args.get(0), EntryTypeName.WIKTIONARY_TRANSLATION_OTHER_TEXT);
- // TODO: transliteration
- } else if (functionName.equals("italbrac") || functionName.equals("gloss")) {
- // TODO: put this text aside to use it.
- foreignText.append("[").append(args.get(0)).append("]");
- foreignIndexBuilder.addEntryWithString(indexedEntry, args.get(0), EntryTypeName.WIKTIONARY_TRANSLATION_OTHER_TEXT);
- } else if (functionName.equals("ttbc")) {
- LOG.warning("Unexpected {{ttbc}}");
- } else if (functionName.equals("trreq")) {
- } else if (functionName.equals("not used")) {
- foreignText.append("(not used)");
- } else if (functionName.equals("t-image")) {
- // American sign language
- } else {
- // Unindexed!
- namedArgs.keySet().removeAll(USELESS_WIKI_ARGS);
- WikiTokenizer.appendFunction(foreignText.append("{{"), functionName, args, namedArgs).append("}}");
- }
-
- } else if (wikiTokenizer.isNewline()) {
- } else if (wikiTokenizer.isComment()) {
- } else if (wikiTokenizer.isMarkup()) {
- } else {
- LOG.warning("Bad translation token: " + wikiTokenizer.token());
- }
- } // while-token loop.
-