} else if (wikiTokenizer.remainderStartsWith("''See''")) {
wikiTokenizer.nextLine();
- LOG.fine("Skipping line: " + wikiTokenizer.token());
+ LOG.fine("Skipping See line: " + wikiTokenizer.token());
} else if (wikiTokenizer.isWikiLink()) {
final String wikiLink = wikiTokenizer.wikiLinkText();
if (wikiLink.contains(":") && wikiLink.contains(title)) {
}
- static final class Callback implements WikiTokenizer.Callback {
- final Map<String,WikiFunctionCallback> functionCallbacks;
+ static final class AppendAndIndexCallback implements WikiTokenizer.Callback {
+ public AppendAndIndexCallback(
+ final StringBuilder builder,
+ final IndexedEntry indexedEntry,
+ final IndexBuilder defaultIndexBuilder,
+ final Map<String, WikiFunctionCallback> functionCallbacks) {
+ this.indexedEntry = indexedEntry;
+ this.defaultIndexBuilder = defaultIndexBuilder;
+ this.builder = builder;
+ this.functionCallbacks = functionCallbacks;
+ }
+
final StringBuilder builder;
- final IndexBuilder defaultIndexBuilder;
final IndexedEntry indexedEntry;
+ IndexBuilder defaultIndexBuilder;
+ final Map<String,WikiFunctionCallback> functionCallbacks;
// TODO: the classes of text are wrong....
@Override
public void onPlainText(WikiTokenizer wikiTokenizer) {
+ // The only non-recursive callback. Just appends to the builder, and
final String plainText = wikiTokenizer.token();
builder.append(plainText);
defaultIndexBuilder.addEntryWithString(indexedEntry, plainText, EntryTypeName.WIKTIONARY_TRANSLATION_OTHER_TEXT);
}
@Override
- public void onFunction(String functionName,
- List<String> functionPositionArgs, Map<String, String> functionNamedArgs) {
+ public void onFunction(final String name,
+ final List<String> args, final Map<String, String> namedArgs) {
+ final WikiFunctionCallback functionCallback = functionCallbacks.get(name);
+ if (functionCallback != null) {
+ // Dispatch the handling elsewhere.
+ functionCallback.onWikiFunction(name, args, namedArgs);
+ } else {
+ // Default function handling:
+ for (int i = 0; i < args.size(); ++i) {
+ args.set(i, WikiTokenizer.toPlainText(args.get(i)));
+ }
+ for (final Map.Entry<String, String> entry : namedArgs.entrySet()) {
+ entry.setValue(WikiTokenizer.toPlainText(entry.getValue()));
+ }
+ WikiTokenizer.appendFunction(builder, name, args, namedArgs);
+ }
}
@Override
while (wikiTokenizer.nextToken() != null) {
if (wikiTokenizer.isPlainText()) {
+ final String plainText = wikiTokenizer.token();
+ foreignText.append(plainText);
+ foreignIndexBuilder.addEntryWithString(indexedEntry, plainText, EntryTypeName.WIKTIONARY_TRANSLATION_OTHER_TEXT);
} else if (wikiTokenizer.isWikiLink()) {
-
+ final String plainText = wikiTokenizer.wikiLinkText();
+ foreignText.append(plainText);
+ // TODO: should check for English before appending.
+ foreignIndexBuilder.addEntryWithString(indexedEntry, plainText, EntryTypeName.WIKTIONARY_TRANSLATION_WIKI_TEXT);
+
} else if (wikiTokenizer.isFunction()) {
final String functionName = wikiTokenizer.functionName();
final List<String> args = wikiTokenizer.functionPositionArgs();
} else if (name.equals("attention") || name.equals("zh-attention")) {
// See: http://en.wiktionary.org/wiki/Template:attention
// Ignore these.
- } else if (name.equals("infl")) {
+ } else if (name.equals("infl") || name.equals("head")) {
// See: http://en.wiktionary.org/wiki/Template:infl
final String langCode = get(args, 0);
String head = namedArgs.remove("head");
// null baseForm happens in Danish.
LOG.warning("Null baseform: " + title);
}
+ } else if (name.equals("l")) {
+ // encodes text in various langs.
+ // lang is arg 0.
+ englishBuilder.append("").append(args.get(1));
+ final String langCode = args.get(0);
+ if ("en".equals(langCode)) {
+ enIndexBuilder.addEntryWithString(indexedEntry, args.get(1), EntryTypeName.WIKTIONARY_ENGLISH_DEF_WIKI_LINK);
+ } else {
+ foreignIndexBuilder.addEntryWithString(indexedEntry, args.get(1), EntryTypeName.WIKTIONARY_ENGLISH_DEF_OTHER_LANG);
+ }
+ // TODO: transliteration
+
+ } else if (name.equals("defn") || name.equals("rfdef")) {
+ // Do nothing.
+ // http://en.wiktionary.org/wiki/Wiktionary:Requests_for_deletion/Others#Template:defn
+ // Redundant, used for the same purpose as {{rfdef}}, but this
+ // doesn't produce the "This word needs a definition" text.
+ // Delete or redirect.
} else {
namedArgs.keySet().removeAll(USELESS_WIKI_ARGS);
if (args.size() == 0 && namedArgs.isEmpty()) {