incrementCount("WARNING: Translations not in English section");
} else if (headingName.equals("Pronunciation")) {
//doPronunciation(wikiLineReader);
+ } else if (headingName.startsWith(" {{S|")) {
+ // HACK to support parsing frwiktionary
+ String[] parts = headingName.split("\\|");
+ if (parts.length > 2 && langCodePattern.matcher(parts[2]).find() &&
+ (parts.length < 4 || !parts[3].startsWith("flexion"))) {
+ doForeignPartOfSpeech(lang, headingName, wikiTokenizer.headingDepth(), wikiTokenizer);
+ }
} else if (partOfSpeechHeader.matcher(headingName).matches()) {
doForeignPartOfSpeech(lang, headingName, wikiTokenizer.headingDepth(), wikiTokenizer);
}
final String prefix = listSection.firstPrefix;
if (prefix.length() > 1) {
// Could just get looser and say that any prefix longer than first is a sublist.
- LOG.warning("Prefix too long: " + listSection);
+ LOG.warning("Prefix '" + prefix + "' too long: " + listSection);
incrementCount("WARNING: Prefix too long");
return;
}
String lastForeign = null;
for (int i = 0; i < listSection.nextPrefixes.size(); ++i) {
final String nextPrefix = listSection.nextPrefixes.get(i);
- final String nextLine = listSection.nextLines.get(i);
+ String nextLine = listSection.nextLines.get(i);
// TODO: This splitting is not sensitive to wiki code.
int dash = nextLine.indexOf("—");
pairEntry.pairs.add(pair);
}
lastForeign = null;
- } else if (nextPrefix.equals("#:") || nextPrefix.equals("##:")){
+ // TODO: make #* and #*: work
+ } else if (nextPrefix.equals("#:") || nextPrefix.equals("##:")/* || nextPrefix.equals("#*")*/){
final Pair pair = new Pair("--", formatAndIndexExampleString(nextLine, null, indexedEntry), swap);
lastForeign = nextLine;
if (pair.lang1 != "--" && pair.lang1 != "--") {
pairEntry.pairs.add(pair);
}
- } else if (nextPrefix.equals("#::") || nextPrefix.equals("#**")) {
+ } else if (nextPrefix.equals("#::") || nextPrefix.equals("#**")/* || nextPrefix.equals("#*:")*/) {
if (lastForeign != null && pairEntry.pairs.size() > 0) {
+ if (i + 1 < listSection.nextPrefixes.size()) {
+ // Chinese has sometimes multiple foreign lines
+ final String nextNextPrefix = listSection.nextPrefixes.get(i + 1);
+ if (nextNextPrefix.equals("#::") || nextNextPrefix.equals("#**")) {
+ ++i;
+ nextLine += "\n" + listSection.nextLines.get(i);
+ }
+ }
pairEntry.pairs.remove(pairEntry.pairs.size() - 1);
final Pair pair = new Pair(formatAndIndexExampleString(nextLine, enIndexBuilder, indexedEntry), formatAndIndexExampleString(lastForeign, foreignIndexBuilder, indexedEntry), swap);
if (pair.lang1 != "--" || pair.lang2 != "--") {