if (isIgnorableTitle(title)) {
return;
}
- final String lang = heading.replaceAll("=", "").trim();
+ final String lang = heading.replace("=", "").trim();
if (!langPattern.matcher(lang).find()){
return;
}
} else if (wikiTokenizer.isPlainText()) {
// Unindexed!
foreignBuilder.append(wikiTokenizer.token());
-
- } else if (wikiTokenizer.isMarkup() || wikiTokenizer.isNewline() || wikiTokenizer.isComment()) {
+ } else if (wikiTokenizer.isHtml()) {
+ if (!wikiTokenizer.token().startsWith("<ref>")) {
+ foreignBuilder.append(wikiTokenizer.token());
+ }
+ } else if (wikiTokenizer.isMarkup() ||
+ wikiTokenizer.isNewline() ||
+ wikiTokenizer.isComment()) {
// Do nothing.
} else {
LOG.warning("Unexpected token: " + wikiTokenizer.token());
final String prefix = listSection.firstPrefix;
if (prefix.length() > 1) {
// Could just get looser and say that any prefix longer than first is a sublist.
- LOG.warning("Prefix too long: " + listSection);
+ LOG.warning("Prefix '" + prefix + "' too long: " + listSection);
incrementCount("WARNING: Prefix too long");
return;
}
final PairEntry pairEntry = new PairEntry(entrySource);
final IndexedEntry indexedEntry = new IndexedEntry(pairEntry);
+ indexedEntry.isValid = true;
entryIsFormOfSomething = false;
final StringBuilder englishBuilder = new StringBuilder();
String lastForeign = null;
for (int i = 0; i < listSection.nextPrefixes.size(); ++i) {
final String nextPrefix = listSection.nextPrefixes.get(i);
- final String nextLine = listSection.nextLines.get(i);
+ String nextLine = listSection.nextLines.get(i);
// TODO: This splitting is not sensitive to wiki code.
int dash = nextLine.indexOf("—");
}
} else if (nextPrefix.equals("#::") || nextPrefix.equals("#**")) {
if (lastForeign != null && pairEntry.pairs.size() > 0) {
+ if (i + 1 < listSection.nextPrefixes.size()) {
+ // Chinese has sometimes multiple foreign lines
+ final String nextNextPrefix = listSection.nextPrefixes.get(i + 1);
+ if (nextNextPrefix.equals("#::") || nextNextPrefix.equals("#**")) {
+ ++i;
+ nextLine += "\n" + listSection.nextLines.get(i);
+ }
+ }
pairEntry.pairs.remove(pairEntry.pairs.size() - 1);
final Pair pair = new Pair(formatAndIndexExampleString(nextLine, enIndexBuilder, indexedEntry), formatAndIndexExampleString(lastForeign, foreignIndexBuilder, indexedEntry), swap);
if (pair.lang1 != "--" || pair.lang2 != "--") {
}
- private void itConjAre(List<String> args, Map<String, String> namedArgs) {
- final String base = args.get(0);
- final String aux = args.get(1);
-
- putIfMissing(namedArgs, "inf", base + "are");
- putIfMissing(namedArgs, "aux", aux);
- putIfMissing(namedArgs, "ger", base + "ando");
- putIfMissing(namedArgs, "presp", base + "ante");
- putIfMissing(namedArgs, "pastp", base + "ato");
- // Present
- putIfMissing(namedArgs, "pres1s", base + "o");
- putIfMissing(namedArgs, "pres2s", base + "i");
- putIfMissing(namedArgs, "pres3s", base + "a");
- putIfMissing(namedArgs, "pres1p", base + "iamo");
- putIfMissing(namedArgs, "pres2p", base + "ate");
- putIfMissing(namedArgs, "pres3p", base + "ano");
- // Imperfect
- putIfMissing(namedArgs, "imperf1s", base + "avo");
- putIfMissing(namedArgs, "imperf2s", base + "avi");
- putIfMissing(namedArgs, "imperf3s", base + "ava");
- putIfMissing(namedArgs, "imperf1p", base + "avamo");
- putIfMissing(namedArgs, "imperf2p", base + "avate");
- putIfMissing(namedArgs, "imperf3p", base + "avano");
- // Passato remoto
- putIfMissing(namedArgs, "prem1s", base + "ai");
- putIfMissing(namedArgs, "prem2s", base + "asti");
- putIfMissing(namedArgs, "prem3s", base + "ò");
- putIfMissing(namedArgs, "prem1p", base + "ammo");
- putIfMissing(namedArgs, "prem2p", base + "aste");
- putIfMissing(namedArgs, "prem3p", base + "arono");
- // Future
- putIfMissing(namedArgs, "fut1s", base + "erò");
- putIfMissing(namedArgs, "fut2s", base + "erai");
- putIfMissing(namedArgs, "fut3s", base + "erà");
- putIfMissing(namedArgs, "fut1p", base + "eremo");
- putIfMissing(namedArgs, "fut2p", base + "erete");
- putIfMissing(namedArgs, "fut3p", base + "eranno");
- // Conditional
- putIfMissing(namedArgs, "cond1s", base + "erei");
- putIfMissing(namedArgs, "cond2s", base + "eresti");
- putIfMissing(namedArgs, "cond3s", base + "erebbe");
- putIfMissing(namedArgs, "cond1p", base + "eremmo");
- putIfMissing(namedArgs, "cond2p", base + "ereste");
- putIfMissing(namedArgs, "cond3p", base + "erebbero");
- // Subjunctive / congiuntivo
- putIfMissing(namedArgs, "sub123s", base + "i");
- putIfMissing(namedArgs, "sub1p", base + "iamo");
- putIfMissing(namedArgs, "sub2p", base + "iate");
- putIfMissing(namedArgs, "sub3p", base + "ino");
- // Imperfect subjunctive
- putIfMissing(namedArgs, "impsub12s", base + "assi");
- putIfMissing(namedArgs, "impsub3s", base + "asse");
- putIfMissing(namedArgs, "impsub1p", base + "assimo");
- putIfMissing(namedArgs, "impsub2p", base + "aste");
- putIfMissing(namedArgs, "impsub3p", base + "assero");
- // Imperative
- putIfMissing(namedArgs, "imp2s", base + "a");
- putIfMissing(namedArgs, "imp3s", base + "i");
- putIfMissing(namedArgs, "imp1p", base + "iamo");
- putIfMissing(namedArgs, "imp2p", base + "ate");
- putIfMissing(namedArgs, "imp3p", base + "ino");
-
-
- itConj(args, namedArgs);
- }
-
-
- private void itConj(List<String> args, Map<String, String> namedArgs) {
- // TODO Auto-generated method stub
-
- }
-
-
- private static void putIfMissing(final Map<String, String> namedArgs, final String key,
- final String value) {
- final String oldValue = namedArgs.get(key);
- if (oldValue == null || oldValue.length() == 0) {
- namedArgs.put(key, value);
- }
- }
-
- // TODO: check how ='' and =| are manifested....
- // TODO: get this right in -are
- private static void putOrNullify(final Map<String, String> namedArgs, final String key,
- final String value) {
- final String oldValue = namedArgs.get(key);
- if (oldValue == null/* || oldValue.length() == 0*/) {
- namedArgs.put(key, value);
- } else {
- if (oldValue.equals("''")) {
- namedArgs.put(key, "");
- }
- }
- }
-
- } // ForeignParser
\ No newline at end of file
+ } // ForeignParser