parser.parse(new BufferedInputStream(in), this);
}
} catch (Exception e) {
- System.err.println("Exception during parse, lastPageTitle=" + lastPageTitle + ", titleBuilder=" + titleBuilder.toString());
+ System.err.println("Exception during parse, lastPageTitle=" + lastPageTitle + ", titleBuilder=" + titleBuilder.toString() + " of file " + pathToSelectorsEntry.getKey());
throw e;
}
title.startsWith("Template:") ||
title.startsWith("Summary:") ||
title.startsWith("Module:") ||
+ title.startsWith("Reconstruction:") ||
// DE
title.startsWith("Datei:") ||
title.startsWith("Verzeichnis:") ||
title.startsWith("Kategorie:") ||
title.startsWith("Hilfe:") ||
title.startsWith("Reim:") ||
+ title.startsWith("Modul:") ||
// FR:
title.startsWith("Annexe:") ||
title.startsWith("Catégori:") ||
title.startsWith("Aide:") ||
title.startsWith("Fichier:") ||
title.startsWith("Wiktionnaire:") ||
+ title.startsWith("Translations:Wiktionnaire:") ||
+ title.startsWith("Translations:Projet:") ||
title.startsWith("Catégorie:") ||
title.startsWith("Portail:") ||
title.startsWith("utiliusateur:") ||
title.startsWith("Kategorio:") ||
+ title.startsWith("Tutoriel:") ||
// IT
title.startsWith("Wikizionario:") ||
title.startsWith("Appendice:") ||
title.startsWith("Categoria:") ||
title.startsWith("Aiuto:") ||
title.startsWith("Portail:") ||
+ title.startsWith("Modulo:") ||
// ES
title.startsWith("Apéndice:") ||
title.startsWith("Archivo:") ||
title.startsWith("Plantilla:") ||
title.startsWith("Wikcionario:") ||
+ // PT
+ title.startsWith("Ajuda:") ||
+ title.startsWith("Apêndice:") ||
+ title.startsWith("Citações:") ||
+ title.startsWith("Portal:") ||
+ title.startsWith("Predefinição:") ||
+ title.startsWith("Vocabulário:") ||
+ title.startsWith("Wikcionário:") ||
+ title.startsWith("Módulo:") ||
+
// sentinel
false
) return;
- if (!title.startsWith("Sign gloss:")) {
+ // leave the Flexion: pages in for now and do not warn about them
+ if (!title.startsWith("Sign gloss:") && !title.startsWith("Flexion:")) {
System.err.println("title with colon: " + title);
}
}
String text = textBuilder.toString();
+ // Workaround for Spanish wiktionary {{ES}} and {{ES|word}} patterns
+ text = text.replaceAll("\\{\\{ES(\\|[^{}=]*)?}}", "== {{lengua|es}} ==");
String translingual = "";
int start = 0;
final Matcher startMatcher = headingStart.matcher(text);