import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
-import java.io.InputStream;
import java.io.IOException;
+import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
+import javax.xml.parsers.SAXParserFactory;
-import org.apache.xerces.jaxp.SAXParserFactoryImpl;
import org.apache.commons.compress.compressors.CompressorStreamFactory;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
}
private void go() throws Exception {
- final SAXParser parser = SAXParserFactoryImpl.newInstance().newSAXParser();
+ final SAXParser parser = SAXParserFactory.newInstance().newSAXParser();
// Configure things.
for (final Map.Entry<String, List<Selector>> pathToSelectorsEntry : pathToSelectors.entrySet()) {
title.startsWith("Template:") ||
title.startsWith("Summary:") ||
title.startsWith("Module:") ||
+ title.startsWith("Reconstruction:") ||
// DE
title.startsWith("Datei:") ||
title.startsWith("Verzeichnis:") ||
title.startsWith("Kategorie:") ||
title.startsWith("Hilfe:") ||
title.startsWith("Reim:") ||
+ title.startsWith("Modul:") ||
// FR:
title.startsWith("Annexe:") ||
title.startsWith("Catégori:") ||
title.startsWith("Aide:") ||
title.startsWith("Fichier:") ||
title.startsWith("Wiktionnaire:") ||
+ title.startsWith("Translations:Wiktionnaire:") ||
+ title.startsWith("Translations:Projet:") ||
title.startsWith("Catégorie:") ||
title.startsWith("Portail:") ||
title.startsWith("utiliusateur:") ||
title.startsWith("Kategorio:") ||
+ title.startsWith("Tutoriel:") ||
// IT
title.startsWith("Wikizionario:") ||
title.startsWith("Appendice:") ||
title.startsWith("Categoria:") ||
title.startsWith("Aiuto:") ||
title.startsWith("Portail:") ||
+ title.startsWith("Modulo:") ||
// ES
title.startsWith("Apéndice:") ||
title.startsWith("Archivo:") ||
title.startsWith("Predefinição:") ||
title.startsWith("Vocabulário:") ||
title.startsWith("Wikcionário:") ||
+ title.startsWith("Módulo:") ||
// sentinel
false
) return;
- if (!title.startsWith("Sign gloss:")) {
+ // leave the Flexion: pages in for now and do not warn about them
+ if (!title.startsWith("Sign gloss:") && !title.startsWith("Flexion:")) {
System.err.println("title with colon: " + title);
}
}
String text = textBuilder.toString();
- // Workaround for Spanish wiktionary {{ES}} pattern
- text = text.replace("{{ES}}", "== {{lengua|es}} ==");
+ // Workaround for Spanish wiktionary {{ES}} and {{ES|word}} patterns
+ text = text.replaceAll("\\{\\{ES(\\|[^{}=]*)?}}", "== {{lengua|es}} ==");
String translingual = "";
int start = 0;
final Matcher startMatcher = headingStart.matcher(text);
public void parse(final File file) throws ParserConfigurationException,
SAXException, IOException {
- final SAXParser parser = SAXParserFactoryImpl.newInstance().newSAXParser();
+ final SAXParser parser = SAXParserFactory.newInstance().newSAXParser();
parser.parse(file, this);
}