From b7b04d01f8d0ed763f0817d0531ecebf9ff50260 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Reimar=20D=C3=B6ffinger?= Date: Mon, 13 Apr 2020 15:42:17 +0200 Subject: [PATCH] Get rid of xerces dependency. Relying on the standard XML implementation is a lot slower, but the WiktionarySplitter run still only takes a few minutes. --- .classpath | 2 +- WiktionarySplitter.sh | 4 +--- compile.sh | 2 +- run.sh | 4 +--- .../android/dictionary/engine/WiktionarySplitter.java | 6 +++--- 5 files changed, 7 insertions(+), 11 deletions(-) diff --git a/.classpath b/.classpath index c9a63ed..96fa364 100755 --- a/.classpath +++ b/.classpath @@ -6,11 +6,11 @@ + - diff --git a/WiktionarySplitter.sh b/WiktionarySplitter.sh index fb1ea66..87593bb 100755 --- a/WiktionarySplitter.sh +++ b/WiktionarySplitter.sh @@ -2,9 +2,7 @@ # per-language data files from enwiktionary. ICU4J=/usr/share/java/icu4j-49.1.jar test -r "$ICU4J" || ICU4J=/usr/share/icu4j-55/lib/icu4j.jar -XERCES=/usr/share/java/xercesImpl.jar -test -r "$XERCES" || XERCES=/usr/share/xerces-2/lib/xercesImpl.jar COMMONS_COMPRESS=/usr/share/java/commons-compress.jar JAVA=/usr/lib/jvm/java-8-openjdk-amd64/jre/bin/java test -x "$JAVA" || JAVA=java -"$JAVA" -Xmx4096m -Xverify:none -classpath bin/:"$ICU4J":"$XERCES":"$COMMONS_COMPRESS" com.hughes.android.dictionary.engine.WiktionarySplitter "$@" +"$JAVA" -Xmx4096m -Xverify:none -classpath bin/:"$ICU4J":"$COMMONS_COMPRESS" com.hughes.android.dictionary.engine.WiktionarySplitter "$@" diff --git a/compile.sh b/compile.sh index 28a6b8e..7c1ad83 100755 --- a/compile.sh +++ b/compile.sh @@ -33,4 +33,4 @@ fi mkdir -p bin # -encoding is just a work around for user that still run systems # with non-UTF8 locales -javac -Xlint:all -encoding UTF-8 -g -d bin/ ../Dictionary/Util/src/com/hughes/util/*.java ../Dictionary/Util/src/com/hughes/util/raf/*.java ../Dictionary/src/com/hughes/android/dictionary/DictionaryInfo.java ../Dictionary/src/com/hughes/android/dictionary/engine/*.java ../Dictionary/src/com/hughes/android/dictionary/C.java src/com/hughes/util/*.java src/com/hughes/android/dictionary/*.java src/com/hughes/android/dictionary/*/*.java src/com/hughes/android/dictionary/*/*/*.java -classpath "$ICU4J:$JUNIT:$XERCES:$COMMONS:$COMMONS_COMPRESS" +javac --limit-modules java.xml,java.logging -Xlint:all -encoding UTF-8 -g -d bin/ ../Dictionary/Util/src/com/hughes/util/*.java ../Dictionary/Util/src/com/hughes/util/raf/*.java ../Dictionary/src/com/hughes/android/dictionary/DictionaryInfo.java ../Dictionary/src/com/hughes/android/dictionary/engine/*.java ../Dictionary/src/com/hughes/android/dictionary/C.java src/com/hughes/util/*.java src/com/hughes/android/dictionary/*.java src/com/hughes/android/dictionary/*/*.java src/com/hughes/android/dictionary/*/*/*.java -classpath "$ICU4J:$JUNIT:$COMMONS:$COMMONS_COMPRESS" diff --git a/run.sh b/run.sh index 99113e7..fe03ef5 100755 --- a/run.sh +++ b/run.sh @@ -1,10 +1,8 @@ # -agentlib:hprof=heap=sites,depth=20 ICU4J=/usr/share/java/icu4j-49.1.jar test -r "$ICU4J" || ICU4J=/usr/share/icu4j-55/lib/icu4j.jar -XERCES=/usr/share/java/xercesImpl.jar -test -r "$XERCES" || XERCES=/usr/share/xerces-2/lib/xercesImpl.jar COMMONS=/usr/share/java/commons-text.jar COMMONS_COMPRESS=/usr/share/java/commons-compress.jar JAVA=/usr/lib/jvm/java-8-openjdk-amd64/jre/bin/java test -x "$JAVA" || JAVA=java -"$JAVA" -Djava.util.logging.config.file="logging.properties" -Xmx4096m -classpath bin/:"$ICU4J":"$XERCES":"$COMMONS":"$COMMONS_COMPRESS" com.hughes.android.dictionary.engine.DictionaryBuilder "$@" +"$JAVA" -Djava.util.logging.config.file="logging.properties" -Xmx4096m -classpath bin/:"$ICU4J":"$COMMONS":"$COMMONS_COMPRESS" com.hughes.android.dictionary.engine.DictionaryBuilder "$@" diff --git a/src/com/hughes/android/dictionary/engine/WiktionarySplitter.java b/src/com/hughes/android/dictionary/engine/WiktionarySplitter.java index 12422fd..cc9a3b1 100644 --- a/src/com/hughes/android/dictionary/engine/WiktionarySplitter.java +++ b/src/com/hughes/android/dictionary/engine/WiktionarySplitter.java @@ -32,9 +32,9 @@ import java.util.regex.Pattern; import javax.xml.parsers.ParserConfigurationException; import javax.xml.parsers.SAXParser; +import javax.xml.parsers.SAXParserFactory; import org.apache.commons.compress.compressors.CompressorStreamFactory; -import org.apache.xerces.jaxp.SAXParserFactoryImpl; import org.xml.sax.Attributes; import org.xml.sax.SAXException; @@ -75,7 +75,7 @@ public class WiktionarySplitter extends org.xml.sax.helpers.DefaultHandler { } private void go() throws Exception { - final SAXParser parser = SAXParserFactoryImpl.newInstance().newSAXParser(); + final SAXParser parser = SAXParserFactory.newInstance().newSAXParser(); // Configure things. for (final Map.Entry> pathToSelectorsEntry : pathToSelectors.entrySet()) { @@ -354,7 +354,7 @@ public class WiktionarySplitter extends org.xml.sax.helpers.DefaultHandler { public void parse(final File file) throws ParserConfigurationException, SAXException, IOException { - final SAXParser parser = SAXParserFactoryImpl.newInstance().newSAXParser(); + final SAXParser parser = SAXParserFactory.newInstance().newSAXParser(); parser.parse(file, this); } -- 2.43.0