test -r "$ICU4J" || ICU4J=/usr/share/icu4j-55/lib/icu4j.jar
XERCES=/usr/share/java/xercesImpl.jar
test -r "$XERCES" || XERCES=/usr/share/xerces-2/lib/xercesImpl.jar
+COMMONS_COMPRESS=/usr/share/java/commons-compress-1.13.jar
JAVA=/usr/lib/jvm/java-8-openjdk-amd64/jre/bin/java
test -x "$JAVA" || JAVA=java
-"$JAVA" -classpath src:../Util/src/:../Dictionary/src/:"$ICU4J":"$XERCES" com.hughes.android.dictionary.engine.WiktionarySplitter "$@"
+"$JAVA" -classpath src:../Util/src/:../Dictionary/src/:"$ICU4J":"$XERCES":"$COMMONS_COMPRESS" com.hughes.android.dictionary.engine.WiktionarySplitter "$@"
test -r "$XERCES" || XERCES=/usr/share/xerces-2/lib/xercesImpl.jar
COMMONS=/usr/share/java/commons-lang3.jar
test -r "$COMMONS" || COMMONS=/usr/share/commons-lang-3.3/lib/commons-lang.jar
+COMMONS_COMPRESS=/usr/share/java/commons-compress-1.13.jar
if [ ! -x ../Dictionary ] ; then
echo "You need to clone the Dictionary repository (including subprojects) into .."
exit 1
echo "commons-lang needs to be installed"
exit 1;
fi
-javac -g ../Dictionary/Util/src/com/hughes/util/*.java ../Dictionary/Util/src/com/hughes/util/raf/*.java ../Dictionary/src/com/hughes/android/dictionary/DictionaryInfo.java ../Dictionary/src/com/hughes/android/dictionary/engine/*.java ../Dictionary/src/com/hughes/android/dictionary/C.java src/com/hughes/android/dictionary/*.java src/com/hughes/android/dictionary/*/*.java src/com/hughes/android/dictionary/*/*/*.java -classpath "$ICU4J:$JUNIT:$XERCES:$COMMONS"
+if [ ! -r "$COMMONS_COMPRESS" ] ; then
+ echo "commons-compress needs to be installed"
+ exit 1;
+fi
+javac -g ../Dictionary/Util/src/com/hughes/util/*.java ../Dictionary/Util/src/com/hughes/util/raf/*.java ../Dictionary/src/com/hughes/android/dictionary/DictionaryInfo.java ../Dictionary/src/com/hughes/android/dictionary/engine/*.java ../Dictionary/src/com/hughes/android/dictionary/C.java src/com/hughes/android/dictionary/*.java src/com/hughes/android/dictionary/*/*.java src/com/hughes/android/dictionary/*/*/*.java -classpath "$ICU4J:$JUNIT:$XERCES:$COMMONS:$COMMONS_COMPRESS"
echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/"
WIKI=${L}wiktionary-latest-pages-articles.xml
curl -L --remote-name http://dumps.wikimedia.org/${L}wiktionary/latest/${WIKI}.bz2
-bunzip2 ${WIKI}.bz2
-mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml
+mv ${WIKI}.bz2 inputs/${L}wiktionary-pages-articles.xml.bz2
echo "Downloading from: http://ftp.tu-chemnitz.de/pub/Local/urz/ding/de-en-devel/"
CHEMNITZ=de-en.txt
curl -L --remote-name http://ftp.tu-chemnitz.de/pub/Local/urz/ding/de-en-devel/${CHEMNITZ}.gz
-gunzip ${CHEMNITZ}.gz
-mv ${CHEMNITZ} inputs/de-en_chemnitz.txt
+mv ${CHEMNITZ}.gz inputs/de-en_chemnitz.txt.gz
L=fr
echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/"
WIKI=${L}wiktionary-latest-pages-articles.xml
curl -L --remote-name http://dumps.wikimedia.org/${L}wiktionary/latest/${WIKI}.bz2
-bunzip2 --force ${WIKI}.bz2
-mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml
+mv ${WIKI}.bz2 inputs/${L}wiktionary-pages-articles.xml.bz2
L=it
echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/"
WIKI=${L}wiktionary-latest-pages-articles.xml
curl -L --remote-name http://dumps.wikimedia.org/${L}wiktionary/latest/${WIKI}.bz2
-bunzip2 ${WIKI}.bz2
-mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml
+mv ${WIKI}.bz2 inputs/${L}wiktionary-pages-articles.xml.bz2
L=de
echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/"
WIKI=${L}wiktionary-latest-pages-articles.xml
curl -L --remote-name http://dumps.wikimedia.org/${L}wiktionary/latest/${WIKI}.bz2
-bunzip2 ${WIKI}.bz2
-mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml
+mv ${WIKI}.bz2 inputs/${L}wiktionary-pages-articles.xml.bz2
L=es
echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/"
WIKI=${L}wiktionary-latest-pages-articles.xml
curl -L --remote-name http://dumps.wikimedia.org/${L}wiktionary/latest/${WIKI}.bz2
-bunzip2 ${WIKI}.bz2
-mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml
+mv ${WIKI}.bz2 inputs/${L}wiktionary-pages-articles.xml.bz2
L=pt
echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/"
WIKI=${L}wiktionary-latest-pages-articles.xml
curl -L --remote-name http://dumps.wikimedia.org/${L}wiktionary/latest/${WIKI}.bz2
-bunzip2 ${WIKI}.bz2
-mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml
+mv ${WIKI}.bz2 inputs/${L}wiktionary-pages-articles.xml.bz2
echo "Done. Now run WiktionarySplitter to split apart enwiktionary."
test -r "$XERCES" || XERCES=/usr/share/xerces-2/lib/xercesImpl.jar
COMMONS=/usr/share/java/commons-lang3.jar
test -r "$COMMONS" || COMMONS=/usr/share/commons-lang-3.3/lib/commons-lang.jar
+COMMONS_COMPRESS=/usr/share/java/commons-compress-1.13.jar
JAVA=/usr/lib/jvm/java-8-openjdk-amd64/jre/bin/java
test -x "$JAVA" || JAVA=java
-"$JAVA" -Djava.util.logging.config.file="logging.properties" -Xmx4096m -classpath src:../Dictionary/Util/src/:../Dictionary/src/:"$ICU4J":"$XERCES":"$COMMONS" com.hughes.android.dictionary.engine.DictionaryBuilder "$@"
+"$JAVA" -Djava.util.logging.config.file="logging.properties" -Xmx4096m -classpath src:../Dictionary/Util/src/:../Dictionary/src/:"$ICU4J":"$XERCES":"$COMMONS":"$COMMONS_COMPRESS" com.hughes.android.dictionary.engine.DictionaryBuilder "$@"
package com.hughes.android.dictionary.engine;
+import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.DataOutputStream;
import java.io.File;
+import java.io.FileInputStream;
import java.io.FileOutputStream;
+import java.io.InputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import javax.xml.parsers.SAXParser;
import org.apache.xerces.jaxp.SAXParserFactoryImpl;
+import org.apache.commons.compress.compressors.CompressorStreamFactory;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
// Do it.
try {
- parser.parse(new File(pathToSelectorsEntry.getKey()), this);
+ File input = new File(pathToSelectorsEntry.getKey() + ".bz2");
+ if (!input.exists()) input = new File(pathToSelectorsEntry.getKey() + ".gz");
+ if (!input.exists()) input = new File(pathToSelectorsEntry.getKey() + ".xz");
+ if (!input.exists()) {
+ // Fallback to uncompressed file
+ parser.parse(new File(pathToSelectorsEntry.getKey()), this);
+ } else {
+ InputStream compressedIn = new BufferedInputStream(new FileInputStream(input));
+ InputStream in = new CompressorStreamFactory().createCompressorInputStream(compressedIn);
+ parser.parse(new BufferedInputStream(in), this);
+ }
} catch (Exception e) {
System.err.println("Exception during parse, lastPageTitle=" + lastPageTitle + ", titleBuilder=" + titleBuilder.toString());
throw e;