]> gitweb.fperrin.net Git - DictionaryPC.git/blobdiff - data/downloadInputs.sh
WiktionarySplitter: Support compressed inputs.
[DictionaryPC.git] / data / downloadInputs.sh
index b5640b7778928fd1c1519fe42a65799f75fb89ad..0929f63687146349e7ae8a5511183e31eeba9acb 100755 (executable)
@@ -10,49 +10,42 @@ L=en
 echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/"
 WIKI=${L}wiktionary-latest-pages-articles.xml
 curl -L --remote-name http://dumps.wikimedia.org/${L}wiktionary/latest/${WIKI}.bz2
-bunzip2 ${WIKI}.bz2
-mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml
+mv ${WIKI}.bz2 inputs/${L}wiktionary-pages-articles.xml.bz2
 
 echo "Downloading from: http://ftp.tu-chemnitz.de/pub/Local/urz/ding/de-en-devel/"
 CHEMNITZ=de-en.txt
 curl -L --remote-name http://ftp.tu-chemnitz.de/pub/Local/urz/ding/de-en-devel/${CHEMNITZ}.gz
-gunzip ${CHEMNITZ}.gz
-mv ${CHEMNITZ} inputs/de-en_chemnitz.txt
+mv ${CHEMNITZ}.gz inputs/de-en_chemnitz.txt.gz
 
 L=fr
 echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/"
 WIKI=${L}wiktionary-latest-pages-articles.xml
 curl -L --remote-name http://dumps.wikimedia.org/${L}wiktionary/latest/${WIKI}.bz2
-bunzip2 --force ${WIKI}.bz2
-mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml
+mv ${WIKI}.bz2 inputs/${L}wiktionary-pages-articles.xml.bz2
 
 L=it
 echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/"
 WIKI=${L}wiktionary-latest-pages-articles.xml
 curl -L --remote-name http://dumps.wikimedia.org/${L}wiktionary/latest/${WIKI}.bz2
-bunzip2 ${WIKI}.bz2
-mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml
+mv ${WIKI}.bz2 inputs/${L}wiktionary-pages-articles.xml.bz2
 
 L=de
 echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/"
 WIKI=${L}wiktionary-latest-pages-articles.xml
 curl -L --remote-name http://dumps.wikimedia.org/${L}wiktionary/latest/${WIKI}.bz2
-bunzip2 ${WIKI}.bz2
-mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml
+mv ${WIKI}.bz2 inputs/${L}wiktionary-pages-articles.xml.bz2
 
 L=es
 echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/"
 WIKI=${L}wiktionary-latest-pages-articles.xml
 curl -L --remote-name http://dumps.wikimedia.org/${L}wiktionary/latest/${WIKI}.bz2
-bunzip2 ${WIKI}.bz2
-mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml
+mv ${WIKI}.bz2 inputs/${L}wiktionary-pages-articles.xml.bz2
 
 L=pt
 echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/"
 WIKI=${L}wiktionary-latest-pages-articles.xml
 curl -L --remote-name http://dumps.wikimedia.org/${L}wiktionary/latest/${WIKI}.bz2
-bunzip2 ${WIKI}.bz2
-mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml
+mv ${WIKI}.bz2 inputs/${L}wiktionary-pages-articles.xml.bz2
 
 echo "Done.  Now run WiktionarySplitter to split apart enwiktionary."