X-Git-Url: http://gitweb.fperrin.net/?a=blobdiff_plain;f=data%2FdownloadInputs.sh;h=3135eb04bbd6f35ed20e1a6b9ebc548fd147e7a8;hb=f73bc969da1b56b315b7de9660a1f5e0ee5fddcc;hp=9db64a09c010bc997f19d14b48c21ee86ee7f684;hpb=07793b9c94c9fcf75f6f1797b9999da0b876dbf2;p=DictionaryPC.git diff --git a/data/downloadInputs.sh b/data/downloadInputs.sh index 9db64a0..3135eb0 100755 --- a/data/downloadInputs.sh +++ b/data/downloadInputs.sh @@ -8,45 +8,45 @@ echo "Note that unzipping is slow." L=en echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/" -WIKI=${L}wiktionary-20121208-pages-articles.xml -#curl --remote-name http://dumps.wikimedia.org/${L}wiktionary/20121208/${WIKI}.bz2 -#bunzip2 ${WIKI}.bz2 -#mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml +WIKI=${L}wiktionary-latest-pages-articles.xml +curl --remote-name http://dumps.wikimedia.org/${L}wiktionary/latest/${WIKI}.bz2 +bunzip2 ${WIKI}.bz2 +mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml echo "Downloading from: http://ftp.tu-chemnitz.de/pub/Local/urz/ding/de-en-devel/" CHEMNITZ=de-en.txt -#curl --remote-name http://ftp.tu-chemnitz.de/pub/Local/urz/ding/de-en-devel/${CHEMNITZ}.gz -#gunzip ${CHEMNITZ}.gz -#mv ${CHEMNITZ} inputs/ +curl --remote-name http://ftp.tu-chemnitz.de/pub/Local/urz/ding/de-en-devel/${CHEMNITZ}.gz +gunzip ${CHEMNITZ}.gz +mv ${CHEMNITZ} inputs/de-en_chemnitz.txt L=fr echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/" -WIKI=${L}wiktionary-20121204-pages-articles.xml -#curl --remote-name http://dumps.wikimedia.org/${L}wiktionary/20121204/${WIKI}.bz2 -#bunzip2 ${WIKI}.bz2 -#mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml +WIKI=${L}wiktionary-latest-pages-articles.xml +curl --remote-name http://dumps.wikimedia.org/${L}wiktionary/latest/${WIKI}.bz2 +bunzip2 --force ${WIKI}.bz2 +mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml L=it echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/" -WIKI=${L}wiktionary-20121213-pages-articles.xml -curl --remote-name http://dumps.wikimedia.org/${L}wiktionary/20121213/${WIKI}.bz2 +WIKI=${L}wiktionary-latest-pages-articles.xml +curl --remote-name http://dumps.wikimedia.org/${L}wiktionary/latest/${WIKI}.bz2 bunzip2 ${WIKI}.bz2 mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml L=de echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/" -WIKI=${L}wiktionary-20121206-pages-articles.xml -curl --remote-name http://dumps.wikimedia.org/${L}wiktionary/20121206/${WIKI}.bz2 +WIKI=${L}wiktionary-latest-pages-articles.xml +curl --remote-name http://dumps.wikimedia.org/${L}wiktionary/latest/${WIKI}.bz2 bunzip2 ${WIKI}.bz2 mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml L=es echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/" -WIKI=${L}wiktionary-20121210-pages-articles.xml -curl --remote-name http://dumps.wikimedia.org/${L}wiktionary/20121210/${WIKI}.bz2 +WIKI=${L}wiktionary-latest-pages-articles.xml +curl --remote-name http://dumps.wikimedia.org/${L}wiktionary/latest/${WIKI}.bz2 bunzip2 ${WIKI}.bz2 mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml -echo "Done. Now run WiktionarySplitter to spit apart enwiktionary." +echo "Done. Now run WiktionarySplitter to split apart enwiktionary." cd $OLD_DIR