X-Git-Url: http://gitweb.fperrin.net/?a=blobdiff_plain;f=data%2FdownloadInputs.sh;h=b5640b7778928fd1c1519fe42a65799f75fb89ad;hb=507089818bd8b7007f43fd118270fa9596e674fd;hp=89e584a6f52612ef4270a08b31412af93643146f;hpb=18ebad605e64286706fabb0b842b086a29c0e6ed;p=DictionaryPC.git diff --git a/data/downloadInputs.sh b/data/downloadInputs.sh old mode 100644 new mode 100755 index 89e584a..b5640b7 --- a/data/downloadInputs.sh +++ b/data/downloadInputs.sh @@ -2,22 +2,58 @@ OLD_DIR=`pwd` DIR=`dirname $0` - cd $DIR -echo "Downloading from: http://dumps.wikimedia.org/enwiktionary/" -WIKI=enwiktionary-20111224-pages-articles.xml -curl --remote-name http://dumps.wikimedia.org/enwiktionary/20111224/${WIKI}.bz2 -echo "Unzipping... this is slow." +echo "Note that unzipping is slow." + +L=en +echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/" +WIKI=${L}wiktionary-latest-pages-articles.xml +curl -L --remote-name http://dumps.wikimedia.org/${L}wiktionary/latest/${WIKI}.bz2 bunzip2 ${WIKI}.bz2 -mv ${WIKI} inputs/ +mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml echo "Downloading from: http://ftp.tu-chemnitz.de/pub/Local/urz/ding/de-en-devel/" CHEMNITZ=de-en.txt -curl --remote-name http://ftp.tu-chemnitz.de/pub/Local/urz/ding/de-en-devel/${CHEMNITZ}.gz +curl -L --remote-name http://ftp.tu-chemnitz.de/pub/Local/urz/ding/de-en-devel/${CHEMNITZ}.gz gunzip ${CHEMNITZ}.gz -mv ${CHEMNITZ} inputs/ +mv ${CHEMNITZ} inputs/de-en_chemnitz.txt + +L=fr +echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/" +WIKI=${L}wiktionary-latest-pages-articles.xml +curl -L --remote-name http://dumps.wikimedia.org/${L}wiktionary/latest/${WIKI}.bz2 +bunzip2 --force ${WIKI}.bz2 +mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml + +L=it +echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/" +WIKI=${L}wiktionary-latest-pages-articles.xml +curl -L --remote-name http://dumps.wikimedia.org/${L}wiktionary/latest/${WIKI}.bz2 +bunzip2 ${WIKI}.bz2 +mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml + +L=de +echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/" +WIKI=${L}wiktionary-latest-pages-articles.xml +curl -L --remote-name http://dumps.wikimedia.org/${L}wiktionary/latest/${WIKI}.bz2 +bunzip2 ${WIKI}.bz2 +mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml + +L=es +echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/" +WIKI=${L}wiktionary-latest-pages-articles.xml +curl -L --remote-name http://dumps.wikimedia.org/${L}wiktionary/latest/${WIKI}.bz2 +bunzip2 ${WIKI}.bz2 +mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml + +L=pt +echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/" +WIKI=${L}wiktionary-latest-pages-articles.xml +curl -L --remote-name http://dumps.wikimedia.org/${L}wiktionary/latest/${WIKI}.bz2 +bunzip2 ${WIKI}.bz2 +mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml -echo "Done. Now run WiktionarySplitter to spit apart enwiktionary." +echo "Done. Now run WiktionarySplitter to split apart enwiktionary." cd $OLD_DIR