X-Git-Url: http://gitweb.fperrin.net/?a=blobdiff_plain;f=data%2FdownloadInputs.sh;h=b5640b7778928fd1c1519fe42a65799f75fb89ad;hb=17ea18fedb5e5e4c2b8b5b4a9a0bb745c25ecd23;hp=e5a5400126709f9d7c0b9f9a43018202a5e2cdf0;hpb=7aa8dbe5efcc7abcc4f78cdf878dce80a5ea83f7;p=DictionaryPC.git diff --git a/data/downloadInputs.sh b/data/downloadInputs.sh index e5a5400..b5640b7 100755 --- a/data/downloadInputs.sh +++ b/data/downloadInputs.sh @@ -8,47 +8,52 @@ echo "Note that unzipping is slow." L=en echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/" -WIKI=${L}wiktionary-20120505-pages-articles.xml -curl --remote-name http://dumps.wikimedia.org/${L}wiktionary/20120505/${WIKI}.bz2 +WIKI=${L}wiktionary-latest-pages-articles.xml +curl -L --remote-name http://dumps.wikimedia.org/${L}wiktionary/latest/${WIKI}.bz2 bunzip2 ${WIKI}.bz2 mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml -exit - echo "Downloading from: http://ftp.tu-chemnitz.de/pub/Local/urz/ding/de-en-devel/" CHEMNITZ=de-en.txt -curl --remote-name http://ftp.tu-chemnitz.de/pub/Local/urz/ding/de-en-devel/${CHEMNITZ}.gz +curl -L --remote-name http://ftp.tu-chemnitz.de/pub/Local/urz/ding/de-en-devel/${CHEMNITZ}.gz gunzip ${CHEMNITZ}.gz -mv ${CHEMNITZ} inputs/ +mv ${CHEMNITZ} inputs/de-en_chemnitz.txt L=fr echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/" -WIKI=${L}wiktionary-20120106-pages-articles.xml -curl --remote-name http://dumps.wikimedia.org/${L}wiktionary/20120106/${WIKI}.bz2 -bunzip2 ${WIKI}.bz2 +WIKI=${L}wiktionary-latest-pages-articles.xml +curl -L --remote-name http://dumps.wikimedia.org/${L}wiktionary/latest/${WIKI}.bz2 +bunzip2 --force ${WIKI}.bz2 mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml L=it echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/" -WIKI=${L}wiktionary-20120110-pages-articles.xml -curl --remote-name http://dumps.wikimedia.org/${L}wiktionary/20120110/${WIKI}.bz2 +WIKI=${L}wiktionary-latest-pages-articles.xml +curl -L --remote-name http://dumps.wikimedia.org/${L}wiktionary/latest/${WIKI}.bz2 bunzip2 ${WIKI}.bz2 mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml L=de echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/" -WIKI=${L}wiktionary-20120111-pages-articles.xml -curl --remote-name http://dumps.wikimedia.org/${L}wiktionary/20120111/${WIKI}.bz2 +WIKI=${L}wiktionary-latest-pages-articles.xml +curl -L --remote-name http://dumps.wikimedia.org/${L}wiktionary/latest/${WIKI}.bz2 bunzip2 ${WIKI}.bz2 mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml L=es echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/" -WIKI=${L}wiktionary-20120108-pages-articles.xml -curl --remote-name http://dumps.wikimedia.org/${L}wiktionary/20120108/${WIKI}.bz2 +WIKI=${L}wiktionary-latest-pages-articles.xml +curl -L --remote-name http://dumps.wikimedia.org/${L}wiktionary/latest/${WIKI}.bz2 +bunzip2 ${WIKI}.bz2 +mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml + +L=pt +echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/" +WIKI=${L}wiktionary-latest-pages-articles.xml +curl -L --remote-name http://dumps.wikimedia.org/${L}wiktionary/latest/${WIKI}.bz2 bunzip2 ${WIKI}.bz2 mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml -echo "Done. Now run WiktionarySplitter to spit apart enwiktionary." +echo "Done. Now run WiktionarySplitter to split apart enwiktionary." cd $OLD_DIR