X-Git-Url: http://gitweb.fperrin.net/?p=DictionaryPC.git;a=blobdiff_plain;f=data%2FdownloadInputs.sh;h=0929f63687146349e7ae8a5511183e31eeba9acb;hp=c78c127c3d39aea91ea5283d0fd8e8c41760c56f;hb=HEAD;hpb=ebed9a0fa85aae350c4d2af0b48dda5fa7b23db9 diff --git a/data/downloadInputs.sh b/data/downloadInputs.sh index c78c127..0929f63 100755 --- a/data/downloadInputs.sh +++ b/data/downloadInputs.sh @@ -4,51 +4,49 @@ OLD_DIR=`pwd` DIR=`dirname $0` cd $DIR -echo "Downloading from: http://ftp.tu-chemnitz.de/pub/Local/urz/ding/de-en-devel/" -CHEMNITZ=de-en.txt -curl --remote-name http://ftp.tu-chemnitz.de/pub/Local/urz/ding/de-en-devel/${CHEMNITZ}.gz -gunzip ${CHEMNITZ}.gz -mv ${CHEMNITZ} inputs/ - echo "Note that unzipping is slow." L=en echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/" -WIKI=${L}wiktionary-20120220-pages-articles.xml -curl --remote-name http://dumps.wikimedia.org/${L}wiktionary/20120220/${WIKI}.bz2 -bunzip2 ${WIKI}.bz2 -mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml +WIKI=${L}wiktionary-latest-pages-articles.xml +curl -L --remote-name http://dumps.wikimedia.org/${L}wiktionary/latest/${WIKI}.bz2 +mv ${WIKI}.bz2 inputs/${L}wiktionary-pages-articles.xml.bz2 -exit +echo "Downloading from: http://ftp.tu-chemnitz.de/pub/Local/urz/ding/de-en-devel/" +CHEMNITZ=de-en.txt +curl -L --remote-name http://ftp.tu-chemnitz.de/pub/Local/urz/ding/de-en-devel/${CHEMNITZ}.gz +mv ${CHEMNITZ}.gz inputs/de-en_chemnitz.txt.gz L=fr echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/" -WIKI=${L}wiktionary-20120106-pages-articles.xml -curl --remote-name http://dumps.wikimedia.org/${L}wiktionary/20120106/${WIKI}.bz2 -bunzip2 ${WIKI}.bz2 -mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml +WIKI=${L}wiktionary-latest-pages-articles.xml +curl -L --remote-name http://dumps.wikimedia.org/${L}wiktionary/latest/${WIKI}.bz2 +mv ${WIKI}.bz2 inputs/${L}wiktionary-pages-articles.xml.bz2 L=it echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/" -WIKI=${L}wiktionary-20120110-pages-articles.xml -curl --remote-name http://dumps.wikimedia.org/${L}wiktionary/20120110/${WIKI}.bz2 -bunzip2 ${WIKI}.bz2 -mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml +WIKI=${L}wiktionary-latest-pages-articles.xml +curl -L --remote-name http://dumps.wikimedia.org/${L}wiktionary/latest/${WIKI}.bz2 +mv ${WIKI}.bz2 inputs/${L}wiktionary-pages-articles.xml.bz2 L=de echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/" -WIKI=${L}wiktionary-20120111-pages-articles.xml -curl --remote-name http://dumps.wikimedia.org/${L}wiktionary/20120111/${WIKI}.bz2 -bunzip2 ${WIKI}.bz2 -mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml +WIKI=${L}wiktionary-latest-pages-articles.xml +curl -L --remote-name http://dumps.wikimedia.org/${L}wiktionary/latest/${WIKI}.bz2 +mv ${WIKI}.bz2 inputs/${L}wiktionary-pages-articles.xml.bz2 L=es echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/" -WIKI=${L}wiktionary-20120108-pages-articles.xml -curl --remote-name http://dumps.wikimedia.org/${L}wiktionary/20120108/${WIKI}.bz2 -bunzip2 ${WIKI}.bz2 -mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml +WIKI=${L}wiktionary-latest-pages-articles.xml +curl -L --remote-name http://dumps.wikimedia.org/${L}wiktionary/latest/${WIKI}.bz2 +mv ${WIKI}.bz2 inputs/${L}wiktionary-pages-articles.xml.bz2 + +L=pt +echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/" +WIKI=${L}wiktionary-latest-pages-articles.xml +curl -L --remote-name http://dumps.wikimedia.org/${L}wiktionary/latest/${WIKI}.bz2 +mv ${WIKI}.bz2 inputs/${L}wiktionary-pages-articles.xml.bz2 -echo "Done. Now run WiktionarySplitter to spit apart enwiktionary." +echo "Done. Now run WiktionarySplitter to split apart enwiktionary." cd $OLD_DIR