X-Git-Url: http://gitweb.fperrin.net/?p=DictionaryPC.git;a=blobdiff_plain;f=data%2FdownloadInputs.sh;fp=data%2FdownloadInputs.sh;h=412ab18f4413f3b63f00517e9fe1df0dc7bd5003;hp=89e584a6f52612ef4270a08b31412af93643146f;hb=1aa4de25c859304d21acfadd18cb546d1c21415b;hpb=297e7670b0c1487cdddb82dd2259f902d4ed80ae diff --git a/data/downloadInputs.sh b/data/downloadInputs.sh index 89e584a..412ab18 100755 --- a/data/downloadInputs.sh +++ b/data/downloadInputs.sh @@ -5,18 +5,48 @@ DIR=`dirname $0` cd $DIR -echo "Downloading from: http://dumps.wikimedia.org/enwiktionary/" -WIKI=enwiktionary-20111224-pages-articles.xml -curl --remote-name http://dumps.wikimedia.org/enwiktionary/20111224/${WIKI}.bz2 -echo "Unzipping... this is slow." -bunzip2 ${WIKI}.bz2 -mv ${WIKI} inputs/ - echo "Downloading from: http://ftp.tu-chemnitz.de/pub/Local/urz/ding/de-en-devel/" CHEMNITZ=de-en.txt -curl --remote-name http://ftp.tu-chemnitz.de/pub/Local/urz/ding/de-en-devel/${CHEMNITZ}.gz -gunzip ${CHEMNITZ}.gz -mv ${CHEMNITZ} inputs/ +#curl --remote-name http://ftp.tu-chemnitz.de/pub/Local/urz/ding/de-en-devel/${CHEMNITZ}.gz +#gunzip ${CHEMNITZ}.gz +#mv ${CHEMNITZ} inputs/ + +echo "Note that unzipping is slow." + +L=en +echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/" +WIKI=${L}wiktionary-20120109-pages-articles.xml +curl --remote-name http://dumps.wikimedia.org/${L}wiktionary/20120109/${WIKI}.bz2 +bunzip2 ${WIKI}.bz2 +mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml + +L=fr +echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/" +WIKI=${L}wiktionary-20120106-pages-articles.xml +curl --remote-name http://dumps.wikimedia.org/${L}wiktionary/20120106/${WIKI}.bz2 +bunzip2 ${WIKI}.bz2 +mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml + +L=it +echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/" +WIKI=${L}wiktionary-20120110-pages-articles.xml +curl --remote-name http://dumps.wikimedia.org/${L}wiktionary/20120110/${WIKI}.bz2 +bunzip2 ${WIKI}.bz2 +mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml + +L=de +echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/" +WIKI=${L}wiktionary-20120111-pages-articles.xml +curl --remote-name http://dumps.wikimedia.org/${L}wiktionary/20120111/${WIKI}.bz2 +bunzip2 ${WIKI}.bz2 +mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml + +L=es +echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/" +WIKI=${L}wiktionary-20120108-pages-articles.xml +curl --remote-name http://dumps.wikimedia.org/${L}wiktionary/20120108/${WIKI}.bz2 +bunzip2 ${WIKI}.bz2 +mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml echo "Done. Now run WiktionarySplitter to spit apart enwiktionary."