]> gitweb.fperrin.net Git - DictionaryPC.git/blobdiff - data/downloadInputs.sh
Added WholeSection entries and parser.
[DictionaryPC.git] / data / downloadInputs.sh
index 9a3492f03059351a19c447ec9374f39ee18e5cc4..c14d4604b57591c4675fbffe980285bea90a496f 100755 (executable)
@@ -1,10 +1,17 @@
 #!/bin/bash -e
 
-echo "Downloading from: http://dumps.wikimedia.org/enwiktionary/"
-WIKI=enwiktionary-20111224-pages-articles.xml
-curl --remote-name http://dumps.wikimedia.org/enwiktionary/20111224/${WIKI}.bz2
+OLD_DIR=`pwd`
+DIR=`dirname $0`
+cd $DIR
+
+echo "Note that unzipping is slow."
+
+L=en
+echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/"
+WIKI=${L}wiktionary-20120714-pages-articles.xml
+curl --remote-name http://dumps.wikimedia.org/${L}wiktionary/20120714/${WIKI}.bz2
 bunzip2 ${WIKI}.bz2
-mv ${WIKI} inputs/
+mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml
 
 echo "Downloading from: http://ftp.tu-chemnitz.de/pub/Local/urz/ding/de-en-devel/"
 CHEMNITZ=de-en.txt
@@ -12,4 +19,34 @@ curl --remote-name http://ftp.tu-chemnitz.de/pub/Local/urz/ding/de-en-devel/${CH
 gunzip ${CHEMNITZ}.gz
 mv ${CHEMNITZ} inputs/
 
+L=fr
+echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/"
+WIKI=${L}wiktionary-20120719-pages-articles.xml
+curl --remote-name http://dumps.wikimedia.org/${L}wiktionary/20120719/${WIKI}.bz2
+bunzip2 ${WIKI}.bz2
+mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml
+
+L=it
+echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/"
+WIKI=${L}wiktionary-20120720-pages-articles.xml
+curl --remote-name http://dumps.wikimedia.org/${L}wiktionary/20120720/${WIKI}.bz2
+bunzip2 ${WIKI}.bz2
+mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml
+
+L=de
+echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/"
+WIKI=${L}wiktionary-20120714-pages-articles.xml
+curl --remote-name http://dumps.wikimedia.org/${L}wiktionary/20120714/${WIKI}.bz2
+bunzip2 ${WIKI}.bz2
+mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml
+
+L=es
+echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/"
+WIKI=${L}wiktionary-20120718-pages-articles.xml
+curl --remote-name http://dumps.wikimedia.org/${L}wiktionary/20120718/${WIKI}.bz2
+bunzip2 ${WIKI}.bz2
+mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml
+
 echo "Done.  Now run WiktionarySplitter to spit apart enwiktionary."
+
+cd $OLD_DIR