]> gitweb.fperrin.net Git - DictionaryPC.git/commitdiff
more downloads
authorThad Hughes <thadh@google.com>
Wed, 11 Jan 2012 22:14:29 +0000 (14:14 -0800)
committerThad Hughes <thadh@google.com>
Wed, 11 Jan 2012 22:14:29 +0000 (14:14 -0800)
data/downloadInputs.sh

index 89e584a6f52612ef4270a08b31412af93643146f..c0cc8ee11ea90887daa271042459b0e1bd269db8 100755 (executable)
@@ -5,18 +5,48 @@ DIR=`dirname $0`
 
 cd $DIR
 
-echo "Downloading from: http://dumps.wikimedia.org/enwiktionary/"
-WIKI=enwiktionary-20111224-pages-articles.xml
-curl --remote-name http://dumps.wikimedia.org/enwiktionary/20111224/${WIKI}.bz2
-echo "Unzipping... this is slow."
-bunzip2 ${WIKI}.bz2
-mv ${WIKI} inputs/
-
 echo "Downloading from: http://ftp.tu-chemnitz.de/pub/Local/urz/ding/de-en-devel/"
 CHEMNITZ=de-en.txt
-curl --remote-name http://ftp.tu-chemnitz.de/pub/Local/urz/ding/de-en-devel/${CHEMNITZ}.gz
-gunzip ${CHEMNITZ}.gz
-mv ${CHEMNITZ} inputs/
+#curl --remote-name http://ftp.tu-chemnitz.de/pub/Local/urz/ding/de-en-devel/${CHEMNITZ}.gz
+#gunzip ${CHEMNITZ}.gz
+#mv ${CHEMNITZ} inputs/
+
+echo "Note that unzipping is slow."
+
+L=en
+echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/"
+WIKI=${L}wiktionary-20111224-pages-articles.xml
+#curl --remote-name http://dumps.wikimedia.org/${L}wiktionary/20111224/${WIKI}.bz2
+#bunzip2 ${WIKI}.bz2
+#mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml
+
+L=fr
+echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/"
+WIKI=${L}wiktionary-20120106-pages-articles.xml
+curl --remote-name http://dumps.wikimedia.org/${L}wiktionary/20120106/${WIKI}.bz2
+bunzip2 ${WIKI}.bz2
+mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml
+
+L=it
+echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/"
+WIKI=${L}wiktionary-20120110-pages-articles.xml
+curl --remote-name http://dumps.wikimedia.org/${L}wiktionary/20120110/${WIKI}.bz2
+bunzip2 ${WIKI}.bz2
+mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml
+
+L=de
+echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/"
+WIKI=${L}wiktionary-20120111-pages-articles.xml
+curl --remote-name http://dumps.wikimedia.org/${L}wiktionary/20120111/${WIKI}.bz2
+bunzip2 ${WIKI}.bz2
+mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml
+
+L=es
+echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/"
+WIKI=${L}wiktionary-20120108-pages-articles.xml
+curl --remote-name http://dumps.wikimedia.org/${L}wiktionary/20120108/${WIKI}.bz2
+bunzip2 ${WIKI}.bz2
+mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml
 
 echo "Done.  Now run WiktionarySplitter to spit apart enwiktionary."