]> gitweb.fperrin.net Git - DictionaryPC.git/blobdiff - data/downloadInputs.sh
Bug-fixes to WikiTokenizer (handle weird line-feed), update to newest
[DictionaryPC.git] / data / downloadInputs.sh
index f0863bfecb6188136cc716cd859b8056710a45f8..c78c127c3d39aea91ea5283d0fd8e8c41760c56f 100755 (executable)
@@ -6,18 +6,20 @@ cd $DIR
 
 echo "Downloading from: http://ftp.tu-chemnitz.de/pub/Local/urz/ding/de-en-devel/"
 CHEMNITZ=de-en.txt
-#curl --remote-name http://ftp.tu-chemnitz.de/pub/Local/urz/ding/de-en-devel/${CHEMNITZ}.gz
-#gunzip ${CHEMNITZ}.gz
-#mv ${CHEMNITZ} inputs/
+curl --remote-name http://ftp.tu-chemnitz.de/pub/Local/urz/ding/de-en-devel/${CHEMNITZ}.gz
+gunzip ${CHEMNITZ}.gz
+mv ${CHEMNITZ} inputs/
 
 echo "Note that unzipping is slow."
 
 L=en
 echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/"
-WIKI=${L}wiktionary-20120109-pages-articles.xml
-#curl --remote-name http://dumps.wikimedia.org/${L}wiktionary/20120109/${WIKI}.bz2
-#bunzip2 ${WIKI}.bz2
-#mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml
+WIKI=${L}wiktionary-20120220-pages-articles.xml
+curl --remote-name http://dumps.wikimedia.org/${L}wiktionary/20120220/${WIKI}.bz2
+bunzip2 ${WIKI}.bz2
+mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml
+
+exit
 
 L=fr
 echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/"