]> gitweb.fperrin.net Git - DictionaryPC.git/blobdiff - data/downloadInputs.sh
Script adds/improvements dictionary generation.
[DictionaryPC.git] / data / downloadInputs.sh
index 5e7990fbe39bb68ad4d01023e045b5b78c90c4c9..3135eb04bbd6f35ed20e1a6b9ebc548fd147e7a8 100755 (executable)
@@ -8,8 +8,8 @@ echo "Note that unzipping is slow."
 
 L=en
 echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/"
-WIKI=${L}wiktionary-20120930-pages-articles.xml
-curl --remote-name http://dumps.wikimedia.org/${L}wiktionary/20120930/${WIKI}.bz2
+WIKI=${L}wiktionary-latest-pages-articles.xml
+curl --remote-name http://dumps.wikimedia.org/${L}wiktionary/latest/${WIKI}.bz2
 bunzip2 ${WIKI}.bz2
 mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml
 
@@ -17,36 +17,36 @@ echo "Downloading from: http://ftp.tu-chemnitz.de/pub/Local/urz/ding/de-en-devel
 CHEMNITZ=de-en.txt
 curl --remote-name http://ftp.tu-chemnitz.de/pub/Local/urz/ding/de-en-devel/${CHEMNITZ}.gz
 gunzip ${CHEMNITZ}.gz
-mv ${CHEMNITZ} inputs/
+mv ${CHEMNITZ} inputs/de-en_chemnitz.txt
 
 L=fr
 echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/"
-WIKI=${L}wiktionary-20120926-pages-articles.xml
-curl --remote-name http://dumps.wikimedia.org/${L}wiktionary/20120926/${WIKI}.bz2
-bunzip2 ${WIKI}.bz2
+WIKI=${L}wiktionary-latest-pages-articles.xml
+curl --remote-name http://dumps.wikimedia.org/${L}wiktionary/latest/${WIKI}.bz2
+bunzip2 --force ${WIKI}.bz2
 mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml
 
 L=it
 echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/"
-WIKI=${L}wiktionary-20120926-pages-articles.xml
-curl --remote-name http://dumps.wikimedia.org/${L}wiktionary/20120926/${WIKI}.bz2
+WIKI=${L}wiktionary-latest-pages-articles.xml
+curl --remote-name http://dumps.wikimedia.org/${L}wiktionary/latest/${WIKI}.bz2
 bunzip2 ${WIKI}.bz2
 mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml
 
 L=de
 echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/"
-WIKI=${L}wiktionary-20120928-pages-articles.xml
-curl --remote-name http://dumps.wikimedia.org/${L}wiktionary/20120928/${WIKI}.bz2
+WIKI=${L}wiktionary-latest-pages-articles.xml
+curl --remote-name http://dumps.wikimedia.org/${L}wiktionary/latest/${WIKI}.bz2
 bunzip2 ${WIKI}.bz2
 mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml
 
 L=es
 echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/"
-WIKI=${L}wiktionary-20120924-pages-articles.xml
-curl --remote-name http://dumps.wikimedia.org/${L}wiktionary/20120924/${WIKI}.bz2
+WIKI=${L}wiktionary-latest-pages-articles.xml
+curl --remote-name http://dumps.wikimedia.org/${L}wiktionary/latest/${WIKI}.bz2
 bunzip2 ${WIKI}.bz2
 mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml
 
-echo "Done.  Now run WiktionarySplitter to spit apart enwiktionary."
+echo "Done.  Now run WiktionarySplitter to split apart enwiktionary."
 
 cd $OLD_DIR