]> gitweb.fperrin.net Git - DictionaryPC.git/blob - data/downloadInputs.sh
more downloads
[DictionaryPC.git] / data / downloadInputs.sh
1 #!/bin/bash -e
2
3 OLD_DIR=`pwd`
4 DIR=`dirname $0`
5
6 cd $DIR
7
8 echo "Downloading from: http://ftp.tu-chemnitz.de/pub/Local/urz/ding/de-en-devel/"
9 CHEMNITZ=de-en.txt
10 #curl --remote-name http://ftp.tu-chemnitz.de/pub/Local/urz/ding/de-en-devel/${CHEMNITZ}.gz
11 #gunzip ${CHEMNITZ}.gz
12 #mv ${CHEMNITZ} inputs/
13
14 echo "Note that unzipping is slow."
15
16 L=en
17 echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/"
18 WIKI=${L}wiktionary-20111224-pages-articles.xml
19 #curl --remote-name http://dumps.wikimedia.org/${L}wiktionary/20111224/${WIKI}.bz2
20 #bunzip2 ${WIKI}.bz2
21 #mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml
22
23 L=fr
24 echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/"
25 WIKI=${L}wiktionary-20120106-pages-articles.xml
26 curl --remote-name http://dumps.wikimedia.org/${L}wiktionary/20120106/${WIKI}.bz2
27 bunzip2 ${WIKI}.bz2
28 mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml
29
30 L=it
31 echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/"
32 WIKI=${L}wiktionary-20120110-pages-articles.xml
33 curl --remote-name http://dumps.wikimedia.org/${L}wiktionary/20120110/${WIKI}.bz2
34 bunzip2 ${WIKI}.bz2
35 mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml
36
37 L=de
38 echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/"
39 WIKI=${L}wiktionary-20120111-pages-articles.xml
40 curl --remote-name http://dumps.wikimedia.org/${L}wiktionary/20120111/${WIKI}.bz2
41 bunzip2 ${WIKI}.bz2
42 mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml
43
44 L=es
45 echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/"
46 WIKI=${L}wiktionary-20120108-pages-articles.xml
47 curl --remote-name http://dumps.wikimedia.org/${L}wiktionary/20120108/${WIKI}.bz2
48 bunzip2 ${WIKI}.bz2
49 mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml
50
51 echo "Done.  Now run WiktionarySplitter to spit apart enwiktionary."
52
53 cd $OLD_DIR