]> gitweb.fperrin.net Git - DictionaryPC.git/blob - data/downloadInputs.sh
Updated to latest enwiktionary.
[DictionaryPC.git] / data / downloadInputs.sh
1 #!/bin/bash -e
2
3 OLD_DIR=`pwd`
4 DIR=`dirname $0`
5 cd $DIR
6
7 echo "Note that unzipping is slow."
8
9 L=en
10 echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/"
11 WIKI=${L}wiktionary-20120505-pages-articles.xml
12 curl --remote-name http://dumps.wikimedia.org/${L}wiktionary/20120505/${WIKI}.bz2
13 bunzip2 ${WIKI}.bz2
14 mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml
15
16 exit
17
18 echo "Downloading from: http://ftp.tu-chemnitz.de/pub/Local/urz/ding/de-en-devel/"
19 CHEMNITZ=de-en.txt
20 curl --remote-name http://ftp.tu-chemnitz.de/pub/Local/urz/ding/de-en-devel/${CHEMNITZ}.gz
21 gunzip ${CHEMNITZ}.gz
22 mv ${CHEMNITZ} inputs/
23
24 L=fr
25 echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/"
26 WIKI=${L}wiktionary-20120106-pages-articles.xml
27 curl --remote-name http://dumps.wikimedia.org/${L}wiktionary/20120106/${WIKI}.bz2
28 bunzip2 ${WIKI}.bz2
29 mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml
30
31 L=it
32 echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/"
33 WIKI=${L}wiktionary-20120110-pages-articles.xml
34 curl --remote-name http://dumps.wikimedia.org/${L}wiktionary/20120110/${WIKI}.bz2
35 bunzip2 ${WIKI}.bz2
36 mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml
37
38 L=de
39 echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/"
40 WIKI=${L}wiktionary-20120111-pages-articles.xml
41 curl --remote-name http://dumps.wikimedia.org/${L}wiktionary/20120111/${WIKI}.bz2
42 bunzip2 ${WIKI}.bz2
43 mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml
44
45 L=es
46 echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/"
47 WIKI=${L}wiktionary-20120108-pages-articles.xml
48 curl --remote-name http://dumps.wikimedia.org/${L}wiktionary/20120108/${WIKI}.bz2
49 bunzip2 ${WIKI}.bz2
50 mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml
51
52 echo "Done.  Now run WiktionarySplitter to spit apart enwiktionary."
53
54 cd $OLD_DIR