]> gitweb.fperrin.net Git - DictionaryPC.git/blob - data/downloadInputs.sh
b5640b7778928fd1c1519fe42a65799f75fb89ad
[DictionaryPC.git] / data / downloadInputs.sh
1 #!/bin/bash -e
2
3 OLD_DIR=`pwd`
4 DIR=`dirname $0`
5 cd $DIR
6
7 echo "Note that unzipping is slow."
8
9 L=en
10 echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/"
11 WIKI=${L}wiktionary-latest-pages-articles.xml
12 curl -L --remote-name http://dumps.wikimedia.org/${L}wiktionary/latest/${WIKI}.bz2
13 bunzip2 ${WIKI}.bz2
14 mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml
15
16 echo "Downloading from: http://ftp.tu-chemnitz.de/pub/Local/urz/ding/de-en-devel/"
17 CHEMNITZ=de-en.txt
18 curl -L --remote-name http://ftp.tu-chemnitz.de/pub/Local/urz/ding/de-en-devel/${CHEMNITZ}.gz
19 gunzip ${CHEMNITZ}.gz
20 mv ${CHEMNITZ} inputs/de-en_chemnitz.txt
21
22 L=fr
23 echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/"
24 WIKI=${L}wiktionary-latest-pages-articles.xml
25 curl -L --remote-name http://dumps.wikimedia.org/${L}wiktionary/latest/${WIKI}.bz2
26 bunzip2 --force ${WIKI}.bz2
27 mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml
28
29 L=it
30 echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/"
31 WIKI=${L}wiktionary-latest-pages-articles.xml
32 curl -L --remote-name http://dumps.wikimedia.org/${L}wiktionary/latest/${WIKI}.bz2
33 bunzip2 ${WIKI}.bz2
34 mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml
35
36 L=de
37 echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/"
38 WIKI=${L}wiktionary-latest-pages-articles.xml
39 curl -L --remote-name http://dumps.wikimedia.org/${L}wiktionary/latest/${WIKI}.bz2
40 bunzip2 ${WIKI}.bz2
41 mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml
42
43 L=es
44 echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/"
45 WIKI=${L}wiktionary-latest-pages-articles.xml
46 curl -L --remote-name http://dumps.wikimedia.org/${L}wiktionary/latest/${WIKI}.bz2
47 bunzip2 ${WIKI}.bz2
48 mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml
49
50 L=pt
51 echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/"
52 WIKI=${L}wiktionary-latest-pages-articles.xml
53 curl -L --remote-name http://dumps.wikimedia.org/${L}wiktionary/latest/${WIKI}.bz2
54 bunzip2 ${WIKI}.bz2
55 mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml
56
57 echo "Done.  Now run WiktionarySplitter to split apart enwiktionary."
58
59 cd $OLD_DIR