From b30d4ffe9312646c025c3cd2edca2a19933c9661 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Reimar=20D=C3=B6ffinger?= Date: Mon, 14 Sep 2015 21:34:42 +0200 Subject: [PATCH] Add FR-* and IT-* generation support to script. --- FR-foreign-dictlist.txt | 12 ++++++++++++ IT-foreign-dictlist.txt | 9 +++++++++ generate_dictionaries.sh | 41 +++++++++++++++++++++++++++++++++++++++- 3 files changed, 61 insertions(+), 1 deletion(-) create mode 100644 FR-foreign-dictlist.txt create mode 100644 IT-foreign-dictlist.txt diff --git a/FR-foreign-dictlist.txt b/FR-foreign-dictlist.txt new file mode 100644 index 0000000..bf3f694 --- /dev/null +++ b/FR-foreign-dictlist.txt @@ -0,0 +1,12 @@ +bg bulgare +cs tchèque +eo espéranto +es espangol +is islandais +it italien +ja japonais +la latin +nl néerlandais +ru russe +uk ukrainien +zh chinois diff --git a/IT-foreign-dictlist.txt b/IT-foreign-dictlist.txt new file mode 100644 index 0000000..b5acad1 --- /dev/null +++ b/IT-foreign-dictlist.txt @@ -0,0 +1,9 @@ +el greco +es spagnolo +hu ungherese +ja giapponese +la latino +lv lettone +nl nederlandese +pl polacco +sv svedese diff --git a/generate_dictionaries.sh b/generate_dictionaries.sh index d60b59c..057cea5 100755 --- a/generate_dictionaries.sh +++ b/generate_dictionaries.sh @@ -3,6 +3,10 @@ DE_DICTS=true #DE_DICTS=false EN_DICTS=true #EN_DICTS=false +FR_DICTS=true +#FR_DICTS=false +IT_DICTS=true +#IT_DICTS=false # Spanish is unfortunately not yet working SINGLE_DICTS="en de fr it" #SINGLE_DICTS="" @@ -54,9 +58,44 @@ fi stoplist="" test -e data/inputs/stoplists/${langcode}.txt && stoplist="--lang2Stoplist=data/inputs/stoplists/${langcode}.txt" ./run.sh --lang1=DE --lang2=$lang --lang1Stoplist=data/inputs/stoplists/de.txt $stoplist --dictOut=data/outputs/DE-${lang}.quickdic --dictInfo="(DE)Wiktionary-based DE-$lang dictionary." --input0=data/inputs/wikiSplit/de/${lang}.data --input0Name=dewikitionary --input0Format=enwiktionary --input0LangPattern=${langname} --input0LangCodePattern=${langcode} --input0EnIndex=1 --input0WiktionaryType=EnForeign --input1=data/inputs/wikiSplit/en/EN.data --input1Name=enwikitionary --input1Format=EnTranslationToTranslation --input1LangPattern1=de --input1LangPattern2=${langcode} --input2=data/inputs/wikiSplit/de/${lang}.data --input2Format=WholeSectionToHtmlParser --input2Name=dewikitionary --input2WiktionaryLang=DE --input2TitleIndex=2 --input2WebUrlTemplate=http://de.wiktionary.org/wiki/%s $reverse_dicts -#./run.sh --lang1=EN --lang2=$lang --lang1Stoplist=data/inputs/stoplists/en.txt $stoplist --dictOut=data/outputs/EN-${lang}.quickdic --dictInfo="(EN)Wiktionary-based EN-$lang dictionary." --input0=data/inputs/wikiSplit/en/${lang}.data --input0Name=enwikitionary --input0Format=enwiktionary --input0LangPattern=${langname} --input0LangCodePattern=${langcode} --input0EnIndex=1 --input0WiktionaryType=EnForeign --input1=data/inputs/wikiSplit/en/EN.data --input1Name=enwikitionary --input1Format=enwiktionary --input1LangPattern=${langname} --input1LangCodePattern=${langcode} --input1EnIndex=1 --input1WiktionaryType=EnToTranslation --input2=data/inputs/wikiSplit/en/${lang}.data --input2Format=WholeSectionToHtmlParser --input2Name=enwikitionary --input2WiktionaryLang=EN --input2TitleIndex=2 --input2WebUrlTemplate=http://en.wiktionary.org/wiki/%s $reverse_dicts rm -f data/outputs/DE-${lang}.quickdic.v006.zip 7z a -mx=9 data/outputs/DE-${lang}.quickdic.v006.zip ./data/outputs/DE-${lang}.quickdic done < DE-foreign-dictlist.txt fi + +if $FR_DICTS; then +while read langcode langname ; do +lang=$(echo $langcode | tr '[a-z]' '[A-Z]') + +reverse_dicts="" +if test "$lang" = "DE" -o "$lang" = "IT" ; then +reverse_dicts="--input3=data/inputs/wikiSplit/$langcode/FR.data --input3Format=WholeSectionToHtmlParser --input3Name=${langcode}wikitionary --input3WiktionaryLang=$lang --input3TitleIndex=1 --input3WebUrlTemplate=http://${langcode}.wiktionary.org/wiki/%s" +fi + +stoplist="" +test -e data/inputs/stoplists/${langcode}.txt && stoplist="--lang2Stoplist=data/inputs/stoplists/${langcode}.txt" +./run.sh --lang1=FR --lang2=$lang --lang1Stoplist=data/inputs/stoplists/fr.txt $stoplist --dictOut=data/outputs/FR-${lang}.quickdic --dictInfo="(FR)Wiktionary-based FR-$lang dictionary." --input0=data/inputs/wikiSplit/fr/${lang}.data --input0Name=frwikitionary --input0Format=enwiktionary --input0LangPattern=${langname} --input0LangCodePattern=${langcode} --input0EnIndex=1 --input0WiktionaryType=EnForeign --input1=data/inputs/wikiSplit/en/EN.data --input1Name=enwikitionary --input1Format=EnTranslationToTranslation --input1LangPattern1=fr --input1LangPattern2=${langcode} --input2=data/inputs/wikiSplit/fr/${lang}.data --input2Format=WholeSectionToHtmlParser --input2Name=frwikitionary --input2WiktionaryLang=FR --input2TitleIndex=2 --input2WebUrlTemplate=http://fr.wiktionary.org/wiki/%s $reverse_dicts +rm -f data/outputs/FR-${lang}.quickdic.v006.zip +7z a -mx=9 data/outputs/FR-${lang}.quickdic.v006.zip ./data/outputs/FR-${lang}.quickdic + +done < FR-foreign-dictlist.txt +fi + +if $IT_DICTS; then +while read langcode langname ; do +lang=$(echo $langcode | tr '[a-z]' '[A-Z]') + +reverse_dicts="" +if test "$lang" = "FR" -o "$lang" = "DE" ; then +reverse_dicts="--input3=data/inputs/wikiSplit/$langcode/IT.data --input3Format=WholeSectionToHtmlParser --input3Name=${langcode}wikitionary --input3WiktionaryLang=$lang --input3TitleIndex=1 --input3WebUrlTemplate=http://${langcode}.wiktionary.org/wiki/%s" +fi + +stoplist="" +test -e data/inputs/stoplists/${langcode}.txt && stoplist="--lang2Stoplist=data/inputs/stoplists/${langcode}.txt" +./run.sh --lang1=IT --lang2=$lang --lang1Stoplist=data/inputs/stoplists/it.txt $stoplist --dictOut=data/outputs/IT-${lang}.quickdic --dictInfo="(IT)Wiktionary-based IT-$lang dictionary." --input0=data/inputs/wikiSplit/it/${lang}.data --input0Name=itwikitionary --input0Format=enwiktionary --input0LangPattern=${langname} --input0LangCodePattern=${langcode} --input0EnIndex=1 --input0WiktionaryType=EnForeign --input1=data/inputs/wikiSplit/en/EN.data --input1Name=enwikitionary --input1Format=EnTranslationToTranslation --input1LangPattern1=it --input1LangPattern2=${langcode} --input2=data/inputs/wikiSplit/it/${lang}.data --input2Format=WholeSectionToHtmlParser --input2Name=itwikitionary --input2WiktionaryLang=IT --input2TitleIndex=2 --input2WebUrlTemplate=http://it.wiktionary.org/wiki/%s $reverse_dicts +rm -f data/outputs/IT-${lang}.quickdic.v006.zip +7z a -mx=9 data/outputs/IT-${lang}.quickdic.v006.zip ./data/outputs/IT-${lang}.quickdic + +done < IT-foreign-dictlist.txt +fi -- 2.43.0