X-Git-Url: http://gitweb.fperrin.net/?p=DictionaryPC.git;a=blobdiff_plain;f=generate_dictionaries.sh;h=50a0e2099755552bb7d73ee5fc72e1f438513abc;hp=f689718f7a798aa2750bbede4f5e0e8bea829a6c;hb=HEAD;hpb=c94d6df39d8c2a8cfd790127ce47b36e52b33676 diff --git a/generate_dictionaries.sh b/generate_dictionaries.sh index f689718..50a0e20 100755 --- a/generate_dictionaries.sh +++ b/generate_dictionaries.sh @@ -9,8 +9,7 @@ IT_DICTS=true #IT_DICTS=false EN_TRANS_DICTS=true #EN_TRANS_DICTS=false -# Spanish is unfortunately not yet working -SINGLE_DICTS="en de fr it" +SINGLE_DICTS="en de fr it es pt" #SINGLE_DICTS="" VERSION=v007 @@ -19,17 +18,23 @@ VERSION=v007 if $EN_DICTS; then # Note: using input1 seems to hang for ZH currently! -while read langcode langname ; do +while read langcode langname enlangname ; do lang=$(echo $langcode | tr '[a-z]' '[A-Z]') test "$lang" = "CY" && lang=CI test "$lang" = "CMN" && lang=cmn test "$lang" = "GRC" && lang=grc test "$lang" = "HAW" && lang=haw +test "$lang" = "SCN" && lang=scn test "$lang" = "YUE" && lang=yue +test "$lang" = "PDC" && lang=pdc +test "$lang" = "NDS" && lang=nds +test "$lang" = "CU" && lang=cu +test "$lang" = "ROM" && lang=rom reverse_dicts="" if test "$lang" = "DE" -o "$lang" = "FR" -o "$lang" = "IT" ; then reverse_dicts="--input3=data/inputs/wikiSplit/$langcode/EN.data --input3Format=WholeSectionToHtmlParser --input3Name=${langcode}wikitionary --input3WiktionaryLang=$lang --input3TitleIndex=1 --input3WebUrlTemplate=http://${langcode}.wiktionary.org/wiki/%s" +#reverse_dicts="$reverse_dicts --input4=data/inputs/wikiSplit/$langcode/EN.data --input4Name=${langcode}wikitionary --input4Format=enwiktionary --input4LangPattern=${enlangname} --input4LangCodePattern=en --input4EnIndex=1 --input4WiktionaryType=EnForeign" fi stoplist="" @@ -57,6 +62,11 @@ done if $DE_DICTS; then while read langcode langname ; do lang=$(echo $langcode | tr '[a-z]' '[A-Z]') +test "$lang" = "CY" && lang=CI +test "$lang" = "CMN" && lang=cmn +test "$lang" = "GRC" && lang=grc +test "$lang" = "HAW" && lang=haw +test "$lang" = "YUE" && lang=yue reverse_dicts="" if test "$lang" = "FR" -o "$lang" = "IT" ; then @@ -79,6 +89,11 @@ fi if $FR_DICTS; then while read langcode langname ; do lang=$(echo $langcode | tr '[a-z]' '[A-Z]') +test "$lang" = "CY" && lang=CI +test "$lang" = "CMN" && lang=cmn +test "$lang" = "GRC" && lang=grc +test "$lang" = "HAW" && lang=haw +test "$lang" = "YUE" && lang=yue reverse_dicts="" if test "$lang" = "DE" -o "$lang" = "IT" ; then @@ -97,6 +112,11 @@ fi if $IT_DICTS; then while read langcode langname ; do lang=$(echo $langcode | tr '[a-z]' '[A-Z]') +test "$lang" = "CY" && lang=CI +test "$lang" = "CMN" && lang=cmn +test "$lang" = "GRC" && lang=grc +test "$lang" = "HAW" && lang=haw +test "$lang" = "YUE" && lang=yue reverse_dicts="" if test "$lang" = "FR" -o "$lang" = "DE" ; then @@ -116,6 +136,16 @@ if $EN_TRANS_DICTS; then while read langcode1 langname1 langcode2 langname2 ; do lang1=$(echo $langcode1 | tr '[a-z]' '[A-Z]') lang2=$(echo $langcode2 | tr '[a-z]' '[A-Z]') +test "$lang1" = "CY" && lang1=CI +test "$lang1" = "CMN" && lang1=cmn +test "$lang1" = "GRC" && lang1=grc +test "$lang1" = "HAW" && lang1=haw +test "$lang1" = "YUE" && lang1=yue +test "$lang2" = "CY" && lang2=CI +test "$lang2" = "CMN" && lang2=cmn +test "$lang2" = "GRC" && lang2=grc +test "$lang2" = "HAW" && lang2=haw +test "$lang2" = "YUE" && lang2=yue stoplist1="" stoplist2="" test -e data/inputs/stoplists/${langcode1}.txt && stoplist1="--lang1Stoplist=data/inputs/stoplists/${langcode1}.txt"