]> gitweb.fperrin.net Git - DictionaryPC.git/commitdiff
Small updates to dictionary generation.
authorReimar Döffinger <Reimar.Doeffinger@gmx.de>
Fri, 28 Aug 2015 04:44:02 +0000 (06:44 +0200)
committerReimar Döffinger <Reimar.Doeffinger@gmx.de>
Fri, 28 Aug 2015 04:44:02 +0000 (06:44 +0200)
generate_dictionaries.sh
src/com/hughes/android/dictionary/parser/wiktionary/WholeSectionToHtmlParser.java

index d1d3d88561a6f1a4a31ce36d9d9504fbf44a929f..6c180da9839f0c1298d6444279eeb0c275134eb5 100755 (executable)
@@ -3,9 +3,13 @@
 while read langcode langname ; do
 lang=$(echo $langcode | tr '[a-z]' '[A-Z]')
 test "$lang" = "CY" && lang=CI
+reverse_dicts=""
+if test "$lang" = "DE" -o "$lang" = "FR" -o "$lang" = "IT" ; then
+reverse_dicts="--input3=data/inputs/wikiSplit/$langcode/EN.data --input3Format=WholeSectionToHtmlParser --input3Name=${langcode}wikitionary --input3WiktionaryLang=$lang --input3TitleIndex=1 --input3WebUrlTemplate=http://${langcode}.wiktionary.org/wiki/%s"
+fi
 stoplist=""
 test -e data/inputs/stoplists/${langcode}.txt && stoplist="--lang2Stoplist=data/inputs/stoplists/${langcode}.txt"
-./run.sh --lang1=EN --lang2=$lang --lang1Stoplist=data/inputs/stoplists/en.txt $stoplist --dictOut=data/outputs/EN-${lang}.quickdic --dictInfo="(EN)Wiktionary-based EN-$lang dictionary." --input0=data/inputs/wikiSplit/en/${lang}.data  --input0Name=enwikitionary --input0Format=enwiktionary --input0LangPattern=${langname} --input0LangCodePattern=${langcode} --input0EnIndex=1 --input0WiktionaryType=EnForeign --input1=data/inputs/wikiSplit/en/EN.data --input1Name=enwikitionary --input1Format=enwiktionary --input1LangPattern=${langname} --input1LangCodePattern=${langcode} --input1EnIndex=1 --input1WiktionaryType=EnToTranslation
+./run.sh --lang1=EN --lang2=$lang --lang1Stoplist=data/inputs/stoplists/en.txt $stoplist --dictOut=data/outputs/EN-${lang}.quickdic --dictInfo="(EN)Wiktionary-based EN-$lang dictionary." --input0=data/inputs/wikiSplit/en/${lang}.data  --input0Name=enwikitionary --input0Format=enwiktionary --input0LangPattern=${langname} --input0LangCodePattern=${langcode} --input0EnIndex=1 --input0WiktionaryType=EnForeign --input1=data/inputs/wikiSplit/en/EN.data --input1Name=enwikitionary --input1Format=enwiktionary --input1LangPattern=${langname} --input1LangCodePattern=${langcode} --input1EnIndex=1 --input1WiktionaryType=EnToTranslation --input1Name=enwikitionary --input2=data/inputs/wikiSplit/en/${lang}.data --input2Format=WholeSectionToHtmlParser --input2Name=enwikitionary --input2WiktionaryLang=EN --input2TitleIndex=2 --input2WebUrlTemplate=http://en.wiktionary.org/wiki/%s $reverse_dicts
 rm data/outputs/EN-${lang}.quickdic.v006.zip
 7z a -mx=9 data/outputs/EN-${lang}.quickdic.v006.zip ./data/outputs/EN-${lang}.quickdic
 done < EN-foreign-dictlist.txt
index c5dca8098fb782519905fc8a8af79ae13e9a3f13..046d9984cc3804a3042a8de3aa24031b0d7f9b77 100644 (file)
@@ -261,7 +261,11 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser {
 
         if (webUrlTemplate != null) {
             final String webUrl = String.format(webUrlTemplate, title);
+           // URI.create can raise an exception e.g. if webUrl contains %, just ignore those cases.
+           try {
             callback.builder.append(String.format("<p> <a href=\"%s\">%s</a>", URI.create(webUrl).toString(), escapeHtmlLiteral(webUrl)));
+           } catch (Exception e)
+           {}
         }
         htmlEntry.html = callback.builder.toString();
         indexedEntry.isValid = true;