From 270a90a01008604e9c883c54e8b001533401d6a5 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Reimar=20D=C3=B6ffinger?= Date: Fri, 28 Aug 2015 06:44:02 +0200 Subject: [PATCH] Small updates to dictionary generation. --- generate_dictionaries.sh | 6 +++++- .../parser/wiktionary/WholeSectionToHtmlParser.java | 4 ++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/generate_dictionaries.sh b/generate_dictionaries.sh index d1d3d88..6c180da 100755 --- a/generate_dictionaries.sh +++ b/generate_dictionaries.sh @@ -3,9 +3,13 @@ while read langcode langname ; do lang=$(echo $langcode | tr '[a-z]' '[A-Z]') test "$lang" = "CY" && lang=CI +reverse_dicts="" +if test "$lang" = "DE" -o "$lang" = "FR" -o "$lang" = "IT" ; then +reverse_dicts="--input3=data/inputs/wikiSplit/$langcode/EN.data --input3Format=WholeSectionToHtmlParser --input3Name=${langcode}wikitionary --input3WiktionaryLang=$lang --input3TitleIndex=1 --input3WebUrlTemplate=http://${langcode}.wiktionary.org/wiki/%s" +fi stoplist="" test -e data/inputs/stoplists/${langcode}.txt && stoplist="--lang2Stoplist=data/inputs/stoplists/${langcode}.txt" -./run.sh --lang1=EN --lang2=$lang --lang1Stoplist=data/inputs/stoplists/en.txt $stoplist --dictOut=data/outputs/EN-${lang}.quickdic --dictInfo="(EN)Wiktionary-based EN-$lang dictionary." --input0=data/inputs/wikiSplit/en/${lang}.data --input0Name=enwikitionary --input0Format=enwiktionary --input0LangPattern=${langname} --input0LangCodePattern=${langcode} --input0EnIndex=1 --input0WiktionaryType=EnForeign --input1=data/inputs/wikiSplit/en/EN.data --input1Name=enwikitionary --input1Format=enwiktionary --input1LangPattern=${langname} --input1LangCodePattern=${langcode} --input1EnIndex=1 --input1WiktionaryType=EnToTranslation +./run.sh --lang1=EN --lang2=$lang --lang1Stoplist=data/inputs/stoplists/en.txt $stoplist --dictOut=data/outputs/EN-${lang}.quickdic --dictInfo="(EN)Wiktionary-based EN-$lang dictionary." --input0=data/inputs/wikiSplit/en/${lang}.data --input0Name=enwikitionary --input0Format=enwiktionary --input0LangPattern=${langname} --input0LangCodePattern=${langcode} --input0EnIndex=1 --input0WiktionaryType=EnForeign --input1=data/inputs/wikiSplit/en/EN.data --input1Name=enwikitionary --input1Format=enwiktionary --input1LangPattern=${langname} --input1LangCodePattern=${langcode} --input1EnIndex=1 --input1WiktionaryType=EnToTranslation --input1Name=enwikitionary --input2=data/inputs/wikiSplit/en/${lang}.data --input2Format=WholeSectionToHtmlParser --input2Name=enwikitionary --input2WiktionaryLang=EN --input2TitleIndex=2 --input2WebUrlTemplate=http://en.wiktionary.org/wiki/%s $reverse_dicts rm data/outputs/EN-${lang}.quickdic.v006.zip 7z a -mx=9 data/outputs/EN-${lang}.quickdic.v006.zip ./data/outputs/EN-${lang}.quickdic done < EN-foreign-dictlist.txt diff --git a/src/com/hughes/android/dictionary/parser/wiktionary/WholeSectionToHtmlParser.java b/src/com/hughes/android/dictionary/parser/wiktionary/WholeSectionToHtmlParser.java index c5dca80..046d998 100644 --- a/src/com/hughes/android/dictionary/parser/wiktionary/WholeSectionToHtmlParser.java +++ b/src/com/hughes/android/dictionary/parser/wiktionary/WholeSectionToHtmlParser.java @@ -261,7 +261,11 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser { if (webUrlTemplate != null) { final String webUrl = String.format(webUrlTemplate, title); + // URI.create can raise an exception e.g. if webUrl contains %, just ignore those cases. + try { callback.builder.append(String.format("

%s", URI.create(webUrl).toString(), escapeHtmlLiteral(webUrl))); + } catch (Exception e) + {} } htmlEntry.html = callback.builder.toString(); indexedEntry.isValid = true; -- 2.43.0