From 1b515f031d39e758e8e6339c03e124f1548579cc Mon Sep 17 00:00:00 2001 From: thadh Date: Mon, 1 Oct 2012 10:41:33 -0700 Subject: [PATCH] Format links properly. --- .../dictionary/engine/DictionaryBuilder.java | 2 +- .../engine/DictionaryBuilderTest.java | 2 +- .../wiktionary/WholeSectionToHtmlParser.java | 35 +- .../parser/wiktionary/WiktionaryLangs.java | 2 +- .../wiktionary.WholeSection.DE.quickdic.text | 546 +- .../wiktionary.WholeSection.EN.quickdic.text | 6362 ++++++++--------- .../wiktionary.WholeSection.IT.quickdic.text | 4784 ++++++------- testdata/outputs/testItConj.html | 1906 ++--- 8 files changed, 6825 insertions(+), 6814 deletions(-) diff --git a/src/com/hughes/android/dictionary/engine/DictionaryBuilder.java b/src/com/hughes/android/dictionary/engine/DictionaryBuilder.java index 65a46fe..d010cdb 100644 --- a/src/com/hughes/android/dictionary/engine/DictionaryBuilder.java +++ b/src/com/hughes/android/dictionary/engine/DictionaryBuilder.java @@ -181,7 +181,7 @@ public class DictionaryBuilder { final String wiktionaryLang = keyValueArgs.remove(prefix + "WiktionaryLang"); String skipLang = keyValueArgs.remove(prefix + "SkipLang"); if (skipLang == null) skipLang = ""; - new WholeSectionToHtmlParser(dictionaryBuilder.indexBuilders.get(titleIndex), wiktionaryLang, skipLang).parse(file, entrySource, pageLimit); + new WholeSectionToHtmlParser(dictionaryBuilder.indexBuilders.get(titleIndex), null, wiktionaryLang, skipLang).parse(file, entrySource, pageLimit); } else { fatalError("Invalid or missing input format: " + inputFormat); } diff --git a/src/com/hughes/android/dictionary/engine/DictionaryBuilderTest.java b/src/com/hughes/android/dictionary/engine/DictionaryBuilderTest.java index 39f5920..c33b626 100644 --- a/src/com/hughes/android/dictionary/engine/DictionaryBuilderTest.java +++ b/src/com/hughes/android/dictionary/engine/DictionaryBuilderTest.java @@ -63,7 +63,7 @@ public class DictionaryBuilderTest extends TestCase { "{{it-conj-cirsi|cuc|essere}}\n" ; final DictionaryBuilder db = new DictionaryBuilder("", Language.en, Language.it, "", "", Collections.singleton("X"), Collections.singleton("X")); - WholeSectionToHtmlParser parser = new WholeSectionToHtmlParser(db.indexBuilders.get(0), "EN", "IT"); + WholeSectionToHtmlParser parser = new WholeSectionToHtmlParser(db.indexBuilders.get(0), null, "EN", "IT"); parser.title = "dummyTitle"; parser.entrySource = new EntrySource(0, "dummySource", 0); parser.parseSection("dummyHeading", toParse); diff --git a/src/com/hughes/android/dictionary/parser/wiktionary/WholeSectionToHtmlParser.java b/src/com/hughes/android/dictionary/parser/wiktionary/WholeSectionToHtmlParser.java index 0b4bc0d..0f7ae2d 100644 --- a/src/com/hughes/android/dictionary/parser/wiktionary/WholeSectionToHtmlParser.java +++ b/src/com/hughes/android/dictionary/parser/wiktionary/WholeSectionToHtmlParser.java @@ -1,6 +1,7 @@ package com.hughes.android.dictionary.parser.wiktionary; +import com.hughes.android.dictionary.HtmlDisplayActivity; import com.hughes.android.dictionary.engine.EntryTypeName; import com.hughes.android.dictionary.engine.HtmlEntry; import com.hughes.android.dictionary.engine.IndexBuilder; @@ -25,7 +26,7 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser { boolean skipSection(final String name); EntryTypeName sectionNameToEntryType(String sectionName); boolean skipWikiLink(final WikiTokenizer wikiTokenizer); - String adjustWikiLink(String wikiLinkDest); + String adjustWikiLink(String wikiLinkDest, final String wikiLinkText); void addFunctionCallbacks( Map> functionCallbacks); } @@ -55,7 +56,7 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser { } return null; } - + @Override public boolean skipWikiLink(WikiTokenizer wikiTokenizer) { final String wikiText = wikiTokenizer.wikiLinkText(); @@ -65,10 +66,17 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser { return false; } @Override - public String adjustWikiLink(String wikiLinkDest) { + public String adjustWikiLink(String wikiLinkDest, String wikiLinkText) { if (wikiLinkDest.startsWith("w:") || wikiLinkDest.startsWith("Image:")) { return null; } + final int hashPos = wikiLinkDest.indexOf("#"); + if (hashPos != -1) { + wikiLinkDest = wikiLinkDest.substring(0, hashPos); + if (wikiLinkDest.isEmpty()) { + wikiLinkDest = wikiLinkText; + } + } return wikiLinkDest; } @@ -76,7 +84,8 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser { public void addFunctionCallbacks( Map> functionCallbacks) { EnFunctionCallbacks.addGenericCallbacks(functionCallbacks); - }}); + } + }); final LangConfig basicLangConfig = new LangConfig() { @Override @@ -96,7 +105,7 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser { return false; } @Override - public String adjustWikiLink(String wikiLinkDest) { + public String adjustWikiLink(String wikiLinkDest, final String wikiLinkText) { return wikiLinkDest; } @@ -111,11 +120,14 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser { } final IndexBuilder titleIndexBuilder; + final IndexBuilder defIndexBuilder; final String skipLangIso; final LangConfig langConfig; + - public WholeSectionToHtmlParser(final IndexBuilder titleIndexBuilder, final String wiktionaryIso, final String skipLangIso) { + public WholeSectionToHtmlParser(final IndexBuilder titleIndexBuilder, final IndexBuilder defIndexBuilder, final String wiktionaryIso, final String skipLangIso) { this.titleIndexBuilder = titleIndexBuilder; + this.defIndexBuilder = defIndexBuilder; assert isoToLangConfig.containsKey(wiktionaryIso): wiktionaryIso; this.langConfig = isoToLangConfig.get(wiktionaryIso); this.skipLangIso = skipLangIso; @@ -126,7 +138,7 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser { @Override public void parseSection(String heading, String text) { assert entrySource != null; - final HtmlEntry htmlEntry = new HtmlEntry(entrySource, StringEscapeUtils.escapeHtml3(title)); + final HtmlEntry htmlEntry = new HtmlEntry(entrySource, title); indexedEntry = new IndexedEntry(htmlEntry); final AppendAndIndexWikiCallback callback = new AppendCallback( @@ -161,8 +173,6 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser { - static final Pattern ALL_ASCII = Pattern.compile("[\\p{ASCII}]*"); - class AppendCallback extends AppendAndIndexWikiCallback { public AppendCallback(WholeSectionToHtmlParser parser) { super(parser); @@ -171,7 +181,7 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser { @Override public void onPlainText(String plainText) { final String htmlEscaped = StringEscapeUtils.escapeHtml3(plainText); - if (ALL_ASCII.matcher(htmlEscaped).matches()) { + if (StringUtil.isAscii(htmlEscaped)) { super.onPlainText(htmlEscaped); } else { super.onPlainText(StringUtil.escapeToPureHtmlUnicode(plainText)); @@ -189,7 +199,7 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser { } String linkDest; if (wikiTokenizer.wikiLinkDest() != null) { - linkDest = langConfig.adjustWikiLink(wikiTokenizer.wikiLinkDest()); + linkDest = langConfig.adjustWikiLink(wikiTokenizer.wikiLinkDest(), wikiTokenizer.wikiLinkText()); } else { linkDest = wikiTokenizer.wikiLinkText(); } @@ -198,7 +208,7 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser { titleIndexBuilder.addEntryWithString(indexedEntry, wikiTokenizer.wikiLinkText(), sectionEntryTypeName); } if (linkDest != null) { - builder.append(String.format("", linkDest)); + builder.append(String.format("", HtmlEntry.formatQuickdicUrl("", linkDest))); super.onWikiLink(wikiTokenizer); builder.append(String.format("")); } else { @@ -225,6 +235,7 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser { } EntryTypeName sectionEntryTypeName; + IndexBuilder currentIndexBuilder; @Override public void onHeading(WikiTokenizer wikiTokenizer) { diff --git a/src/com/hughes/android/dictionary/parser/wiktionary/WiktionaryLangs.java b/src/com/hughes/android/dictionary/parser/wiktionary/WiktionaryLangs.java index e67685c..c66e650 100644 --- a/src/com/hughes/android/dictionary/parser/wiktionary/WiktionaryLangs.java +++ b/src/com/hughes/android/dictionary/parser/wiktionary/WiktionaryLangs.java @@ -108,7 +108,7 @@ public class WiktionaryLangs { isoCodeToEnWikiName.put("LB", "Luxembourgish"); isoCodeToEnWikiName.put("MK", "Macedonian"); - assert isoCodeToEnWikiName.keySet().equals(Language.isoCodeToResources.keySet()); + assert Language.isoCodeToResources.keySet().containsAll(isoCodeToEnWikiName.keySet()); } public static final Map> wikiCodeToIsoCodeToWikiName = new LinkedHashMap>(); diff --git a/testdata/goldens/wiktionary.WholeSection.DE.quickdic.text b/testdata/goldens/wiktionary.WholeSection.DE.quickdic.text index 04016f4..b41bdd7 100644 --- a/testdata/goldens/wiktionary.WholeSection.DE.quickdic.text +++ b/testdata/goldens/wiktionary.WholeSection.DE.quickdic.text @@ -36,13 +36,13 @@ From {{etyl|goh|de}} {{term|ab|lang=goh}}, from {{proto|Germanic|ab}}.

Preposition

{{head|de|preposition}} -
  1. Beginning at that time or location; from.
  2. +
    1. Beginning at that time or location; from.
      • ab heute verfügbar (available from today on)

    Derived terms

    -