From 0cde0a508bc65074c94b408e9f74b01aca9b8b29 Mon Sep 17 00:00:00 2001 From: Thad Hughes Date: Mon, 3 Dec 2012 13:47:50 -0800 Subject: [PATCH] go --- .../engine/DictionaryBuilderMain.java | 13 +-- .../dictionary/engine/LanguageTest.java | 6 +- .../dictionary/parser/WikiTokenizer.java | 6 +- .../wiktionary/ItFunctionCallbacks.java | 108 ++++++++++-------- .../parser/wiktionary/WiktionaryLangs.java | 5 +- todo.txt | 14 +++ 6 files changed, 91 insertions(+), 61 deletions(-) diff --git a/src/com/hughes/android/dictionary/engine/DictionaryBuilderMain.java b/src/com/hughes/android/dictionary/engine/DictionaryBuilderMain.java index 2677bb4..b298040 100644 --- a/src/com/hughes/android/dictionary/engine/DictionaryBuilderMain.java +++ b/src/com/hughes/android/dictionary/engine/DictionaryBuilderMain.java @@ -37,6 +37,7 @@ public class DictionaryBuilderMain extends TestCase { // Build the non EN ones. static final String[][] nonEnPairs = new String[][] { + /* {"EN"}, {"DE"}, {"IT"}, @@ -47,7 +48,6 @@ public class DictionaryBuilderMain extends TestCase { {"DE", "EN" }, {"DE", "IT" }, - {"AR", "DE" }, {"AR", "ES" }, {"AR", "FR" }, @@ -91,6 +91,7 @@ public class DictionaryBuilderMain extends TestCase { {"FR", "RU" }, {"FR", "TR" }, // Turkish {"FR", "ZH" }, + {"FR", "EL" }, {"IT", "DE" }, {"IT", "EL" }, // Greek @@ -132,6 +133,9 @@ public class DictionaryBuilderMain extends TestCase { {"FA", "HY" }, // Persian, Armenian, by request. {"FA", "SV" }, // Persian, Swedish, by request. + {"NL", "PL" }, // Dutch, Polish, by request. + + */ }; @@ -312,15 +316,10 @@ public class DictionaryBuilderMain extends TestCase { final List allPairs = new ArrayList(); allPairs.addAll(Arrays.asList(nonEnPairs)); - // Add all the EN-XX pairs. for (final String isoCode : WiktionaryLangs.isoCodeToEnWikiName.keySet()) { - if (isoCode.equals("EN") || isoCode.equals("DE")) { - continue; - } allPairs.add(new String[] {"EN", isoCode}); } - allPairs.add(new String[] {"EN", "DE"}); final Set> done = new LinkedHashSet>(); @@ -332,7 +331,7 @@ public class DictionaryBuilderMain extends TestCase { } done.add(pairList); - if (!pairList.contains("IT") || !pairList.contains("EN")) { + if (!pairList.contains("EN") && !pairList.contains("EL")) { //continue; } diff --git a/src/com/hughes/android/dictionary/engine/LanguageTest.java b/src/com/hughes/android/dictionary/engine/LanguageTest.java index 4f90962..078afaf 100644 --- a/src/com/hughes/android/dictionary/engine/LanguageTest.java +++ b/src/com/hughes/android/dictionary/engine/LanguageTest.java @@ -184,10 +184,14 @@ public class LanguageTest extends TestCase { public void testEnWiktionaryNames() { final Set enLangs = new LinkedHashSet(WiktionaryLangs.isoCodeToEnWikiName.keySet()); + final List names = new ArrayList(); for (final String code : WiktionaryLangs.isoCodeToEnWikiName.keySet()) { + names.add(WiktionaryLangs.isoCodeToEnWikiName.get(code)); enLangs.add(code.toLowerCase()); } - assertEquals(enLangs.toString(), Language.isoCodeToResources.keySet().toString()); + Collections.sort(names); + System.out.println(names); + //assertEquals(enLangs, Language.isoCodeToResources.keySet()); assertEquals(enLangs, Language.isoCodeToResources.keySet()); } diff --git a/src/com/hughes/android/dictionary/parser/WikiTokenizer.java b/src/com/hughes/android/dictionary/parser/WikiTokenizer.java index cdf2f04..4a28cee 100644 --- a/src/com/hughes/android/dictionary/parser/WikiTokenizer.java +++ b/src/com/hughes/android/dictionary/parser/WikiTokenizer.java @@ -121,8 +121,10 @@ public final class WikiTokenizer { this(wikiText, true); } - public WikiTokenizer(final String wikiText, final boolean isNewline) { - this.wikiText = wikiText.replaceAll("\u2028", "\n"); + public WikiTokenizer(String wikiText, final boolean isNewline) { + wikiText = wikiText.replaceAll("\u2028", "\n"); + wikiText = wikiText.replaceAll("\u0085", "\n"); + this.wikiText = wikiText; this.matcher = wikiTokenEvent.matcher(wikiText); justReturnedNewline = isNewline; } diff --git a/src/com/hughes/android/dictionary/parser/wiktionary/ItFunctionCallbacks.java b/src/com/hughes/android/dictionary/parser/wiktionary/ItFunctionCallbacks.java index 275aa29..bd975b2 100644 --- a/src/com/hughes/android/dictionary/parser/wiktionary/ItFunctionCallbacks.java +++ b/src/com/hughes/android/dictionary/parser/wiktionary/ItFunctionCallbacks.java @@ -22,64 +22,72 @@ import java.util.List; import java.util.Map; class ItFunctionCallbacks { - - static void addGenericCallbacks(Map> callbacks) { - callbacks.put("-hyph-", new Redispatch("\n==== Sillabazione ====\n")); - callbacks.put("-pron-", new Redispatch("\n==== Pronuncia ====\n")); - callbacks.put("-etim-", new Redispatch("\n==== Etimologia / Derivazione ====\n")); - callbacks.put("-syn-", new Redispatch("\n==== Sinonimi ====\n")); - callbacks.put("-ant-", new Redispatch("\n==== Antonimi/Contrari ====\n")); - callbacks.put("-drv-", new Redispatch("\n==== Parole derivate ====\n")); - callbacks.put("-prov-", new Redispatch("\n==== Proverbi e modi di dire ====\n")); - callbacks.put("-rel-", new Redispatch("\n==== Termini correlati ====\n")); - callbacks.put("-ref-", new Redispatch("\n==== Note / Riferimenti ====\n")); - callbacks.put("-trans1-", new SkipSection()); - callbacks.put("-trans2-", new SkipSection()); + static void addGenericCallbacks( + Map> callbacks) { + callbacks.put("-hyph-", new Redispatch("\n==== Sillabazione ====\n")); + callbacks.put("-pron-", new Redispatch("\n==== Pronuncia ====\n")); + callbacks.put("-etim-", new Redispatch("\n==== Etimologia / Derivazione ====\n")); + callbacks.put("-syn-", new Redispatch("\n==== Sinonimi ====\n")); + callbacks.put("-ant-", new Redispatch("\n==== Antonimi/Contrari ====\n")); + callbacks.put("-drv-", new Redispatch("\n==== Parole derivate ====\n")); + callbacks.put("-prov-", new Redispatch("\n==== Proverbi e modi di dire ====\n")); + callbacks.put("-rel-", new Redispatch("\n==== Termini correlati ====\n")); + callbacks.put("-ref-", new Redispatch("\n==== Note / Riferimenti ====\n")); + callbacks.put("-var-", new Redispatch("\n==== Varianti ====\n")); - } + callbacks.put("-trans1-", new SkipSection()); + callbacks.put("-trans2-", new SkipSection()); - - static final NameAndArgs NAME_AND_ARGS = new NameAndArgs(); - - - static final class Redispatch implements FunctionCallback { - final String newText; - public Redispatch(String newText) { - this.newText = newText; } - @Override - public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List args, - final Map namedArgs, - final T parser, - final AppendAndIndexWikiCallback appendAndIndexWikiCallback) { - if (!namedArgs.isEmpty() || args.size() != 0) { - return false; + static final NameAndArgs NAME_AND_ARGS = new NameAndArgs(); + + static final class Redispatch implements + FunctionCallback { + final String newText; + + public Redispatch(String newText) { + this.newText = newText; } - appendAndIndexWikiCallback.dispatch(newText, null); - return true; - } - } + @Override + public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, + final List args, + final Map namedArgs, + final T parser, + final AppendAndIndexWikiCallback appendAndIndexWikiCallback) { + if (!namedArgs.isEmpty() || args.size() != 0) { + return false; + } + appendAndIndexWikiCallback.dispatch(newText, null); + return true; + } + } - static final class SkipSection implements FunctionCallback { - public SkipSection() { - } + static final class SkipSection implements + FunctionCallback { + public SkipSection() { + } - @Override - public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, final List args, - final Map namedArgs, - final T parser, - final AppendAndIndexWikiCallback appendAndIndexWikiCallback) { - while (wikiTokenizer.nextToken() != null) { - if (wikiTokenizer.isFunction() && wikiTokenizer.functionName().startsWith("-") && wikiTokenizer.functionName().endsWith("-")) { - wikiTokenizer.returnToLineStart(); - return true; - } - } - return true; + @Override + public boolean onWikiFunction(final WikiTokenizer wikiTokenizer, final String name, + final List args, + final Map namedArgs, + final T parser, + final AppendAndIndexWikiCallback appendAndIndexWikiCallback) { + while (wikiTokenizer.nextToken() != null) { + if (wikiTokenizer.isFunction() + && wikiTokenizer.functionName().startsWith("-") + && wikiTokenizer.functionName().endsWith("-") + // Hack to prevent infinite-looping, would be better to check that this func was at the start of the line. + && !wikiTokenizer.functionName().contains("trans")) { + wikiTokenizer.returnToLineStart(); + return true; + } + } + return true; } - } + } -} \ No newline at end of file +} diff --git a/src/com/hughes/android/dictionary/parser/wiktionary/WiktionaryLangs.java b/src/com/hughes/android/dictionary/parser/wiktionary/WiktionaryLangs.java index b764291..4acdef7 100644 --- a/src/com/hughes/android/dictionary/parser/wiktionary/WiktionaryLangs.java +++ b/src/com/hughes/android/dictionary/parser/wiktionary/WiktionaryLangs.java @@ -14,7 +14,9 @@ package com.hughes.android.dictionary.parser.wiktionary; +import com.hughes.android.dictionary.R; import com.hughes.android.dictionary.engine.Language; +import com.hughes.android.dictionary.engine.Language.LanguageResources; import java.util.LinkedHashMap; import java.util.Map; @@ -107,7 +109,8 @@ public class WiktionaryLangs { isoCodeToEnWikiName.put("HT", "Haitian Creole"); isoCodeToEnWikiName.put("LB", "Luxembourgish"); isoCodeToEnWikiName.put("MK", "Macedonian"); - + + assert Language.isoCodeToResources.keySet().containsAll(isoCodeToEnWikiName.keySet()); } diff --git a/todo.txt b/todo.txt index 5ed7c5f..bd11c25 100644 --- a/todo.txt +++ b/todo.txt @@ -1,3 +1,17 @@ +URLs with special chars +encode properly. +Link span +de-conj +-var- +why does presso not show up? +Multi search results include titles +more distinct BG color for tokens +no BG for see also. +Afferrare in it, italics don't end. +{{L +start new intent for web link. + + {{term {{etyl {{l -- 2.43.0