X-Git-Url: http://gitweb.fperrin.net/?a=blobdiff_plain;f=src%2Fcom%2Fhughes%2Fandroid%2Fdictionary%2Fengine%2FDictionaryBuilderTest.java;h=59de98fa13b071db22e386385ab6b616691cb145;hb=d46f529d02bf4306a922c521d032f7620020b1e8;hp=8059a1ef5e78e3b8c6cba9cb06d33b9119e5c1dd;hpb=a7ae2524281869de5aa756ae35524b21bab3e08a;p=DictionaryPC.git diff --git a/src/com/hughes/android/dictionary/engine/DictionaryBuilderTest.java b/src/com/hughes/android/dictionary/engine/DictionaryBuilderTest.java index 8059a1e..59de98f 100644 --- a/src/com/hughes/android/dictionary/engine/DictionaryBuilderTest.java +++ b/src/com/hughes/android/dictionary/engine/DictionaryBuilderTest.java @@ -20,6 +20,8 @@ import java.io.IOException; import java.io.PrintStream; import java.io.RandomAccessFile; +import com.hughes.android.dictionary.parser.wiktionary.EnTranslationToTranslationParser; +import com.hughes.android.dictionary.parser.wiktionary.WholeSectionToHtmlParser; import com.hughes.util.FileUtil; import junit.framework.TestCase; @@ -27,30 +29,34 @@ import junit.framework.TestCase; public class DictionaryBuilderTest extends TestCase { public static final String TEST_INPUTS = "testdata/inputs/"; - public static final String WIKISPLIT = "../DictionaryData/inputs/enWikiSplit/"; - public static final String STOPLISTS = "../DictionaryData/inputs/stoplists/"; + public static final String WIKISPLIT = "data/inputs/wikiSplit/en/"; + public static final String STOPLISTS = "data/inputs/stoplists/"; public static final String GOLDENS = "testdata/goldens/"; public static final String TEST_OUTPUTS = "testdata/outputs/"; - public void testWiktionaryItalianFromItalian() throws Exception { - final String name = "wiktionary.it_it.quickdic"; + + public void testWiktionary_en_de2fr() throws Exception { + wiktionaryTestWithEnTrans2Trans("wiktionary.de_fr.quickdic", "DE", "FR"); + } + + public void wiktionaryTestWithEnTrans2Trans(final String name, final String lang1, + final String lang2) throws Exception { final File result = new File(TEST_OUTPUTS + name); System.out.println("Writing to: " + result); DictionaryBuilder.main(new String[] { "--dictOut=" + result.getAbsolutePath(), - "--lang1=IT", - "--lang2=EN", - "--lang1Stoplist=" + STOPLISTS + "it.txt", - "--lang2Stoplist=" + STOPLISTS + "en.txt", - "--dictInfo=SomeWikiData", - - "--input4=" + WIKISPLIT + "IT.data", - "--input4Name=enwiktionary.italian", - "--input4Format=enwiktionary", - "--input4LangPattern=Italian", - "--input4LangCodePattern=it", - "--input4EnIndex=2", + "--lang1=" + lang1, + "--lang2=" + lang2, + "--lang1Stoplist=" + STOPLISTS + "empty.txt", + "--lang2Stoplist=" + STOPLISTS + "empty.txt", + "--dictInfo=SomeWikiDataTrans2Trans", + + "--input4=" + WIKISPLIT + "EN.data", + "--input4Name=" + name, + "--input4Format=" + EnTranslationToTranslationParser.NAME, + "--input4LangPattern1=" + lang1, + "--input4LangPattern2=" + lang2, "--input4PageLimit=1000", "--print=" + result.getPath() + ".text", @@ -59,25 +65,117 @@ public class DictionaryBuilderTest extends TestCase { checkGolden(name, result); } - public void testWiktionaryItalianFromEnglish() throws Exception { - final String name = "wiktionary.it_en.quickdic"; + public void testWiktionary_WholeSection_DE() throws Exception { + wiktionaryTestWithWholeSectionToHtml("wiktionary.WholeSection.DE.quickdic", "DE"); + } + + public void testWiktionary_WholeSection_EN() throws Exception { + wiktionaryTestWithWholeSectionToHtml("wiktionary.WholeSection.EN.quickdic", "EN"); + } + + public void testWiktionary_WholeSection_IT() throws Exception { + wiktionaryTestWithWholeSectionToHtml("wiktionary.WholeSection.IT.quickdic", "IT"); + } + + public void wiktionaryTestWithWholeSectionToHtml(final String name, final String langCode) throws Exception { final File result = new File(TEST_OUTPUTS + name); System.out.println("Writing to: " + result); DictionaryBuilder.main(new String[] { "--dictOut=" + result.getAbsolutePath(), - "--lang1=IT", + "--lang1=" + langCode, + "--lang2=" + "EN", + "--lang1Stoplist=" + STOPLISTS + "empty.txt", + "--lang2Stoplist=" + STOPLISTS + "empty.txt", + "--dictInfo=SomeWikiDataWholeSection", + + "--input4=" + WIKISPLIT + langCode + ".data", + "--input4Name=" + name, + "--input4Format=" + WholeSectionToHtmlParser.NAME, + "--input4TitleIndex=" + "1", + "--input4PageLimit=100", + + "--print=" + result.getPath() + ".text", + }); + checkGolden(name, result); + } + + + public void testWiktionary_IT_EN() throws Exception { + wiktionaryTestWithLangToEn("wiktionary.it_en.quickdic", "IT", "it.txt", + "EN.data", "enwiktionary.english", "Italian", "it"); + } + + public void testWiktionary_ZH_EN() throws Exception { + wiktionaryTestWithLangToEn("wiktionary.zh_en.quickdic", "ZH", "empty.txt", + // These missing "e" prevents a complete match, forcing the name to be printed + "EN.data", "enwiktionary.english", "Chinese|Mandarin|Cantones", "zh"); + } + + public void testWiktionary_DE_EN() throws Exception { + wiktionaryTestWithLangToEn("wiktionary.de_en.quickdic", "DE", "de.txt", + "EN.data", "enwiktionary.english", "German", "it"); + } + + public void testWiktionary_IT_IT() throws Exception { + wiktionaryTestWithLangToEn("wiktionary.it_it.quickdic", "IT", "it.txt", + "IT.data", "enwiktionary.italian", "Italian", "it"); + } + + // French + public void testWiktionary_FR_FR() throws Exception { + wiktionaryTestWithLangToEn("wiktionary.fr_fr.quickdic", "FR", "fr.txt", + "FR.data", "enwiktionary.french", "French", "fr"); + } + + + // Arabic + public void testWiktionary_AR_AR() throws Exception { + wiktionaryTestWithLangToEn("wiktionary.ar_ar.quickdic", "AR", "empty.txt", + "AR.data", "enwiktionary.arabic", "Arabic", "ar"); + } + + // Chinese + public void testWiktionary_ZH_ZH() throws Exception { + wiktionaryTestWithLangToEn("wiktionary.zh_zh.quickdic", "ZH", "empty.txt", + // These missing "e" prevents a complete match, forcing the name to be printed. + "ZH.data", "enwiktionary.chinese", "Chinese|Mandarin|Cantones", "zh"); + } + + // German + public void testWiktionary_DE_DE() throws Exception { + wiktionaryTestWithLangToEn("wiktionary.de_de.quickdic", "DE", "de.txt", + "DE.data", "enwiktionary.german", "German", "it"); + } + + // Thai + public void testWiktionary_TH_TH() throws Exception { + wiktionaryTestWithLangToEn("wiktionary.th_th.quickdic", "TH", "empty.txt", + // These missing "e" prevents a complete match, forcing the name to be printed. + "TH.data", "enwiktionary.thai", "Thai", "th"); + } + + public void wiktionaryTestWithLangToEn(final String name, final String lang1, + final String stoplist, final String data, final String dictName, + final String langPattern, final String langCode) throws Exception { + final File result = new File(TEST_OUTPUTS + name); + System.out.println("Writing to: " + result); + final String type = data.equals("EN.data") ? "EnToTranslation" : "EnForeign"; + DictionaryBuilder.main(new String[] { + "--dictOut=" + result.getAbsolutePath(), + "--lang1=" + lang1, "--lang2=EN", - "--lang1Stoplist=" + STOPLISTS + "it.txt", + "--lang1Stoplist=" + STOPLISTS + stoplist, "--lang2Stoplist=" + STOPLISTS + "en.txt", "--dictInfo=SomeWikiData", - "--input3=" + WIKISPLIT + "EN.data", - "--input3Name=enwiktionary.english", - "--input3Format=enwiktionary", - "--input3LangPattern=Italian", - "--input3LangCodePattern=it", - "--input3EnIndex=2", - "--input3PageLimit=1000", + "--input4=" + WIKISPLIT + data, + "--input4Name=" + dictName, + "--input4Format=enwiktionary", + "--input4WiktionaryType=" + type, + "--input4LangPattern=" + langPattern, + "--input4LangCodePattern=" + langCode, + "--input4EnIndex=2", + "--input4PageLimit=1000", "--print=" + result.getPath() + ".text", }); @@ -85,7 +183,6 @@ public class DictionaryBuilderTest extends TestCase { checkGolden(name, result); } - public void testGermanCombined() throws Exception { final String name = "de-en.quickdic"; final File result = new File(TEST_OUTPUTS + name); @@ -104,7 +201,7 @@ public class DictionaryBuilderTest extends TestCase { "--input2=" + TEST_INPUTS + "de-en_dictcc_simulated", "--input2Name=dictcc", "--input2Charset=UTF8", - "--input2Format=dictcc", + "--input2Format=tab_separated", "--print=" + result.getPath() + ".text", });