From: thadh Date: Tue, 27 Oct 2009 15:01:07 +0000 (-0700) Subject: a X-Git-Url: http://gitweb.fperrin.net/?a=commitdiff_plain;h=3822413ba169ee1c4f910cba7db1d363eaa78cd8;p=DictionaryPC.git a --- diff --git a/src/com/hughes/android/dictionary/DictionaryBuilder.java b/src/com/hughes/android/dictionary/DictionaryBuilder.java index bb1301e..408908e 100755 --- a/src/com/hughes/android/dictionary/DictionaryBuilder.java +++ b/src/com/hughes/android/dictionary/DictionaryBuilder.java @@ -8,6 +8,7 @@ import java.io.InputStreamReader; import java.io.RandomAccessFile; import java.nio.charset.Charset; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; @@ -19,25 +20,37 @@ import com.hughes.android.dictionary.Dictionary.IndexEntry; import com.hughes.android.dictionary.Dictionary.Row; public class DictionaryBuilder { + + static final List inputFiles = Arrays.asList( + new InputFile("c:\\thad\\de-en-chemnitz.txt", Charset.forName("UTF8"), true), + // Thad's extra sauce: + new InputFile("c:\\thad\\de-en-dictcc.txt", Charset.forName("Cp1252"), false) + ); + static final String dictOutFilename = "c:\\thad\\de-en.dict"; + + static class InputFile { + final String file; + final Charset charset; + final boolean hasMultipleSubentries; + public InputFile(String file, Charset charset, boolean hasMultipleSubentries) { + this.file = file; + this.charset = charset; + this.hasMultipleSubentries = hasMultipleSubentries; + } + } public static void main(String[] args) throws IOException, ClassNotFoundException { - if (args.length != 1) { - System.err.println("outputfile"); - return; - } - final String dictOutFilename = args[0]; final Dictionary dict = new Dictionary("de-en.txt - a German-English dictionary\n" + "Version: devel, 2009-04-17\n" + "Source: http://dict.tu-chemnitz.de/\n" + "Thanks to Frank Richter.", Language.DE, Language.EN); System.out.println(Charset.forName("Cp1252")); - processInputFile("c:\\de-en-chemnitz.txt", dict, true, Charset.forName("UTF8")); + for (final InputFile inputFile : inputFiles) { + processInputFile(dict, inputFile); + } - // Thad's extra sauce: -// processInputFile("c:\\de-en-dictcc.txt", dict, false, Charset.forName("Cp1252")); - createIndex(dict, Entry.LANG1); createIndex(dict, Entry.LANG2); @@ -48,9 +61,8 @@ public class DictionaryBuilder { dictOut.close(); } - private static void processInputFile(final String filename, - final Dictionary dict, final boolean hasMultipleSubentries, final Charset charset) throws FileNotFoundException, IOException { - final BufferedReader dictionaryIn = new BufferedReader(new InputStreamReader(new FileInputStream(filename), charset)); + private static void processInputFile(final Dictionary dict, final InputFile inputFile) throws FileNotFoundException, IOException { + final BufferedReader dictionaryIn = new BufferedReader(new InputStreamReader(new FileInputStream(inputFile.file), inputFile.charset)); String line; int lineCount = 0; while ((line = dictionaryIn.readLine()) != null) { @@ -60,7 +72,7 @@ public class DictionaryBuilder { continue; } - final Entry entry = Entry.parseFromLine(line, hasMultipleSubentries); + final Entry entry = Entry.parseFromLine(line, inputFile.hasMultipleSubentries); if (entry == null) { System.err.println("Invalid entry: " + line); continue; diff --git a/src/com/hughes/android/dictionary/DictionaryTest.java b/src/com/hughes/android/dictionary/DictionaryTest.java index 84bdd5e..0ac87ce 100755 --- a/src/com/hughes/android/dictionary/DictionaryTest.java +++ b/src/com/hughes/android/dictionary/DictionaryTest.java @@ -52,6 +52,18 @@ public class DictionaryTest extends TestCase { assertEquals("der", dict.languageDatas[0].sortedIndex.get(0).word); assertEquals("die", dict.languageDatas[0].sortedIndex.get(1).word); + assertEquals(0, dict.languageDatas[0].getPrevTokenRow(0)); + assertEquals(0, dict.languageDatas[0].getPrevTokenRow(2)); + assertEquals(0, dict.languageDatas[0].getPrevTokenRow(1)); + assertEquals(4, dict.languageDatas[0].getPrevTokenRow(6)); + + assertEquals(2, dict.languageDatas[0].getNextTokenRow(0)); + assertEquals(2, dict.languageDatas[0].getNextTokenRow(1)); + assertEquals(4, dict.languageDatas[0].getNextTokenRow(2)); + assertEquals(8, dict.languageDatas[0].getNextTokenRow(6)); + assertEquals(dict.languageDatas[0].rows.size() - 1, dict.languageDatas[0].getNextTokenRow(dict.languageDatas[0].rows.size() - 2)); + assertEquals(dict.languageDatas[0].rows.size() - 1, dict.languageDatas[0].getNextTokenRow(dict.languageDatas[0].rows.size() - 1)); + for (final IndexEntry indexEntry : dict.languageDatas[0].sortedIndex) { System.out.println(indexEntry); } @@ -94,13 +106,14 @@ public class DictionaryTest extends TestCase { Entry.parseFromLine("(akuter) Myokardinfarkt {m} :: (acute) myocardial infarction ", true), Entry.parseFromLine("(reine) Vermutung {f} :: guesswork", true), Entry.parseFromLine("(mit) 6:1 vorne liegen :: to be 6-1 up [football]", true), - Entry.parseFromLine("(auf) den Knopf drücken [auch fig.: auslösen] :: to push the button [also fig.: initiate]", false), + Entry.parseFromLine("(auf) den Knopf drücken [auch fig.: auslösen] :: to push the button [also fig.: initiate]", false), Entry.parseFromLine("Adjektiv {n} /Adj./; Eigenschaftswort {n} [gramm.] | Adjektive {pl}; Eigenschaftswoerter {pl} :: adjective /adj./ | adjectives", true), - Entry.parseFromLine("Älteste {m,f}; Ältester :: oldest; eldest", true), + Entry.parseFromLine("Älteste {m,f}; Ältester :: oldest; eldest", true), Entry.parseFromLine("\"...\", schloss er an. :: '...,' he added.", true), Entry.parseFromLine("besonderer | besondere | besonderes :: extra", false), Entry.parseFromLine("| zu Pferde; zu Pferd | reiten :: horseback | on horseback | go on horseback", true), - Entry.parseFromLine("Hauptaugenmerk {m} | sein Hauptaugenmerk richten auf :: | to focus (one's) attention on", true) + Entry.parseFromLine("Hauptaugenmerk {m} | sein Hauptaugenmerk richten auf :: | to focus (one's) attention on", true), + Entry.parseFromLine("σ-Algebra {f} :: σ-field", true) ); assertFalse(entries.contains(null)); @@ -124,7 +137,11 @@ public class DictionaryTest extends TestCase { if (lang == 0) { assertTrue(words.contains("CHRISTOS")); assertTrue(words.contains("akuter")); + assertTrue(words.contains("σ-Algebra")); + + assertFalse(words.contains("-Algebra")); } else { + assertTrue(words.contains("σ-field")); assertTrue(words.contains("6-1")); } } @@ -132,11 +149,11 @@ public class DictionaryTest extends TestCase { } public void testGermanSort() { - assertEquals("aüÄ", Language.DE.textNorm("aueAe")); + assertEquals("aüÄ", Language.DE.textNorm("aueAe")); final List words = Arrays.asList( + "er-ben", "erben", "Erben", - "er-ben", "Erbse", "Erbsen", "essen", @@ -144,25 +161,25 @@ public class DictionaryTest extends TestCase { "Grosformat", "Grosformats", "Grossformat", - "Großformat", + "Großformat", "Grossformats", - "Großformats", - "Großpoo", - "Großpoos", + "Großformats", + "Großpoo", + "Großpoos", "hulle", "Hulle", - "hülle", + "hülle", "huelle", - "Hülle", + "Hülle", "Huelle", "Hum" ); - assertEquals(0, Language.DE.sortComparator.compare("hülle", "huelle")); - assertEquals(0, Language.DE.sortComparator.compare("huelle", "hülle")); + assertEquals(0, Language.DE.sortComparator.compare("hülle", "huelle")); + assertEquals(0, Language.DE.sortComparator.compare("huelle", "hülle")); - assertEquals(-1, Language.DE.sortComparator.compare("hülle", "Hülle")); - assertEquals(0, Language.DE.findComparator.compare("hülle", "Hülle")); - assertEquals(-1, Language.DE.findComparator.compare("hulle", "Hülle")); + assertEquals(-1, Language.DE.sortComparator.compare("hülle", "Hülle")); + assertEquals(0, Language.DE.findComparator.compare("hülle", "Hülle")); + assertEquals(-1, Language.DE.findComparator.compare("hulle", "Hülle")); for (final String s : words) { @@ -181,9 +198,9 @@ public class DictionaryTest extends TestCase { public void testEnglishSort() { final List words = Arrays.asList( + "pre-print", "preppie", "preppy", - "pre-print", "preprocess"); final List sorted = new ArrayList(words); @@ -196,7 +213,7 @@ public class DictionaryTest extends TestCase { assertEquals(words.get(i), sorted.get(i)); } - assertTrue(Language.EN.sortCollator.compare("preppy", "pre-print") < 0); + assertTrue(Language.EN.sortCollator.compare("pre-print", "preppy") < 0); }