]> gitweb.fperrin.net Git - DictionaryPC.git/commitdiff
a
authorthadh <thadh@THADH-LAPTOP.ad.corp.google.com>
Tue, 27 Oct 2009 15:01:07 +0000 (08:01 -0700)
committerthadh <thadh@THADH-LAPTOP.ad.corp.google.com>
Tue, 27 Oct 2009 15:01:07 +0000 (08:01 -0700)
src/com/hughes/android/dictionary/DictionaryBuilder.java
src/com/hughes/android/dictionary/DictionaryTest.java

index bb1301ee1a9789b02a920d59286ea24f90b4a607..408908e01c61a0ee3d1d7de1033fa9bc82cce84d 100755 (executable)
@@ -8,6 +8,7 @@ import java.io.InputStreamReader;
 import java.io.RandomAccessFile;\r
 import java.nio.charset.Charset;\r
 import java.util.ArrayList;\r
+import java.util.Arrays;\r
 import java.util.Collections;\r
 import java.util.Comparator;\r
 import java.util.HashMap;\r
@@ -19,25 +20,37 @@ import com.hughes.android.dictionary.Dictionary.IndexEntry;
 import com.hughes.android.dictionary.Dictionary.Row;\r
 \r
 public class DictionaryBuilder {\r
+  \r
+  static final List<InputFile> inputFiles = Arrays.asList(\r
+      new InputFile("c:\\thad\\de-en-chemnitz.txt", Charset.forName("UTF8"), true),\r
+      // Thad's extra sauce: \r
+      new InputFile("c:\\thad\\de-en-dictcc.txt", Charset.forName("Cp1252"), false)\r
+      );\r
+  static final String dictOutFilename = "c:\\thad\\de-en.dict";\r
+  \r
+  static class InputFile {\r
+    final String file;\r
+    final Charset charset;\r
+    final boolean hasMultipleSubentries;\r
+    public InputFile(String file, Charset charset, boolean hasMultipleSubentries) {\r
+      this.file = file;\r
+      this.charset = charset;\r
+      this.hasMultipleSubentries = hasMultipleSubentries;\r
+    }\r
+  }\r
 \r
   public static void main(String[] args) throws IOException,\r
       ClassNotFoundException {\r
-    if (args.length != 1) {\r
-      System.err.println("outputfile");\r
-      return;\r
-    }\r
-    final String dictOutFilename = args[0];\r
 \r
     final Dictionary dict = new Dictionary("de-en.txt - a German-English dictionary\n" +\r
                "Version: devel, 2009-04-17\n" +\r
                "Source: http://dict.tu-chemnitz.de/\n" +\r
                "Thanks to Frank Richter.", Language.DE, Language.EN);\r
     System.out.println(Charset.forName("Cp1252"));\r
-    processInputFile("c:\\de-en-chemnitz.txt", dict, true, Charset.forName("UTF8"));\r
+    for (final InputFile inputFile : inputFiles) {\r
+      processInputFile(dict, inputFile);\r
+    }\r
     \r
-    // Thad's extra sauce: \r
-//    processInputFile("c:\\de-en-dictcc.txt", dict, false, Charset.forName("Cp1252"));\r
-\r
     createIndex(dict, Entry.LANG1);\r
     createIndex(dict, Entry.LANG2);\r
 \r
@@ -48,9 +61,8 @@ public class DictionaryBuilder {
     dictOut.close();\r
   }\r
 \r
-  private static void processInputFile(final String filename,\r
-      final Dictionary dict, final boolean hasMultipleSubentries, final Charset charset) throws FileNotFoundException, IOException {\r
-    final BufferedReader dictionaryIn = new BufferedReader(new InputStreamReader(new FileInputStream(filename), charset));\r
+  private static void processInputFile(final Dictionary dict, final InputFile inputFile) throws FileNotFoundException, IOException {\r
+    final BufferedReader dictionaryIn = new BufferedReader(new InputStreamReader(new FileInputStream(inputFile.file), inputFile.charset));\r
     String line;\r
     int lineCount = 0;\r
     while ((line = dictionaryIn.readLine()) != null) {\r
@@ -60,7 +72,7 @@ public class DictionaryBuilder {
         continue;\r
       }\r
 \r
-      final Entry entry = Entry.parseFromLine(line, hasMultipleSubentries);\r
+      final Entry entry = Entry.parseFromLine(line, inputFile.hasMultipleSubentries);\r
       if (entry == null) {\r
         System.err.println("Invalid entry: " + line);\r
         continue;\r
index 84bdd5e1b3ec5612176b3e98f0e0a4f70cf61eb4..0ac87cedd51d5c296585657252ca0c071aa83393 100755 (executable)
@@ -52,6 +52,18 @@ public class DictionaryTest extends TestCase {
     assertEquals("der", dict.languageDatas[0].sortedIndex.get(0).word);\r
     assertEquals("die", dict.languageDatas[0].sortedIndex.get(1).word);\r
     \r
+    assertEquals(0, dict.languageDatas[0].getPrevTokenRow(0));\r
+    assertEquals(0, dict.languageDatas[0].getPrevTokenRow(2));\r
+    assertEquals(0, dict.languageDatas[0].getPrevTokenRow(1));\r
+    assertEquals(4, dict.languageDatas[0].getPrevTokenRow(6));\r
+\r
+    assertEquals(2, dict.languageDatas[0].getNextTokenRow(0));\r
+    assertEquals(2, dict.languageDatas[0].getNextTokenRow(1));\r
+    assertEquals(4, dict.languageDatas[0].getNextTokenRow(2));\r
+    assertEquals(8, dict.languageDatas[0].getNextTokenRow(6));\r
+    assertEquals(dict.languageDatas[0].rows.size() - 1, dict.languageDatas[0].getNextTokenRow(dict.languageDatas[0].rows.size() - 2));\r
+    assertEquals(dict.languageDatas[0].rows.size() - 1, dict.languageDatas[0].getNextTokenRow(dict.languageDatas[0].rows.size() - 1));\r
+\r
     for (final IndexEntry indexEntry : dict.languageDatas[0].sortedIndex) {\r
       System.out.println(indexEntry);\r
     }\r
@@ -94,13 +106,14 @@ public class DictionaryTest extends TestCase {
         Entry.parseFromLine("(akuter) Myokardinfarkt {m} <AMI / MI> :: (acute) myocardial infarction <AMI / MI>", true),\r
         Entry.parseFromLine("(reine) Vermutung {f} :: guesswork", true),\r
         Entry.parseFromLine("(mit) 6:1 vorne liegen :: to be 6-1 up [football]", true),\r
-        Entry.parseFromLine("(auf) den Knopf drücken [auch fig.: auslösen] :: to push the button [also fig.: initiate]", false),\r
+        Entry.parseFromLine("(auf) den Knopf drücken [auch fig.: auslösen] :: to push the button [also fig.: initiate]", false),\r
         Entry.parseFromLine("Adjektiv {n} /Adj./; Eigenschaftswort {n} [gramm.] | Adjektive {pl}; Eigenschaftswoerter {pl} :: adjective /adj./ | adjectives", true),\r
-        Entry.parseFromLine("Älteste {m,f}; Ältester :: oldest; eldest", true),\r
+        Entry.parseFromLine("Älteste {m,f}; Ältester :: oldest; eldest", true),\r
         Entry.parseFromLine("\"...\", schloss er an. :: '...,' he added.", true),\r
         Entry.parseFromLine("besonderer | besondere | besonderes :: extra", false),\r
         Entry.parseFromLine("| zu Pferde; zu Pferd | reiten :: horseback | on horseback | go on horseback", true),\r
-        Entry.parseFromLine("Hauptaugenmerk {m} | sein Hauptaugenmerk richten auf ::  | to focus (one's) attention on", true)\r
+        Entry.parseFromLine("Hauptaugenmerk {m} | sein Hauptaugenmerk richten auf ::  | to focus (one's) attention on", true),\r
+        Entry.parseFromLine("&#963;-Algebra {f} :: &#963;-field", true)\r
         );\r
 \r
     assertFalse(entries.contains(null));\r
@@ -124,7 +137,11 @@ public class DictionaryTest extends TestCase {
       if (lang == 0) {\r
         assertTrue(words.contains("CHRISTOS"));\r
         assertTrue(words.contains("akuter"));\r
+        assertTrue(words.contains("σ-Algebra"));\r
+\r
+        assertFalse(words.contains("-Algebra"));\r
       } else {\r
+        assertTrue(words.contains("σ-field"));\r
         assertTrue(words.contains("6-1"));\r
       }\r
     }\r
@@ -132,11 +149,11 @@ public class DictionaryTest extends TestCase {
   }\r
   \r
   public void testGermanSort() {\r
-    assertEquals("aüÄ", Language.DE.textNorm("aueAe"));\r
+    assertEquals("aüÄ", Language.DE.textNorm("aueAe"));\r
     final List<String> words = Arrays.asList(\r
+        "er-ben",\r
         "erben",\r
         "Erben",\r
-        "er-ben",\r
         "Erbse",\r
         "Erbsen",\r
         "essen",\r
@@ -144,25 +161,25 @@ public class DictionaryTest extends TestCase {
         "Grosformat",\r
         "Grosformats",\r
         "Grossformat",\r
-        "Großformat",\r
+        "Großformat",\r
         "Grossformats",\r
-        "Großformats",\r
-        "Großpoo",\r
-        "Großpoos",\r
+        "Großformats",\r
+        "Großpoo",\r
+        "Großpoos",\r
         "hulle",\r
         "Hulle",\r
-        "hülle",\r
+        "hülle",\r
         "huelle",\r
-        "Hülle",\r
+        "Hülle",\r
         "Huelle",\r
         "Hum"\r
         );\r
-    assertEquals(0, Language.DE.sortComparator.compare("hülle", "huelle"));\r
-    assertEquals(0, Language.DE.sortComparator.compare("huelle", "hülle"));\r
+    assertEquals(0, Language.DE.sortComparator.compare("hülle", "huelle"));\r
+    assertEquals(0, Language.DE.sortComparator.compare("huelle", "hülle"));\r
     \r
-    assertEquals(-1, Language.DE.sortComparator.compare("hülle", "Hülle"));\r
-    assertEquals(0, Language.DE.findComparator.compare("hülle", "Hülle"));\r
-    assertEquals(-1, Language.DE.findComparator.compare("hulle", "Hülle"));\r
+    assertEquals(-1, Language.DE.sortComparator.compare("hülle", "Hülle"));\r
+    assertEquals(0, Language.DE.findComparator.compare("hülle", "Hülle"));\r
+    assertEquals(-1, Language.DE.findComparator.compare("hulle", "Hülle"));\r
 \r
     \r
     for (final String s : words) {\r
@@ -181,9 +198,9 @@ public class DictionaryTest extends TestCase {
   public void testEnglishSort() {\r
 \r
     final List<String> words = Arrays.asList(\r
+        "pre-print", \r
         "preppie", \r
         "preppy",\r
-        "pre-print", \r
         "preprocess");\r
     \r
     final List<String> sorted = new ArrayList<String>(words);\r
@@ -196,7 +213,7 @@ public class DictionaryTest extends TestCase {
       assertEquals(words.get(i), sorted.get(i));\r
     }\r
     \r
-    assertTrue(Language.EN.sortCollator.compare("preppy", "pre-print") < 0);\r
+    assertTrue(Language.EN.sortCollator.compare("pre-print", "preppy") < 0);\r
 \r
   }\r
 \r