]> gitweb.fperrin.net Git - DictionaryPC.git/commitdiff
go
authorthadh <thadh@THADH-MTV.ad.corp.google.com>
Sat, 9 May 2009 16:35:00 +0000 (09:35 -0700)
committerthadh <thadh@THADH-MTV.ad.corp.google.com>
Sat, 9 May 2009 16:35:00 +0000 (09:35 -0700)
src/com/hughes/android/dictionary/DictionaryBuilder.java
src/com/hughes/android/dictionary/DictionaryTest.java

index c141eff77807eed64c4eb19b5362014878356d5b..820298ebc4bd3a4fee09399293e79fd1ae179b60 100755 (executable)
@@ -28,9 +28,14 @@ public class DictionaryBuilder {
     }\r
     final String dictOutFilename = args[0];\r
 \r
-    final Dictionary dict = new Dictionary(Language.DE, Language.EN);\r
+    final Dictionary dict = new Dictionary("de-en.txt - a German-English dictionary\n" +\r
+               "Version: 1.6, 2009-04-16\n" +\r
+               "Source: http://dict.tu-chemnitz.de/\n" +\r
+               "Thanks to Frank Richter.", Language.DE, Language.EN);\r
     System.out.println(Charset.forName("Cp1252"));\r
     processInputFile("c:\\de-en-chemnitz.txt", dict, true, Charset.forName("UTF8"));\r
+    \r
+    // Thad's extra sauce: \r
     processInputFile("c:\\de-en-dictcc.txt", dict, false, Charset.forName("Cp1252"));\r
 \r
     createIndex(dict, Entry.LANG1);\r
index 60a9cbaefe99abdc021b7fa21b3814b2c1f67292..c09338a2b5425fe0a8035c81c7b4ed97ea9db9d2 100755 (executable)
@@ -3,8 +3,11 @@ package com.hughes.android.dictionary;
 import java.io.File;\r
 import java.io.IOException;\r
 import java.io.RandomAccessFile;\r
+import java.util.ArrayList;\r
 import java.util.Arrays;\r
+import java.util.Collections;\r
 import java.util.List;\r
+import java.util.Random;\r
 import java.util.concurrent.atomic.AtomicBoolean;\r
 \r
 import junit.framework.TestCase;\r
@@ -29,7 +32,7 @@ public class DictionaryTest extends TestCase {
         Entry.parseFromLine("rennen :: run", false));\r
 \r
     {\r
-      final Dictionary dict = new Dictionary(Language.DE, Language.EN);\r
+      final Dictionary dict = new Dictionary("test", Language.DE, Language.EN);\r
       dict.entries.addAll(entries);\r
       DictionaryBuilder.createIndex(dict, Entry.LANG1);\r
       DictionaryBuilder.createIndex(dict, Entry.LANG2);\r
@@ -46,7 +49,7 @@ public class DictionaryTest extends TestCase {
     assertEquals(entries, dict.entries);\r
     \r
     assertEquals("der", dict.languageDatas[0].sortedIndex.get(0).word);\r
-    assertEquals("Die", dict.languageDatas[0].sortedIndex.get(1).word);\r
+    assertEquals("die", dict.languageDatas[0].sortedIndex.get(1).word);\r
     \r
     for (final IndexEntry indexEntry : dict.languageDatas[0].sortedIndex) {\r
       System.out.println(indexEntry);\r
@@ -75,7 +78,8 @@ public class DictionaryTest extends TestCase {
       }\r
     }\r
     \r
-    assertEquals("Die", dict.languageDatas[0].sortedIndex.get(dict.languageDatas[0].lookup("die", new AtomicBoolean())).word);\r
+    assertEquals("die", dict.languageDatas[0].sortedIndex.get(dict.languageDatas[0].lookup("Die", new AtomicBoolean())).word);\r
+    assertEquals("die", dict.languageDatas[0].sortedIndex.get(dict.languageDatas[0].lookup("die", new AtomicBoolean())).word);\r
 \r
   }\r
   \r
@@ -100,7 +104,7 @@ public class DictionaryTest extends TestCase {
 \r
     // Hyphenated words get put both multiple listings.\r
 \r
-    final Dictionary dict = new Dictionary(Language.DE, Language.EN);\r
+    final Dictionary dict = new Dictionary("test", Language.DE, Language.EN);\r
     dict.entries.addAll(entries);\r
     DictionaryBuilder.createIndex(dict, Entry.LANG1);\r
     DictionaryBuilder.createIndex(dict, Entry.LANG2);\r
@@ -115,6 +119,44 @@ public class DictionaryTest extends TestCase {
     }\r
 \r
   }\r
+  \r
+  public void testGermanSort() {\r
+    assertEquals("grosformat", Language.DE.normalizeTokenForSort("Grosformat"));\r
+    final List<String> words = Arrays.asList(\r
+        "er-ben",\r
+        "erben",\r
+        "Erben",\r
+        "Erbse",\r
+        "Erbsen",\r
+        "essen",\r
+        "Essen",\r
+        "Grosformat",\r
+        "Grosformats",\r
+        "Grossformat",\r
+        "Großformat",\r
+        "Grossformats",\r
+        "Großformats",\r
+        "Großpoo",\r
+        "Großpoos",\r
+        "hulle",\r
+        "Hulle",\r
+        "Hum",\r
+        "huelle",\r
+        "Huelle",\r
+        "hülle",\r
+        "Hülle"\r
+        );\r
+    for (final String s : words) {\r
+      System.out.println(s + "\t" + Language.DE.normalizeTokenForSort(s));\r
+    }\r
+    final List<String> shuffled = new ArrayList<String>(words);\r
+    Collections.shuffle(shuffled, new Random(0));\r
+    Collections.sort(shuffled, Language.DE.tokenComparator);\r
+    System.out.println(shuffled.toString());\r
+    for (int i = 0; i < words.size(); ++i) {\r
+      assertEquals(words.get(i), shuffled.get(i));\r
+    }\r
+  }\r
 \r
 \r
 }\r