}\r
final String dictOutFilename = args[0];\r
\r
- final Dictionary dict = new Dictionary(Language.DE, Language.EN);\r
+ final Dictionary dict = new Dictionary("de-en.txt - a German-English dictionary\n" +\r
+ "Version: 1.6, 2009-04-16\n" +\r
+ "Source: http://dict.tu-chemnitz.de/\n" +\r
+ "Thanks to Frank Richter.", Language.DE, Language.EN);\r
System.out.println(Charset.forName("Cp1252"));\r
processInputFile("c:\\de-en-chemnitz.txt", dict, true, Charset.forName("UTF8"));\r
+ \r
+ // Thad's extra sauce: \r
processInputFile("c:\\de-en-dictcc.txt", dict, false, Charset.forName("Cp1252"));\r
\r
createIndex(dict, Entry.LANG1);\r
import java.io.File;\r
import java.io.IOException;\r
import java.io.RandomAccessFile;\r
+import java.util.ArrayList;\r
import java.util.Arrays;\r
+import java.util.Collections;\r
import java.util.List;\r
+import java.util.Random;\r
import java.util.concurrent.atomic.AtomicBoolean;\r
\r
import junit.framework.TestCase;\r
Entry.parseFromLine("rennen :: run", false));\r
\r
{\r
- final Dictionary dict = new Dictionary(Language.DE, Language.EN);\r
+ final Dictionary dict = new Dictionary("test", Language.DE, Language.EN);\r
dict.entries.addAll(entries);\r
DictionaryBuilder.createIndex(dict, Entry.LANG1);\r
DictionaryBuilder.createIndex(dict, Entry.LANG2);\r
assertEquals(entries, dict.entries);\r
\r
assertEquals("der", dict.languageDatas[0].sortedIndex.get(0).word);\r
- assertEquals("Die", dict.languageDatas[0].sortedIndex.get(1).word);\r
+ assertEquals("die", dict.languageDatas[0].sortedIndex.get(1).word);\r
\r
for (final IndexEntry indexEntry : dict.languageDatas[0].sortedIndex) {\r
System.out.println(indexEntry);\r
}\r
}\r
\r
- assertEquals("Die", dict.languageDatas[0].sortedIndex.get(dict.languageDatas[0].lookup("die", new AtomicBoolean())).word);\r
+ assertEquals("die", dict.languageDatas[0].sortedIndex.get(dict.languageDatas[0].lookup("Die", new AtomicBoolean())).word);\r
+ assertEquals("die", dict.languageDatas[0].sortedIndex.get(dict.languageDatas[0].lookup("die", new AtomicBoolean())).word);\r
\r
}\r
\r
\r
// Hyphenated words get put both multiple listings.\r
\r
- final Dictionary dict = new Dictionary(Language.DE, Language.EN);\r
+ final Dictionary dict = new Dictionary("test", Language.DE, Language.EN);\r
dict.entries.addAll(entries);\r
DictionaryBuilder.createIndex(dict, Entry.LANG1);\r
DictionaryBuilder.createIndex(dict, Entry.LANG2);\r
}\r
\r
}\r
+ \r
+ public void testGermanSort() {\r
+ assertEquals("grosformat", Language.DE.normalizeTokenForSort("Grosformat"));\r
+ final List<String> words = Arrays.asList(\r
+ "er-ben",\r
+ "erben",\r
+ "Erben",\r
+ "Erbse",\r
+ "Erbsen",\r
+ "essen",\r
+ "Essen",\r
+ "Grosformat",\r
+ "Grosformats",\r
+ "Grossformat",\r
+ "Großformat",\r
+ "Grossformats",\r
+ "Großformats",\r
+ "Großpoo",\r
+ "Großpoos",\r
+ "hulle",\r
+ "Hulle",\r
+ "Hum",\r
+ "huelle",\r
+ "Huelle",\r
+ "hülle",\r
+ "Hülle"\r
+ );\r
+ for (final String s : words) {\r
+ System.out.println(s + "\t" + Language.DE.normalizeTokenForSort(s));\r
+ }\r
+ final List<String> shuffled = new ArrayList<String>(words);\r
+ Collections.shuffle(shuffled, new Random(0));\r
+ Collections.sort(shuffled, Language.DE.tokenComparator);\r
+ System.out.println(shuffled.toString());\r
+ for (int i = 0; i < words.size(); ++i) {\r
+ assertEquals(words.get(i), shuffled.get(i));\r
+ }\r
+ }\r
\r
\r
}\r