import java.io.RandomAccessFile;\r
import java.nio.charset.Charset;\r
import java.util.ArrayList;\r
+import java.util.Arrays;\r
import java.util.Collections;\r
import java.util.Comparator;\r
import java.util.HashMap;\r
import com.hughes.android.dictionary.Dictionary.Row;\r
\r
public class DictionaryBuilder {\r
+ \r
+ static final List<InputFile> inputFiles = Arrays.asList(\r
+ new InputFile("c:\\thad\\de-en-chemnitz.txt", Charset.forName("UTF8"), true),\r
+ // Thad's extra sauce: \r
+ new InputFile("c:\\thad\\de-en-dictcc.txt", Charset.forName("Cp1252"), false)\r
+ );\r
+ static final String dictOutFilename = "c:\\thad\\de-en.dict";\r
+ \r
+ static class InputFile {\r
+ final String file;\r
+ final Charset charset;\r
+ final boolean hasMultipleSubentries;\r
+ public InputFile(String file, Charset charset, boolean hasMultipleSubentries) {\r
+ this.file = file;\r
+ this.charset = charset;\r
+ this.hasMultipleSubentries = hasMultipleSubentries;\r
+ }\r
+ }\r
\r
public static void main(String[] args) throws IOException,\r
ClassNotFoundException {\r
- if (args.length != 1) {\r
- System.err.println("outputfile");\r
- return;\r
- }\r
- final String dictOutFilename = args[0];\r
\r
final Dictionary dict = new Dictionary("de-en.txt - a German-English dictionary\n" +\r
"Version: devel, 2009-04-17\n" +\r
"Source: http://dict.tu-chemnitz.de/\n" +\r
"Thanks to Frank Richter.", Language.DE, Language.EN);\r
System.out.println(Charset.forName("Cp1252"));\r
- processInputFile("c:\\de-en-chemnitz.txt", dict, true, Charset.forName("UTF8"));\r
+ for (final InputFile inputFile : inputFiles) {\r
+ processInputFile(dict, inputFile);\r
+ }\r
\r
- // Thad's extra sauce: \r
-// processInputFile("c:\\de-en-dictcc.txt", dict, false, Charset.forName("Cp1252"));\r
-\r
createIndex(dict, Entry.LANG1);\r
createIndex(dict, Entry.LANG2);\r
\r
dictOut.close();\r
}\r
\r
- private static void processInputFile(final String filename,\r
- final Dictionary dict, final boolean hasMultipleSubentries, final Charset charset) throws FileNotFoundException, IOException {\r
- final BufferedReader dictionaryIn = new BufferedReader(new InputStreamReader(new FileInputStream(filename), charset));\r
+ private static void processInputFile(final Dictionary dict, final InputFile inputFile) throws FileNotFoundException, IOException {\r
+ final BufferedReader dictionaryIn = new BufferedReader(new InputStreamReader(new FileInputStream(inputFile.file), inputFile.charset));\r
String line;\r
int lineCount = 0;\r
while ((line = dictionaryIn.readLine()) != null) {\r
continue;\r
}\r
\r
- final Entry entry = Entry.parseFromLine(line, hasMultipleSubentries);\r
+ final Entry entry = Entry.parseFromLine(line, inputFile.hasMultipleSubentries);\r
if (entry == null) {\r
System.err.println("Invalid entry: " + line);\r
continue;\r
assertEquals("der", dict.languageDatas[0].sortedIndex.get(0).word);\r
assertEquals("die", dict.languageDatas[0].sortedIndex.get(1).word);\r
\r
+ assertEquals(0, dict.languageDatas[0].getPrevTokenRow(0));\r
+ assertEquals(0, dict.languageDatas[0].getPrevTokenRow(2));\r
+ assertEquals(0, dict.languageDatas[0].getPrevTokenRow(1));\r
+ assertEquals(4, dict.languageDatas[0].getPrevTokenRow(6));\r
+\r
+ assertEquals(2, dict.languageDatas[0].getNextTokenRow(0));\r
+ assertEquals(2, dict.languageDatas[0].getNextTokenRow(1));\r
+ assertEquals(4, dict.languageDatas[0].getNextTokenRow(2));\r
+ assertEquals(8, dict.languageDatas[0].getNextTokenRow(6));\r
+ assertEquals(dict.languageDatas[0].rows.size() - 1, dict.languageDatas[0].getNextTokenRow(dict.languageDatas[0].rows.size() - 2));\r
+ assertEquals(dict.languageDatas[0].rows.size() - 1, dict.languageDatas[0].getNextTokenRow(dict.languageDatas[0].rows.size() - 1));\r
+\r
for (final IndexEntry indexEntry : dict.languageDatas[0].sortedIndex) {\r
System.out.println(indexEntry);\r
}\r
Entry.parseFromLine("(akuter) Myokardinfarkt {m} <AMI / MI> :: (acute) myocardial infarction <AMI / MI>", true),\r
Entry.parseFromLine("(reine) Vermutung {f} :: guesswork", true),\r
Entry.parseFromLine("(mit) 6:1 vorne liegen :: to be 6-1 up [football]", true),\r
- Entry.parseFromLine("(auf) den Knopf drücken [auch fig.: auslösen] :: to push the button [also fig.: initiate]", false),\r
+ Entry.parseFromLine("(auf) den Knopf drücken [auch fig.: auslösen] :: to push the button [also fig.: initiate]", false),\r
Entry.parseFromLine("Adjektiv {n} /Adj./; Eigenschaftswort {n} [gramm.] | Adjektive {pl}; Eigenschaftswoerter {pl} :: adjective /adj./ | adjectives", true),\r
- Entry.parseFromLine("Älteste {m,f}; Ältester :: oldest; eldest", true),\r
+ Entry.parseFromLine("Älteste {m,f}; Ältester :: oldest; eldest", true),\r
Entry.parseFromLine("\"...\", schloss er an. :: '...,' he added.", true),\r
Entry.parseFromLine("besonderer | besondere | besonderes :: extra", false),\r
Entry.parseFromLine("| zu Pferde; zu Pferd | reiten :: horseback | on horseback | go on horseback", true),\r
- Entry.parseFromLine("Hauptaugenmerk {m} | sein Hauptaugenmerk richten auf :: | to focus (one's) attention on", true)\r
+ Entry.parseFromLine("Hauptaugenmerk {m} | sein Hauptaugenmerk richten auf :: | to focus (one's) attention on", true),\r
+ Entry.parseFromLine("σ-Algebra {f} :: σ-field", true)\r
);\r
\r
assertFalse(entries.contains(null));\r
if (lang == 0) {\r
assertTrue(words.contains("CHRISTOS"));\r
assertTrue(words.contains("akuter"));\r
+ assertTrue(words.contains("σ-Algebra"));\r
+\r
+ assertFalse(words.contains("-Algebra"));\r
} else {\r
+ assertTrue(words.contains("σ-field"));\r
assertTrue(words.contains("6-1"));\r
}\r
}\r
}\r
\r
public void testGermanSort() {\r
- assertEquals("aüÄ", Language.DE.textNorm("aueAe"));\r
+ assertEquals("aüÄ", Language.DE.textNorm("aueAe"));\r
final List<String> words = Arrays.asList(\r
+ "er-ben",\r
"erben",\r
"Erben",\r
- "er-ben",\r
"Erbse",\r
"Erbsen",\r
"essen",\r
"Grosformat",\r
"Grosformats",\r
"Grossformat",\r
- "Großformat",\r
+ "Großformat",\r
"Grossformats",\r
- "Großformats",\r
- "Großpoo",\r
- "Großpoos",\r
+ "Großformats",\r
+ "Großpoo",\r
+ "Großpoos",\r
"hulle",\r
"Hulle",\r
- "hülle",\r
+ "hülle",\r
"huelle",\r
- "Hülle",\r
+ "Hülle",\r
"Huelle",\r
"Hum"\r
);\r
- assertEquals(0, Language.DE.sortComparator.compare("hülle", "huelle"));\r
- assertEquals(0, Language.DE.sortComparator.compare("huelle", "hülle"));\r
+ assertEquals(0, Language.DE.sortComparator.compare("hülle", "huelle"));\r
+ assertEquals(0, Language.DE.sortComparator.compare("huelle", "hülle"));\r
\r
- assertEquals(-1, Language.DE.sortComparator.compare("hülle", "Hülle"));\r
- assertEquals(0, Language.DE.findComparator.compare("hülle", "Hülle"));\r
- assertEquals(-1, Language.DE.findComparator.compare("hulle", "Hülle"));\r
+ assertEquals(-1, Language.DE.sortComparator.compare("hülle", "Hülle"));\r
+ assertEquals(0, Language.DE.findComparator.compare("hülle", "Hülle"));\r
+ assertEquals(-1, Language.DE.findComparator.compare("hulle", "Hülle"));\r
\r
\r
for (final String s : words) {\r
public void testEnglishSort() {\r
\r
final List<String> words = Arrays.asList(\r
+ "pre-print", \r
"preppie", \r
"preppy",\r
- "pre-print", \r
"preprocess");\r
\r
final List<String> sorted = new ArrayList<String>(words);\r
assertEquals(words.get(i), sorted.get(i));\r
}\r
\r
- assertTrue(Language.EN.sortCollator.compare("preppy", "pre-print") < 0);\r
+ assertTrue(Language.EN.sortCollator.compare("pre-print", "preppy") < 0);\r
\r
}\r
\r