package com.hughes.android.dictionary;\r
\r
+import java.io.BufferedReader;\r
+import java.io.FileInputStream;\r
+import java.io.FileNotFoundException;\r
import java.io.IOException;\r
+import java.io.InputStreamReader;\r
import java.io.RandomAccessFile;\r
+import java.nio.charset.Charset;\r
import java.util.ArrayList;\r
-import java.util.Arrays;\r
import java.util.Collections;\r
import java.util.Comparator;\r
import java.util.HashMap;\r
-import java.util.LinkedHashSet;\r
import java.util.List;\r
import java.util.Map;\r
import java.util.Set;\r
-import java.util.regex.Pattern;\r
\r
import com.hughes.android.dictionary.Dictionary.IndexEntry;\r
import com.hughes.android.dictionary.Dictionary.Row;\r
\r
public class DictionaryBuilder {\r
\r
- static final Pattern WHITESPACE = Pattern.compile("\\s+");\r
-\r
public static void main(String[] args) throws IOException,\r
ClassNotFoundException {\r
- if (args.length != 2) {\r
- System.err.println("inputfile outputfile");\r
+ if (args.length != 1) {\r
+ System.err.println("outputfile");\r
return;\r
}\r
+ final String dictOutFilename = args[0];\r
+\r
+ final Dictionary dict = new Dictionary(Language.DE, Language.EN);\r
+ System.out.println(Charset.forName("Cp1252"));\r
+ processInputFile("c:\\de-en-chemnitz.txt", dict, true, Charset.forName("UTF8"));\r
+ processInputFile("c:\\de-en-dictcc.txt", dict, false, Charset.forName("Cp1252"));\r
\r
- final Dictionary dict = new Dictionary("de", "en");\r
- final RandomAccessFile dictionaryFile = new RandomAccessFile(args[0], "r");\r
+ createIndex(dict, Entry.LANG1);\r
+ createIndex(dict, Entry.LANG2);\r
+\r
+ System.out.println("Writing dictionary.");\r
+ final RandomAccessFile dictOut = new RandomAccessFile(dictOutFilename, "rw");\r
+ dictOut.setLength(0);\r
+ dict.write(dictOut);\r
+ dictOut.close();\r
+ }\r
+\r
+ private static void processInputFile(final String filename,\r
+ final Dictionary dict, final boolean hasMultipleSubentries, final Charset charset) throws FileNotFoundException, IOException {\r
+ final BufferedReader dictionaryIn = new BufferedReader(new InputStreamReader(new FileInputStream(filename), charset));\r
String line;\r
int lineCount = 0;\r
- long fileLocation = 0;\r
- while ((line = dictionaryFile.readLine()) != null) {\r
- assert ((int) fileLocation) == fileLocation;\r
+ while ((line = dictionaryIn.readLine()) != null) {\r
+// System.out.println(line);\r
line = line.trim();\r
if (line.isEmpty() || line.startsWith("#")) {\r
continue;\r
}\r
\r
- final Entry entry = Entry.parseFromLine(line);\r
+ final Entry entry = Entry.parseFromLine(line, hasMultipleSubentries);\r
if (entry == null) {\r
System.err.println("Invalid entry: " + line);\r
continue;\r
System.out.println("IndexBuilder: " + "lineCount=" + lineCount);\r
}\r
lineCount++;\r
- fileLocation = dictionaryFile.getFilePointer();\r
}\r
- dictionaryFile.close();\r
-\r
- createIndex(dict, Entry.LANG1);\r
- createIndex(dict, Entry.LANG2);\r
-\r
- System.out.println("Writing dictionary.");\r
- final RandomAccessFile dictOut = new RandomAccessFile(args[1], "rw");\r
- dictOut.setLength(0);\r
- dict.write(dictOut);\r
- dictOut.close();\r
+ dictionaryIn.close();\r
}\r
\r
public static void createIndex(final Dictionary dict, final byte lang) {\r
\r
for (int e = 0; e < dict.entries.size(); ++e) {\r
final Entry entry = dict.entries.get(e);\r
- final String text = entry.getIndexableText(lang);\r
- final Set<String> tokens = new LinkedHashSet<String>(Arrays\r
- .asList(WHITESPACE.split(text.trim())));\r
+ final Set<String> tokens = entry.getIndexableTokens(lang);\r
entryDatas[e] = new EntryData(tokens.size());\r
for (final String token : tokens) {\r
TokenData tokenData = tokenDatas.get(token);\r
\r
// Sort it.\r
\r
+ System.out.println("Sorting TokenData...");\r
final List<TokenData> sortedIndex = new ArrayList<TokenData>(tokenDatas\r
.values());\r
- Collections.sort(sortedIndex);\r
+ Collections.sort(sortedIndex, new Comparator<TokenData>() {\r
+ @Override\r
+ public int compare(TokenData tokenData0, TokenData tokenData1) {\r
+ return dict.languageDatas[lang].language.tokenComparator.compare(tokenData0.token, tokenData1.token);\r
+ }});\r
\r
+ System.out.println("Sorting entries within each TokenData...");\r
final Comparator<Integer> entryComparator = new Comparator<Integer>() {\r
@Override\r
public int compare(Integer o1, Integer o2) {\r
+ // TODO: better this\r
+ // Relevant (first token match) chemnitz entries first\r
+ // first token position in entry\r
+ // entry length in chars\r
return entryDatas[o1].numTokens < entryDatas[o2].numTokens ? -1\r
: entryDatas[o1].numTokens == entryDatas[o2].numTokens ? 0 : 1;\r
}\r
};\r
-\r
for (final TokenData tokenData : tokenDatas.values()) {\r
Collections.sort(tokenData.entries, entryComparator);\r
}\r
\r
// Put it all together.\r
-\r
- final List<Row> rows = dict.languages[lang].rows;\r
- final List<IndexEntry> indexEntries = dict.languages[lang].sortedIndex;\r
-\r
+ System.out.println("Assembling final data structures...");\r
+ final List<Row> rows = dict.languageDatas[lang].rows;\r
+ final List<IndexEntry> indexEntries = dict.languageDatas[lang].sortedIndex;\r
for (int t = 0; t < sortedIndex.size(); ++t) {\r
final TokenData tokenData = sortedIndex.get(t);\r
final int startRow = rows.size();\r
}\r
}\r
\r
- static final class TokenData implements Comparable<TokenData> {\r
+ static final class TokenData {\r
final String token;\r
final List<Integer> entries = new ArrayList<Integer>();\r
\r
int startRow;\r
\r
- public TokenData(String token) {\r
+ public TokenData(final String token) {\r
this.token = token;\r
}\r
-\r
- @Override\r
- public int compareTo(TokenData that) {\r
- return EntryFactory.entryFactory.getEntryComparator().compare(this.token,\r
- that.token);\r
- }\r
}\r
\r
}\r
import junit.framework.TestCase;\r
\r
import com.hughes.android.dictionary.Dictionary.IndexEntry;\r
-import com.hughes.android.dictionary.Dictionary.Language;\r
+import com.hughes.android.dictionary.Dictionary.LanguageData;\r
import com.hughes.android.dictionary.Dictionary.Row;\r
\r
public class DictionaryTest extends TestCase {\r
\r
// final Dictionary goldenDict;\r
final List<Entry> entries = Arrays.asList(\r
- new Entry("der Hund", "the dog"),\r
- new Entry("Die grosse Katze", "The big cat"), \r
- new Entry("die Katze", "the cat"),\r
- new Entry("gross", "big"),\r
- new Entry("Dieb", "thief"),\r
- new Entry("rennen", "run"));\r
+ Entry.parseFromLine("der Hund :: the dog", false),\r
+ Entry.parseFromLine("Die grosse Katze :: The big cat", false), \r
+ Entry.parseFromLine("die Katze :: the cat", false),\r
+ Entry.parseFromLine("gross :: big", false),\r
+ Entry.parseFromLine("Dieb :: thief", false),\r
+ Entry.parseFromLine("rennen :: run", false));\r
\r
{\r
- final Dictionary dict = new Dictionary("de", "en");\r
- for (final Entry entry : entries) {\r
- dict.entries.add(entry);\r
- }\r
+ final Dictionary dict = new Dictionary(Language.DE, Language.EN);\r
+ dict.entries.addAll(entries);\r
DictionaryBuilder.createIndex(dict, Entry.LANG1);\r
DictionaryBuilder.createIndex(dict, Entry.LANG2);\r
final RandomAccessFile raf = new RandomAccessFile(file, "rw");\r
\r
assertEquals(entries, dict.entries);\r
\r
- assertEquals("der", dict.languages[0].sortedIndex.get(0).word);\r
- assertEquals("Die", dict.languages[0].sortedIndex.get(1).word);\r
+ assertEquals("der", dict.languageDatas[0].sortedIndex.get(0).word);\r
+ assertEquals("Die", dict.languageDatas[0].sortedIndex.get(1).word);\r
\r
- for (final IndexEntry indexEntry : dict.languages[0].sortedIndex) {\r
+ for (final IndexEntry indexEntry : dict.languageDatas[0].sortedIndex) {\r
System.out.println(indexEntry);\r
}\r
\r
int rowCount = 0;\r
- for (final Row row : dict.languages[0].rows) {\r
+ for (final Row row : dict.languageDatas[0].rows) {\r
if (row.index >= 0) {\r
System.out.println(" " + rowCount + ":" + dict.entries.get(row.index));\r
} else {\r
- System.out.println(rowCount + ":" + dict.languages[0].sortedIndex.get(-row.index - 1));\r
+ System.out.println(rowCount + ":" + dict.languageDatas[0].sortedIndex.get(-row.index - 1));\r
}\r
++rowCount;\r
}\r
\r
for (int l = 0; l <= 1; l++) {\r
- final Language lang = dict.languages[l];\r
- for (int i = 0; i < lang.sortedIndex.size(); i++) {\r
- final IndexEntry indexEntry = lang.sortedIndex.get(i);\r
+ final LanguageData languageData = dict.languageDatas[l];\r
+ for (int i = 0; i < languageData.sortedIndex.size(); i++) {\r
+ final IndexEntry indexEntry = languageData.sortedIndex.get(i);\r
if (indexEntry.word.toLowerCase().equals("dieb"))\r
System.out.println();\r
- final IndexEntry lookedUpEntry = lang.sortedIndex.get(lang.lookup(indexEntry.word, new AtomicBoolean(false)));\r
+ final IndexEntry lookedUpEntry = languageData.sortedIndex.get(languageData.lookup(indexEntry.word, new AtomicBoolean(false)));\r
if (!indexEntry.word.toLowerCase().equals(lookedUpEntry.word.toLowerCase()))\r
System.out.println();\r
assertEquals(indexEntry.word.toLowerCase(), lookedUpEntry.word.toLowerCase());\r
}\r
}\r
\r
- assertEquals("Die", dict.languages[0].sortedIndex.get(dict.languages[0].lookup("die", new AtomicBoolean())).word);\r
+ assertEquals("Die", dict.languageDatas[0].sortedIndex.get(dict.languageDatas[0].lookup("die", new AtomicBoolean())).word);\r
\r
}\r
\r
public void testTextNorm() throws IOException {\r
-// final File file = File.createTempFile("asdf", "asdf");\r
-// file.deleteOnExit();\r
-\r
-// final Dictionary goldenDict;\r
final List<Entry> entries = Arrays.asList(\r
- new Entry("der Hund", "the dog"),\r
- new Entry("Die grosse Katze", "The big cat"), \r
- new Entry("die Katze", "the cat"),\r
- new Entry("gross", "big"),\r
- new Entry("Dieb", "thief"),\r
- new Entry("rennen", "run"));\r
+ Entry.parseFromLine("Hund {m} :: dog", true),\r
+ Entry.parseFromLine("\"Pick-up\"-Presse {f} :: baler", true),\r
+ Entry.parseFromLine("(Ach was), echt? [auch ironisch] :: No shit! [also ironic]", true),\r
+ Entry.parseFromLine("(akuter) Myokardinfarkt {m} <AMI / MI> :: (acute) myocardial infarction <AMI / MI>", true),\r
+ Entry.parseFromLine("(reine) Vermutung {f} :: guesswork", true),\r
+ Entry.parseFromLine("(mit) 6:1 vorne liegen :: to be 6-1 up [football]", true),\r
+ Entry.parseFromLine("(auf) den Knopf drücken [auch fig.: auslösen] :: to push the button [also fig.: initiate]", false),\r
+ Entry.parseFromLine("Adjektiv {n} /Adj./; Eigenschaftswort {n} [gramm.] | Adjektive {pl}; Eigenschaftswörter {pl} :: adjective /adj./ | adjectives", true),\r
+ Entry.parseFromLine("Älteste {m,f}; Ältester :: oldest; eldest", true),\r
+ Entry.parseFromLine("\"...\", schloss er an. :: '...,' he added.", true),\r
+ Entry.parseFromLine("besonderer | besondere | besonderes :: extra", false),\r
+ Entry.parseFromLine("| zu Pferde; zu Pferd | reiten :: horseback | on horseback | go on horseback", true),\r
+ Entry.parseFromLine("Hauptaugenmerk {m} | sein Hauptaugenmerk richten auf :: | to focus (one's) attention on", true)\r
+ );\r
+\r
+ assertFalse(entries.contains(null));\r
+\r
+ // Hyphenated words get put both multiple listings.\r
+\r
+ final Dictionary dict = new Dictionary(Language.DE, Language.EN);\r
+ dict.entries.addAll(entries);\r
+ DictionaryBuilder.createIndex(dict, Entry.LANG1);\r
+ DictionaryBuilder.createIndex(dict, Entry.LANG2);\r
+ \r
+ for (int l = 0; l <= 1; l++) {\r
+ final LanguageData languageData = dict.languageDatas[l];\r
+ System.out.println("\n" + languageData.language);\r
+ for (int i = 0; i < languageData.sortedIndex.size(); i++) {\r
+ final IndexEntry indexEntry = languageData.sortedIndex.get(i);\r
+ System.out.println(indexEntry);\r
+ }\r
+ }\r
\r
}\r
\r
+\r
}\r
+++ /dev/null
-package com.hughes.android.dictionary;\r
-\r
-import java.io.FileNotFoundException;\r
-import java.io.IOException;\r
-import java.io.RandomAccessFile;\r
-import java.io.Serializable;\r
-import java.util.ArrayList;\r
-import java.util.Collections;\r
-import java.util.LinkedHashMap;\r
-import java.util.List;\r
-import java.util.Map;\r
-import java.util.TreeMap;\r
-import java.util.regex.Pattern;\r
-\r
-import com.hughes.util.FileUtil;\r
-\r
-public class IndexBuilder {\r
-\r
- static final Pattern WHITESPACE = Pattern.compile("\\s+");\r
- static final Pattern NONALPHA = Pattern.compile("[^A-Za-z]+");\r
-\r
- public static void main(String[] args) throws IOException,\r
- ClassNotFoundException {\r
- if (args.length != 1) {\r
- System.err.println("No input file.");\r
- return;\r
- }\r
- final String dictionaryFileName = args[0];\r
- createIndex(dictionaryFileName, Entry.LANG1);\r
- createIndex(dictionaryFileName, Entry.LANG2);\r
- }\r
-\r
- private static void createIndex(final String dictionaryFileName,\r
- final byte lang) throws IOException, FileNotFoundException,\r
- ClassNotFoundException {\r
- Node rootBuilder;\r
- rootBuilder = processDictionaryLines(dictionaryFileName, lang);\r
- FileUtil.write(rootBuilder, String.format("%s_builder_%d.serialized", dictionaryFileName, lang));\r
- rootBuilder = (Node) FileUtil.read(String.format("%s_builder_%d.serialized", dictionaryFileName, lang));\r
-\r
- rootBuilder.forEachNode(new Function<Node>() {\r
- @Override\r
- public void invoke(final Node node) {\r
- for (final List<EntryDescriptor> entryDescriptors : node.entryDescriptorsMap.values()) {\r
- Collections.sort(entryDescriptors);\r
- }\r
- }});\r
- \r
- // Dump twice to get accurate file locations.\r
- for (int i = 0; i < 2; ++i) {\r
- final RandomAccessFile raf = new RandomAccessFile(String.format(Dictionary.INDEX_FORMAT, dictionaryFileName, lang), "rw"); \r
- rootBuilder.dump(raf);\r
- raf.close();\r
- }\r
- }\r
-\r
- // ----------------------------------------------------------------\r
- \r
- static final class EntryDescriptor implements Comparable<EntryDescriptor>, Serializable {\r
- final int offset;\r
- final int numTokens;\r
- public EntryDescriptor(int offset, int numTokens) {\r
- this.offset = offset;\r
- this.numTokens = numTokens;\r
- }\r
- @Override\r
- public boolean equals(Object obj) {\r
- final EntryDescriptor that = (EntryDescriptor) obj;\r
- return this.offset == that.offset;\r
- }\r
- @Override\r
- public int hashCode() {\r
- return offset;\r
- }\r
- @Override\r
- public int compareTo(EntryDescriptor o) {\r
- return this.numTokens < o.numTokens ? -1 : this.numTokens == o.numTokens ? 0 : 1;\r
- }\r
- }\r
-\r
-\r
- // ----------------------------------------------------------------\r
-\r
- static Node processDictionaryLines(final String dictionaryFileName, final byte lang) throws IOException {\r
- final Node root = new Node("");\r
- final RandomAccessFile dictionaryFile = new RandomAccessFile(dictionaryFileName, "r");\r
- String line;\r
- final Entry entry = new Entry();\r
- int lineCount = 0;\r
- long fileLocation = 0;\r
- while ((line = dictionaryFile.readLine()) != null) {\r
- assert ((int) fileLocation) == fileLocation;\r
-\r
- line = line.trim();\r
- if (line.isEmpty() || line.startsWith("#") || !entry.parseFromLine(line)) {\r
- continue;\r
- }\r
- final String text = entry.getIndexableText(Entry.LANG1);\r
- final String[] tokens = WHITESPACE.split(text);\r
- final Map<String,String> tokenToNormalizedMap = new LinkedHashMap<String,String>();\r
- for (String token : tokens) {\r
- if (token.length() <= 1 || !Character.isLetter(token.charAt(0))) {\r
- continue;\r
- }\r
- tokenToNormalizedMap.put(token, EntryFactory.entryFactory.normalizeToken(token));\r
- }\r
- for (final Map.Entry<String, String> tokenToNormalized : tokenToNormalizedMap.entrySet()) {\r
- final String normalizedToken = tokenToNormalized.getValue();\r
- final Node node = root.getNode(normalizedToken, 0, true);\r
- node.addToken(tokenToNormalized.getKey(), new EntryDescriptor((int) fileLocation, tokens.length));\r
- assert node == root.getNode(normalizedToken, 0, false);\r
- assert normalizedToken\r
- .equals(root.getNode(normalizedToken, 0, false).normalizedToken);\r
- }\r
-\r
- if (lineCount % 10000 == 0) {\r
- System.out.println("IndexBuilder: " + "lineCount=" + lineCount);\r
- }\r
- \r
- lineCount++;\r
- fileLocation = dictionaryFile.getFilePointer();\r
- }\r
- dictionaryFile.close();\r
- \r
- root.recursiveSetDescendantCounts();\r
- \r
- return root;\r
- }\r
-\r
-}\r
+++ /dev/null
-package com.hughes.android.dictionary;\r
-\r
-import java.io.IOException;\r
-import java.io.RandomAccessFile;\r
-import java.util.LinkedHashSet;\r
-import java.util.Set;\r
-\r
-import junit.framework.TestCase;\r
-\r
-import com.hughes.android.dictionary.Index.Node;\r
-import com.hughes.util.FileUtil;\r
-\r
-public class IndexTest extends TestCase {\r
-\r
- static final String file = "c:\\dict-de-en.txt";\r
- static final String file_index = file + "_index_0";\r
- \r
- public void testRoot() throws IOException {\r
- System.out.println(" testRoot");\r
- final Index index = new Index(file_index);\r
- final Node node = index.lookup("");\r
- assertNotNull(node);\r
- \r
- assertEquals(312220, node.descendantTokenCount);\r
- assertEquals(1087063, node.descendantEntryCount);\r
- \r
- for (final String token : node.tokenToOffsets.keySet()) {\r
- System.out.println(token);\r
- assertTrue(token.toLowerCase().contains("handhubwagen"));\r
- }\r
- }\r
- \r
- public void testLookup() throws IOException {\r
- System.out.println(" testLookup");\r
- final Index index = new Index(file_index);\r
- final Node node = index.lookup("handhubwagen");\r
- assertNotNull(node);\r
- \r
- assertEquals(1, node.descendantTokenCount);\r
- assertEquals(2, node.descendantEntryCount);\r
- \r
- for (final String token : node.tokenToOffsets.keySet()) {\r
- System.out.println(token);\r
- assertTrue(token.toLowerCase().contains("handhubwagen"));\r
- }\r
- }\r
-\r
- public void testGetDescendantOffsets() throws IOException {\r
- System.out.println(" testGetDescendantOffsets");\r
- final Index index = new Index(file_index);\r
- \r
- final Node node = index.lookup("handhebe");\r
- assertNotNull(node);\r
- assertEquals("handhebel", node.nodeHandle.normalizedToken);\r
- final Set<Integer> offsets = new LinkedHashSet<Integer>();\r
- node.getDescendantEntryOffsets(offsets, 10);\r
- final RandomAccessFile raf = new RandomAccessFile(file, "r");\r
- for (final Integer offset : offsets) {\r
- final String entry = FileUtil.readLine(raf, offset);\r
- System.out.println(entry);\r
- assertTrue(entry.toLowerCase().contains(node.nodeHandle.normalizedToken));\r
- }\r
- }\r
-\r
- public void testGetDescendants() throws IOException {\r
- System.out.println(" testGetDescendant");\r
- final Index index = new Index(file_index);\r
- final RandomAccessFile raf = new RandomAccessFile(file, "r");\r
- for (int i = 1000000; i < 1000050; ++i) {\r
- final Object o = index.root.getDescendant(i);\r
- if (o instanceof Integer) {\r
- System.out.println(" " + FileUtil.readLine(raf, (Integer)o));\r
- } else {\r
- System.out.println(o);\r
- }\r
- }\r
- raf.close();\r
- }\r
-\r
-}\r
--- /dev/null
+package com.hughes.android.dictionary;\r
+\r
+\r
+public class ZIndexBuilder {\r
+\r
+// static final Pattern WHITESPACE = Pattern.compile("\\s+");\r
+// static final Pattern NONALPHA = Pattern.compile("[^A-Za-z]+");\r
+//\r
+// public static void main(String[] args) throws IOException,\r
+// ClassNotFoundException {\r
+// if (args.length != 1) {\r
+// System.err.println("No input file.");\r
+// return;\r
+// }\r
+// final String dictionaryFileName = args[0];\r
+// createIndex(dictionaryFileName, Entry.LANG1);\r
+// createIndex(dictionaryFileName, Entry.LANG2);\r
+// }\r
+//\r
+// private static void createIndex(final String dictionaryFileName,\r
+// final byte lang) throws IOException, FileNotFoundException,\r
+// ClassNotFoundException {\r
+// Node rootBuilder;\r
+// rootBuilder = processDictionaryLines(dictionaryFileName, lang);\r
+// FileUtil.write(rootBuilder, String.format("%s_builder_%d.serialized", dictionaryFileName, lang));\r
+// rootBuilder = (Node) FileUtil.read(String.format("%s_builder_%d.serialized", dictionaryFileName, lang));\r
+//\r
+// rootBuilder.forEachNode(new Function<Node>() {\r
+// @Override\r
+// public void invoke(final Node node) {\r
+// for (final List<EntryDescriptor> entryDescriptors : node.entryDescriptorsMap.values()) {\r
+// Collections.sort(entryDescriptors);\r
+// }\r
+// }});\r
+// \r
+// // Dump twice to get accurate file locations.\r
+// for (int i = 0; i < 2; ++i) {\r
+// final RandomAccessFile raf = new RandomAccessFile(String.format(Dictionary.INDEX_FORMAT, dictionaryFileName, lang), "rw"); \r
+// rootBuilder.dump(raf);\r
+// raf.close();\r
+// }\r
+// }\r
+//\r
+// // ----------------------------------------------------------------\r
+// \r
+// static final class EntryDescriptor implements Comparable<EntryDescriptor>, Serializable {\r
+// final int offset;\r
+// final int numTokens;\r
+// public EntryDescriptor(int offset, int numTokens) {\r
+// this.offset = offset;\r
+// this.numTokens = numTokens;\r
+// }\r
+// @Override\r
+// public boolean equals(Object obj) {\r
+// final EntryDescriptor that = (EntryDescriptor) obj;\r
+// return this.offset == that.offset;\r
+// }\r
+// @Override\r
+// public int hashCode() {\r
+// return offset;\r
+// }\r
+// @Override\r
+// public int compareTo(EntryDescriptor o) {\r
+// return this.numTokens < o.numTokens ? -1 : this.numTokens == o.numTokens ? 0 : 1;\r
+// }\r
+// }\r
+//\r
+//\r
+// // ----------------------------------------------------------------\r
+//\r
+// static Node processDictionaryLines(final String dictionaryFileName, final byte lang) throws IOException {\r
+// final Node root = new Node("");\r
+// final RandomAccessFile dictionaryFile = new RandomAccessFile(dictionaryFileName, "r");\r
+// String line;\r
+// final Entry entry = new Entry();\r
+// int lineCount = 0;\r
+// long fileLocation = 0;\r
+// while ((line = dictionaryFile.readLine()) != null) {\r
+// assert ((int) fileLocation) == fileLocation;\r
+//\r
+// line = line.trim();\r
+// if (line.isEmpty() || line.startsWith("#") || !entry.parseFromLine(line)) {\r
+// continue;\r
+// }\r
+// final String text = entry.getIndexableText(Entry.LANG1);\r
+// final String[] tokens = WHITESPACE.split(text);\r
+// final Map<String,String> tokenToNormalizedMap = new LinkedHashMap<String,String>();\r
+// for (String token : tokens) {\r
+// if (token.length() <= 1 || !Character.isLetter(token.charAt(0))) {\r
+// continue;\r
+// }\r
+// tokenToNormalizedMap.put(token, EntryFactory.entryFactory.normalizeToken(token));\r
+// }\r
+// for (final Map.Entry<String, String> tokenToNormalized : tokenToNormalizedMap.entrySet()) {\r
+// final String normalizedToken = tokenToNormalized.getValue();\r
+// final Node node = root.getNode(normalizedToken, 0, true);\r
+// node.addToken(tokenToNormalized.getKey(), new EntryDescriptor((int) fileLocation, tokens.length));\r
+// assert node == root.getNode(normalizedToken, 0, false);\r
+// assert normalizedToken\r
+// .equals(root.getNode(normalizedToken, 0, false).normalizedToken);\r
+// }\r
+//\r
+// if (lineCount % 10000 == 0) {\r
+// System.out.println("IndexBuilder: " + "lineCount=" + lineCount);\r
+// }\r
+// \r
+// lineCount++;\r
+// fileLocation = dictionaryFile.getFilePointer();\r
+// }\r
+// dictionaryFile.close();\r
+// \r
+// root.recursiveSetDescendantCounts();\r
+// \r
+// return root;\r
+// }\r
+\r
+}\r
--- /dev/null
+package com.hughes.android.dictionary;\r
+//package com.hughes.android.dictionary;\r
+//\r
+//import java.io.IOException;\r
+//import java.io.RandomAccessFile;\r
+//import java.util.LinkedHashSet;\r
+//import java.util.Set;\r
+//\r
+//import junit.framework.TestCase;\r
+//\r
+//import com.hughes.android.dictionary.Index.Node;\r
+//import com.hughes.util.FileUtil;\r
+//\r
+//public class IndexTest extends TestCase {\r
+//\r
+// static final String file = "c:\\dict-de-en.txt";\r
+// static final String file_index = file + "_index_0";\r
+// \r
+// public void testRoot() throws IOException {\r
+// System.out.println(" testRoot");\r
+// final Index index = new Index(file_index);\r
+// final Node node = index.lookup("");\r
+// assertNotNull(node);\r
+// \r
+// assertEquals(312220, node.descendantTokenCount);\r
+// assertEquals(1087063, node.descendantEntryCount);\r
+// \r
+// for (final String token : node.tokenToOffsets.keySet()) {\r
+// System.out.println(token);\r
+// assertTrue(token.toLowerCase().contains("handhubwagen"));\r
+// }\r
+// }\r
+// \r
+// public void testLookup() throws IOException {\r
+// System.out.println(" testLookup");\r
+// final Index index = new Index(file_index);\r
+// final Node node = index.lookup("handhubwagen");\r
+// assertNotNull(node);\r
+// \r
+// assertEquals(1, node.descendantTokenCount);\r
+// assertEquals(2, node.descendantEntryCount);\r
+// \r
+// for (final String token : node.tokenToOffsets.keySet()) {\r
+// System.out.println(token);\r
+// assertTrue(token.toLowerCase().contains("handhubwagen"));\r
+// }\r
+// }\r
+//\r
+// public void testGetDescendantOffsets() throws IOException {\r
+// System.out.println(" testGetDescendantOffsets");\r
+// final Index index = new Index(file_index);\r
+// \r
+// final Node node = index.lookup("handhebe");\r
+// assertNotNull(node);\r
+// assertEquals("handhebel", node.nodeHandle.normalizedToken);\r
+// final Set<Integer> offsets = new LinkedHashSet<Integer>();\r
+// node.getDescendantEntryOffsets(offsets, 10);\r
+// final RandomAccessFile raf = new RandomAccessFile(file, "r");\r
+// for (final Integer offset : offsets) {\r
+// final String entry = FileUtil.readLine(raf, offset);\r
+// System.out.println(entry);\r
+// assertTrue(entry.toLowerCase().contains(node.nodeHandle.normalizedToken));\r
+// }\r
+// }\r
+//\r
+// public void testGetDescendants() throws IOException {\r
+// System.out.println(" testGetDescendant");\r
+// final Index index = new Index(file_index);\r
+// final RandomAccessFile raf = new RandomAccessFile(file, "r");\r
+// for (int i = 1000000; i < 1000050; ++i) {\r
+// final Object o = index.root.getDescendant(i);\r
+// if (o instanceof Integer) {\r
+// System.out.println(" " + FileUtil.readLine(raf, (Integer)o));\r
+// } else {\r
+// System.out.println(o);\r
+// }\r
+// }\r
+// raf.close();\r
+// }\r
+//\r
+//}\r