From: thadh Date: Thu, 9 Apr 2009 00:39:12 +0000 (-0700) Subject: go X-Git-Url: http://gitweb.fperrin.net/?a=commitdiff_plain;h=d31ded214d7015250d37ddbe8995c324239ecb1f;p=DictionaryPC.git go --- diff --git a/src/com/hughes/android/dictionary/DictionaryBuilder.java b/src/com/hughes/android/dictionary/DictionaryBuilder.java index ae6fece..c141eff 100755 --- a/src/com/hughes/android/dictionary/DictionaryBuilder.java +++ b/src/com/hughes/android/dictionary/DictionaryBuilder.java @@ -1,45 +1,61 @@ package com.hughes.android.dictionary; +import java.io.BufferedReader; +import java.io.FileInputStream; +import java.io.FileNotFoundException; import java.io.IOException; +import java.io.InputStreamReader; import java.io.RandomAccessFile; +import java.nio.charset.Charset; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; -import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; -import java.util.regex.Pattern; import com.hughes.android.dictionary.Dictionary.IndexEntry; import com.hughes.android.dictionary.Dictionary.Row; public class DictionaryBuilder { - static final Pattern WHITESPACE = Pattern.compile("\\s+"); - public static void main(String[] args) throws IOException, ClassNotFoundException { - if (args.length != 2) { - System.err.println("inputfile outputfile"); + if (args.length != 1) { + System.err.println("outputfile"); return; } + final String dictOutFilename = args[0]; + + final Dictionary dict = new Dictionary(Language.DE, Language.EN); + System.out.println(Charset.forName("Cp1252")); + processInputFile("c:\\de-en-chemnitz.txt", dict, true, Charset.forName("UTF8")); + processInputFile("c:\\de-en-dictcc.txt", dict, false, Charset.forName("Cp1252")); - final Dictionary dict = new Dictionary("de", "en"); - final RandomAccessFile dictionaryFile = new RandomAccessFile(args[0], "r"); + createIndex(dict, Entry.LANG1); + createIndex(dict, Entry.LANG2); + + System.out.println("Writing dictionary."); + final RandomAccessFile dictOut = new RandomAccessFile(dictOutFilename, "rw"); + dictOut.setLength(0); + dict.write(dictOut); + dictOut.close(); + } + + private static void processInputFile(final String filename, + final Dictionary dict, final boolean hasMultipleSubentries, final Charset charset) throws FileNotFoundException, IOException { + final BufferedReader dictionaryIn = new BufferedReader(new InputStreamReader(new FileInputStream(filename), charset)); String line; int lineCount = 0; - long fileLocation = 0; - while ((line = dictionaryFile.readLine()) != null) { - assert ((int) fileLocation) == fileLocation; + while ((line = dictionaryIn.readLine()) != null) { +// System.out.println(line); line = line.trim(); if (line.isEmpty() || line.startsWith("#")) { continue; } - final Entry entry = Entry.parseFromLine(line); + final Entry entry = Entry.parseFromLine(line, hasMultipleSubentries); if (entry == null) { System.err.println("Invalid entry: " + line); continue; @@ -51,18 +67,8 @@ public class DictionaryBuilder { System.out.println("IndexBuilder: " + "lineCount=" + lineCount); } lineCount++; - fileLocation = dictionaryFile.getFilePointer(); } - dictionaryFile.close(); - - createIndex(dict, Entry.LANG1); - createIndex(dict, Entry.LANG2); - - System.out.println("Writing dictionary."); - final RandomAccessFile dictOut = new RandomAccessFile(args[1], "rw"); - dictOut.setLength(0); - dict.write(dictOut); - dictOut.close(); + dictionaryIn.close(); } public static void createIndex(final Dictionary dict, final byte lang) { @@ -73,9 +79,7 @@ public class DictionaryBuilder { for (int e = 0; e < dict.entries.size(); ++e) { final Entry entry = dict.entries.get(e); - final String text = entry.getIndexableText(lang); - final Set tokens = new LinkedHashSet(Arrays - .asList(WHITESPACE.split(text.trim()))); + final Set tokens = entry.getIndexableTokens(lang); entryDatas[e] = new EntryData(tokens.size()); for (final String token : tokens) { TokenData tokenData = tokenDatas.get(token); @@ -93,27 +97,35 @@ public class DictionaryBuilder { // Sort it. + System.out.println("Sorting TokenData..."); final List sortedIndex = new ArrayList(tokenDatas .values()); - Collections.sort(sortedIndex); + Collections.sort(sortedIndex, new Comparator() { + @Override + public int compare(TokenData tokenData0, TokenData tokenData1) { + return dict.languageDatas[lang].language.tokenComparator.compare(tokenData0.token, tokenData1.token); + }}); + System.out.println("Sorting entries within each TokenData..."); final Comparator entryComparator = new Comparator() { @Override public int compare(Integer o1, Integer o2) { + // TODO: better this + // Relevant (first token match) chemnitz entries first + // first token position in entry + // entry length in chars return entryDatas[o1].numTokens < entryDatas[o2].numTokens ? -1 : entryDatas[o1].numTokens == entryDatas[o2].numTokens ? 0 : 1; } }; - for (final TokenData tokenData : tokenDatas.values()) { Collections.sort(tokenData.entries, entryComparator); } // Put it all together. - - final List rows = dict.languages[lang].rows; - final List indexEntries = dict.languages[lang].sortedIndex; - + System.out.println("Assembling final data structures..."); + final List rows = dict.languageDatas[lang].rows; + final List indexEntries = dict.languageDatas[lang].sortedIndex; for (int t = 0; t < sortedIndex.size(); ++t) { final TokenData tokenData = sortedIndex.get(t); final int startRow = rows.size(); @@ -139,21 +151,15 @@ public class DictionaryBuilder { } } - static final class TokenData implements Comparable { + static final class TokenData { final String token; final List entries = new ArrayList(); int startRow; - public TokenData(String token) { + public TokenData(final String token) { this.token = token; } - - @Override - public int compareTo(TokenData that) { - return EntryFactory.entryFactory.getEntryComparator().compare(this.token, - that.token); - } } } diff --git a/src/com/hughes/android/dictionary/DictionaryTest.java b/src/com/hughes/android/dictionary/DictionaryTest.java index 806e836..60a9cba 100755 --- a/src/com/hughes/android/dictionary/DictionaryTest.java +++ b/src/com/hughes/android/dictionary/DictionaryTest.java @@ -10,7 +10,7 @@ import java.util.concurrent.atomic.AtomicBoolean; import junit.framework.TestCase; import com.hughes.android.dictionary.Dictionary.IndexEntry; -import com.hughes.android.dictionary.Dictionary.Language; +import com.hughes.android.dictionary.Dictionary.LanguageData; import com.hughes.android.dictionary.Dictionary.Row; public class DictionaryTest extends TestCase { @@ -21,18 +21,16 @@ public class DictionaryTest extends TestCase { // final Dictionary goldenDict; final List entries = Arrays.asList( - new Entry("der Hund", "the dog"), - new Entry("Die grosse Katze", "The big cat"), - new Entry("die Katze", "the cat"), - new Entry("gross", "big"), - new Entry("Dieb", "thief"), - new Entry("rennen", "run")); + Entry.parseFromLine("der Hund :: the dog", false), + Entry.parseFromLine("Die grosse Katze :: The big cat", false), + Entry.parseFromLine("die Katze :: the cat", false), + Entry.parseFromLine("gross :: big", false), + Entry.parseFromLine("Dieb :: thief", false), + Entry.parseFromLine("rennen :: run", false)); { - final Dictionary dict = new Dictionary("de", "en"); - for (final Entry entry : entries) { - dict.entries.add(entry); - } + final Dictionary dict = new Dictionary(Language.DE, Language.EN); + dict.entries.addAll(entries); DictionaryBuilder.createIndex(dict, Entry.LANG1); DictionaryBuilder.createIndex(dict, Entry.LANG2); final RandomAccessFile raf = new RandomAccessFile(file, "rw"); @@ -47,53 +45,76 @@ public class DictionaryTest extends TestCase { assertEquals(entries, dict.entries); - assertEquals("der", dict.languages[0].sortedIndex.get(0).word); - assertEquals("Die", dict.languages[0].sortedIndex.get(1).word); + assertEquals("der", dict.languageDatas[0].sortedIndex.get(0).word); + assertEquals("Die", dict.languageDatas[0].sortedIndex.get(1).word); - for (final IndexEntry indexEntry : dict.languages[0].sortedIndex) { + for (final IndexEntry indexEntry : dict.languageDatas[0].sortedIndex) { System.out.println(indexEntry); } int rowCount = 0; - for (final Row row : dict.languages[0].rows) { + for (final Row row : dict.languageDatas[0].rows) { if (row.index >= 0) { System.out.println(" " + rowCount + ":" + dict.entries.get(row.index)); } else { - System.out.println(rowCount + ":" + dict.languages[0].sortedIndex.get(-row.index - 1)); + System.out.println(rowCount + ":" + dict.languageDatas[0].sortedIndex.get(-row.index - 1)); } ++rowCount; } for (int l = 0; l <= 1; l++) { - final Language lang = dict.languages[l]; - for (int i = 0; i < lang.sortedIndex.size(); i++) { - final IndexEntry indexEntry = lang.sortedIndex.get(i); + final LanguageData languageData = dict.languageDatas[l]; + for (int i = 0; i < languageData.sortedIndex.size(); i++) { + final IndexEntry indexEntry = languageData.sortedIndex.get(i); if (indexEntry.word.toLowerCase().equals("dieb")) System.out.println(); - final IndexEntry lookedUpEntry = lang.sortedIndex.get(lang.lookup(indexEntry.word, new AtomicBoolean(false))); + final IndexEntry lookedUpEntry = languageData.sortedIndex.get(languageData.lookup(indexEntry.word, new AtomicBoolean(false))); if (!indexEntry.word.toLowerCase().equals(lookedUpEntry.word.toLowerCase())) System.out.println(); assertEquals(indexEntry.word.toLowerCase(), lookedUpEntry.word.toLowerCase()); } } - assertEquals("Die", dict.languages[0].sortedIndex.get(dict.languages[0].lookup("die", new AtomicBoolean())).word); + assertEquals("Die", dict.languageDatas[0].sortedIndex.get(dict.languageDatas[0].lookup("die", new AtomicBoolean())).word); } public void testTextNorm() throws IOException { -// final File file = File.createTempFile("asdf", "asdf"); -// file.deleteOnExit(); - -// final Dictionary goldenDict; final List entries = Arrays.asList( - new Entry("der Hund", "the dog"), - new Entry("Die grosse Katze", "The big cat"), - new Entry("die Katze", "the cat"), - new Entry("gross", "big"), - new Entry("Dieb", "thief"), - new Entry("rennen", "run")); + Entry.parseFromLine("Hund {m} :: dog", true), + Entry.parseFromLine("\"Pick-up\"-Presse {f} :: baler", true), + Entry.parseFromLine("(Ach was), echt? [auch ironisch] :: No shit! [also ironic]", true), + Entry.parseFromLine("(akuter) Myokardinfarkt {m} :: (acute) myocardial infarction ", true), + Entry.parseFromLine("(reine) Vermutung {f} :: guesswork", true), + Entry.parseFromLine("(mit) 6:1 vorne liegen :: to be 6-1 up [football]", true), + Entry.parseFromLine("(auf) den Knopf drücken [auch fig.: auslösen] :: to push the button [also fig.: initiate]", false), + Entry.parseFromLine("Adjektiv {n} /Adj./; Eigenschaftswort {n} [gramm.] | Adjektive {pl}; Eigenschaftswörter {pl} :: adjective /adj./ | adjectives", true), + Entry.parseFromLine("Älteste {m,f}; Ältester :: oldest; eldest", true), + Entry.parseFromLine("\"...\", schloss er an. :: '...,' he added.", true), + Entry.parseFromLine("besonderer | besondere | besonderes :: extra", false), + Entry.parseFromLine("| zu Pferde; zu Pferd | reiten :: horseback | on horseback | go on horseback", true), + Entry.parseFromLine("Hauptaugenmerk {m} | sein Hauptaugenmerk richten auf :: | to focus (one's) attention on", true) + ); + + assertFalse(entries.contains(null)); + + // Hyphenated words get put both multiple listings. + + final Dictionary dict = new Dictionary(Language.DE, Language.EN); + dict.entries.addAll(entries); + DictionaryBuilder.createIndex(dict, Entry.LANG1); + DictionaryBuilder.createIndex(dict, Entry.LANG2); + + for (int l = 0; l <= 1; l++) { + final LanguageData languageData = dict.languageDatas[l]; + System.out.println("\n" + languageData.language); + for (int i = 0; i < languageData.sortedIndex.size(); i++) { + final IndexEntry indexEntry = languageData.sortedIndex.get(i); + System.out.println(indexEntry); + } + } } + } diff --git a/src/com/hughes/android/dictionary/IndexBuilder.java b/src/com/hughes/android/dictionary/IndexBuilder.java deleted file mode 100755 index 49d393d..0000000 --- a/src/com/hughes/android/dictionary/IndexBuilder.java +++ /dev/null @@ -1,130 +0,0 @@ -package com.hughes.android.dictionary; - -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.RandomAccessFile; -import java.io.Serializable; -import java.util.ArrayList; -import java.util.Collections; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.TreeMap; -import java.util.regex.Pattern; - -import com.hughes.util.FileUtil; - -public class IndexBuilder { - - static final Pattern WHITESPACE = Pattern.compile("\\s+"); - static final Pattern NONALPHA = Pattern.compile("[^A-Za-z]+"); - - public static void main(String[] args) throws IOException, - ClassNotFoundException { - if (args.length != 1) { - System.err.println("No input file."); - return; - } - final String dictionaryFileName = args[0]; - createIndex(dictionaryFileName, Entry.LANG1); - createIndex(dictionaryFileName, Entry.LANG2); - } - - private static void createIndex(final String dictionaryFileName, - final byte lang) throws IOException, FileNotFoundException, - ClassNotFoundException { - Node rootBuilder; - rootBuilder = processDictionaryLines(dictionaryFileName, lang); - FileUtil.write(rootBuilder, String.format("%s_builder_%d.serialized", dictionaryFileName, lang)); - rootBuilder = (Node) FileUtil.read(String.format("%s_builder_%d.serialized", dictionaryFileName, lang)); - - rootBuilder.forEachNode(new Function() { - @Override - public void invoke(final Node node) { - for (final List entryDescriptors : node.entryDescriptorsMap.values()) { - Collections.sort(entryDescriptors); - } - }}); - - // Dump twice to get accurate file locations. - for (int i = 0; i < 2; ++i) { - final RandomAccessFile raf = new RandomAccessFile(String.format(Dictionary.INDEX_FORMAT, dictionaryFileName, lang), "rw"); - rootBuilder.dump(raf); - raf.close(); - } - } - - // ---------------------------------------------------------------- - - static final class EntryDescriptor implements Comparable, Serializable { - final int offset; - final int numTokens; - public EntryDescriptor(int offset, int numTokens) { - this.offset = offset; - this.numTokens = numTokens; - } - @Override - public boolean equals(Object obj) { - final EntryDescriptor that = (EntryDescriptor) obj; - return this.offset == that.offset; - } - @Override - public int hashCode() { - return offset; - } - @Override - public int compareTo(EntryDescriptor o) { - return this.numTokens < o.numTokens ? -1 : this.numTokens == o.numTokens ? 0 : 1; - } - } - - - // ---------------------------------------------------------------- - - static Node processDictionaryLines(final String dictionaryFileName, final byte lang) throws IOException { - final Node root = new Node(""); - final RandomAccessFile dictionaryFile = new RandomAccessFile(dictionaryFileName, "r"); - String line; - final Entry entry = new Entry(); - int lineCount = 0; - long fileLocation = 0; - while ((line = dictionaryFile.readLine()) != null) { - assert ((int) fileLocation) == fileLocation; - - line = line.trim(); - if (line.isEmpty() || line.startsWith("#") || !entry.parseFromLine(line)) { - continue; - } - final String text = entry.getIndexableText(Entry.LANG1); - final String[] tokens = WHITESPACE.split(text); - final Map tokenToNormalizedMap = new LinkedHashMap(); - for (String token : tokens) { - if (token.length() <= 1 || !Character.isLetter(token.charAt(0))) { - continue; - } - tokenToNormalizedMap.put(token, EntryFactory.entryFactory.normalizeToken(token)); - } - for (final Map.Entry tokenToNormalized : tokenToNormalizedMap.entrySet()) { - final String normalizedToken = tokenToNormalized.getValue(); - final Node node = root.getNode(normalizedToken, 0, true); - node.addToken(tokenToNormalized.getKey(), new EntryDescriptor((int) fileLocation, tokens.length)); - assert node == root.getNode(normalizedToken, 0, false); - assert normalizedToken - .equals(root.getNode(normalizedToken, 0, false).normalizedToken); - } - - if (lineCount % 10000 == 0) { - System.out.println("IndexBuilder: " + "lineCount=" + lineCount); - } - - lineCount++; - fileLocation = dictionaryFile.getFilePointer(); - } - dictionaryFile.close(); - - root.recursiveSetDescendantCounts(); - - return root; - } - -} diff --git a/src/com/hughes/android/dictionary/IndexTest.java b/src/com/hughes/android/dictionary/IndexTest.java deleted file mode 100755 index 2622e30..0000000 --- a/src/com/hughes/android/dictionary/IndexTest.java +++ /dev/null @@ -1,80 +0,0 @@ -package com.hughes.android.dictionary; - -import java.io.IOException; -import java.io.RandomAccessFile; -import java.util.LinkedHashSet; -import java.util.Set; - -import junit.framework.TestCase; - -import com.hughes.android.dictionary.Index.Node; -import com.hughes.util.FileUtil; - -public class IndexTest extends TestCase { - - static final String file = "c:\\dict-de-en.txt"; - static final String file_index = file + "_index_0"; - - public void testRoot() throws IOException { - System.out.println(" testRoot"); - final Index index = new Index(file_index); - final Node node = index.lookup(""); - assertNotNull(node); - - assertEquals(312220, node.descendantTokenCount); - assertEquals(1087063, node.descendantEntryCount); - - for (final String token : node.tokenToOffsets.keySet()) { - System.out.println(token); - assertTrue(token.toLowerCase().contains("handhubwagen")); - } - } - - public void testLookup() throws IOException { - System.out.println(" testLookup"); - final Index index = new Index(file_index); - final Node node = index.lookup("handhubwagen"); - assertNotNull(node); - - assertEquals(1, node.descendantTokenCount); - assertEquals(2, node.descendantEntryCount); - - for (final String token : node.tokenToOffsets.keySet()) { - System.out.println(token); - assertTrue(token.toLowerCase().contains("handhubwagen")); - } - } - - public void testGetDescendantOffsets() throws IOException { - System.out.println(" testGetDescendantOffsets"); - final Index index = new Index(file_index); - - final Node node = index.lookup("handhebe"); - assertNotNull(node); - assertEquals("handhebel", node.nodeHandle.normalizedToken); - final Set offsets = new LinkedHashSet(); - node.getDescendantEntryOffsets(offsets, 10); - final RandomAccessFile raf = new RandomAccessFile(file, "r"); - for (final Integer offset : offsets) { - final String entry = FileUtil.readLine(raf, offset); - System.out.println(entry); - assertTrue(entry.toLowerCase().contains(node.nodeHandle.normalizedToken)); - } - } - - public void testGetDescendants() throws IOException { - System.out.println(" testGetDescendant"); - final Index index = new Index(file_index); - final RandomAccessFile raf = new RandomAccessFile(file, "r"); - for (int i = 1000000; i < 1000050; ++i) { - final Object o = index.root.getDescendant(i); - if (o instanceof Integer) { - System.out.println(" " + FileUtil.readLine(raf, (Integer)o)); - } else { - System.out.println(o); - } - } - raf.close(); - } - -} diff --git a/src/com/hughes/android/dictionary/ZIndexBuilder.java b/src/com/hughes/android/dictionary/ZIndexBuilder.java new file mode 100755 index 0000000..5d4b26a --- /dev/null +++ b/src/com/hughes/android/dictionary/ZIndexBuilder.java @@ -0,0 +1,117 @@ +package com.hughes.android.dictionary; + + +public class ZIndexBuilder { + +// static final Pattern WHITESPACE = Pattern.compile("\\s+"); +// static final Pattern NONALPHA = Pattern.compile("[^A-Za-z]+"); +// +// public static void main(String[] args) throws IOException, +// ClassNotFoundException { +// if (args.length != 1) { +// System.err.println("No input file."); +// return; +// } +// final String dictionaryFileName = args[0]; +// createIndex(dictionaryFileName, Entry.LANG1); +// createIndex(dictionaryFileName, Entry.LANG2); +// } +// +// private static void createIndex(final String dictionaryFileName, +// final byte lang) throws IOException, FileNotFoundException, +// ClassNotFoundException { +// Node rootBuilder; +// rootBuilder = processDictionaryLines(dictionaryFileName, lang); +// FileUtil.write(rootBuilder, String.format("%s_builder_%d.serialized", dictionaryFileName, lang)); +// rootBuilder = (Node) FileUtil.read(String.format("%s_builder_%d.serialized", dictionaryFileName, lang)); +// +// rootBuilder.forEachNode(new Function() { +// @Override +// public void invoke(final Node node) { +// for (final List entryDescriptors : node.entryDescriptorsMap.values()) { +// Collections.sort(entryDescriptors); +// } +// }}); +// +// // Dump twice to get accurate file locations. +// for (int i = 0; i < 2; ++i) { +// final RandomAccessFile raf = new RandomAccessFile(String.format(Dictionary.INDEX_FORMAT, dictionaryFileName, lang), "rw"); +// rootBuilder.dump(raf); +// raf.close(); +// } +// } +// +// // ---------------------------------------------------------------- +// +// static final class EntryDescriptor implements Comparable, Serializable { +// final int offset; +// final int numTokens; +// public EntryDescriptor(int offset, int numTokens) { +// this.offset = offset; +// this.numTokens = numTokens; +// } +// @Override +// public boolean equals(Object obj) { +// final EntryDescriptor that = (EntryDescriptor) obj; +// return this.offset == that.offset; +// } +// @Override +// public int hashCode() { +// return offset; +// } +// @Override +// public int compareTo(EntryDescriptor o) { +// return this.numTokens < o.numTokens ? -1 : this.numTokens == o.numTokens ? 0 : 1; +// } +// } +// +// +// // ---------------------------------------------------------------- +// +// static Node processDictionaryLines(final String dictionaryFileName, final byte lang) throws IOException { +// final Node root = new Node(""); +// final RandomAccessFile dictionaryFile = new RandomAccessFile(dictionaryFileName, "r"); +// String line; +// final Entry entry = new Entry(); +// int lineCount = 0; +// long fileLocation = 0; +// while ((line = dictionaryFile.readLine()) != null) { +// assert ((int) fileLocation) == fileLocation; +// +// line = line.trim(); +// if (line.isEmpty() || line.startsWith("#") || !entry.parseFromLine(line)) { +// continue; +// } +// final String text = entry.getIndexableText(Entry.LANG1); +// final String[] tokens = WHITESPACE.split(text); +// final Map tokenToNormalizedMap = new LinkedHashMap(); +// for (String token : tokens) { +// if (token.length() <= 1 || !Character.isLetter(token.charAt(0))) { +// continue; +// } +// tokenToNormalizedMap.put(token, EntryFactory.entryFactory.normalizeToken(token)); +// } +// for (final Map.Entry tokenToNormalized : tokenToNormalizedMap.entrySet()) { +// final String normalizedToken = tokenToNormalized.getValue(); +// final Node node = root.getNode(normalizedToken, 0, true); +// node.addToken(tokenToNormalized.getKey(), new EntryDescriptor((int) fileLocation, tokens.length)); +// assert node == root.getNode(normalizedToken, 0, false); +// assert normalizedToken +// .equals(root.getNode(normalizedToken, 0, false).normalizedToken); +// } +// +// if (lineCount % 10000 == 0) { +// System.out.println("IndexBuilder: " + "lineCount=" + lineCount); +// } +// +// lineCount++; +// fileLocation = dictionaryFile.getFilePointer(); +// } +// dictionaryFile.close(); +// +// root.recursiveSetDescendantCounts(); +// +// return root; +// } + +} diff --git a/src/com/hughes/android/dictionary/ZIndexTest.java b/src/com/hughes/android/dictionary/ZIndexTest.java new file mode 100755 index 0000000..3196093 --- /dev/null +++ b/src/com/hughes/android/dictionary/ZIndexTest.java @@ -0,0 +1,81 @@ +package com.hughes.android.dictionary; +//package com.hughes.android.dictionary; +// +//import java.io.IOException; +//import java.io.RandomAccessFile; +//import java.util.LinkedHashSet; +//import java.util.Set; +// +//import junit.framework.TestCase; +// +//import com.hughes.android.dictionary.Index.Node; +//import com.hughes.util.FileUtil; +// +//public class IndexTest extends TestCase { +// +// static final String file = "c:\\dict-de-en.txt"; +// static final String file_index = file + "_index_0"; +// +// public void testRoot() throws IOException { +// System.out.println(" testRoot"); +// final Index index = new Index(file_index); +// final Node node = index.lookup(""); +// assertNotNull(node); +// +// assertEquals(312220, node.descendantTokenCount); +// assertEquals(1087063, node.descendantEntryCount); +// +// for (final String token : node.tokenToOffsets.keySet()) { +// System.out.println(token); +// assertTrue(token.toLowerCase().contains("handhubwagen")); +// } +// } +// +// public void testLookup() throws IOException { +// System.out.println(" testLookup"); +// final Index index = new Index(file_index); +// final Node node = index.lookup("handhubwagen"); +// assertNotNull(node); +// +// assertEquals(1, node.descendantTokenCount); +// assertEquals(2, node.descendantEntryCount); +// +// for (final String token : node.tokenToOffsets.keySet()) { +// System.out.println(token); +// assertTrue(token.toLowerCase().contains("handhubwagen")); +// } +// } +// +// public void testGetDescendantOffsets() throws IOException { +// System.out.println(" testGetDescendantOffsets"); +// final Index index = new Index(file_index); +// +// final Node node = index.lookup("handhebe"); +// assertNotNull(node); +// assertEquals("handhebel", node.nodeHandle.normalizedToken); +// final Set offsets = new LinkedHashSet(); +// node.getDescendantEntryOffsets(offsets, 10); +// final RandomAccessFile raf = new RandomAccessFile(file, "r"); +// for (final Integer offset : offsets) { +// final String entry = FileUtil.readLine(raf, offset); +// System.out.println(entry); +// assertTrue(entry.toLowerCase().contains(node.nodeHandle.normalizedToken)); +// } +// } +// +// public void testGetDescendants() throws IOException { +// System.out.println(" testGetDescendant"); +// final Index index = new Index(file_index); +// final RandomAccessFile raf = new RandomAccessFile(file, "r"); +// for (int i = 1000000; i < 1000050; ++i) { +// final Object o = index.root.getDescendant(i); +// if (o instanceof Integer) { +// System.out.println(" " + FileUtil.readLine(raf, (Integer)o)); +// } else { +// System.out.println(o); +// } +// } +// raf.close(); +// } +// +//}