From: thadh Date: Thu, 30 Jul 2009 22:00:41 +0000 (-0700) Subject: go X-Git-Url: http://gitweb.fperrin.net/?a=commitdiff_plain;h=a04c68cfe8978118debb157c5f8c56e7b1937856;p=DictionaryPC.git go --- diff --git a/src/com/hughes/android/dictionary/DictionaryBuilder.java b/src/com/hughes/android/dictionary/DictionaryBuilder.java index 8ff741e..bb1301e 100755 --- a/src/com/hughes/android/dictionary/DictionaryBuilder.java +++ b/src/com/hughes/android/dictionary/DictionaryBuilder.java @@ -36,7 +36,7 @@ public class DictionaryBuilder { processInputFile("c:\\de-en-chemnitz.txt", dict, true, Charset.forName("UTF8")); // Thad's extra sauce: - processInputFile("c:\\de-en-dictcc.txt", dict, false, Charset.forName("Cp1252")); +// processInputFile("c:\\de-en-dictcc.txt", dict, false, Charset.forName("Cp1252")); createIndex(dict, Entry.LANG1); createIndex(dict, Entry.LANG2); diff --git a/src/com/hughes/android/dictionary/DictionaryTest.java b/src/com/hughes/android/dictionary/DictionaryTest.java index 1578fd2..84bdd5e 100755 --- a/src/com/hughes/android/dictionary/DictionaryTest.java +++ b/src/com/hughes/android/dictionary/DictionaryTest.java @@ -178,4 +178,26 @@ public class DictionaryTest extends TestCase { } } + public void testEnglishSort() { + + final List words = Arrays.asList( + "preppie", + "preppy", + "pre-print", + "preprocess"); + + final List sorted = new ArrayList(words); + Collections.sort(sorted, Language.EN.sortComparator); + for (int i = 0; i < words.size(); ++i) { + if (i > 0) { + assertTrue(Language.EN.sortComparator.compare(words.get(i-1), words.get(i)) < 0); + } + System.out.println(words.get(i) + "\t" + sorted.get(i)); + assertEquals(words.get(i), sorted.get(i)); + } + + assertTrue(Language.EN.sortCollator.compare("preppy", "pre-print") < 0); + + } + } diff --git a/src/com/hughes/android/dictionary/ZIndexBuilder.java b/src/com/hughes/android/dictionary/ZIndexBuilder.java deleted file mode 100755 index 5d4b26a..0000000 --- a/src/com/hughes/android/dictionary/ZIndexBuilder.java +++ /dev/null @@ -1,117 +0,0 @@ -package com.hughes.android.dictionary; - - -public class ZIndexBuilder { - -// static final Pattern WHITESPACE = Pattern.compile("\\s+"); -// static final Pattern NONALPHA = Pattern.compile("[^A-Za-z]+"); -// -// public static void main(String[] args) throws IOException, -// ClassNotFoundException { -// if (args.length != 1) { -// System.err.println("No input file."); -// return; -// } -// final String dictionaryFileName = args[0]; -// createIndex(dictionaryFileName, Entry.LANG1); -// createIndex(dictionaryFileName, Entry.LANG2); -// } -// -// private static void createIndex(final String dictionaryFileName, -// final byte lang) throws IOException, FileNotFoundException, -// ClassNotFoundException { -// Node rootBuilder; -// rootBuilder = processDictionaryLines(dictionaryFileName, lang); -// FileUtil.write(rootBuilder, String.format("%s_builder_%d.serialized", dictionaryFileName, lang)); -// rootBuilder = (Node) FileUtil.read(String.format("%s_builder_%d.serialized", dictionaryFileName, lang)); -// -// rootBuilder.forEachNode(new Function() { -// @Override -// public void invoke(final Node node) { -// for (final List entryDescriptors : node.entryDescriptorsMap.values()) { -// Collections.sort(entryDescriptors); -// } -// }}); -// -// // Dump twice to get accurate file locations. -// for (int i = 0; i < 2; ++i) { -// final RandomAccessFile raf = new RandomAccessFile(String.format(Dictionary.INDEX_FORMAT, dictionaryFileName, lang), "rw"); -// rootBuilder.dump(raf); -// raf.close(); -// } -// } -// -// // ---------------------------------------------------------------- -// -// static final class EntryDescriptor implements Comparable, Serializable { -// final int offset; -// final int numTokens; -// public EntryDescriptor(int offset, int numTokens) { -// this.offset = offset; -// this.numTokens = numTokens; -// } -// @Override -// public boolean equals(Object obj) { -// final EntryDescriptor that = (EntryDescriptor) obj; -// return this.offset == that.offset; -// } -// @Override -// public int hashCode() { -// return offset; -// } -// @Override -// public int compareTo(EntryDescriptor o) { -// return this.numTokens < o.numTokens ? -1 : this.numTokens == o.numTokens ? 0 : 1; -// } -// } -// -// -// // ---------------------------------------------------------------- -// -// static Node processDictionaryLines(final String dictionaryFileName, final byte lang) throws IOException { -// final Node root = new Node(""); -// final RandomAccessFile dictionaryFile = new RandomAccessFile(dictionaryFileName, "r"); -// String line; -// final Entry entry = new Entry(); -// int lineCount = 0; -// long fileLocation = 0; -// while ((line = dictionaryFile.readLine()) != null) { -// assert ((int) fileLocation) == fileLocation; -// -// line = line.trim(); -// if (line.isEmpty() || line.startsWith("#") || !entry.parseFromLine(line)) { -// continue; -// } -// final String text = entry.getIndexableText(Entry.LANG1); -// final String[] tokens = WHITESPACE.split(text); -// final Map tokenToNormalizedMap = new LinkedHashMap(); -// for (String token : tokens) { -// if (token.length() <= 1 || !Character.isLetter(token.charAt(0))) { -// continue; -// } -// tokenToNormalizedMap.put(token, EntryFactory.entryFactory.normalizeToken(token)); -// } -// for (final Map.Entry tokenToNormalized : tokenToNormalizedMap.entrySet()) { -// final String normalizedToken = tokenToNormalized.getValue(); -// final Node node = root.getNode(normalizedToken, 0, true); -// node.addToken(tokenToNormalized.getKey(), new EntryDescriptor((int) fileLocation, tokens.length)); -// assert node == root.getNode(normalizedToken, 0, false); -// assert normalizedToken -// .equals(root.getNode(normalizedToken, 0, false).normalizedToken); -// } -// -// if (lineCount % 10000 == 0) { -// System.out.println("IndexBuilder: " + "lineCount=" + lineCount); -// } -// -// lineCount++; -// fileLocation = dictionaryFile.getFilePointer(); -// } -// dictionaryFile.close(); -// -// root.recursiveSetDescendantCounts(); -// -// return root; -// } - -} diff --git a/src/com/hughes/android/dictionary/ZIndexTest.java b/src/com/hughes/android/dictionary/ZIndexTest.java deleted file mode 100755 index 3196093..0000000 --- a/src/com/hughes/android/dictionary/ZIndexTest.java +++ /dev/null @@ -1,81 +0,0 @@ -package com.hughes.android.dictionary; -//package com.hughes.android.dictionary; -// -//import java.io.IOException; -//import java.io.RandomAccessFile; -//import java.util.LinkedHashSet; -//import java.util.Set; -// -//import junit.framework.TestCase; -// -//import com.hughes.android.dictionary.Index.Node; -//import com.hughes.util.FileUtil; -// -//public class IndexTest extends TestCase { -// -// static final String file = "c:\\dict-de-en.txt"; -// static final String file_index = file + "_index_0"; -// -// public void testRoot() throws IOException { -// System.out.println(" testRoot"); -// final Index index = new Index(file_index); -// final Node node = index.lookup(""); -// assertNotNull(node); -// -// assertEquals(312220, node.descendantTokenCount); -// assertEquals(1087063, node.descendantEntryCount); -// -// for (final String token : node.tokenToOffsets.keySet()) { -// System.out.println(token); -// assertTrue(token.toLowerCase().contains("handhubwagen")); -// } -// } -// -// public void testLookup() throws IOException { -// System.out.println(" testLookup"); -// final Index index = new Index(file_index); -// final Node node = index.lookup("handhubwagen"); -// assertNotNull(node); -// -// assertEquals(1, node.descendantTokenCount); -// assertEquals(2, node.descendantEntryCount); -// -// for (final String token : node.tokenToOffsets.keySet()) { -// System.out.println(token); -// assertTrue(token.toLowerCase().contains("handhubwagen")); -// } -// } -// -// public void testGetDescendantOffsets() throws IOException { -// System.out.println(" testGetDescendantOffsets"); -// final Index index = new Index(file_index); -// -// final Node node = index.lookup("handhebe"); -// assertNotNull(node); -// assertEquals("handhebel", node.nodeHandle.normalizedToken); -// final Set offsets = new LinkedHashSet(); -// node.getDescendantEntryOffsets(offsets, 10); -// final RandomAccessFile raf = new RandomAccessFile(file, "r"); -// for (final Integer offset : offsets) { -// final String entry = FileUtil.readLine(raf, offset); -// System.out.println(entry); -// assertTrue(entry.toLowerCase().contains(node.nodeHandle.normalizedToken)); -// } -// } -// -// public void testGetDescendants() throws IOException { -// System.out.println(" testGetDescendant"); -// final Index index = new Index(file_index); -// final RandomAccessFile raf = new RandomAccessFile(file, "r"); -// for (int i = 1000000; i < 1000050; ++i) { -// final Object o = index.root.getDescendant(i); -// if (o instanceof Integer) { -// System.out.println(" " + FileUtil.readLine(raf, (Integer)o)); -// } else { -// System.out.println(o); -// } -// } -// raf.close(); -// } -// -//}