From: Thad Hughes Date: Tue, 12 Oct 2010 23:54:17 +0000 (-0700) Subject: go X-Git-Url: http://gitweb.fperrin.net/?p=DictionaryPC.git;a=commitdiff_plain;h=05fefbf6fab4ad3ba6a5d2899bc709a9815d490c go --- diff --git a/src/com/hughes/android/dictionary/DictionaryBuilder.java b/src/com/hughes/android/dictionary/DictionaryBuilder.java deleted file mode 100755 index ba60c4c..0000000 --- a/src/com/hughes/android/dictionary/DictionaryBuilder.java +++ /dev/null @@ -1,271 +0,0 @@ -package com.hughes.android.dictionary; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.InputStreamReader; -import java.io.RandomAccessFile; -import java.nio.charset.Charset; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.Map; -import java.util.Random; -import java.util.Set; -import java.util.TreeMap; - -import javax.xml.parsers.ParserConfigurationException; - -import org.xml.sax.SAXException; - -import com.hughes.android.dictionary.Dictionary.IndexEntry; -import com.hughes.android.dictionary.Dictionary.LanguageData; -import com.hughes.android.dictionary.Dictionary.Row; -import com.hughes.util.Args; -import com.hughes.util.FileUtil; - -public class DictionaryBuilder { - - public static void main(String[] args) throws IOException, - ClassNotFoundException, ParserConfigurationException, SAXException { - - final Map keyValueArgs = Args.keyValueArgs(args); - - final Language lang1 = Language.lookup(keyValueArgs.remove("lang1")); - final Language lang2 = Language.lookup(keyValueArgs.remove("lang2")); - if (lang1 == null || lang2 == null) { - fatalError("--lang1= and --lang2= must both be specified."); - } - - final String dictOutFilename = keyValueArgs.remove("dictOut"); - if (dictOutFilename == null) { - fatalError("--dictOut= must be specified."); - } - - String summaryText = keyValueArgs.remove("summaryText"); - if (summaryText == null) { - fatalError("--summaryText= must be specified."); - } - if (summaryText.startsWith("@")) { - summaryText = FileUtil.readToString(new File(summaryText.substring(1))); - } - - final String maxEntriesString = keyValueArgs.remove("maxEntries"); - final int maxEntries = maxEntriesString == null ? Integer.MAX_VALUE : Integer.parseInt(maxEntriesString); - - System.out.println("lang1=" + lang1); - System.out.println("lang2=" + lang2); - System.out.println("summaryText=" + summaryText); - System.out.println("dictOut=" + dictOutFilename); - - final Dictionary dict = new Dictionary(summaryText, lang1, lang2); - - for (int i = 0; i < 100; ++i) { - final String prefix = "input" + i; - if (keyValueArgs.containsKey(prefix)) { - final File file = new File(keyValueArgs.remove(prefix)); - System.out.println("Processing: " + file); - String charsetName = keyValueArgs.remove(prefix + "Charset"); - if (charsetName == null) { - charsetName = "UTF8"; - } - final Charset charset = Charset.forName(charsetName); - String inputName = keyValueArgs.remove(prefix + "Name"); - if (inputName == null) { - fatalError("Must specify human readable name for: " + prefix + "Name"); - } - - String inputFormat = keyValueArgs.remove(prefix + "Format"); - if ("dictcc".equals(inputFormat)) { - processLinedInputFile(dict, file, charset, false, maxEntries); - } else if ("chemnitz".equals(inputFormat)) { - processLinedInputFile(dict, file, charset, true, maxEntries); - } else if ("wiktionary".equals(inputFormat)) { - new WiktionaryXmlParser(dict).parse(file); - } else { - fatalError("Invalid or missing input format: " + inputFormat); - } - - dict.sources.add(inputName); - System.out.println("Done: " + file + "\n\n"); - } - } - - if (!keyValueArgs.isEmpty()) { - System.err.println("WARNING: couldn't parse arguments: " + keyValueArgs); - } - - createIndex(dict, SimpleEntry.LANG1); - createIndex(dict, SimpleEntry.LANG2); - - System.out.println("Writing dictionary."); - final RandomAccessFile dictOut = new RandomAccessFile(dictOutFilename, "rw"); - dictOut.setLength(0); - dict.write(dictOut); - dictOut.close(); - - final Random random = new Random(0); - for (byte lang = 0; lang < 2; ++lang) { - final LanguageData languageData = dict.languageDatas[lang]; - System.out.println("\nRandom words for: " + languageData.language.getSymbol()); - for (int i = 0; i < 20; ++i) { - final int w = random.nextInt(languageData.sortedIndex.size()); - final IndexEntry entry = languageData.sortedIndex.get(w); - final List rows = languageData.rows; - int r = entry.startRow; - System.out.println(languageData.rowToString(rows.get(r), false)); - ++r; - while (r < rows.size() && !rows.get(r).isToken()) { - System.out.println(" " + languageData.rowToString(rows.get(r), false)); - ++r; - } - } - } - } - - private static void fatalError(String string) { - System.err.println(string); - System.exit(1); - } - - private static void processLinedInputFile(final Dictionary dict, final File file, - final Charset charset, final boolean hasMultipleSubentries, - final int maxEntries) throws FileNotFoundException, IOException { - final BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(file), charset)); - String line; - int lineCount = 0; - while ((line = reader.readLine()) != null && lineCount < maxEntries) { - if (maxEntries < 200) { - System.out.println(line); - } - line = line.trim(); - if (line.equals("") || line.startsWith("#")) { - continue; - } - - final SimpleEntry entry = SimpleEntry.parseFromLine(line, hasMultipleSubentries); - if (entry == null) { - System.err.println("Invalid entry: " + line); - continue; - } - - dict.entries.add(entry); - - if (lineCount % 10000 == 0) { - System.out.println("IndexBuilder: " + "lineCount=" + lineCount); - } - lineCount++; - } - reader.close(); - } - - public static void createIndex(final Dictionary dict, final byte lang) { - System.out.println("Creating index: " + lang); - - final Map tokenToData = new TreeMap(dict.languageDatas[lang].language.sortComparator); - - for (int e = 0; e < dict.entries.size(); ++e) { - final SimpleEntry entry = null; //dict.entries.get(e); - final Set tokens = entry.getIndexableTokens(lang); - for (final String token : tokens) { - TokenData tokenData = tokenToData.get(token); - if (tokenData == null) { - tokenData = new TokenData(token); - tokenToData.put(token, tokenData); - } - tokenData.entries.add(new TokenEntryData(lang, token, entry, e)); - } - - if (e % 10000 == 0) { - System.out.println("createIndex: " + "e=" + e); - } - } - - // Sort it. - - System.out.println("Sorting TokenData..."); - final List sortedTokenData = new ArrayList(tokenToData - .values()); - - System.out.println("Sorting entries within each TokenData..."); - for (final TokenData tokenData : sortedTokenData) { - Collections.sort(tokenData.entries); - } - - // Put it all together. - System.out.println("Assembling final data structures..."); - final List rows = dict.languageDatas[lang].rows; - final List indexEntries = dict.languageDatas[lang].sortedIndex; - for (int t = 0; t < sortedTokenData.size(); ++t) { - final TokenData tokenData = sortedTokenData.get(t); - final int startRow = rows.size(); - final IndexEntry indexEntry = new IndexEntry(tokenData.token, startRow); - indexEntries.add(indexEntry); - - final Row tokenRow = new Row(-(t + 1)); - rows.add(tokenRow); - - for (final TokenEntryData entryData : tokenData.entries) { - final Row entryRow = new Row(entryData.entryIndex); - rows.add(entryRow); - } - } - - } - - static final class TokenEntryData implements Comparable { - final String token; - final SimpleEntry entry; - final int entryIndex; - - private static final int bigNoOverflow = 100000; - - int minSubEntryIndexOf = bigNoOverflow; - int minSubEntryLength = bigNoOverflow; - int minSubEntry = bigNoOverflow; - - public TokenEntryData(final byte lang, final String token, final SimpleEntry entry, final int entryIndex) { - this.token = token; - this.entry = entry; - this.entryIndex = entryIndex; - - final String[] subentries = entry.getAllText(lang); - for (int s = 0; s < subentries.length; ++s) { - final String subentry = subentries[s]; - int indexOf = subentry.indexOf(token); - if (indexOf != -1) { - minSubEntryIndexOf = Math.min(minSubEntryIndexOf, indexOf); - minSubEntryLength = Math.min(minSubEntryLength, subentry.length()); - minSubEntry = Math.min(minSubEntry, s); - } - } - } - - @Override - public int compareTo(final TokenEntryData that) { - assert this.token.equals(that.token); - - if (this.minSubEntryIndexOf != that.minSubEntryIndexOf) { - return this.minSubEntryIndexOf - that.minSubEntryIndexOf; - } - if (this.minSubEntryLength != that.minSubEntryLength) { - return this.minSubEntryLength - that.minSubEntryLength; - } - return this.minSubEntry - that.minSubEntry; - } - } - - static final class TokenData { - final String token; - final List entries = new ArrayList(); - - int startRow; - - public TokenData(final String token) { - this.token = token; - } - } - -} diff --git a/src/com/hughes/android/dictionary/DictionaryTest.java b/src/com/hughes/android/dictionary/DictionaryTest.java deleted file mode 100755 index af770e1..0000000 --- a/src/com/hughes/android/dictionary/DictionaryTest.java +++ /dev/null @@ -1,227 +0,0 @@ -package com.hughes.android.dictionary; - -import java.io.File; -import java.io.IOException; -import java.io.RandomAccessFile; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.LinkedHashSet; -import java.util.List; -import java.util.Set; -import java.util.concurrent.atomic.AtomicBoolean; - -import junit.framework.TestCase; - -import com.hughes.android.dictionary.Dictionary.IndexEntry; -import com.hughes.android.dictionary.Dictionary.LanguageData; -import com.hughes.android.dictionary.Dictionary.Row; - -public class DictionaryTest extends TestCase { - - public void testDictionary() throws IOException { - final File file = File.createTempFile("asdf", "asdf"); - file.deleteOnExit(); - -// final Dictionary goldenDict; - final List entries = Arrays.asList( - SimpleEntry.parseFromLine("der Hund :: the dog", false), - SimpleEntry.parseFromLine("Die grosse Katze :: The big cat", false), - SimpleEntry.parseFromLine("die Katze :: the cat", false), - SimpleEntry.parseFromLine("gross :: big", false), - SimpleEntry.parseFromLine("Dieb :: thief", false), - SimpleEntry.parseFromLine("rennen :: run", false)); - - { - final Dictionary dict = new Dictionary("test", Language.de, Language.en); - dict.entries.addAll(entries); - DictionaryBuilder.createIndex(dict, SimpleEntry.LANG1); - DictionaryBuilder.createIndex(dict, SimpleEntry.LANG2); - final RandomAccessFile raf = new RandomAccessFile(file, "rw"); - dict.write(raf); - raf.close(); - -// goldenDict = dict; - } - - final RandomAccessFile raf = new RandomAccessFile(file, "r"); - final Dictionary dict = new Dictionary(raf); - - assertEquals(entries, dict.entries); - - assertEquals("der", dict.languageDatas[0].sortedIndex.get(0).word); - assertEquals("die", dict.languageDatas[0].sortedIndex.get(1).word); - - assertEquals(0, dict.languageDatas[0].getPrevTokenRow(0)); - assertEquals(0, dict.languageDatas[0].getPrevTokenRow(2)); - assertEquals(0, dict.languageDatas[0].getPrevTokenRow(1)); - assertEquals(4, dict.languageDatas[0].getPrevTokenRow(6)); - - assertEquals(2, dict.languageDatas[0].getNextTokenRow(0)); - assertEquals(2, dict.languageDatas[0].getNextTokenRow(1)); - assertEquals(4, dict.languageDatas[0].getNextTokenRow(2)); - assertEquals(8, dict.languageDatas[0].getNextTokenRow(6)); - assertEquals(dict.languageDatas[0].rows.size() - 1, dict.languageDatas[0].getNextTokenRow(dict.languageDatas[0].rows.size() - 2)); - assertEquals(dict.languageDatas[0].rows.size() - 1, dict.languageDatas[0].getNextTokenRow(dict.languageDatas[0].rows.size() - 1)); - - for (final IndexEntry indexEntry : dict.languageDatas[0].sortedIndex) { - System.out.println(indexEntry); - } - - int rowCount = 0; - for (final Row row : dict.languageDatas[0].rows) { - if (row.index >= 0) { - System.out.println(" " + rowCount + ":" + dict.entries.get(row.index)); - } else { - System.out.println(rowCount + ":" + dict.languageDatas[0].sortedIndex.get(-row.index - 1)); - } - ++rowCount; - } - - for (int l = 0; l <= 1; l++) { - final LanguageData languageData = dict.languageDatas[l]; - for (int i = 0; i < languageData.sortedIndex.size(); i++) { - final IndexEntry indexEntry = languageData.sortedIndex.get(i); - if (indexEntry.word.toLowerCase().equals("dieb")) - System.out.println(); - final IndexEntry lookedUpEntry = languageData.sortedIndex.get(languageData.lookup(indexEntry.word, new AtomicBoolean(false))); - if (!indexEntry.word.toLowerCase().equals(lookedUpEntry.word.toLowerCase())) - System.out.println(); - assertEquals(indexEntry.word.toLowerCase(), lookedUpEntry.word.toLowerCase()); - } - } - - assertEquals("die", dict.languageDatas[0].sortedIndex.get(dict.languageDatas[0].lookup("Die", new AtomicBoolean())).word); - assertEquals("die", dict.languageDatas[0].sortedIndex.get(dict.languageDatas[0].lookup("die", new AtomicBoolean())).word); - - } - - public void testTextNorm() throws IOException { - System.out.println("\n\ntestTextNorm"); - final List entries = Arrays.asList( - SimpleEntry.parseFromLine("Hund {m} :: dog", true), - SimpleEntry.parseFromLine("'CHRISTOS' :: doh", true), - SimpleEntry.parseFromLine("\"Pick-up\"-Presse {f} :: baler", true), - SimpleEntry.parseFromLine("(Ach was), echt? [auch ironisch] :: No shit! [also ironic]", true), - SimpleEntry.parseFromLine("(akuter) Myokardinfarkt {m} :: (acute) myocardial infarction ", true), - SimpleEntry.parseFromLine("(reine) Vermutung {f} :: guesswork", true), - SimpleEntry.parseFromLine("(mit) 6:1 vorne liegen :: to be 6-1 up [football]", true), - SimpleEntry.parseFromLine("(auf) den Knopf drücken [auch fig.: auslösen] :: to push the button [also fig.: initiate]", false), - SimpleEntry.parseFromLine("Adjektiv {n} /Adj./; Eigenschaftswort {n} [gramm.] | Adjektive {pl}; Eigenschaftswoerter {pl} :: adjective /adj./ | adjectives", true), - SimpleEntry.parseFromLine("Älteste {m,f}; Ältester :: oldest; eldest", true), - SimpleEntry.parseFromLine("\"...\", schloss er an. :: '...,' he added.", true), - SimpleEntry.parseFromLine("besonderer | besondere | besonderes :: extra", false), - SimpleEntry.parseFromLine("| zu Pferde; zu Pferd | reiten :: horseback | on horseback | go on horseback", true), - SimpleEntry.parseFromLine("Hauptaugenmerk {m} | sein Hauptaugenmerk richten auf :: | to focus (one's) attention on", true), - SimpleEntry.parseFromLine("σ-Algebra {f} :: σ-field", true) - ); - - assertFalse(entries.contains(null)); - - // Hyphenated words get put both multiple listings. - - final Dictionary dict = new Dictionary("test", Language.de, Language.en); - dict.entries.addAll(entries); - DictionaryBuilder.createIndex(dict, SimpleEntry.LANG1); - DictionaryBuilder.createIndex(dict, SimpleEntry.LANG2); - - for (int lang = 0; lang <= 1; lang++) { - final LanguageData languageData = dict.languageDatas[lang]; - System.out.println("\n" + languageData.language); - final Set words = new LinkedHashSet(); - for (int i = 0; i < languageData.sortedIndex.size(); i++) { - final IndexEntry indexEntry = languageData.sortedIndex.get(i); - System.out.println(indexEntry); - words.add(indexEntry.word); - } - if (lang == 0) { - assertTrue(words.contains("CHRISTOS")); - assertTrue(words.contains("akuter")); - assertTrue(words.contains("σ-Algebra")); - - assertFalse(words.contains("-Algebra")); - } else { - assertTrue(words.contains("σ-field")); - assertTrue(words.contains("6-1")); - } - } - - } - - public void testGermanSort() { - assertEquals("aüÄ", Language.de.textNorm("aueAe")); - final List words = Arrays.asList( - "er-ben", - "erben", - "Erben", - "Erbse", - "Erbsen", - "essen", - "Essen", - "Grosformat", - "Grosformats", - "Grossformat", - "Großformat", - "Grossformats", - "Großformats", - "Großpoo", - "Großpoos", - "hulle", - "Hulle", - "hülle", - "huelle", - "Hülle", - "Huelle", - "Hum" - ); - assertEquals(0, Language.de.sortComparator.compare("hülle", "huelle")); - assertEquals(0, Language.de.sortComparator.compare("huelle", "hülle")); - - assertEquals(-1, Language.de.sortComparator.compare("hülle", "Hülle")); - assertEquals(0, Language.de.findComparator.compare("hülle", "Hülle")); - assertEquals(-1, Language.de.findComparator.compare("hulle", "Hülle")); - - - for (final String s : words) { - System.out.println(s + "\t" + Language.de.textNorm(s)); - } - final List sorted = new ArrayList(words); -// Collections.shuffle(shuffled, new Random(0)); - Collections.sort(sorted, Language.de.sortComparator); - System.out.println(sorted.toString()); - for (int i = 0; i < words.size(); ++i) { - System.out.println(words.get(i) + "\t" + sorted.get(i)); - assertEquals(words.get(i), sorted.get(i)); - } - } - - public void testEnglishSort() { - - final List words = Arrays.asList( - "pre-print", - "preppie", - "preppy", - "preprocess"); - - final List sorted = new ArrayList(words); - Collections.sort(sorted, Language.en.sortComparator); - for (int i = 0; i < words.size(); ++i) { - if (i > 0) { - assertTrue(Language.en.sortComparator.compare(words.get(i-1), words.get(i)) < 0); - } - System.out.println(words.get(i) + "\t" + sorted.get(i)); - assertEquals(words.get(i), sorted.get(i)); - } - - assertTrue(Language.en.sortCollator.compare("pre-print", "preppy") < 0); - - } - - public void testLanguage() { - System.out.println("languages=" + Language.symbolToLangauge.values()); - assertEquals(Language.de, Language.lookup("de")); - assertEquals(Language.en, Language.lookup("en")); - assertEquals("es", Language.lookup("es").symbol); - } - -} diff --git a/src/com/hughes/android/dictionary/InputParser.java b/src/com/hughes/android/dictionary/InputParser.java deleted file mode 100644 index 5ea1374..0000000 --- a/src/com/hughes/android/dictionary/InputParser.java +++ /dev/null @@ -1,15 +0,0 @@ -package com.hughes.android.dictionary; - -import java.io.File; - -public interface InputParser { - - void parse(final File file, final Dictionary dest); - - class LineParser implements InputParser { - @Override - public void parse(File file, Dictionary dest) { - } - } - -} diff --git a/src/com/hughes/android/dictionary/WiktionaryXmlParser.java b/src/com/hughes/android/dictionary/WiktionaryXmlParser.java index 3ed4617..31d8c92 100644 --- a/src/com/hughes/android/dictionary/WiktionaryXmlParser.java +++ b/src/com/hughes/android/dictionary/WiktionaryXmlParser.java @@ -18,6 +18,7 @@ import javax.xml.parsers.SAXParserFactory; import org.xml.sax.Attributes; import org.xml.sax.SAXException; +import com.hughes.android.dictionary.engine.Dictionary; import com.hughes.util.MapUtil; import com.hughes.util.StringUtil; diff --git a/src/com/hughes/android/dictionary/engine/DictFileParser.java b/src/com/hughes/android/dictionary/engine/DictFileParser.java index 55512be..2119a10 100644 --- a/src/com/hughes/android/dictionary/engine/DictFileParser.java +++ b/src/com/hughes/android/dictionary/engine/DictFileParser.java @@ -11,7 +11,6 @@ import java.util.logging.Logger; import java.util.regex.Matcher; import java.util.regex.Pattern; -import com.hughes.android.dictionary.Language; import com.hughes.android.dictionary.engine.PairEntry.Pair; public class DictFileParser { diff --git a/src/com/hughes/android/dictionary/engine/DictionaryBuilder.java b/src/com/hughes/android/dictionary/engine/DictionaryBuilder.java index bff164b..6aea577 100644 --- a/src/com/hughes/android/dictionary/engine/DictionaryBuilder.java +++ b/src/com/hughes/android/dictionary/engine/DictionaryBuilder.java @@ -9,7 +9,6 @@ import java.util.ArrayList; import java.util.List; import java.util.Map; -import com.hughes.android.dictionary.Language; import com.hughes.util.Args; import com.hughes.util.FileUtil; diff --git a/src/com/hughes/android/dictionary/engine/DictionaryTest.java b/src/com/hughes/android/dictionary/engine/DictionaryTest.java index 59bb031..d4ca69f 100644 --- a/src/com/hughes/android/dictionary/engine/DictionaryTest.java +++ b/src/com/hughes/android/dictionary/engine/DictionaryTest.java @@ -2,10 +2,15 @@ package com.hughes.android.dictionary.engine; import java.io.IOException; import java.io.RandomAccessFile; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; import java.util.concurrent.atomic.AtomicBoolean; import junit.framework.TestCase; + public class DictionaryTest extends TestCase { RandomAccessFile raf; @@ -47,10 +52,23 @@ public class DictionaryTest extends TestCase { assertEquals(indexEntry.token.toLowerCase(), row.getToken().toLowerCase()); } + // TODO: maybe if user types capitalization, use it. assertEquals("aaac", deIndex.find("AAAC", new AtomicBoolean(false)).getToken()); assertEquals("aaac", deIndex.find("aaac", new AtomicBoolean(false)).getToken()); assertEquals("aaac", deIndex.find("AAAc", new AtomicBoolean(false)).getToken()); assertEquals("aaac", deIndex.find("aaac", new AtomicBoolean(false)).getToken()); + + // Before the beginning. + assertEquals("40", deIndex.find("__", new AtomicBoolean(false)).getToken()); + + // After the end. + assertEquals("Zweckorientiertheit", deIndex.find("ZZZZZ", new AtomicBoolean(false)).getToken()); + + assertEquals("aaac", deIndex.find("aaaca", new AtomicBoolean(false)).getToken()); + + assertEquals("überprüfe", deIndex.find("ueberprüfe", new AtomicBoolean(false)).getToken()); + assertEquals("überprüfe", deIndex.find("ueberpruefe", new AtomicBoolean(false)).getToken()); + } public void testGermanTokenRows() { @@ -76,7 +94,83 @@ public class DictionaryTest extends TestCase { // This will break if the Row cache isn't big enough. assertEquals(lastTokenRow, row.getTokenRow(false)); } + } + + @SuppressWarnings("unchecked") + public void testGermanSort() { + assertEquals("aüÄÄ", Language.de.textNorm("aueAeAE", false)); + final List words = Arrays.asList( + "er-ben", + "erben", + "Erben", + "Erbse", + "Erbsen", + "essen", + "Essen", + "Grosformat", + "Grosformats", + "Grossformat", + "Großformat", + "Grossformats", + "Großformats", + "Großpoo", + "Großpoos", + "hulle", + "Hulle", + "hülle", + "huelle", + "Hülle", + "Huelle", + "Hum" + ); + assertEquals(0, Language.de.sortComparator.compare("hülle", "huelle")); + assertEquals(0, Language.de.sortComparator.compare("huelle", "hülle")); + + assertEquals(-1, Language.de.sortComparator.compare("hülle", "Hülle")); + assertEquals(0, Language.de.findComparator.compare("hülle", "Hülle")); + assertEquals(-1, Language.de.findComparator.compare("hulle", "Hülle")); + + + for (final String s : words) { + System.out.println(s + "\t" + Language.de.textNorm(s, false)); + } + final List sorted = new ArrayList(words); +// Collections.shuffle(shuffled, new Random(0)); + Collections.sort(sorted, Language.de.sortComparator); + System.out.println(sorted.toString()); + for (int i = 0; i < words.size(); ++i) { + System.out.println(words.get(i) + "\t" + sorted.get(i)); + assertEquals(words.get(i), sorted.get(i)); + } + } + @SuppressWarnings("unchecked") + public void testEnglishSort() { + + final List words = Arrays.asList( + "pre-print", + "preppie", + "preppy", + "preprocess"); + + final List sorted = new ArrayList(words); + Collections.sort(sorted, Language.en.getSortCollator()); + for (int i = 0; i < words.size(); ++i) { + if (i > 0) { + assertTrue(Language.en.getSortCollator().compare(words.get(i-1), words.get(i)) < 0); + } + System.out.println(words.get(i) + "\t" + sorted.get(i)); + assertEquals(words.get(i), sorted.get(i)); + } + + assertTrue(Language.en.getSortCollator().compare("pre-print", "preppy") < 0); + + } + + public void testLanguage() { + assertEquals(Language.de, Language.lookup("de")); + assertEquals(Language.en, Language.lookup("en")); + assertEquals("es", Language.lookup("es").getSymbol()); } diff --git a/src/com/hughes/android/dictionary/engine/IndexBuilder.java b/src/com/hughes/android/dictionary/engine/IndexBuilder.java index 0f35b50..aa09421 100644 --- a/src/com/hughes/android/dictionary/engine/IndexBuilder.java +++ b/src/com/hughes/android/dictionary/engine/IndexBuilder.java @@ -9,7 +9,6 @@ import java.util.Set; import java.util.SortedMap; import java.util.TreeMap; -import com.hughes.android.dictionary.Language; public class IndexBuilder {