]> gitweb.fperrin.net Git - DictionaryPC.git/commitdiff
go
authorThad Hughes <thad.hughes@gmail.com>
Tue, 12 Oct 2010 23:54:17 +0000 (16:54 -0700)
committerThad Hughes <thad.hughes@gmail.com>
Tue, 13 Dec 2011 01:29:28 +0000 (17:29 -0800)
src/com/hughes/android/dictionary/DictionaryBuilder.java [deleted file]
src/com/hughes/android/dictionary/DictionaryTest.java [deleted file]
src/com/hughes/android/dictionary/InputParser.java [deleted file]
src/com/hughes/android/dictionary/WiktionaryXmlParser.java
src/com/hughes/android/dictionary/engine/DictFileParser.java
src/com/hughes/android/dictionary/engine/DictionaryBuilder.java
src/com/hughes/android/dictionary/engine/DictionaryTest.java
src/com/hughes/android/dictionary/engine/IndexBuilder.java

diff --git a/src/com/hughes/android/dictionary/DictionaryBuilder.java b/src/com/hughes/android/dictionary/DictionaryBuilder.java
deleted file mode 100755 (executable)
index ba60c4c..0000000
+++ /dev/null
@@ -1,271 +0,0 @@
-package com.hughes.android.dictionary;\r
-\r
-import java.io.BufferedReader;\r
-import java.io.File;\r
-import java.io.FileInputStream;\r
-import java.io.FileNotFoundException;\r
-import java.io.IOException;\r
-import java.io.InputStreamReader;\r
-import java.io.RandomAccessFile;\r
-import java.nio.charset.Charset;\r
-import java.util.ArrayList;\r
-import java.util.Collections;\r
-import java.util.List;\r
-import java.util.Map;\r
-import java.util.Random;\r
-import java.util.Set;\r
-import java.util.TreeMap;\r
-\r
-import javax.xml.parsers.ParserConfigurationException;\r
-\r
-import org.xml.sax.SAXException;\r
-\r
-import com.hughes.android.dictionary.Dictionary.IndexEntry;\r
-import com.hughes.android.dictionary.Dictionary.LanguageData;\r
-import com.hughes.android.dictionary.Dictionary.Row;\r
-import com.hughes.util.Args;\r
-import com.hughes.util.FileUtil;\r
-\r
-public class DictionaryBuilder {\r
-  \r
-  public static void main(String[] args) throws IOException,\r
-      ClassNotFoundException, ParserConfigurationException, SAXException {\r
-    \r
-    final Map<String,String> keyValueArgs = Args.keyValueArgs(args);\r
-    \r
-    final Language lang1 = Language.lookup(keyValueArgs.remove("lang1"));\r
-    final Language lang2 = Language.lookup(keyValueArgs.remove("lang2"));\r
-    if (lang1 == null || lang2 == null) {\r
-      fatalError("--lang1= and --lang2= must both be specified.");\r
-    }\r
-    \r
-    final String dictOutFilename = keyValueArgs.remove("dictOut");\r
-    if (dictOutFilename == null) {\r
-      fatalError("--dictOut= must be specified.");\r
-    }\r
-    \r
-    String summaryText = keyValueArgs.remove("summaryText");\r
-    if (summaryText == null) {\r
-      fatalError("--summaryText= must be specified.");\r
-    }\r
-    if (summaryText.startsWith("@")) {\r
-      summaryText = FileUtil.readToString(new File(summaryText.substring(1)));\r
-    }\r
-    \r
-    final String maxEntriesString = keyValueArgs.remove("maxEntries");\r
-    final int maxEntries = maxEntriesString == null ? Integer.MAX_VALUE : Integer.parseInt(maxEntriesString);\r
-    \r
-    System.out.println("lang1=" + lang1);\r
-    System.out.println("lang2=" + lang2);\r
-    System.out.println("summaryText=" + summaryText);\r
-    System.out.println("dictOut=" + dictOutFilename);\r
-\r
-    final Dictionary dict = new Dictionary(summaryText, lang1, lang2);\r
-\r
-    for (int i = 0; i < 100; ++i) {\r
-      final String prefix = "input" + i;\r
-      if (keyValueArgs.containsKey(prefix)) {\r
-        final File file = new File(keyValueArgs.remove(prefix));\r
-        System.out.println("Processing: " + file);\r
-        String charsetName = keyValueArgs.remove(prefix + "Charset");\r
-        if (charsetName == null) {\r
-          charsetName = "UTF8";\r
-        }\r
-        final Charset charset = Charset.forName(charsetName);\r
-        String inputName = keyValueArgs.remove(prefix + "Name");\r
-        if (inputName == null) {\r
-          fatalError("Must specify human readable name for: " + prefix + "Name");\r
-        }\r
-\r
-        String inputFormat = keyValueArgs.remove(prefix + "Format");\r
-        if ("dictcc".equals(inputFormat)) {\r
-          processLinedInputFile(dict, file, charset, false, maxEntries);\r
-        } else if ("chemnitz".equals(inputFormat)) {\r
-          processLinedInputFile(dict, file, charset, true, maxEntries);\r
-        } else if ("wiktionary".equals(inputFormat)) {\r
-          new WiktionaryXmlParser(dict).parse(file);\r
-        } else {\r
-          fatalError("Invalid or missing input format: " + inputFormat);\r
-        }\r
-        \r
-        dict.sources.add(inputName);\r
-        System.out.println("Done: " + file + "\n\n");\r
-      }\r
-    }\r
-    \r
-    if (!keyValueArgs.isEmpty()) {\r
-      System.err.println("WARNING: couldn't parse arguments: " + keyValueArgs);\r
-    }\r
-    \r
-    createIndex(dict, SimpleEntry.LANG1);\r
-    createIndex(dict, SimpleEntry.LANG2);\r
-\r
-    System.out.println("Writing dictionary.");\r
-    final RandomAccessFile dictOut = new RandomAccessFile(dictOutFilename, "rw");\r
-    dictOut.setLength(0);\r
-    dict.write(dictOut);\r
-    dictOut.close();\r
-    \r
-    final Random random = new Random(0);\r
-    for (byte lang = 0; lang < 2; ++lang) {\r
-      final LanguageData languageData = dict.languageDatas[lang];\r
-      System.out.println("\nRandom words for: " + languageData.language.getSymbol());\r
-      for (int i = 0; i < 20; ++i) {\r
-        final int w = random.nextInt(languageData.sortedIndex.size());\r
-        final IndexEntry entry = languageData.sortedIndex.get(w);\r
-        final List<Row> rows = languageData.rows;\r
-        int r = entry.startRow;\r
-        System.out.println(languageData.rowToString(rows.get(r), false));\r
-        ++r;\r
-        while (r < rows.size() && !rows.get(r).isToken()) {\r
-          System.out.println("  " + languageData.rowToString(rows.get(r), false));\r
-          ++r;\r
-        }\r
-      }\r
-    }\r
-  }\r
-\r
-  private static void fatalError(String string) {\r
-    System.err.println(string);\r
-    System.exit(1);\r
-  }\r
-\r
-  private static void processLinedInputFile(final Dictionary dict, final File file,\r
-      final Charset charset, final boolean hasMultipleSubentries,\r
-      final int maxEntries) throws FileNotFoundException, IOException {\r
-    final BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(file), charset));\r
-    String line;\r
-    int lineCount = 0;\r
-    while ((line = reader.readLine()) != null && lineCount < maxEntries) {\r
-      if (maxEntries < 200) { \r
-        System.out.println(line);\r
-      }\r
-      line = line.trim();\r
-      if (line.equals("") || line.startsWith("#")) {\r
-        continue;\r
-      }\r
-\r
-      final SimpleEntry entry = SimpleEntry.parseFromLine(line, hasMultipleSubentries);\r
-      if (entry == null) {\r
-        System.err.println("Invalid entry: " + line);\r
-        continue;\r
-      }\r
-\r
-      dict.entries.add(entry);\r
-\r
-      if (lineCount % 10000 == 0) {\r
-        System.out.println("IndexBuilder: " + "lineCount=" + lineCount);\r
-      }\r
-      lineCount++;\r
-    }\r
-    reader.close();\r
-  }\r
-\r
-  public static void createIndex(final Dictionary dict, final byte lang) {\r
-    System.out.println("Creating index: " + lang);\r
-\r
-    final Map<String, TokenData> tokenToData = new TreeMap<String, TokenData>(dict.languageDatas[lang].language.sortComparator);\r
-\r
-    for (int e = 0; e < dict.entries.size(); ++e) {\r
-      final SimpleEntry entry = null; //dict.entries.get(e);\r
-      final Set<String> tokens = entry.getIndexableTokens(lang);\r
-      for (final String token : tokens) {\r
-        TokenData tokenData = tokenToData.get(token);\r
-        if (tokenData == null) {\r
-          tokenData = new TokenData(token);\r
-          tokenToData.put(token, tokenData);\r
-        }\r
-        tokenData.entries.add(new TokenEntryData(lang, token, entry, e));\r
-      }\r
-\r
-      if (e % 10000 == 0) {\r
-        System.out.println("createIndex: " + "e=" + e);\r
-      }\r
-    }\r
-\r
-    // Sort it.\r
-\r
-    System.out.println("Sorting TokenData...");\r
-    final List<TokenData> sortedTokenData = new ArrayList<TokenData>(tokenToData\r
-        .values());\r
-\r
-    System.out.println("Sorting entries within each TokenData...");\r
-    for (final TokenData tokenData : sortedTokenData) {\r
-      Collections.sort(tokenData.entries);\r
-    }\r
-\r
-    // Put it all together.\r
-    System.out.println("Assembling final data structures...");\r
-    final List<Row> rows = dict.languageDatas[lang].rows;\r
-    final List<IndexEntry> indexEntries = dict.languageDatas[lang].sortedIndex;\r
-    for (int t = 0; t < sortedTokenData.size(); ++t) {\r
-      final TokenData tokenData = sortedTokenData.get(t);\r
-      final int startRow = rows.size();\r
-      final IndexEntry indexEntry = new IndexEntry(tokenData.token, startRow);\r
-      indexEntries.add(indexEntry);\r
-\r
-      final Row tokenRow = new Row(-(t + 1));\r
-      rows.add(tokenRow);\r
-\r
-      for (final TokenEntryData entryData : tokenData.entries) {\r
-        final Row entryRow = new Row(entryData.entryIndex);\r
-        rows.add(entryRow);\r
-      }\r
-    }\r
-\r
-  }\r
-\r
-  static final class TokenEntryData implements Comparable<TokenEntryData> {\r
-    final String token;\r
-    final SimpleEntry entry;\r
-    final int entryIndex;\r
-    \r
-    private static final int bigNoOverflow = 100000;\r
-\r
-    int minSubEntryIndexOf = bigNoOverflow;\r
-    int minSubEntryLength = bigNoOverflow;\r
-    int minSubEntry = bigNoOverflow;\r
-\r
-    public TokenEntryData(final byte lang, final String token, final SimpleEntry entry, final int entryIndex) {\r
-      this.token = token;\r
-      this.entry = entry;\r
-      this.entryIndex = entryIndex;\r
-      \r
-      final String[] subentries = entry.getAllText(lang);\r
-      for (int s = 0; s < subentries.length; ++s) {\r
-        final String subentry = subentries[s];\r
-        int indexOf = subentry.indexOf(token);\r
-        if (indexOf != -1) {\r
-          minSubEntryIndexOf = Math.min(minSubEntryIndexOf, indexOf); \r
-          minSubEntryLength = Math.min(minSubEntryLength, subentry.length());\r
-          minSubEntry = Math.min(minSubEntry, s);\r
-        }\r
-      }\r
-    }\r
-\r
-    @Override\r
-    public int compareTo(final TokenEntryData that) {\r
-      assert this.token.equals(that.token);\r
-      \r
-      if (this.minSubEntryIndexOf != that.minSubEntryIndexOf) {\r
-        return this.minSubEntryIndexOf - that.minSubEntryIndexOf;\r
-      }\r
-      if (this.minSubEntryLength != that.minSubEntryLength) {\r
-        return this.minSubEntryLength - that.minSubEntryLength;\r
-      }\r
-      return this.minSubEntry - that.minSubEntry;\r
-    }\r
-  }\r
-\r
-  static final class TokenData {\r
-    final String token;\r
-    final List<TokenEntryData> entries = new ArrayList<TokenEntryData>();\r
-\r
-    int startRow;\r
-\r
-    public TokenData(final String token) {\r
-      this.token = token;\r
-    }\r
-  }\r
-\r
-}\r
diff --git a/src/com/hughes/android/dictionary/DictionaryTest.java b/src/com/hughes/android/dictionary/DictionaryTest.java
deleted file mode 100755 (executable)
index af770e1..0000000
+++ /dev/null
@@ -1,227 +0,0 @@
-package com.hughes.android.dictionary;\r
-\r
-import java.io.File;\r
-import java.io.IOException;\r
-import java.io.RandomAccessFile;\r
-import java.util.ArrayList;\r
-import java.util.Arrays;\r
-import java.util.Collections;\r
-import java.util.LinkedHashSet;\r
-import java.util.List;\r
-import java.util.Set;\r
-import java.util.concurrent.atomic.AtomicBoolean;\r
-\r
-import junit.framework.TestCase;\r
-\r
-import com.hughes.android.dictionary.Dictionary.IndexEntry;\r
-import com.hughes.android.dictionary.Dictionary.LanguageData;\r
-import com.hughes.android.dictionary.Dictionary.Row;\r
-\r
-public class DictionaryTest extends TestCase {\r
-\r
-  public void testDictionary() throws IOException {\r
-    final File file = File.createTempFile("asdf", "asdf");\r
-    file.deleteOnExit();\r
-\r
-//    final Dictionary goldenDict;\r
-    final List<SimpleEntry> entries = Arrays.asList(\r
-        SimpleEntry.parseFromLine("der Hund :: the dog", false),\r
-        SimpleEntry.parseFromLine("Die grosse Katze :: The big cat", false), \r
-        SimpleEntry.parseFromLine("die Katze :: the cat", false),\r
-        SimpleEntry.parseFromLine("gross :: big", false),\r
-        SimpleEntry.parseFromLine("Dieb :: thief", false),\r
-        SimpleEntry.parseFromLine("rennen :: run", false));\r
-\r
-    {\r
-      final Dictionary dict = new Dictionary("test", Language.de, Language.en);\r
-      dict.entries.addAll(entries);\r
-      DictionaryBuilder.createIndex(dict, SimpleEntry.LANG1);\r
-      DictionaryBuilder.createIndex(dict, SimpleEntry.LANG2);\r
-      final RandomAccessFile raf = new RandomAccessFile(file, "rw");\r
-      dict.write(raf);\r
-      raf.close();\r
-      \r
-//      goldenDict = dict;\r
-    }\r
-\r
-    final RandomAccessFile raf = new RandomAccessFile(file, "r");\r
-    final Dictionary dict = new Dictionary(raf);\r
-    \r
-    assertEquals(entries, dict.entries);\r
-    \r
-    assertEquals("der", dict.languageDatas[0].sortedIndex.get(0).word);\r
-    assertEquals("die", dict.languageDatas[0].sortedIndex.get(1).word);\r
-    \r
-    assertEquals(0, dict.languageDatas[0].getPrevTokenRow(0));\r
-    assertEquals(0, dict.languageDatas[0].getPrevTokenRow(2));\r
-    assertEquals(0, dict.languageDatas[0].getPrevTokenRow(1));\r
-    assertEquals(4, dict.languageDatas[0].getPrevTokenRow(6));\r
-\r
-    assertEquals(2, dict.languageDatas[0].getNextTokenRow(0));\r
-    assertEquals(2, dict.languageDatas[0].getNextTokenRow(1));\r
-    assertEquals(4, dict.languageDatas[0].getNextTokenRow(2));\r
-    assertEquals(8, dict.languageDatas[0].getNextTokenRow(6));\r
-    assertEquals(dict.languageDatas[0].rows.size() - 1, dict.languageDatas[0].getNextTokenRow(dict.languageDatas[0].rows.size() - 2));\r
-    assertEquals(dict.languageDatas[0].rows.size() - 1, dict.languageDatas[0].getNextTokenRow(dict.languageDatas[0].rows.size() - 1));\r
-\r
-    for (final IndexEntry indexEntry : dict.languageDatas[0].sortedIndex) {\r
-      System.out.println(indexEntry);\r
-    }\r
-\r
-    int rowCount = 0;\r
-    for (final Row row : dict.languageDatas[0].rows) {\r
-      if (row.index >= 0) {\r
-        System.out.println("  " + rowCount + ":" + dict.entries.get(row.index));\r
-      } else {\r
-        System.out.println(rowCount + ":" + dict.languageDatas[0].sortedIndex.get(-row.index - 1));\r
-      }\r
-      ++rowCount;\r
-    }\r
-\r
-    for (int l = 0; l <= 1; l++) {\r
-      final LanguageData languageData = dict.languageDatas[l];\r
-      for (int i = 0; i < languageData.sortedIndex.size(); i++) {\r
-        final IndexEntry indexEntry = languageData.sortedIndex.get(i);\r
-        if (indexEntry.word.toLowerCase().equals("dieb"))\r
-          System.out.println();\r
-        final IndexEntry lookedUpEntry = languageData.sortedIndex.get(languageData.lookup(indexEntry.word, new AtomicBoolean(false)));\r
-        if (!indexEntry.word.toLowerCase().equals(lookedUpEntry.word.toLowerCase()))\r
-          System.out.println();\r
-        assertEquals(indexEntry.word.toLowerCase(), lookedUpEntry.word.toLowerCase());\r
-      }\r
-    }\r
-    \r
-    assertEquals("die", dict.languageDatas[0].sortedIndex.get(dict.languageDatas[0].lookup("Die", new AtomicBoolean())).word);\r
-    assertEquals("die", dict.languageDatas[0].sortedIndex.get(dict.languageDatas[0].lookup("die", new AtomicBoolean())).word);\r
-\r
-  }\r
-  \r
-  public void testTextNorm() throws IOException {\r
-    System.out.println("\n\ntestTextNorm");\r
-    final List<SimpleEntry> entries = Arrays.asList(\r
-        SimpleEntry.parseFromLine("Hund {m} :: dog", true),\r
-        SimpleEntry.parseFromLine("'CHRISTOS' :: doh", true),\r
-        SimpleEntry.parseFromLine("\"Pick-up\"-Presse {f} :: baler", true),\r
-        SimpleEntry.parseFromLine("(Ach was), echt? [auch ironisch] :: No shit! [also ironic]", true),\r
-        SimpleEntry.parseFromLine("(akuter) Myokardinfarkt {m} <AMI / MI> :: (acute) myocardial infarction <AMI / MI>", true),\r
-        SimpleEntry.parseFromLine("(reine) Vermutung {f} :: guesswork", true),\r
-        SimpleEntry.parseFromLine("(mit) 6:1 vorne liegen :: to be 6-1 up [football]", true),\r
-        SimpleEntry.parseFromLine("(auf) den Knopf drücken [auch fig.: auslösen] :: to push the button [also fig.: initiate]", false),\r
-        SimpleEntry.parseFromLine("Adjektiv {n} /Adj./; Eigenschaftswort {n} [gramm.] | Adjektive {pl}; Eigenschaftswoerter {pl} :: adjective /adj./ | adjectives", true),\r
-        SimpleEntry.parseFromLine("Älteste {m,f}; Ältester :: oldest; eldest", true),\r
-        SimpleEntry.parseFromLine("\"...\", schloss er an. :: '...,' he added.", true),\r
-        SimpleEntry.parseFromLine("besonderer | besondere | besonderes :: extra", false),\r
-        SimpleEntry.parseFromLine("| zu Pferde; zu Pferd | reiten :: horseback | on horseback | go on horseback", true),\r
-        SimpleEntry.parseFromLine("Hauptaugenmerk {m} | sein Hauptaugenmerk richten auf ::  | to focus (one's) attention on", true),\r
-        SimpleEntry.parseFromLine("&#963;-Algebra {f} :: &#963;-field", true)\r
-        );\r
-\r
-    assertFalse(entries.contains(null));\r
-    \r
-    // Hyphenated words get put both multiple listings.\r
-\r
-    final Dictionary dict = new Dictionary("test", Language.de, Language.en);\r
-    dict.entries.addAll(entries);\r
-    DictionaryBuilder.createIndex(dict, SimpleEntry.LANG1);\r
-    DictionaryBuilder.createIndex(dict, SimpleEntry.LANG2);\r
-    \r
-    for (int lang = 0; lang <= 1; lang++) {\r
-      final LanguageData languageData = dict.languageDatas[lang];\r
-      System.out.println("\n" + languageData.language);\r
-      final Set<String> words = new LinkedHashSet<String>();\r
-      for (int i = 0; i < languageData.sortedIndex.size(); i++) {\r
-        final IndexEntry indexEntry = languageData.sortedIndex.get(i);\r
-        System.out.println(indexEntry);\r
-        words.add(indexEntry.word);\r
-      }\r
-      if (lang == 0) {\r
-        assertTrue(words.contains("CHRISTOS"));\r
-        assertTrue(words.contains("akuter"));\r
-        assertTrue(words.contains("σ-Algebra"));\r
-\r
-        assertFalse(words.contains("-Algebra"));\r
-      } else {\r
-        assertTrue(words.contains("σ-field"));\r
-        assertTrue(words.contains("6-1"));\r
-      }\r
-    }\r
-\r
-  }\r
-  \r
-  public void testGermanSort() {\r
-    assertEquals("aüÄ", Language.de.textNorm("aueAe"));\r
-    final List<String> words = Arrays.asList(\r
-        "er-ben",\r
-        "erben",\r
-        "Erben",\r
-        "Erbse",\r
-        "Erbsen",\r
-        "essen",\r
-        "Essen",\r
-        "Grosformat",\r
-        "Grosformats",\r
-        "Grossformat",\r
-        "Großformat",\r
-        "Grossformats",\r
-        "Großformats",\r
-        "Großpoo",\r
-        "Großpoos",\r
-        "hulle",\r
-        "Hulle",\r
-        "hülle",\r
-        "huelle",\r
-        "Hülle",\r
-        "Huelle",\r
-        "Hum"\r
-        );\r
-    assertEquals(0, Language.de.sortComparator.compare("hülle", "huelle"));\r
-    assertEquals(0, Language.de.sortComparator.compare("huelle", "hülle"));\r
-    \r
-    assertEquals(-1, Language.de.sortComparator.compare("hülle", "Hülle"));\r
-    assertEquals(0, Language.de.findComparator.compare("hülle", "Hülle"));\r
-    assertEquals(-1, Language.de.findComparator.compare("hulle", "Hülle"));\r
-\r
-    \r
-    for (final String s : words) {\r
-      System.out.println(s + "\t" + Language.de.textNorm(s));\r
-    }\r
-    final List<String> sorted = new ArrayList<String>(words);\r
-//    Collections.shuffle(shuffled, new Random(0));\r
-    Collections.sort(sorted, Language.de.sortComparator);\r
-    System.out.println(sorted.toString());\r
-    for (int i = 0; i < words.size(); ++i) {\r
-      System.out.println(words.get(i) + "\t" + sorted.get(i));\r
-      assertEquals(words.get(i), sorted.get(i));\r
-    }\r
-  }\r
-\r
-  public void testEnglishSort() {\r
-\r
-    final List<String> words = Arrays.asList(\r
-        "pre-print", \r
-        "preppie", \r
-        "preppy",\r
-        "preprocess");\r
-    \r
-    final List<String> sorted = new ArrayList<String>(words);\r
-    Collections.sort(sorted, Language.en.sortComparator);\r
-    for (int i = 0; i < words.size(); ++i) {\r
-      if (i > 0) {\r
-        assertTrue(Language.en.sortComparator.compare(words.get(i-1), words.get(i)) < 0);\r
-      }\r
-      System.out.println(words.get(i) + "\t" + sorted.get(i));\r
-      assertEquals(words.get(i), sorted.get(i));\r
-    }\r
-    \r
-    assertTrue(Language.en.sortCollator.compare("pre-print", "preppy") < 0);\r
-\r
-  }\r
-  \r
-  public void testLanguage() {\r
-    System.out.println("languages=" + Language.symbolToLangauge.values());\r
-    assertEquals(Language.de, Language.lookup("de"));\r
-    assertEquals(Language.en, Language.lookup("en"));\r
-    assertEquals("es", Language.lookup("es").symbol);\r
-  }\r
-\r
-}\r
diff --git a/src/com/hughes/android/dictionary/InputParser.java b/src/com/hughes/android/dictionary/InputParser.java
deleted file mode 100644 (file)
index 5ea1374..0000000
+++ /dev/null
@@ -1,15 +0,0 @@
-package com.hughes.android.dictionary;
-
-import java.io.File;
-
-public interface InputParser {
-  
-  void parse(final File file, final Dictionary dest);
-  
-  class LineParser implements InputParser {
-    @Override
-    public void parse(File file, Dictionary dest) {
-    }
-  }
-
-}
index 3ed461776af5b1a85101d1b26a0d9d59112c71dc..31d8c926ffc11dadf2d41b54080badc8b24f7745 100644 (file)
@@ -18,6 +18,7 @@ import javax.xml.parsers.SAXParserFactory;
 import org.xml.sax.Attributes;
 import org.xml.sax.SAXException;
 
+import com.hughes.android.dictionary.engine.Dictionary;
 import com.hughes.util.MapUtil;
 import com.hughes.util.StringUtil;
 
index 55512be76bfcd19a7fb01ac8bfa1e7b02bd40fbb..2119a10fb61aecf5abcd34cfe8f9f8c9b21be416 100644 (file)
@@ -11,7 +11,6 @@ import java.util.logging.Logger;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
-import com.hughes.android.dictionary.Language;
 import com.hughes.android.dictionary.engine.PairEntry.Pair;
 
 public class DictFileParser {
index bff164b91f0e75e85826f3e35844dac4567540e0..6aea57709be1dd6f23f0e4c9c2ff5b5dab0ef96c 100644 (file)
@@ -9,7 +9,6 @@ import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
 
-import com.hughes.android.dictionary.Language;
 import com.hughes.util.Args;
 import com.hughes.util.FileUtil;
 
index 59bb031b4ec67a4c9113eca8f3c9dc946ef2cba7..d4ca69f00f6ddbf73ec5e93e76c04c6c1054b2d5 100644 (file)
@@ -2,10 +2,15 @@ package com.hughes.android.dictionary.engine;
 
 import java.io.IOException;
 import java.io.RandomAccessFile;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
 import java.util.concurrent.atomic.AtomicBoolean;
 
 import junit.framework.TestCase;
 
+
 public class DictionaryTest extends TestCase {
   
   RandomAccessFile raf;
@@ -47,10 +52,23 @@ public class DictionaryTest extends TestCase {
       assertEquals(indexEntry.token.toLowerCase(), row.getToken().toLowerCase());
     }
 
+    // TODO: maybe if user types capitalization, use it.
     assertEquals("aaac", deIndex.find("AAAC", new AtomicBoolean(false)).getToken());
     assertEquals("aaac", deIndex.find("aaac", new AtomicBoolean(false)).getToken());
     assertEquals("aaac", deIndex.find("AAAc", new AtomicBoolean(false)).getToken());
     assertEquals("aaac", deIndex.find("aaac", new AtomicBoolean(false)).getToken());
+    
+    // Before the beginning.
+    assertEquals("40", deIndex.find("__", new AtomicBoolean(false)).getToken());
+    
+    // After the end.
+    assertEquals("Zweckorientiertheit", deIndex.find("ZZZZZ", new AtomicBoolean(false)).getToken());
+
+    assertEquals("aaac", deIndex.find("aaaca", new AtomicBoolean(false)).getToken());
+    
+    assertEquals("überprüfe", deIndex.find("ueberprüfe", new AtomicBoolean(false)).getToken());
+    assertEquals("überprüfe", deIndex.find("ueberpruefe", new AtomicBoolean(false)).getToken());
+
   }
   
   public void testGermanTokenRows() {
@@ -76,7 +94,83 @@ public class DictionaryTest extends TestCase {
       // This will break if the Row cache isn't big enough.
       assertEquals(lastTokenRow, row.getTokenRow(false));
     }
+  }
+  
+  @SuppressWarnings("unchecked")
+  public void testGermanSort() {
+    assertEquals("aüÄÄ", Language.de.textNorm("aueAeAE", false));
+    final List<String> words = Arrays.asList(
+        "er-ben",
+        "erben",
+        "Erben",
+        "Erbse",
+        "Erbsen",
+        "essen",
+        "Essen",
+        "Grosformat",
+        "Grosformats",
+        "Grossformat",
+        "Großformat",
+        "Grossformats",
+        "Großformats",
+        "Großpoo",
+        "Großpoos",
+        "hulle",
+        "Hulle",
+        "hülle",
+        "huelle",
+        "Hülle",
+        "Huelle",
+        "Hum"
+        );
+    assertEquals(0, Language.de.sortComparator.compare("hülle", "huelle"));
+    assertEquals(0, Language.de.sortComparator.compare("huelle", "hülle"));
+    
+    assertEquals(-1, Language.de.sortComparator.compare("hülle", "Hülle"));
+    assertEquals(0, Language.de.findComparator.compare("hülle", "Hülle"));
+    assertEquals(-1, Language.de.findComparator.compare("hulle", "Hülle"));
+
+    
+    for (final String s : words) {
+      System.out.println(s + "\t" + Language.de.textNorm(s, false));
+    }
+    final List<String> sorted = new ArrayList<String>(words);
+//    Collections.shuffle(shuffled, new Random(0));
+    Collections.sort(sorted, Language.de.sortComparator);
+    System.out.println(sorted.toString());
+    for (int i = 0; i < words.size(); ++i) {
+      System.out.println(words.get(i) + "\t" + sorted.get(i));
+      assertEquals(words.get(i), sorted.get(i));
+    }
+  }
 
+  @SuppressWarnings("unchecked")
+  public void testEnglishSort() {
+
+    final List<String> words = Arrays.asList(
+        "pre-print", 
+        "preppie", 
+        "preppy",
+        "preprocess");
+    
+    final List<String> sorted = new ArrayList<String>(words);
+    Collections.sort(sorted, Language.en.getSortCollator());
+    for (int i = 0; i < words.size(); ++i) {
+      if (i > 0) {
+        assertTrue(Language.en.getSortCollator().compare(words.get(i-1), words.get(i)) < 0);
+      }
+      System.out.println(words.get(i) + "\t" + sorted.get(i));
+      assertEquals(words.get(i), sorted.get(i));
+    }
+    
+    assertTrue(Language.en.getSortCollator().compare("pre-print", "preppy") < 0);
+
+  }
+  
+  public void testLanguage() {
+    assertEquals(Language.de, Language.lookup("de"));
+    assertEquals(Language.en, Language.lookup("en"));
+    assertEquals("es", Language.lookup("es").getSymbol());
   }
 
 
index 0f35b50710fada821fcb93277d5e5a3039ab3ede..aa09421df726d502849ccf009ca7b9ec9544a0bb 100644 (file)
@@ -9,7 +9,6 @@ import java.util.Set;
 import java.util.SortedMap;
 import java.util.TreeMap;
 
-import com.hughes.android.dictionary.Language;
 
 public class IndexBuilder {