+++ /dev/null
-package com.hughes.android.dictionary;\r
-\r
-import java.io.BufferedReader;\r
-import java.io.File;\r
-import java.io.FileInputStream;\r
-import java.io.FileNotFoundException;\r
-import java.io.IOException;\r
-import java.io.InputStreamReader;\r
-import java.io.RandomAccessFile;\r
-import java.nio.charset.Charset;\r
-import java.util.ArrayList;\r
-import java.util.Collections;\r
-import java.util.List;\r
-import java.util.Map;\r
-import java.util.Random;\r
-import java.util.Set;\r
-import java.util.TreeMap;\r
-\r
-import javax.xml.parsers.ParserConfigurationException;\r
-\r
-import org.xml.sax.SAXException;\r
-\r
-import com.hughes.android.dictionary.Dictionary.IndexEntry;\r
-import com.hughes.android.dictionary.Dictionary.LanguageData;\r
-import com.hughes.android.dictionary.Dictionary.Row;\r
-import com.hughes.util.Args;\r
-import com.hughes.util.FileUtil;\r
-\r
-public class DictionaryBuilder {\r
- \r
- public static void main(String[] args) throws IOException,\r
- ClassNotFoundException, ParserConfigurationException, SAXException {\r
- \r
- final Map<String,String> keyValueArgs = Args.keyValueArgs(args);\r
- \r
- final Language lang1 = Language.lookup(keyValueArgs.remove("lang1"));\r
- final Language lang2 = Language.lookup(keyValueArgs.remove("lang2"));\r
- if (lang1 == null || lang2 == null) {\r
- fatalError("--lang1= and --lang2= must both be specified.");\r
- }\r
- \r
- final String dictOutFilename = keyValueArgs.remove("dictOut");\r
- if (dictOutFilename == null) {\r
- fatalError("--dictOut= must be specified.");\r
- }\r
- \r
- String summaryText = keyValueArgs.remove("summaryText");\r
- if (summaryText == null) {\r
- fatalError("--summaryText= must be specified.");\r
- }\r
- if (summaryText.startsWith("@")) {\r
- summaryText = FileUtil.readToString(new File(summaryText.substring(1)));\r
- }\r
- \r
- final String maxEntriesString = keyValueArgs.remove("maxEntries");\r
- final int maxEntries = maxEntriesString == null ? Integer.MAX_VALUE : Integer.parseInt(maxEntriesString);\r
- \r
- System.out.println("lang1=" + lang1);\r
- System.out.println("lang2=" + lang2);\r
- System.out.println("summaryText=" + summaryText);\r
- System.out.println("dictOut=" + dictOutFilename);\r
-\r
- final Dictionary dict = new Dictionary(summaryText, lang1, lang2);\r
-\r
- for (int i = 0; i < 100; ++i) {\r
- final String prefix = "input" + i;\r
- if (keyValueArgs.containsKey(prefix)) {\r
- final File file = new File(keyValueArgs.remove(prefix));\r
- System.out.println("Processing: " + file);\r
- String charsetName = keyValueArgs.remove(prefix + "Charset");\r
- if (charsetName == null) {\r
- charsetName = "UTF8";\r
- }\r
- final Charset charset = Charset.forName(charsetName);\r
- String inputName = keyValueArgs.remove(prefix + "Name");\r
- if (inputName == null) {\r
- fatalError("Must specify human readable name for: " + prefix + "Name");\r
- }\r
-\r
- String inputFormat = keyValueArgs.remove(prefix + "Format");\r
- if ("dictcc".equals(inputFormat)) {\r
- processLinedInputFile(dict, file, charset, false, maxEntries);\r
- } else if ("chemnitz".equals(inputFormat)) {\r
- processLinedInputFile(dict, file, charset, true, maxEntries);\r
- } else if ("wiktionary".equals(inputFormat)) {\r
- new WiktionaryXmlParser(dict).parse(file);\r
- } else {\r
- fatalError("Invalid or missing input format: " + inputFormat);\r
- }\r
- \r
- dict.sources.add(inputName);\r
- System.out.println("Done: " + file + "\n\n");\r
- }\r
- }\r
- \r
- if (!keyValueArgs.isEmpty()) {\r
- System.err.println("WARNING: couldn't parse arguments: " + keyValueArgs);\r
- }\r
- \r
- createIndex(dict, SimpleEntry.LANG1);\r
- createIndex(dict, SimpleEntry.LANG2);\r
-\r
- System.out.println("Writing dictionary.");\r
- final RandomAccessFile dictOut = new RandomAccessFile(dictOutFilename, "rw");\r
- dictOut.setLength(0);\r
- dict.write(dictOut);\r
- dictOut.close();\r
- \r
- final Random random = new Random(0);\r
- for (byte lang = 0; lang < 2; ++lang) {\r
- final LanguageData languageData = dict.languageDatas[lang];\r
- System.out.println("\nRandom words for: " + languageData.language.getSymbol());\r
- for (int i = 0; i < 20; ++i) {\r
- final int w = random.nextInt(languageData.sortedIndex.size());\r
- final IndexEntry entry = languageData.sortedIndex.get(w);\r
- final List<Row> rows = languageData.rows;\r
- int r = entry.startRow;\r
- System.out.println(languageData.rowToString(rows.get(r), false));\r
- ++r;\r
- while (r < rows.size() && !rows.get(r).isToken()) {\r
- System.out.println(" " + languageData.rowToString(rows.get(r), false));\r
- ++r;\r
- }\r
- }\r
- }\r
- }\r
-\r
- private static void fatalError(String string) {\r
- System.err.println(string);\r
- System.exit(1);\r
- }\r
-\r
- private static void processLinedInputFile(final Dictionary dict, final File file,\r
- final Charset charset, final boolean hasMultipleSubentries,\r
- final int maxEntries) throws FileNotFoundException, IOException {\r
- final BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(file), charset));\r
- String line;\r
- int lineCount = 0;\r
- while ((line = reader.readLine()) != null && lineCount < maxEntries) {\r
- if (maxEntries < 200) { \r
- System.out.println(line);\r
- }\r
- line = line.trim();\r
- if (line.equals("") || line.startsWith("#")) {\r
- continue;\r
- }\r
-\r
- final SimpleEntry entry = SimpleEntry.parseFromLine(line, hasMultipleSubentries);\r
- if (entry == null) {\r
- System.err.println("Invalid entry: " + line);\r
- continue;\r
- }\r
-\r
- dict.entries.add(entry);\r
-\r
- if (lineCount % 10000 == 0) {\r
- System.out.println("IndexBuilder: " + "lineCount=" + lineCount);\r
- }\r
- lineCount++;\r
- }\r
- reader.close();\r
- }\r
-\r
- public static void createIndex(final Dictionary dict, final byte lang) {\r
- System.out.println("Creating index: " + lang);\r
-\r
- final Map<String, TokenData> tokenToData = new TreeMap<String, TokenData>(dict.languageDatas[lang].language.sortComparator);\r
-\r
- for (int e = 0; e < dict.entries.size(); ++e) {\r
- final SimpleEntry entry = null; //dict.entries.get(e);\r
- final Set<String> tokens = entry.getIndexableTokens(lang);\r
- for (final String token : tokens) {\r
- TokenData tokenData = tokenToData.get(token);\r
- if (tokenData == null) {\r
- tokenData = new TokenData(token);\r
- tokenToData.put(token, tokenData);\r
- }\r
- tokenData.entries.add(new TokenEntryData(lang, token, entry, e));\r
- }\r
-\r
- if (e % 10000 == 0) {\r
- System.out.println("createIndex: " + "e=" + e);\r
- }\r
- }\r
-\r
- // Sort it.\r
-\r
- System.out.println("Sorting TokenData...");\r
- final List<TokenData> sortedTokenData = new ArrayList<TokenData>(tokenToData\r
- .values());\r
-\r
- System.out.println("Sorting entries within each TokenData...");\r
- for (final TokenData tokenData : sortedTokenData) {\r
- Collections.sort(tokenData.entries);\r
- }\r
-\r
- // Put it all together.\r
- System.out.println("Assembling final data structures...");\r
- final List<Row> rows = dict.languageDatas[lang].rows;\r
- final List<IndexEntry> indexEntries = dict.languageDatas[lang].sortedIndex;\r
- for (int t = 0; t < sortedTokenData.size(); ++t) {\r
- final TokenData tokenData = sortedTokenData.get(t);\r
- final int startRow = rows.size();\r
- final IndexEntry indexEntry = new IndexEntry(tokenData.token, startRow);\r
- indexEntries.add(indexEntry);\r
-\r
- final Row tokenRow = new Row(-(t + 1));\r
- rows.add(tokenRow);\r
-\r
- for (final TokenEntryData entryData : tokenData.entries) {\r
- final Row entryRow = new Row(entryData.entryIndex);\r
- rows.add(entryRow);\r
- }\r
- }\r
-\r
- }\r
-\r
- static final class TokenEntryData implements Comparable<TokenEntryData> {\r
- final String token;\r
- final SimpleEntry entry;\r
- final int entryIndex;\r
- \r
- private static final int bigNoOverflow = 100000;\r
-\r
- int minSubEntryIndexOf = bigNoOverflow;\r
- int minSubEntryLength = bigNoOverflow;\r
- int minSubEntry = bigNoOverflow;\r
-\r
- public TokenEntryData(final byte lang, final String token, final SimpleEntry entry, final int entryIndex) {\r
- this.token = token;\r
- this.entry = entry;\r
- this.entryIndex = entryIndex;\r
- \r
- final String[] subentries = entry.getAllText(lang);\r
- for (int s = 0; s < subentries.length; ++s) {\r
- final String subentry = subentries[s];\r
- int indexOf = subentry.indexOf(token);\r
- if (indexOf != -1) {\r
- minSubEntryIndexOf = Math.min(minSubEntryIndexOf, indexOf); \r
- minSubEntryLength = Math.min(minSubEntryLength, subentry.length());\r
- minSubEntry = Math.min(minSubEntry, s);\r
- }\r
- }\r
- }\r
-\r
- @Override\r
- public int compareTo(final TokenEntryData that) {\r
- assert this.token.equals(that.token);\r
- \r
- if (this.minSubEntryIndexOf != that.minSubEntryIndexOf) {\r
- return this.minSubEntryIndexOf - that.minSubEntryIndexOf;\r
- }\r
- if (this.minSubEntryLength != that.minSubEntryLength) {\r
- return this.minSubEntryLength - that.minSubEntryLength;\r
- }\r
- return this.minSubEntry - that.minSubEntry;\r
- }\r
- }\r
-\r
- static final class TokenData {\r
- final String token;\r
- final List<TokenEntryData> entries = new ArrayList<TokenEntryData>();\r
-\r
- int startRow;\r
-\r
- public TokenData(final String token) {\r
- this.token = token;\r
- }\r
- }\r
-\r
-}\r
+++ /dev/null
-package com.hughes.android.dictionary;\r
-\r
-import java.io.File;\r
-import java.io.IOException;\r
-import java.io.RandomAccessFile;\r
-import java.util.ArrayList;\r
-import java.util.Arrays;\r
-import java.util.Collections;\r
-import java.util.LinkedHashSet;\r
-import java.util.List;\r
-import java.util.Set;\r
-import java.util.concurrent.atomic.AtomicBoolean;\r
-\r
-import junit.framework.TestCase;\r
-\r
-import com.hughes.android.dictionary.Dictionary.IndexEntry;\r
-import com.hughes.android.dictionary.Dictionary.LanguageData;\r
-import com.hughes.android.dictionary.Dictionary.Row;\r
-\r
-public class DictionaryTest extends TestCase {\r
-\r
- public void testDictionary() throws IOException {\r
- final File file = File.createTempFile("asdf", "asdf");\r
- file.deleteOnExit();\r
-\r
-// final Dictionary goldenDict;\r
- final List<SimpleEntry> entries = Arrays.asList(\r
- SimpleEntry.parseFromLine("der Hund :: the dog", false),\r
- SimpleEntry.parseFromLine("Die grosse Katze :: The big cat", false), \r
- SimpleEntry.parseFromLine("die Katze :: the cat", false),\r
- SimpleEntry.parseFromLine("gross :: big", false),\r
- SimpleEntry.parseFromLine("Dieb :: thief", false),\r
- SimpleEntry.parseFromLine("rennen :: run", false));\r
-\r
- {\r
- final Dictionary dict = new Dictionary("test", Language.de, Language.en);\r
- dict.entries.addAll(entries);\r
- DictionaryBuilder.createIndex(dict, SimpleEntry.LANG1);\r
- DictionaryBuilder.createIndex(dict, SimpleEntry.LANG2);\r
- final RandomAccessFile raf = new RandomAccessFile(file, "rw");\r
- dict.write(raf);\r
- raf.close();\r
- \r
-// goldenDict = dict;\r
- }\r
-\r
- final RandomAccessFile raf = new RandomAccessFile(file, "r");\r
- final Dictionary dict = new Dictionary(raf);\r
- \r
- assertEquals(entries, dict.entries);\r
- \r
- assertEquals("der", dict.languageDatas[0].sortedIndex.get(0).word);\r
- assertEquals("die", dict.languageDatas[0].sortedIndex.get(1).word);\r
- \r
- assertEquals(0, dict.languageDatas[0].getPrevTokenRow(0));\r
- assertEquals(0, dict.languageDatas[0].getPrevTokenRow(2));\r
- assertEquals(0, dict.languageDatas[0].getPrevTokenRow(1));\r
- assertEquals(4, dict.languageDatas[0].getPrevTokenRow(6));\r
-\r
- assertEquals(2, dict.languageDatas[0].getNextTokenRow(0));\r
- assertEquals(2, dict.languageDatas[0].getNextTokenRow(1));\r
- assertEquals(4, dict.languageDatas[0].getNextTokenRow(2));\r
- assertEquals(8, dict.languageDatas[0].getNextTokenRow(6));\r
- assertEquals(dict.languageDatas[0].rows.size() - 1, dict.languageDatas[0].getNextTokenRow(dict.languageDatas[0].rows.size() - 2));\r
- assertEquals(dict.languageDatas[0].rows.size() - 1, dict.languageDatas[0].getNextTokenRow(dict.languageDatas[0].rows.size() - 1));\r
-\r
- for (final IndexEntry indexEntry : dict.languageDatas[0].sortedIndex) {\r
- System.out.println(indexEntry);\r
- }\r
-\r
- int rowCount = 0;\r
- for (final Row row : dict.languageDatas[0].rows) {\r
- if (row.index >= 0) {\r
- System.out.println(" " + rowCount + ":" + dict.entries.get(row.index));\r
- } else {\r
- System.out.println(rowCount + ":" + dict.languageDatas[0].sortedIndex.get(-row.index - 1));\r
- }\r
- ++rowCount;\r
- }\r
-\r
- for (int l = 0; l <= 1; l++) {\r
- final LanguageData languageData = dict.languageDatas[l];\r
- for (int i = 0; i < languageData.sortedIndex.size(); i++) {\r
- final IndexEntry indexEntry = languageData.sortedIndex.get(i);\r
- if (indexEntry.word.toLowerCase().equals("dieb"))\r
- System.out.println();\r
- final IndexEntry lookedUpEntry = languageData.sortedIndex.get(languageData.lookup(indexEntry.word, new AtomicBoolean(false)));\r
- if (!indexEntry.word.toLowerCase().equals(lookedUpEntry.word.toLowerCase()))\r
- System.out.println();\r
- assertEquals(indexEntry.word.toLowerCase(), lookedUpEntry.word.toLowerCase());\r
- }\r
- }\r
- \r
- assertEquals("die", dict.languageDatas[0].sortedIndex.get(dict.languageDatas[0].lookup("Die", new AtomicBoolean())).word);\r
- assertEquals("die", dict.languageDatas[0].sortedIndex.get(dict.languageDatas[0].lookup("die", new AtomicBoolean())).word);\r
-\r
- }\r
- \r
- public void testTextNorm() throws IOException {\r
- System.out.println("\n\ntestTextNorm");\r
- final List<SimpleEntry> entries = Arrays.asList(\r
- SimpleEntry.parseFromLine("Hund {m} :: dog", true),\r
- SimpleEntry.parseFromLine("'CHRISTOS' :: doh", true),\r
- SimpleEntry.parseFromLine("\"Pick-up\"-Presse {f} :: baler", true),\r
- SimpleEntry.parseFromLine("(Ach was), echt? [auch ironisch] :: No shit! [also ironic]", true),\r
- SimpleEntry.parseFromLine("(akuter) Myokardinfarkt {m} <AMI / MI> :: (acute) myocardial infarction <AMI / MI>", true),\r
- SimpleEntry.parseFromLine("(reine) Vermutung {f} :: guesswork", true),\r
- SimpleEntry.parseFromLine("(mit) 6:1 vorne liegen :: to be 6-1 up [football]", true),\r
- SimpleEntry.parseFromLine("(auf) den Knopf drücken [auch fig.: auslösen] :: to push the button [also fig.: initiate]", false),\r
- SimpleEntry.parseFromLine("Adjektiv {n} /Adj./; Eigenschaftswort {n} [gramm.] | Adjektive {pl}; Eigenschaftswoerter {pl} :: adjective /adj./ | adjectives", true),\r
- SimpleEntry.parseFromLine("Älteste {m,f}; Ältester :: oldest; eldest", true),\r
- SimpleEntry.parseFromLine("\"...\", schloss er an. :: '...,' he added.", true),\r
- SimpleEntry.parseFromLine("besonderer | besondere | besonderes :: extra", false),\r
- SimpleEntry.parseFromLine("| zu Pferde; zu Pferd | reiten :: horseback | on horseback | go on horseback", true),\r
- SimpleEntry.parseFromLine("Hauptaugenmerk {m} | sein Hauptaugenmerk richten auf :: | to focus (one's) attention on", true),\r
- SimpleEntry.parseFromLine("σ-Algebra {f} :: σ-field", true)\r
- );\r
-\r
- assertFalse(entries.contains(null));\r
- \r
- // Hyphenated words get put both multiple listings.\r
-\r
- final Dictionary dict = new Dictionary("test", Language.de, Language.en);\r
- dict.entries.addAll(entries);\r
- DictionaryBuilder.createIndex(dict, SimpleEntry.LANG1);\r
- DictionaryBuilder.createIndex(dict, SimpleEntry.LANG2);\r
- \r
- for (int lang = 0; lang <= 1; lang++) {\r
- final LanguageData languageData = dict.languageDatas[lang];\r
- System.out.println("\n" + languageData.language);\r
- final Set<String> words = new LinkedHashSet<String>();\r
- for (int i = 0; i < languageData.sortedIndex.size(); i++) {\r
- final IndexEntry indexEntry = languageData.sortedIndex.get(i);\r
- System.out.println(indexEntry);\r
- words.add(indexEntry.word);\r
- }\r
- if (lang == 0) {\r
- assertTrue(words.contains("CHRISTOS"));\r
- assertTrue(words.contains("akuter"));\r
- assertTrue(words.contains("σ-Algebra"));\r
-\r
- assertFalse(words.contains("-Algebra"));\r
- } else {\r
- assertTrue(words.contains("σ-field"));\r
- assertTrue(words.contains("6-1"));\r
- }\r
- }\r
-\r
- }\r
- \r
- public void testGermanSort() {\r
- assertEquals("aüÄ", Language.de.textNorm("aueAe"));\r
- final List<String> words = Arrays.asList(\r
- "er-ben",\r
- "erben",\r
- "Erben",\r
- "Erbse",\r
- "Erbsen",\r
- "essen",\r
- "Essen",\r
- "Grosformat",\r
- "Grosformats",\r
- "Grossformat",\r
- "Großformat",\r
- "Grossformats",\r
- "Großformats",\r
- "Großpoo",\r
- "Großpoos",\r
- "hulle",\r
- "Hulle",\r
- "hülle",\r
- "huelle",\r
- "Hülle",\r
- "Huelle",\r
- "Hum"\r
- );\r
- assertEquals(0, Language.de.sortComparator.compare("hülle", "huelle"));\r
- assertEquals(0, Language.de.sortComparator.compare("huelle", "hülle"));\r
- \r
- assertEquals(-1, Language.de.sortComparator.compare("hülle", "Hülle"));\r
- assertEquals(0, Language.de.findComparator.compare("hülle", "Hülle"));\r
- assertEquals(-1, Language.de.findComparator.compare("hulle", "Hülle"));\r
-\r
- \r
- for (final String s : words) {\r
- System.out.println(s + "\t" + Language.de.textNorm(s));\r
- }\r
- final List<String> sorted = new ArrayList<String>(words);\r
-// Collections.shuffle(shuffled, new Random(0));\r
- Collections.sort(sorted, Language.de.sortComparator);\r
- System.out.println(sorted.toString());\r
- for (int i = 0; i < words.size(); ++i) {\r
- System.out.println(words.get(i) + "\t" + sorted.get(i));\r
- assertEquals(words.get(i), sorted.get(i));\r
- }\r
- }\r
-\r
- public void testEnglishSort() {\r
-\r
- final List<String> words = Arrays.asList(\r
- "pre-print", \r
- "preppie", \r
- "preppy",\r
- "preprocess");\r
- \r
- final List<String> sorted = new ArrayList<String>(words);\r
- Collections.sort(sorted, Language.en.sortComparator);\r
- for (int i = 0; i < words.size(); ++i) {\r
- if (i > 0) {\r
- assertTrue(Language.en.sortComparator.compare(words.get(i-1), words.get(i)) < 0);\r
- }\r
- System.out.println(words.get(i) + "\t" + sorted.get(i));\r
- assertEquals(words.get(i), sorted.get(i));\r
- }\r
- \r
- assertTrue(Language.en.sortCollator.compare("pre-print", "preppy") < 0);\r
-\r
- }\r
- \r
- public void testLanguage() {\r
- System.out.println("languages=" + Language.symbolToLangauge.values());\r
- assertEquals(Language.de, Language.lookup("de"));\r
- assertEquals(Language.en, Language.lookup("en"));\r
- assertEquals("es", Language.lookup("es").symbol);\r
- }\r
-\r
-}\r
+++ /dev/null
-package com.hughes.android.dictionary;
-
-import java.io.File;
-
-public interface InputParser {
-
- void parse(final File file, final Dictionary dest);
-
- class LineParser implements InputParser {
- @Override
- public void parse(File file, Dictionary dest) {
- }
- }
-
-}
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
+import com.hughes.android.dictionary.engine.Dictionary;
import com.hughes.util.MapUtil;
import com.hughes.util.StringUtil;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-import com.hughes.android.dictionary.Language;
import com.hughes.android.dictionary.engine.PairEntry.Pair;
public class DictFileParser {
import java.util.List;
import java.util.Map;
-import com.hughes.android.dictionary.Language;
import com.hughes.util.Args;
import com.hughes.util.FileUtil;
import java.io.IOException;
import java.io.RandomAccessFile;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
import java.util.concurrent.atomic.AtomicBoolean;
import junit.framework.TestCase;
+
public class DictionaryTest extends TestCase {
RandomAccessFile raf;
assertEquals(indexEntry.token.toLowerCase(), row.getToken().toLowerCase());
}
+ // TODO: maybe if user types capitalization, use it.
assertEquals("aaac", deIndex.find("AAAC", new AtomicBoolean(false)).getToken());
assertEquals("aaac", deIndex.find("aaac", new AtomicBoolean(false)).getToken());
assertEquals("aaac", deIndex.find("AAAc", new AtomicBoolean(false)).getToken());
assertEquals("aaac", deIndex.find("aaac", new AtomicBoolean(false)).getToken());
+
+ // Before the beginning.
+ assertEquals("40", deIndex.find("__", new AtomicBoolean(false)).getToken());
+
+ // After the end.
+ assertEquals("Zweckorientiertheit", deIndex.find("ZZZZZ", new AtomicBoolean(false)).getToken());
+
+ assertEquals("aaac", deIndex.find("aaaca", new AtomicBoolean(false)).getToken());
+
+ assertEquals("überprüfe", deIndex.find("ueberprüfe", new AtomicBoolean(false)).getToken());
+ assertEquals("überprüfe", deIndex.find("ueberpruefe", new AtomicBoolean(false)).getToken());
+
}
public void testGermanTokenRows() {
// This will break if the Row cache isn't big enough.
assertEquals(lastTokenRow, row.getTokenRow(false));
}
+ }
+
+ @SuppressWarnings("unchecked")
+ public void testGermanSort() {
+ assertEquals("aüÄÄ", Language.de.textNorm("aueAeAE", false));
+ final List<String> words = Arrays.asList(
+ "er-ben",
+ "erben",
+ "Erben",
+ "Erbse",
+ "Erbsen",
+ "essen",
+ "Essen",
+ "Grosformat",
+ "Grosformats",
+ "Grossformat",
+ "Großformat",
+ "Grossformats",
+ "Großformats",
+ "Großpoo",
+ "Großpoos",
+ "hulle",
+ "Hulle",
+ "hülle",
+ "huelle",
+ "Hülle",
+ "Huelle",
+ "Hum"
+ );
+ assertEquals(0, Language.de.sortComparator.compare("hülle", "huelle"));
+ assertEquals(0, Language.de.sortComparator.compare("huelle", "hülle"));
+
+ assertEquals(-1, Language.de.sortComparator.compare("hülle", "Hülle"));
+ assertEquals(0, Language.de.findComparator.compare("hülle", "Hülle"));
+ assertEquals(-1, Language.de.findComparator.compare("hulle", "Hülle"));
+
+
+ for (final String s : words) {
+ System.out.println(s + "\t" + Language.de.textNorm(s, false));
+ }
+ final List<String> sorted = new ArrayList<String>(words);
+// Collections.shuffle(shuffled, new Random(0));
+ Collections.sort(sorted, Language.de.sortComparator);
+ System.out.println(sorted.toString());
+ for (int i = 0; i < words.size(); ++i) {
+ System.out.println(words.get(i) + "\t" + sorted.get(i));
+ assertEquals(words.get(i), sorted.get(i));
+ }
+ }
+ @SuppressWarnings("unchecked")
+ public void testEnglishSort() {
+
+ final List<String> words = Arrays.asList(
+ "pre-print",
+ "preppie",
+ "preppy",
+ "preprocess");
+
+ final List<String> sorted = new ArrayList<String>(words);
+ Collections.sort(sorted, Language.en.getSortCollator());
+ for (int i = 0; i < words.size(); ++i) {
+ if (i > 0) {
+ assertTrue(Language.en.getSortCollator().compare(words.get(i-1), words.get(i)) < 0);
+ }
+ System.out.println(words.get(i) + "\t" + sorted.get(i));
+ assertEquals(words.get(i), sorted.get(i));
+ }
+
+ assertTrue(Language.en.getSortCollator().compare("pre-print", "preppy") < 0);
+
+ }
+
+ public void testLanguage() {
+ assertEquals(Language.de, Language.lookup("de"));
+ assertEquals(Language.en, Language.lookup("en"));
+ assertEquals("es", Language.lookup("es").getSymbol());
}
import java.util.SortedMap;
import java.util.TreeMap;
-import com.hughes.android.dictionary.Language;
public class IndexBuilder {