]> gitweb.fperrin.net Git - DictionaryPC.git/blob - src/com/hughes/android/dictionary/engine/DictionaryTest.java
go
[DictionaryPC.git] / src / com / hughes / android / dictionary / engine / DictionaryTest.java
1 package com.hughes.android.dictionary.engine;
2
3 import java.io.IOException;
4 import java.io.RandomAccessFile;
5 import java.util.ArrayList;
6 import java.util.Arrays;
7 import java.util.Collections;
8 import java.util.List;
9 import java.util.concurrent.atomic.AtomicBoolean;
10
11 import junit.framework.TestCase;
12
13 import com.hughes.android.dictionary.engine.Index.IndexEntry;
14 import com.ibm.icu.text.Transliterator;
15
16
17 public class DictionaryTest extends TestCase {
18     
19   public void testGermanMetadata() throws IOException {
20     final RandomAccessFile raf = new RandomAccessFile("testdata/de-en.quickdic", "r");
21     final Dictionary dict = new Dictionary(raf);
22     final Index deIndex = dict.indices.get(0);
23     
24     assertEquals("de", deIndex.shortName);
25     assertEquals("de->en", deIndex.longName);
26     
27     raf.close();
28   }
29   
30   public void testGermanIndex() throws IOException {
31     final RandomAccessFile raf = new RandomAccessFile("testdata/de-en.quickdic", "r");
32     final Dictionary dict = new Dictionary(raf);
33     final Index deIndex = dict.indices.get(0);
34     
35     for (final Index.IndexEntry indexEntry : deIndex.sortedIndexEntries) {
36       System.out.println("testing: " + indexEntry.token);
37       final IndexEntry searchResult = deIndex.findInsertionPoint(indexEntry.token, new AtomicBoolean(
38           false));
39       assertEquals(indexEntry.token.toLowerCase(), searchResult.token.toLowerCase());
40     }
41
42     // TODO: maybe if user types capitalization, use it.
43     assertSearchResult("aaac", "aaac", deIndex.findInsertionPoint("aaac", new AtomicBoolean(false)));
44     assertSearchResult("aaac", "aaac", deIndex.findInsertionPoint("AAAC", new AtomicBoolean(false)));
45     assertSearchResult("aaac", "aaac", deIndex.findInsertionPoint("AAAc", new AtomicBoolean(false)));
46     assertSearchResult("aaac", "aaac", deIndex.findInsertionPoint("aAac", new AtomicBoolean(false)));
47
48     // Before the beginning.
49     assertSearchResult("40", "40" /* special case */, deIndex.findInsertionPoint("", new AtomicBoolean(false)));
50     assertSearchResult("40", "40" /* special case */, deIndex.findInsertionPoint("__", new AtomicBoolean(false)));
51     
52     // After the end.
53     assertSearchResult("Zweckorientiertheit", "zählen", deIndex.findInsertionPoint("ZZZZZ", new AtomicBoolean(false)));
54
55     assertSearchResult("ab", "aaac", deIndex.findInsertionPoint("aaaca", new AtomicBoolean(false)));
56     assertSearchResult("machen", "machen", deIndex.findInsertionPoint("m", new AtomicBoolean(false)));
57     assertSearchResult("machen", "machen", deIndex.findInsertionPoint("macdddd", new AtomicBoolean(false)));
58
59
60     assertSearchResult("überprüfe", "überprüfe", deIndex.findInsertionPoint("ueberprüfe", new AtomicBoolean(false)));
61     assertSearchResult("überprüfe", "überprüfe", deIndex.findInsertionPoint("ueberpruefe", new AtomicBoolean(false)));
62
63     assertSearchResult("überprüfe", "überprüfe", deIndex.findInsertionPoint("ueberpBLEH", new AtomicBoolean(false)));
64     assertSearchResult("überprüfe", "überprüfe", deIndex.findInsertionPoint("überprBLEH", new AtomicBoolean(false)));
65
66     assertSearchResult("überprüfen", "überprüfe", deIndex.findInsertionPoint("überprüfeBLEH", new AtomicBoolean(false)));
67
68     // Check that search in lowercase works.
69     assertSearchResult("Alibi", "Alibi", deIndex.findInsertionPoint("alib", new AtomicBoolean(false)));
70     System.out.println(deIndex.findInsertionPoint("alib", new AtomicBoolean(false)).toString());
71     
72     raf.close();
73   }
74   
75   private void assertSearchResult(final String insertionPoint, final String longestPrefix,
76       final IndexEntry actual) {
77     assertEquals(insertionPoint, actual.token);
78   }
79
80   public void testGermanTokenRows() throws IOException {
81     final RandomAccessFile raf = new RandomAccessFile("testdata/de-en.quickdic", "r");
82     final Dictionary dict = new Dictionary(raf);
83     final Index deIndex = dict.indices.get(0);
84     
85     // Pre-cache a few of these, just to make sure that's working.
86     for (int i = 0; i < deIndex.rows.size(); i += 7) {
87       deIndex.rows.get(i).getTokenRow(true);
88     }
89     
90     // Do the exhaustive searching.
91     TokenRow lastTokenRow = null;
92     for (final RowBase row : deIndex.rows) {
93       if (row instanceof TokenRow) {
94         lastTokenRow = (TokenRow) row;
95       }
96       assertEquals(lastTokenRow, row.getTokenRow(true));
97     }
98
99     // Now they're all cached, we shouldn't have to search.
100     for (final RowBase row : deIndex.rows) {
101       if (row instanceof TokenRow) {
102         lastTokenRow = (TokenRow) row;
103       }
104       // This will break if the Row cache isn't big enough.
105       assertEquals(lastTokenRow, row.getTokenRow(false));
106     }
107     
108     raf.close();
109   }
110   
111   public void testGermanSort() {
112     final Transliterator normalizer = Transliterator.createFromRules("", Language.de.getDefaultNormalizerRules(), Transliterator.FORWARD);
113     assertEquals("aüääss", normalizer.transform("aueAeAEß"));
114     final List<String> words = Arrays.asList(
115         "er-ben",
116         "erben",
117         "Erben",
118         "Erbse",
119         "Erbsen",
120         "essen",
121         "Essen",
122         "Grosformat",
123         "Grosformats",
124         "Grossformat",
125         "Großformat",
126         "Grossformats",
127         "Großformats",
128         "Großpoo",
129         "Großpoos",
130         "Hörvermögen",
131         "Hörweite",
132         "hos",
133         "Höschen",
134         "Hostel",
135         "hulle",
136         "Hulle",
137         "huelle",
138         "Huelle",
139         "hülle",
140         "Hülle",
141         "Huellen",
142         "Hüllen",
143         "Hum"
144         );
145     final NormalizeComparator comparator = new NormalizeComparator(normalizer, Language.de.getCollator());
146     assertEquals(1, comparator.compare("hülle", "huelle"));
147     assertEquals(-1, comparator.compare("huelle", "hülle"));
148     
149     assertEquals(-1, comparator.compare("hülle", "Hülle"));
150     
151     assertEquals("hülle", normalizer.transform("Hülle"));
152     assertEquals("hulle", normalizer.transform("Hulle"));
153
154     
155     final List<String> sorted = new ArrayList<String>(words);
156 //    Collections.shuffle(shuffled, new Random(0));
157     Collections.sort(sorted, comparator);
158     System.out.println(sorted.toString());
159     for (int i = 0; i < words.size(); ++i) {
160       System.out.println(words.get(i) + "\t" + sorted.get(i));
161       assertEquals(words.get(i), sorted.get(i));
162     }
163   }
164
165   public void testEnglishSort() {
166     final Transliterator normalizer = Transliterator.createFromRules("", Language.en.getDefaultNormalizerRules(), Transliterator.FORWARD);
167
168     final List<String> words = Arrays.asList(
169         "pre-print", 
170         "preppie", 
171         "preppy",
172         "preprocess");
173     
174     final List<String> sorted = new ArrayList<String>(words);
175     final NormalizeComparator comparator = new NormalizeComparator(normalizer, Language.en.getCollator());
176     Collections.sort(sorted, comparator);
177     for (int i = 0; i < words.size(); ++i) {
178       if (i > 0) {
179         assertTrue(comparator.compare(words.get(i-1), words.get(i)) < 0);
180       }
181       System.out.println(words.get(i) + "\t" + sorted.get(i));
182       assertEquals(words.get(i), sorted.get(i));
183     }
184     
185     assertTrue(comparator.compare("pre-print", "preppy") < 0);
186
187   }
188   
189   public void testLanguage() {
190     assertEquals(Language.de, Language.lookup("de"));
191     assertEquals(Language.en, Language.lookup("en"));
192     assertEquals("es", Language.lookup("es").getSymbol());
193   }
194
195   public void testTextNorm() {
196     //final Transliterator transliterator = Transliterator.getInstance("Any-Latin; Upper; Lower; 'oe' > 'o'; NFD; [:Nonspacing Mark:] Remove; NFC", Transliterator.FORWARD);
197     final Transliterator transliterator = Transliterator.createFromRules("", ":: Any-Latin; :: Upper; :: Lower; 'oe' > 'o'; :: NFD; :: [:Nonspacing Mark:] Remove; :: NFC ;", Transliterator.FORWARD);
198     assertEquals("hoschen", transliterator.transliterate("Höschen"));
199     assertEquals("hoschen", transliterator.transliterate("Hoeschen"));
200     assertEquals("grosspoo", transliterator.transliterate("Großpoo"));
201
202     assertEquals("kyanpasu", transliterator.transliterate("キャンパス"));
203     assertEquals("alphabetikos katalogos", transliterator.transliterate("Αλφαβητικός Κατάλογος"));
204     assertEquals("biologiceskom", transliterator.transliterate("биологическом"));
205   }
206
207   public void testChemnitz() throws IOException {
208     final RandomAccessFile raf = new RandomAccessFile("dictOutputs/de-en_chemnitz.quickdic", "r");
209     final Dictionary dict = new Dictionary(raf);
210     final Index deIndex = dict.indices.get(0);
211     
212     assertSearchResult("Höschen", "Hos", deIndex.findInsertionPoint("Hos", new AtomicBoolean(false)));
213     assertSearchResult("Höschen", "hos", deIndex.findInsertionPoint("hos", new AtomicBoolean(false)));
214
215     raf.close();
216   }
217
218 }