]> gitweb.fperrin.net Git - DictionaryPC.git/blob - src/com/hughes/android/dictionary/engine/DictionaryTest.java
Moved around testdata.
[DictionaryPC.git] / src / com / hughes / android / dictionary / engine / DictionaryTest.java
1 package com.hughes.android.dictionary.engine;
2
3 import java.io.IOException;
4 import java.io.RandomAccessFile;
5 import java.util.ArrayList;
6 import java.util.Arrays;
7 import java.util.Collections;
8 import java.util.List;
9 import java.util.concurrent.atomic.AtomicBoolean;
10
11 import junit.framework.TestCase;
12
13 import com.hughes.android.dictionary.engine.Index.SearchResult;
14
15
16 public class DictionaryTest extends TestCase {
17     
18   public void testGermanMetadata() throws IOException {
19     final RandomAccessFile raf = new RandomAccessFile("testdata/de-en.dict", "r");
20     final Dictionary dict = new Dictionary(raf);
21     final Index deIndex = dict.indices.get(0);
22     
23     assertEquals("de", deIndex.shortName);
24     assertEquals("de->en", deIndex.longName);
25     
26     raf.close();
27   }
28   
29   public void testGermanIndex() throws IOException {
30     final RandomAccessFile raf = new RandomAccessFile("testdata/de-en.dict", "r");
31     final Dictionary dict = new Dictionary(raf);
32     final Index deIndex = dict.indices.get(0);
33     
34     for (final Index.IndexEntry indexEntry : deIndex.sortedIndexEntries) {
35       System.out.println("testing: " + indexEntry.token);
36       final Index.SearchResult searchResult = deIndex.findLongestSubstring(indexEntry.token, new AtomicBoolean(
37           false));
38       assertEquals(indexEntry.token.toLowerCase(), searchResult.insertionPoint.token.toLowerCase());
39       assertEquals(indexEntry.token.toLowerCase(), searchResult.longestPrefix.token.toLowerCase());
40     }
41
42     // TODO: maybe if user types capitalization, use it.
43     assertSearchResult("aaac", "aaac", deIndex.findLongestSubstring("aaac", new AtomicBoolean(false)));
44     assertSearchResult("aaac", "aaac", deIndex.findLongestSubstring("AAAC", new AtomicBoolean(false)));
45     assertSearchResult("aaac", "aaac", deIndex.findLongestSubstring("AAAc", new AtomicBoolean(false)));
46     assertSearchResult("aaac", "aaac", deIndex.findLongestSubstring("aAac", new AtomicBoolean(false)));
47
48     // Before the beginning.
49     assertSearchResult("40", "40" /* special case */, deIndex.findLongestSubstring("", new AtomicBoolean(false)));
50     assertSearchResult("40", "40" /* special case */, deIndex.findLongestSubstring("__", new AtomicBoolean(false)));
51     
52     // After the end.
53     assertSearchResult("Zweckorientiertheit", "zählen", deIndex.findLongestSubstring("ZZZZZ", new AtomicBoolean(false)));
54
55     assertSearchResult("ab", "aaac", deIndex.findLongestSubstring("aaaca", new AtomicBoolean(false)));
56     assertSearchResult("machen", "machen", deIndex.findLongestSubstring("m", new AtomicBoolean(false)));
57
58     assertFalse(deIndex.findLongestSubstring("macdddd", new AtomicBoolean(false)).success);
59
60
61     assertSearchResult("überprüfe", "überprüfe", deIndex.findLongestSubstring("ueberprüfe", new AtomicBoolean(false)));
62     assertSearchResult("überprüfe", "überprüfe", deIndex.findLongestSubstring("ueberpruefe", new AtomicBoolean(false)));
63
64     assertSearchResult("überprüfe", "überprüfe", deIndex.findLongestSubstring("ueberpBLEH", new AtomicBoolean(false)));
65     assertSearchResult("überprüfe", "überprüfe", deIndex.findLongestSubstring("überprBLEH", new AtomicBoolean(false)));
66
67     assertSearchResult("überprüfen", "überprüfe", deIndex.findLongestSubstring("überprüfeBLEH", new AtomicBoolean(false)));
68
69     // Check that search in lowercase works.
70     assertSearchResult("Alibi", "Alibi", deIndex.findLongestSubstring("alib", new AtomicBoolean(false)));
71     assertTrue(deIndex.findLongestSubstring("alib", new AtomicBoolean(false)).success);
72     System.out.println(deIndex.findLongestSubstring("alib", new AtomicBoolean(false)).toString());
73     
74     raf.close();
75   }
76   
77   private void assertSearchResult(final String insertionPoint, final String longestPrefix,
78       final SearchResult actual) {
79     assertEquals(insertionPoint, actual.insertionPoint.token);
80     assertEquals(longestPrefix, actual.longestPrefix.token);
81   }
82
83   public void testGermanTokenRows() throws IOException {
84     final RandomAccessFile raf = new RandomAccessFile("testdata/de-en.dict", "r");
85     final Dictionary dict = new Dictionary(raf);
86     final Index deIndex = dict.indices.get(0);
87     
88     // Pre-cache a few of these, just to make sure that's working.
89     for (int i = 0; i < deIndex.rows.size(); i += 7) {
90       deIndex.rows.get(i).getTokenRow(true);
91     }
92     
93     // Do the exhaustive searching.
94     TokenRow lastTokenRow = null;
95     for (final RowBase row : deIndex.rows) {
96       if (row instanceof TokenRow) {
97         lastTokenRow = (TokenRow) row;
98       }
99       assertEquals(lastTokenRow, row.getTokenRow(true));
100     }
101
102     // Now they're all cached, we shouldn't have to search.
103     for (final RowBase row : deIndex.rows) {
104       if (row instanceof TokenRow) {
105         lastTokenRow = (TokenRow) row;
106       }
107       // This will break if the Row cache isn't big enough.
108       assertEquals(lastTokenRow, row.getTokenRow(false));
109     }
110     
111     raf.close();
112   }
113   
114   public void testGermanSort() {
115     assertEquals("aüÄÄ", Language.de.textNorm("aueAeAE", false));
116     final List<String> words = Arrays.asList(
117         "er-ben",
118         "erben",
119         "Erben",
120         "Erbse",
121         "Erbsen",
122         "essen",
123         "Essen",
124         "Grosformat",
125         "Grosformats",
126         "Grossformat",
127         "Großformat",
128         "Grossformats",
129         "Großformats",
130         "Großpoo",
131         "Großpoos",
132         "Hörweite",
133         "hos",
134         "Höschen",
135         "Hostel",
136         "hulle",
137         "Hulle",
138         "hülle",
139         "huelle",
140         "Hülle",
141         "Huelle",
142         "Hum"
143         );
144     assertEquals(0, Language.de.sortComparator.compare("hülle", "huelle"));
145     assertEquals(0, Language.de.sortComparator.compare("huelle", "hülle"));
146     
147     assertEquals(-1, Language.de.sortComparator.compare("hülle", "Hülle"));
148     assertEquals(0, Language.de.findComparator.compare("hülle", "Hülle"));
149     assertEquals(-1, Language.de.findComparator.compare("hulle", "Hülle"));
150
151     
152     for (final String s : words) {
153       System.out.println(s + "\t" + Language.de.textNorm(s, false));
154     }
155     final List<String> sorted = new ArrayList<String>(words);
156 //    Collections.shuffle(shuffled, new Random(0));
157     Collections.sort(sorted, Language.de.sortComparator);
158     System.out.println(sorted.toString());
159     for (int i = 0; i < words.size(); ++i) {
160       System.out.println(words.get(i) + "\t" + sorted.get(i));
161       assertEquals(words.get(i), sorted.get(i));
162     }
163   }
164
165   @SuppressWarnings("unchecked")
166   public void testEnglishSort() {
167
168     final List<String> words = Arrays.asList(
169         "pre-print", 
170         "preppie", 
171         "preppy",
172         "preprocess");
173     
174     final List<String> sorted = new ArrayList<String>(words);
175     Collections.sort(sorted, Language.en.getSortCollator());
176     for (int i = 0; i < words.size(); ++i) {
177       if (i > 0) {
178         assertTrue(Language.en.getSortCollator().compare(words.get(i-1), words.get(i)) < 0);
179       }
180       System.out.println(words.get(i) + "\t" + sorted.get(i));
181       assertEquals(words.get(i), sorted.get(i));
182     }
183     
184     assertTrue(Language.en.getSortCollator().compare("pre-print", "preppy") < 0);
185
186   }
187   
188   public void testLanguage() {
189     assertEquals(Language.de, Language.lookup("de"));
190     assertEquals(Language.en, Language.lookup("en"));
191     assertEquals("es", Language.lookup("es").getSymbol());
192   }
193
194   public void testTextNorm() {
195     assertEquals("hoschen", "Höschen".toLowerCase(Language.de.locale));
196   }
197
198   public void testChemnitz() throws IOException {
199     final RandomAccessFile raf = new RandomAccessFile("testdata/de-en_chemnitz.dict", "r");
200     final Dictionary dict = new Dictionary(raf);
201     final Index deIndex = dict.indices.get(0);
202     
203     //assertSearchResult("Höschen", "Hos", deIndex.findLongestSubstring("Hos", new AtomicBoolean(false)));
204     //assertSearchResult("Höschen", "hos", deIndex.findLongestSubstring("hos", new AtomicBoolean(false)));
205  
206
207     raf.close();
208   }
209
210 }