1 // Copyright 2011 Google Inc. All Rights Reserved.
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
15 package com.hughes.android.dictionary.engine;
17 import java.io.IOException;
18 import java.io.RandomAccessFile;
19 import java.util.Arrays;
20 import java.util.List;
21 import java.util.concurrent.atomic.AtomicBoolean;
23 import junit.framework.TestCase;
25 import com.hughes.android.dictionary.engine.Index.IndexEntry;
26 import com.hughes.util.CollectionUtil;
29 public class DictionaryTest extends TestCase {
31 static final String TEST_OUTPUTS = com.hughes.android.dictionary.engine.DictionaryBuilderTest.TEST_OUTPUTS;
32 public static final String OUTPUTS = "data/outputs/";
35 protected void setUp() {
36 while (!TransliteratorManager.init(null)) {
39 } catch (InterruptedException e) {
45 public void testEnItWiktionary() throws IOException {
46 final RandomAccessFile raf = new RandomAccessFile(OUTPUTS + "EN-IT_enwiktionary.quickdic", "r");
47 final Dictionary dict = new Dictionary(raf);
48 final Index enIndex = dict.indices.get(0);
50 final RowBase row = enIndex.rows.get(4);
51 assertEquals("The numeral 00\tzeranta (noun) {m|f|inv}", row.getRawText(false));
56 public void testGermanMetadata() throws IOException {
57 final RandomAccessFile raf = new RandomAccessFile(TEST_OUTPUTS + "de-en.quickdic", "r");
58 final Dictionary dict = new Dictionary(raf);
59 final Index deIndex = dict.indices.get(0);
61 assertEquals("DE", deIndex.shortName);
62 assertEquals("DE->EN", deIndex.longName);
64 assertEquals(2, dict.sources.size());
65 assertEquals("chemnitz", dict.sources.get(0).name);
66 assertEquals("dictcc", dict.sources.get(1).name);
68 assertEquals("dictcc", dict.pairEntries.get(0).entrySource.name);
69 assertEquals("chemnitz", dict.pairEntries.get(1).entrySource.name);
74 public void testGermanIndex() throws IOException {
75 final RandomAccessFile raf = new RandomAccessFile(TEST_OUTPUTS + "de-en.quickdic", "r");
76 final Dictionary dict = new Dictionary(raf);
77 final Index deIndex = dict.indices.get(0);
79 for (final Index.IndexEntry indexEntry : deIndex.sortedIndexEntries) {
80 System.out.println("testing: " + indexEntry.token);
81 final IndexEntry searchResult = deIndex.findInsertionPoint(indexEntry.token, new AtomicBoolean(
83 assertEquals("Looked up: " + indexEntry.token, indexEntry.token.toLowerCase(), searchResult.token.toLowerCase());
86 // TODO: maybe if user types capitalization, use it.
87 assertSearchResult("aaac", "aaac", deIndex.findInsertionPoint("aaac", new AtomicBoolean(false)));
88 assertSearchResult("aaac", "aaac", deIndex.findInsertionPoint("AAAC", new AtomicBoolean(false)));
89 assertSearchResult("aaac", "aaac", deIndex.findInsertionPoint("AAAc", new AtomicBoolean(false)));
90 assertSearchResult("aaac", "aaac", deIndex.findInsertionPoint("aAac", new AtomicBoolean(false)));
92 // Before the beginning.
93 assertSearchResult("40", "40" /* special case */, deIndex.findInsertionPoint("", new AtomicBoolean(false)));
94 assertSearchResult("40", "40" /* special case */, deIndex.findInsertionPoint("__", new AtomicBoolean(false)));
97 assertSearchResult("Zweckorientiertheit", "zählen", deIndex.findInsertionPoint("ZZZZZ", new AtomicBoolean(false)));
99 assertSearchResult("ab", "aaac", deIndex.findInsertionPoint("aaaca", new AtomicBoolean(false)));
100 assertSearchResult("machen", "machen", deIndex.findInsertionPoint("m", new AtomicBoolean(false)));
101 assertSearchResult("machen", "machen", deIndex.findInsertionPoint("macdddd", new AtomicBoolean(false)));
104 assertSearchResult("überprüfe", "überprüfe", deIndex.findInsertionPoint("ueberprüfe", new AtomicBoolean(false)));
105 assertSearchResult("überprüfe", "überprüfe", deIndex.findInsertionPoint("ueberpruefe", new AtomicBoolean(false)));
107 assertSearchResult("überprüfe", "überprüfe", deIndex.findInsertionPoint("ueberpBLEH", new AtomicBoolean(false)));
108 assertSearchResult("überprüfe", "überprüfe", deIndex.findInsertionPoint("überprBLEH", new AtomicBoolean(false)));
110 assertSearchResult("überprüfen", "überprüfe", deIndex.findInsertionPoint("überprüfeBLEH", new AtomicBoolean(false)));
112 // Check that search in lowercase works.
113 assertSearchResult("Alibi", "Alibi", deIndex.findInsertionPoint("alib", new AtomicBoolean(false)));
114 System.out.println(deIndex.findInsertionPoint("alib", new AtomicBoolean(false)).toString());
119 private void assertSearchResult(final String insertionPoint, final String longestPrefix,
120 final IndexEntry actual) {
121 assertEquals(insertionPoint, actual.token);
124 public void testGermanTokenRows() throws IOException {
125 final RandomAccessFile raf = new RandomAccessFile(TEST_OUTPUTS + "de-en.quickdic", "r");
126 final Dictionary dict = new Dictionary(raf);
127 final Index deIndex = dict.indices.get(0);
129 // Pre-cache a few of these, just to make sure that's working.
130 for (int i = 0; i < deIndex.rows.size(); i += 7) {
131 deIndex.rows.get(i).getTokenRow(true);
134 // Do the exhaustive searching.
135 TokenRow lastTokenRow = null;
136 for (final RowBase row : deIndex.rows) {
137 if (row instanceof TokenRow) {
138 lastTokenRow = (TokenRow) row;
140 assertEquals(lastTokenRow, row.getTokenRow(true));
143 // Now they're all cached, we shouldn't have to search.
144 for (final RowBase row : deIndex.rows) {
145 if (row instanceof TokenRow) {
146 lastTokenRow = (TokenRow) row;
148 // This will break if the Row cache isn't big enough.
149 assertEquals(lastTokenRow, row.getTokenRow(false));
155 public void testChemnitz() throws IOException {
156 final RandomAccessFile raf = new RandomAccessFile(TEST_OUTPUTS + "de-en.quickdic", "r");
157 final Dictionary dict = new Dictionary(raf);
158 final Index deIndex = dict.indices.get(0);
160 assertSearchResult("Höschen", "Hos", deIndex.findInsertionPoint("Hos", new AtomicBoolean(false)));
161 assertSearchResult("Höschen", "hos", deIndex.findInsertionPoint("hos", new AtomicBoolean(false)));
166 public void testMultiSearch() throws IOException {
167 final RandomAccessFile raf = new RandomAccessFile(TEST_OUTPUTS + "de-en.quickdic", "r");
168 final Dictionary dict = new Dictionary(raf);
169 final Index deIndex = dict.indices.get(0);
172 final List<RowBase> rows = deIndex.multiWordSearch(Arrays.asList("aaa", "aaab"), new AtomicBoolean(false));
173 System.out.println(CollectionUtil.join(rows, "\n "));
174 assertTrue(rows.toString(), rows.size() > 0);
180 public void testMultiSearchBig() throws IOException {
181 final RandomAccessFile raf = new RandomAccessFile(OUTPUTS + "DE-EN_chemnitz_enwiktionary.quickdic", "r");
182 final Dictionary dict = new Dictionary(raf);
183 final Index enIndex = dict.indices.get(1);
186 final List<RowBase> rows = enIndex.multiWordSearch(Arrays.asList("train", "station"), new AtomicBoolean(false));
187 System.out.println(CollectionUtil.join(rows, "\n "));
188 assertTrue(rows.toString(), rows.size() > 0);
189 assertEquals("Bahnhof {{de-noun|g=m|genitive=Bahnhofs|genitive2=Bahnhofes|plural=Bahnhöfe}}\ttrain station", rows.get(0).toString());
193 final List<RowBase> rows = enIndex.multiWordSearch(Arrays.asList("a", "train", "station"), new AtomicBoolean(false));
194 System.out.println(CollectionUtil.join(rows, "\n "));
195 assertTrue(rows.toString(), rows.size() > 0);
196 assertEquals("Bahnhofsuhr {{de-noun|g=f|plural=Bahnhofsuhren}}\tstation clock (at a train station)", rows.get(0).toString());
200 final List<RowBase> rows = enIndex.multiWordSearch(Arrays.asList("a", "station"), new AtomicBoolean(false));
201 // TODO: bug, "a" isn't in stoplist for now...
202 System.out.println(CollectionUtil.join(rows, "\n "));
203 assertTrue(rows.toString(), rows.size() == 0);
204 //assertEquals("Bahnhofsuhr {{de-noun|g=f|plural=Bahnhofsuhren}}\tstation clock (at a train station)", rows.get(0).toString());