import java.util.List;\r
import java.util.concurrent.atomic.AtomicBoolean;\r
\r
+import com.hughes.android.dictionary.engine.Language;\r
import com.hughes.util.CachingList;\r
import com.hughes.util.raf.RAFList;\r
import com.hughes.util.raf.RAFFactory;\r
import com.hughes.android.dictionary.Dictionary.IndexEntry;
import com.hughes.android.dictionary.Dictionary.LanguageData;
import com.hughes.android.dictionary.Dictionary.Row;
+import com.hughes.android.dictionary.engine.Language;
import com.ibm.icu.text.Collator;
public class DictionaryActivity extends ListActivity {
package com.hughes.android.dictionary;\r
\r
+import com.hughes.android.dictionary.engine.Language;\r
+\r
import android.test.ActivityInstrumentationTestCase2;\r
\r
public class DictionaryActivityTest extends ActivityInstrumentationTestCase2<DictionaryActivity> {\r
import android.widget.AdapterView;
import android.widget.BaseAdapter;
import android.widget.EditText;
-import android.widget.ListView;
import android.widget.TableLayout;
import android.widget.TextView;
import android.widget.AdapterView.AdapterContextMenuInfo;
public class Dictionary implements RAFSerializable<Dictionary> {
+ static final int CACHE_SIZE = 5000;
+
// persisted
final String dictInfo;
final List<PairEntry> pairEntries;
public Dictionary(final RandomAccessFile raf) throws IOException {
dictInfo = raf.readUTF();
-
- sources = RAFList.create(raf, EntrySource.SERIALIZER, raf.getFilePointer());
-
- // TODO: caching
- pairEntries = RAFList.create(raf, PairEntry.SERIALIZER, raf.getFilePointer());
-
- // TODO: caching
- textEntries = RAFList.create(raf, TextEntry.SERIALIZER, raf.getFilePointer());
-
- final List<Index> rawIndices = RAFList.create(raf, indexSerializer, raf.getFilePointer());
- indices = CachingList.create(rawIndices, rawIndices.size());
+ sources = CachingList.createFullyCached(RAFList.create(raf, EntrySource.SERIALIZER, raf.getFilePointer()));
+ pairEntries = CachingList.create(RAFList.create(raf, PairEntry.SERIALIZER, raf.getFilePointer()), CACHE_SIZE);
+ textEntries = CachingList.create(RAFList.create(raf, TextEntry.SERIALIZER, raf.getFilePointer()), CACHE_SIZE);
+ indices = CachingList.createFullyCached(RAFList.create(raf, indexSerializer, raf.getFilePointer()));
}
public void print(final PrintStream out) {
public abstract class Entry {
- EntrySource entrySource;
-
}
this.name = name;
}
+ @Override
+ public String toString() {
+ return name;
+ }
+
+
public static RAFListSerializer<EntrySource> SERIALIZER = new RAFListSerializer<EntrySource>() {
@Override
@Override
public void write(RandomAccessFile raf, EntrySource t) throws IOException {
raf.writeUTF(t.name);
- }
-
+ }
};
}
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
+import java.util.concurrent.atomic.AtomicBoolean;
-import com.hughes.android.dictionary.Language;
+import com.hughes.util.CachingList;
import com.hughes.util.raf.RAFList;
import com.hughes.util.raf.RAFSerializable;
import com.hughes.util.raf.RAFSerializer;
import com.hughes.util.raf.UniformRAFList;
+import com.ibm.icu.text.Collator;
public final class Index implements RAFSerializable<Index> {
+
+ static final int CACHE_SIZE = 5000;
+
final Dictionary dict;
final String shortName;
if (sortLanguage == null) {
throw new IOException("Unsupported language: " + languageCode);
}
- // TODO: caching
- sortedIndexEntries = RAFList.create(raf, IndexEntry.SERIALIZER, raf.getFilePointer());
- rows = UniformRAFList.create(raf, new RowBase.Serializer(this), raf.getFilePointer());
+ sortedIndexEntries = CachingList.create(RAFList.create(raf, IndexEntry.SERIALIZER, raf.getFilePointer()), CACHE_SIZE);
+ rows = CachingList.create(UniformRAFList.create(raf, new RowBase.Serializer(this), raf.getFilePointer()), CACHE_SIZE);
}
public void print(final PrintStream out) {
public void write(RandomAccessFile raf) throws IOException {
raf.writeUTF(token);
- raf.write(startRow);
+ raf.writeInt(startRow);
+ }
+
+ public String toString() {
+ return token + "@" + startRow;
+ }
+}
+
+
+ private TokenRow sortedIndexToToken(final int sortedIndex) {
+ final IndexEntry indexEntry = sortedIndexEntries.get(sortedIndex);
+ return (TokenRow) rows.get(indexEntry.startRow);
+ }
+
+ public TokenRow find(String token, final AtomicBoolean interrupted) {
+ token = sortLanguage.textNorm(token, true);
+
+ int start = 0;
+ int end = sortedIndexEntries.size();
+
+ final Collator sortCollator = sortLanguage.getSortCollator();
+ while (start < end) {
+ final int mid = (start + end) / 2;
+ if (interrupted.get()) {
+ return sortedIndexToToken(mid);
+ }
+ final IndexEntry midEntry = sortedIndexEntries.get(mid);
+
+ final int comp = sortCollator.compare(token, sortLanguage.textNorm(midEntry.token, true));
+ if (comp == 0) {
+ final int result = windBack(token, mid, sortCollator, interrupted);
+ return sortedIndexToToken(result);
+ } else if (comp < 0) {
+// Log.d("THAD", "Upper bound: " + midEntry);
+ end = mid;
+ } else {
+// Log.d("THAD", "Lower bound: " + midEntry);
+ start = mid + 1;
+ }
+ }
+ int result = Math.min(start, sortedIndexEntries.size() - 1);
+ result = windBack(token, result, sortCollator, interrupted);
+ if (result > 0 && sortCollator.compare(sortLanguage.textNorm(sortedIndexEntries.get(result).token, true), token) > 0) {
+ result = windBack(sortLanguage.textNorm(sortedIndexEntries.get(result - 1).token, true), result, sortCollator, interrupted);
+ }
+ return sortedIndexToToken(result);
+ }
+
+ private final int windBack(final String token, int result, final Collator sortCollator, final AtomicBoolean interrupted) {
+ while (result > 0 && sortCollator.compare(sortLanguage.textNorm(sortedIndexEntries.get(result - 1).token, true), token) >= 0) {
+ --result;
+ if (interrupted.get()) {
+ return result;
+ }
}
+ return result;
}
}
\ No newline at end of file
-package com.hughes.android.dictionary;\r
+package com.hughes.android.dictionary.engine;\r
\r
import java.util.Comparator;\r
import java.util.LinkedHashMap;\r
\r
this.sortComparator = new Comparator<String>() {\r
public int compare(final String s1, final String s2) {\r
- return getSortCollator().compare(textNorm(s1), textNorm(s2));\r
+ return getSortCollator().compare(textNorm(s1, false), textNorm(s2, false));\r
}\r
};\r
\r
this.findComparator = new Comparator<String>() {\r
public int compare(final String s1, final String s2) {\r
- return getFindCollator().compare(textNorm(s1), textNorm(s2));\r
+ return getFindCollator().compare(textNorm(s1, false), textNorm(s2, false));\r
}\r
};\r
\r
symbolToLangauge.put(symbol.toLowerCase(), this);\r
}\r
\r
- public String textNorm(final String s) {\r
- return s;\r
+ public String textNorm(final String s, final boolean toLower) {\r
+ return toLower ? s.toLowerCase() : s;\r
}\r
\r
@Override\r
\r
public static final Language de = new Language(Locale.GERMAN) {\r
@Override\r
- public String textNorm(String token) {\r
+ public String textNorm(String token, final boolean toLower) {\r
+ if (toLower) {\r
+ token = token.toLowerCase();\r
+ }\r
boolean sub = false;\r
+ // This is meant to be fast: occurrences of ae, oe, ue are probably rare.\r
for (int ePos = token.indexOf('e', 1); ePos != -1; ePos = token.indexOf(\r
'e', ePos + 1)) {\r
final char pre = Character.toLowerCase(token.charAt(ePos - 1));\r
if (!sub) {\r
return token;\r
}\r
+ \r
token = token.replaceAll("ae", "ä");\r
token = token.replaceAll("oe", "ö");\r
token = token.replaceAll("ue", "ü");\r
token = token.replaceAll("Ae", "Ä");\r
token = token.replaceAll("Oe", "Ö");\r
token = token.replaceAll("Ue", "Ü");\r
+\r
+ token = token.replaceAll("AE", "Ä");\r
+ token = token.replaceAll("OE", "Ö");\r
+ token = token.replaceAll("UE", "Ü");\r
+ \r
return token; \r
}\r
};\r
this.lang2 = lang2;
}
public String toString() {
- return lang1 + "\t" + lang2;
+ return lang1 + " :: " + lang2;
}
}
}
@Override
public void write(RandomAccessFile raf) throws IOException {
- // TODO: this couls be a short.
+ // TODO: this could be a short.
raf.writeInt(pairs.length);
for (int i = 0; i < pairs.length; ++i) {
raf.writeUTF(pairs[i].lang1);
public void print(PrintStream out) {
final PairEntry pairEntry = getEntry();
for (int i = 0; i < pairEntry.pairs.length; ++i) {
- out.println((i == 0 ? " " : " ") + pairEntry.pairs[i]);
+ out.print((i == 0 ? " " : " ") + pairEntry.pairs[i]);
+ out.println();
}
}
}
for (++r; r <= index(); ++r) {
index.rows.get(r).setTokenRow(candidate);
}
+ break;
}
+ --r;
}
assert tokenRow != null;
}
TokenRow(final int referenceIndex, final int thisRowIndex, final Index index) {
super(referenceIndex, thisRowIndex, index);
}
+
+ public String toString() {
+ return getToken() + "@" + referenceIndex;
+ }
@Override
public TokenRow getTokenRow(final boolean search) {