From b2be3ae02bbbf99129418738083cf92b1751c3b9 Mon Sep 17 00:00:00 2001 From: Thad Hughes Date: Tue, 12 Oct 2010 16:54:32 -0700 Subject: [PATCH] go --- .../hughes/android/dictionary/Dictionary.java | 1 + .../dictionary/DictionaryActivity.java | 1 + .../dictionary/DictionaryActivityTest.java | 2 + .../dictionary/DictionaryListActivity.java | 1 - .../android/dictionary/engine/Dictionary.java | 17 ++--- .../android/dictionary/engine/Entry.java | 2 - .../dictionary/engine/EntrySource.java | 9 ++- .../android/dictionary/engine/Index.java | 68 +++++++++++++++++-- .../dictionary/{ => engine}/Language.java | 22 ++++-- .../android/dictionary/engine/PairEntry.java | 7 +- .../android/dictionary/engine/RowBase.java | 2 + .../android/dictionary/engine/TokenRow.java | 4 ++ 12 files changed, 106 insertions(+), 30 deletions(-) rename src/com/hughes/android/dictionary/{ => engine}/Language.java (78%) diff --git a/src/com/hughes/android/dictionary/Dictionary.java b/src/com/hughes/android/dictionary/Dictionary.java index c0a3588..5de8d32 100755 --- a/src/com/hughes/android/dictionary/Dictionary.java +++ b/src/com/hughes/android/dictionary/Dictionary.java @@ -6,6 +6,7 @@ import java.util.ArrayList; import java.util.List; import java.util.concurrent.atomic.AtomicBoolean; +import com.hughes.android.dictionary.engine.Language; import com.hughes.util.CachingList; import com.hughes.util.raf.RAFList; import com.hughes.util.raf.RAFFactory; diff --git a/src/com/hughes/android/dictionary/DictionaryActivity.java b/src/com/hughes/android/dictionary/DictionaryActivity.java index d6a6ff1..5c272c5 100644 --- a/src/com/hughes/android/dictionary/DictionaryActivity.java +++ b/src/com/hughes/android/dictionary/DictionaryActivity.java @@ -50,6 +50,7 @@ import android.widget.Toast; import com.hughes.android.dictionary.Dictionary.IndexEntry; import com.hughes.android.dictionary.Dictionary.LanguageData; import com.hughes.android.dictionary.Dictionary.Row; +import com.hughes.android.dictionary.engine.Language; import com.ibm.icu.text.Collator; public class DictionaryActivity extends ListActivity { diff --git a/src/com/hughes/android/dictionary/DictionaryActivityTest.java b/src/com/hughes/android/dictionary/DictionaryActivityTest.java index 1ac9e8b..dc0325b 100755 --- a/src/com/hughes/android/dictionary/DictionaryActivityTest.java +++ b/src/com/hughes/android/dictionary/DictionaryActivityTest.java @@ -1,5 +1,7 @@ package com.hughes.android.dictionary; +import com.hughes.android.dictionary.engine.Language; + import android.test.ActivityInstrumentationTestCase2; public class DictionaryActivityTest extends ActivityInstrumentationTestCase2 { diff --git a/src/com/hughes/android/dictionary/DictionaryListActivity.java b/src/com/hughes/android/dictionary/DictionaryListActivity.java index faeb8fe..1f9936a 100644 --- a/src/com/hughes/android/dictionary/DictionaryListActivity.java +++ b/src/com/hughes/android/dictionary/DictionaryListActivity.java @@ -18,7 +18,6 @@ import android.view.View.OnFocusChangeListener; import android.widget.AdapterView; import android.widget.BaseAdapter; import android.widget.EditText; -import android.widget.ListView; import android.widget.TableLayout; import android.widget.TextView; import android.widget.AdapterView.AdapterContextMenuInfo; diff --git a/src/com/hughes/android/dictionary/engine/Dictionary.java b/src/com/hughes/android/dictionary/engine/Dictionary.java index 1472d31..26dae91 100644 --- a/src/com/hughes/android/dictionary/engine/Dictionary.java +++ b/src/com/hughes/android/dictionary/engine/Dictionary.java @@ -14,6 +14,8 @@ import com.hughes.util.raf.RAFSerializable; public class Dictionary implements RAFSerializable { + static final int CACHE_SIZE = 5000; + // persisted final String dictInfo; final List pairEntries; @@ -31,17 +33,10 @@ public class Dictionary implements RAFSerializable { public Dictionary(final RandomAccessFile raf) throws IOException { dictInfo = raf.readUTF(); - - sources = RAFList.create(raf, EntrySource.SERIALIZER, raf.getFilePointer()); - - // TODO: caching - pairEntries = RAFList.create(raf, PairEntry.SERIALIZER, raf.getFilePointer()); - - // TODO: caching - textEntries = RAFList.create(raf, TextEntry.SERIALIZER, raf.getFilePointer()); - - final List rawIndices = RAFList.create(raf, indexSerializer, raf.getFilePointer()); - indices = CachingList.create(rawIndices, rawIndices.size()); + sources = CachingList.createFullyCached(RAFList.create(raf, EntrySource.SERIALIZER, raf.getFilePointer())); + pairEntries = CachingList.create(RAFList.create(raf, PairEntry.SERIALIZER, raf.getFilePointer()), CACHE_SIZE); + textEntries = CachingList.create(RAFList.create(raf, TextEntry.SERIALIZER, raf.getFilePointer()), CACHE_SIZE); + indices = CachingList.createFullyCached(RAFList.create(raf, indexSerializer, raf.getFilePointer())); } public void print(final PrintStream out) { diff --git a/src/com/hughes/android/dictionary/engine/Entry.java b/src/com/hughes/android/dictionary/engine/Entry.java index 9efd1f5..8be07f3 100644 --- a/src/com/hughes/android/dictionary/engine/Entry.java +++ b/src/com/hughes/android/dictionary/engine/Entry.java @@ -3,6 +3,4 @@ package com.hughes.android.dictionary.engine; public abstract class Entry { - EntrySource entrySource; - } diff --git a/src/com/hughes/android/dictionary/engine/EntrySource.java b/src/com/hughes/android/dictionary/engine/EntrySource.java index 914cf8f..391e8ce 100644 --- a/src/com/hughes/android/dictionary/engine/EntrySource.java +++ b/src/com/hughes/android/dictionary/engine/EntrySource.java @@ -18,6 +18,12 @@ public class EntrySource extends IndexedObject implements Serializable { this.name = name; } + @Override + public String toString() { + return name; + } + + public static RAFListSerializer SERIALIZER = new RAFListSerializer() { @Override @@ -30,8 +36,7 @@ public class EntrySource extends IndexedObject implements Serializable { @Override public void write(RandomAccessFile raf, EntrySource t) throws IOException { raf.writeUTF(t.name); - } - + } }; } diff --git a/src/com/hughes/android/dictionary/engine/Index.java b/src/com/hughes/android/dictionary/engine/Index.java index 4953600..e2c2fee 100644 --- a/src/com/hughes/android/dictionary/engine/Index.java +++ b/src/com/hughes/android/dictionary/engine/Index.java @@ -9,14 +9,19 @@ import java.io.RandomAccessFile; import java.util.ArrayList; import java.util.Collection; import java.util.List; +import java.util.concurrent.atomic.AtomicBoolean; -import com.hughes.android.dictionary.Language; +import com.hughes.util.CachingList; import com.hughes.util.raf.RAFList; import com.hughes.util.raf.RAFSerializable; import com.hughes.util.raf.RAFSerializer; import com.hughes.util.raf.UniformRAFList; +import com.ibm.icu.text.Collator; public final class Index implements RAFSerializable { + + static final int CACHE_SIZE = 5000; + final Dictionary dict; final String shortName; @@ -53,9 +58,8 @@ public final class Index implements RAFSerializable { if (sortLanguage == null) { throw new IOException("Unsupported language: " + languageCode); } - // TODO: caching - sortedIndexEntries = RAFList.create(raf, IndexEntry.SERIALIZER, raf.getFilePointer()); - rows = UniformRAFList.create(raf, new RowBase.Serializer(this), raf.getFilePointer()); + sortedIndexEntries = CachingList.create(RAFList.create(raf, IndexEntry.SERIALIZER, raf.getFilePointer()), CACHE_SIZE); + rows = CachingList.create(UniformRAFList.create(raf, new RowBase.Serializer(this), raf.getFilePointer()), CACHE_SIZE); } public void print(final PrintStream out) { @@ -102,8 +106,62 @@ public final class Index implements RAFSerializable { public void write(RandomAccessFile raf) throws IOException { raf.writeUTF(token); - raf.write(startRow); + raf.writeInt(startRow); + } + + public String toString() { + return token + "@" + startRow; + } +} + + + private TokenRow sortedIndexToToken(final int sortedIndex) { + final IndexEntry indexEntry = sortedIndexEntries.get(sortedIndex); + return (TokenRow) rows.get(indexEntry.startRow); + } + + public TokenRow find(String token, final AtomicBoolean interrupted) { + token = sortLanguage.textNorm(token, true); + + int start = 0; + int end = sortedIndexEntries.size(); + + final Collator sortCollator = sortLanguage.getSortCollator(); + while (start < end) { + final int mid = (start + end) / 2; + if (interrupted.get()) { + return sortedIndexToToken(mid); + } + final IndexEntry midEntry = sortedIndexEntries.get(mid); + + final int comp = sortCollator.compare(token, sortLanguage.textNorm(midEntry.token, true)); + if (comp == 0) { + final int result = windBack(token, mid, sortCollator, interrupted); + return sortedIndexToToken(result); + } else if (comp < 0) { +// Log.d("THAD", "Upper bound: " + midEntry); + end = mid; + } else { +// Log.d("THAD", "Lower bound: " + midEntry); + start = mid + 1; + } + } + int result = Math.min(start, sortedIndexEntries.size() - 1); + result = windBack(token, result, sortCollator, interrupted); + if (result > 0 && sortCollator.compare(sortLanguage.textNorm(sortedIndexEntries.get(result).token, true), token) > 0) { + result = windBack(sortLanguage.textNorm(sortedIndexEntries.get(result - 1).token, true), result, sortCollator, interrupted); + } + return sortedIndexToToken(result); + } + + private final int windBack(final String token, int result, final Collator sortCollator, final AtomicBoolean interrupted) { + while (result > 0 && sortCollator.compare(sortLanguage.textNorm(sortedIndexEntries.get(result - 1).token, true), token) >= 0) { + --result; + if (interrupted.get()) { + return result; + } } + return result; } } \ No newline at end of file diff --git a/src/com/hughes/android/dictionary/Language.java b/src/com/hughes/android/dictionary/engine/Language.java similarity index 78% rename from src/com/hughes/android/dictionary/Language.java rename to src/com/hughes/android/dictionary/engine/Language.java index 9732efa..b4d8558 100755 --- a/src/com/hughes/android/dictionary/Language.java +++ b/src/com/hughes/android/dictionary/engine/Language.java @@ -1,4 +1,4 @@ -package com.hughes.android.dictionary; +package com.hughes.android.dictionary.engine; import java.util.Comparator; import java.util.LinkedHashMap; @@ -26,21 +26,21 @@ public class Language { this.sortComparator = new Comparator() { public int compare(final String s1, final String s2) { - return getSortCollator().compare(textNorm(s1), textNorm(s2)); + return getSortCollator().compare(textNorm(s1, false), textNorm(s2, false)); } }; this.findComparator = new Comparator() { public int compare(final String s1, final String s2) { - return getFindCollator().compare(textNorm(s1), textNorm(s2)); + return getFindCollator().compare(textNorm(s1, false), textNorm(s2, false)); } }; symbolToLangauge.put(symbol.toLowerCase(), this); } - public String textNorm(final String s) { - return s; + public String textNorm(final String s, final boolean toLower) { + return toLower ? s.toLowerCase() : s; } @Override @@ -78,8 +78,12 @@ public class Language { public static final Language de = new Language(Locale.GERMAN) { @Override - public String textNorm(String token) { + public String textNorm(String token, final boolean toLower) { + if (toLower) { + token = token.toLowerCase(); + } boolean sub = false; + // This is meant to be fast: occurrences of ae, oe, ue are probably rare. for (int ePos = token.indexOf('e', 1); ePos != -1; ePos = token.indexOf( 'e', ePos + 1)) { final char pre = Character.toLowerCase(token.charAt(ePos - 1)); @@ -91,6 +95,7 @@ public class Language { if (!sub) { return token; } + token = token.replaceAll("ae", "ä"); token = token.replaceAll("oe", "ö"); token = token.replaceAll("ue", "ü"); @@ -98,6 +103,11 @@ public class Language { token = token.replaceAll("Ae", "Ä"); token = token.replaceAll("Oe", "Ö"); token = token.replaceAll("Ue", "Ü"); + + token = token.replaceAll("AE", "Ä"); + token = token.replaceAll("OE", "Ö"); + token = token.replaceAll("UE", "Ü"); + return token; } }; diff --git a/src/com/hughes/android/dictionary/engine/PairEntry.java b/src/com/hughes/android/dictionary/engine/PairEntry.java index 55c32d0..1557c9b 100644 --- a/src/com/hughes/android/dictionary/engine/PairEntry.java +++ b/src/com/hughes/android/dictionary/engine/PairEntry.java @@ -17,7 +17,7 @@ public class PairEntry extends Entry implements RAFSerializable { this.lang2 = lang2; } public String toString() { - return lang1 + "\t" + lang2; + return lang1 + " :: " + lang2; } } @@ -35,7 +35,7 @@ public class PairEntry extends Entry implements RAFSerializable { } @Override public void write(RandomAccessFile raf) throws IOException { - // TODO: this couls be a short. + // TODO: this could be a short. raf.writeInt(pairs.length); for (int i = 0; i < pairs.length; ++i) { raf.writeUTF(pairs[i].lang1); @@ -82,7 +82,8 @@ public class PairEntry extends Entry implements RAFSerializable { public void print(PrintStream out) { final PairEntry pairEntry = getEntry(); for (int i = 0; i < pairEntry.pairs.length; ++i) { - out.println((i == 0 ? " " : " ") + pairEntry.pairs[i]); + out.print((i == 0 ? " " : " ") + pairEntry.pairs[i]); + out.println(); } } } diff --git a/src/com/hughes/android/dictionary/engine/RowBase.java b/src/com/hughes/android/dictionary/engine/RowBase.java index 61ff4e0..d62edf1 100644 --- a/src/com/hughes/android/dictionary/engine/RowBase.java +++ b/src/com/hughes/android/dictionary/engine/RowBase.java @@ -48,7 +48,9 @@ public abstract class RowBase extends IndexedObject { for (++r; r <= index(); ++r) { index.rows.get(r).setTokenRow(candidate); } + break; } + --r; } assert tokenRow != null; } diff --git a/src/com/hughes/android/dictionary/engine/TokenRow.java b/src/com/hughes/android/dictionary/engine/TokenRow.java index 06a1aa8..0488f0a 100644 --- a/src/com/hughes/android/dictionary/engine/TokenRow.java +++ b/src/com/hughes/android/dictionary/engine/TokenRow.java @@ -13,6 +13,10 @@ public class TokenRow extends RowBase { TokenRow(final int referenceIndex, final int thisRowIndex, final Index index) { super(referenceIndex, thisRowIndex, index); } + + public String toString() { + return getToken() + "@" + referenceIndex; + } @Override public TokenRow getTokenRow(final boolean search) { -- 2.43.0