X-Git-Url: http://gitweb.fperrin.net/?a=blobdiff_plain;f=src%2Fcom%2Fhughes%2Fandroid%2Fdictionary%2Fengine%2FIndex.java;h=ea0de2528d2aa76fe5d951e649b47ecb4db71916;hb=83d497f704ad1f8ba85190255d46a3fbe0e3c353;hp=9ac805a3872c96ca66187062c356817e281a2270;hpb=d0295ea6e7db5499efd8a71935a834069975d607;p=Dictionary.git diff --git a/src/com/hughes/android/dictionary/engine/Index.java b/src/com/hughes/android/dictionary/engine/Index.java index 9ac805a..ea0de25 100644 --- a/src/com/hughes/android/dictionary/engine/Index.java +++ b/src/com/hughes/android/dictionary/engine/Index.java @@ -29,14 +29,16 @@ import com.hughes.util.raf.RAFSerializable; import com.hughes.util.raf.RAFSerializer; import com.hughes.util.raf.SerializableSerializer; import com.hughes.util.raf.UniformRAFList; -import java.text.Collator; import com.ibm.icu.text.Transliterator; import java.io.DataInput; +import java.io.DataInputStream; import java.io.DataOutput; import java.io.IOException; import java.io.PrintStream; import java.io.RandomAccessFile; +import java.nio.channels.Channels; +import java.nio.channels.FileChannel; import java.util.AbstractList; import java.util.ArrayList; import java.util.Collection; @@ -118,9 +120,8 @@ public final class Index implements RAFSerializable { return new NormalizeComparator(normalizer(), sortLanguage.getCollator(), dict.dictFileVersion); } - public Index(final Dictionary dict, final DataInput inp) throws IOException { + public Index(final Dictionary dict, final FileChannel inp, final DataInput raf) throws IOException { this.dict = dict; - RandomAccessFile raf = (RandomAccessFile)inp; shortName = raf.readUTF(); longName = raf.readUTF(); final String languageCode = raf.readUTF(); @@ -134,8 +135,8 @@ public final class Index implements RAFSerializable { mainTokenCount = raf.readInt(); } sortedIndexEntries = CachingList.create( - RAFList.create(raf, indexEntrySerializer, raf.getFilePointer(), - dict.dictFileVersion, dict.dictInfo + " idx " + languageCode + ": "), CACHE_SIZE); + RAFList.create(inp, new IndexEntrySerializer(dict.dictFileVersion == 6 ? inp : null), inp.position(), + dict.dictFileVersion, dict.dictInfo + " idx " + languageCode + ": "), CACHE_SIZE, true); if (dict.dictFileVersion >= 7) { int count = StringUtil.readVarInt(raf); stoplist = new HashSet(count); @@ -148,8 +149,8 @@ public final class Index implements RAFSerializable { stoplist = Collections.emptySet(); } rows = CachingList.create( - UniformRAFList.create(raf, new RowBase.Serializer(this), raf.getFilePointer()), - CACHE_SIZE); + UniformRAFList.create(inp, new RowBase.Serializer(this), inp.position()), + CACHE_SIZE, true); } @Override @@ -163,7 +164,7 @@ public final class Index implements RAFSerializable { if (dict.dictFileVersion >= 2) { raf.writeInt(mainTokenCount); } - RAFList.write(raf, sortedIndexEntries, indexEntrySerializer, 32, true); + RAFList.write(raf, sortedIndexEntries, new IndexEntrySerializer(null), 32, true); StringUtil.writeVarInt(raf, stoplist.size()); for (String i : stoplist) { raf.writeUTF(i); @@ -177,17 +178,23 @@ public final class Index implements RAFSerializable { } } - private final RAFSerializer indexEntrySerializer = new RAFSerializer() { + private final class IndexEntrySerializer implements RAFSerializer { + private final FileChannel ch; + + public IndexEntrySerializer(FileChannel ch) { + this.ch = ch; + } + @Override public IndexEntry read(DataInput raf) throws IOException { - return new IndexEntry(Index.this, raf); + return new IndexEntry(Index.this, ch, raf); } @Override public void write(DataOutput raf, IndexEntry t) throws IOException { t.write(raf); } - }; + } public static final class IndexEntry implements RAFSerializable { public final String token; @@ -207,7 +214,7 @@ public final class Index implements RAFSerializable { this.htmlEntries = new ArrayList(); } - public IndexEntry(final Index index, final DataInput raf) throws IOException { + public IndexEntry(final Index index, final FileChannel ch, final DataInput raf) throws IOException { token = raf.readUTF(); if (index.dict.dictFileVersion >= 7) { startRow = StringUtil.readVarInt(raf); @@ -240,9 +247,9 @@ public final class Index implements RAFSerializable { } } else if (index.dict.dictFileVersion >= 6) { this.htmlEntries = CachingList.create( - RAFList.create((RandomAccessFile)raf, index.dict.htmlEntryIndexSerializer, - ((RandomAccessFile)raf).getFilePointer(), index.dict.dictFileVersion, - index.dict.dictInfo + " htmlEntries: "), 1); + RAFList.create(ch, index.dict.htmlEntryIndexSerializer, + ch.position(), index.dict.dictFileVersion, + index.dict.dictInfo + " htmlEntries: "), 1, false); } else { this.htmlEntries = Collections.emptyList(); } @@ -298,6 +305,20 @@ public final class Index implements RAFSerializable { return NormalizeComparator.compareWithoutDash(token, entry.normalizedToken(), sortCollator, dict.dictFileVersion); } + private int findMatchLen(final Comparator sortCollator, String a, String b) { + int start = 0; + int end = Math.min(a.length(), b.length()); + while (start < end) + { + int mid = (start + end + 1) / 2; + if (sortCollator.compare(a.substring(0, mid), b.substring(0, mid)) == 0) + start = mid; + else + end = mid - 1; + } + return start; + } + public int findInsertionPointIndex(String token, final AtomicBoolean interrupted) { token = normalizeToken(token); @@ -345,6 +366,15 @@ public final class Index implements RAFSerializable { } } + // if the word before is the better match, move + // our result to it + if (start > 0 && start < sortedIndexEntries.size()) { + String prev = sortedIndexEntries.get(start - 1).normalizedToken(); + String next = sortedIndexEntries.get(start).normalizedToken(); + if (findMatchLen(sortCollator, token, prev) >= findMatchLen(sortCollator, token, next)) + start--; + } + // If we search for a substring of a string that's in there, return // that. int result = Math.min(start, sortedIndexEntries.size() - 1);