X-Git-Url: http://gitweb.fperrin.net/?a=blobdiff_plain;f=src%2Fcom%2Fhughes%2Fandroid%2Fdictionary%2Fengine%2FIndex.java;h=b9300ba0462c9846398229ad96c561f45343fe36;hb=f8e4d0f62dc4f4fe3577c5bb03e3d8fa8a956e5b;hp=db44b41546569ee39e178511fdbf5f5bf92b4bea;hpb=35b7b7dc537441278934398a6b81009c1ec42bbf;p=Dictionary.git diff --git a/src/com/hughes/android/dictionary/engine/Index.java b/src/com/hughes/android/dictionary/engine/Index.java index db44b41..b9300ba 100644 --- a/src/com/hughes/android/dictionary/engine/Index.java +++ b/src/com/hughes/android/dictionary/engine/Index.java @@ -14,8 +14,11 @@ package com.hughes.android.dictionary.engine; +import java.io.BufferedOutputStream; import java.io.DataInput; import java.io.DataOutput; +import java.io.DataOutputStream; +import java.io.FileOutputStream; import java.io.IOException; import java.io.PrintStream; import java.io.RandomAccessFile; @@ -28,6 +31,7 @@ import java.util.EnumMap; import java.util.HashMap; import java.util.HashSet; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.Set; import java.util.concurrent.atomic.AtomicBoolean; @@ -41,13 +45,11 @@ import com.hughes.util.DataInputBuffer; import com.hughes.util.StringUtil; import com.hughes.util.TransformingList; import com.hughes.util.raf.RAFList; -import com.hughes.util.raf.RAFSerializable; import com.hughes.util.raf.RAFSerializer; -import com.hughes.util.raf.SerializableSerializer; import com.hughes.util.raf.UniformRAFList; import com.ibm.icu.text.Transliterator; -public final class Index implements RAFSerializable { +public final class Index { private static final int CACHE_SIZE = 5000; @@ -138,7 +140,15 @@ public final class Index implements RAFSerializable { stoplist.add(raf.readUTF()); } } else if (dict.dictFileVersion >= 4) { - stoplist = new SerializableSerializer>().read(raf); + stoplist = new HashSet<>(); + raf.readInt(); // length + raf.skipBytes(18); + byte b = raf.readByte(); + raf.skipBytes(b == 'L' ? 71 : 33); + while ((b = raf.readByte()) == 0x74) { + stoplist.add(raf.readUTF()); + } + if (b != 0x78) throw new IOException("Invalid data in dictionary stoplist!"); } else { stoplist = Collections.emptySet(); } @@ -147,7 +157,6 @@ public final class Index implements RAFSerializable { CACHE_SIZE, true); } - @Override public void write(final DataOutput out) throws IOException { RandomAccessFile raf = (RandomAccessFile)out; raf.writeUTF(shortName); @@ -161,7 +170,9 @@ public final class Index implements RAFSerializable { for (String i : stoplist) { raf.writeUTF(i); } - UniformRAFList.write(raf, rows, new RowBase.Serializer(this), 3 /* bytes per entry */); + DataOutputStream outb = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(raf.getFD()))); + UniformRAFList.write(outb, rows, new RowBase.Serializer(this), 3 /* bytes per entry */); + outb.flush(); } public void print(final PrintStream out) { @@ -182,7 +193,7 @@ public final class Index implements RAFSerializable { } } - public static final class IndexEntry implements RAFSerializable { + public static final class IndexEntry { public final String token; private final String normalizedToken; public final int startRow; @@ -190,14 +201,14 @@ public final class Index implements RAFSerializable { public List htmlEntries; public IndexEntry(final Index index, final String token, final String normalizedToken, - final int startRow, final int numRows) { + final int startRow, final int numRows, final List htmlEntries) { assert token.equals(token.trim()); assert token.length() > 0; this.token = token; this.normalizedToken = normalizedToken; this.startRow = startRow; this.numRows = numRows; - this.htmlEntries = new ArrayList<>(); + this.htmlEntries = htmlEntries; } IndexEntry(final Index index, final DataInput raf) throws IOException { @@ -225,6 +236,7 @@ public final class Index implements RAFSerializable { public HtmlEntry get(int i) { return index.dict.htmlEntries.get(htmlEntryIndices[i]); } + @Override public int size() { return htmlEntryIndices.length; @@ -306,6 +318,7 @@ public final class Index implements RAFSerializable { } private int findInsertionPointIndex(String token, final AtomicBoolean interrupted) { + String orig_token = token; token = normalizeToken(token); int start = 0; @@ -323,7 +336,8 @@ public final class Index implements RAFSerializable { if (comp == 0) comp = sortCollator.compare(token, midEntry.normalizedToken()); if (comp == 0) { - return windBackCase(token, mid, interrupted); + start = end = mid; + break; } else if (comp < 0) { // System.out.println("Upper bound: " + midEntry + ", norm=" + // midEntry.normalizedToken() + ", mid=" + mid); @@ -360,6 +374,23 @@ public final class Index implements RAFSerializable { start--; } + // If the search term was normalized, try to find an exact match first + if (!orig_token.equalsIgnoreCase(token)) { + int matchLen = findMatchLen(sortCollator, token, sortedIndexEntries.get(start).normalizedToken()); + int scan = start; + while (scan >= 0 && scan < sortedIndexEntries.size()) { + IndexEntry e = sortedIndexEntries.get(scan); + if (e.token.equalsIgnoreCase(orig_token)) + { + return scan; + } + if (matchLen > findMatchLen(sortCollator, token, e.normalizedToken())) + break; + if (interrupted.get()) return start; + scan++; + } + } + // If we search for a substring of a string that's in there, return // that. int result = Math.min(start, sortedIndexEntries.size() - 1); @@ -367,7 +398,7 @@ public final class Index implements RAFSerializable { return result; } - private final int windBackCase(final String token, int result, final AtomicBoolean interrupted) { + private int windBackCase(final String token, int result, final AtomicBoolean interrupted) { while (result > 0 && sortedIndexEntries.get(result - 1).normalizedToken().equals(token)) { --result; if (interrupted.get()) { @@ -385,8 +416,8 @@ public final class Index implements RAFSerializable { private final Map prefixToNumRows = new HashMap<>(); - private synchronized final int getUpperBoundOnRowsStartingWith(final String normalizedPrefix, - final int maxRows, final AtomicBoolean interrupted) { + private synchronized int getUpperBoundOnRowsStartingWith(final String normalizedPrefix, + final int maxRows, final AtomicBoolean interrupted) { final Integer numRows = prefixToNumRows.get(normalizedPrefix); if (numRows != null) { return numRows; @@ -539,7 +570,7 @@ public final class Index implements RAFSerializable { return normalizer.transliterate(searchToken); } else { // Do our best since the Transliterators aren't up yet. - return searchToken.toLowerCase(); + return searchToken.toLowerCase(Locale.US); } }