package com.hughes.android.dictionary.engine;
+import java.io.BufferedOutputStream;
import java.io.DataInput;
import java.io.DataOutput;
+import java.io.DataOutputStream;
+import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintStream;
import java.io.RandomAccessFile;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
+import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicBoolean;
import com.hughes.util.StringUtil;
import com.hughes.util.TransformingList;
import com.hughes.util.raf.RAFList;
-import com.hughes.util.raf.RAFSerializable;
import com.hughes.util.raf.RAFSerializer;
-import com.hughes.util.raf.SerializableSerializer;
import com.hughes.util.raf.UniformRAFList;
import com.ibm.icu.text.Transliterator;
-public final class Index implements RAFSerializable<Index> {
+public final class Index {
private static final int CACHE_SIZE = 5000;
stoplist.add(raf.readUTF());
}
} else if (dict.dictFileVersion >= 4) {
- stoplist = new SerializableSerializer<Set<String>>().read(raf);
+ stoplist = new HashSet<>();
+ raf.readInt(); // length
+ raf.skipBytes(18);
+ byte b = raf.readByte();
+ raf.skipBytes(b == 'L' ? 71 : 33);
+ while ((b = raf.readByte()) == 0x74) {
+ stoplist.add(raf.readUTF());
+ }
+ if (b != 0x78) throw new IOException("Invalid data in dictionary stoplist!");
} else {
stoplist = Collections.emptySet();
}
CACHE_SIZE, true);
}
- @Override
public void write(final DataOutput out) throws IOException {
RandomAccessFile raf = (RandomAccessFile)out;
raf.writeUTF(shortName);
for (String i : stoplist) {
raf.writeUTF(i);
}
- UniformRAFList.write(raf, rows, new RowBase.Serializer(this), 3 /* bytes per entry */);
+ DataOutputStream outb = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(raf.getFD())));
+ UniformRAFList.write(outb, rows, new RowBase.Serializer(this), 3 /* bytes per entry */);
+ outb.flush();
}
public void print(final PrintStream out) {
}
}
- public static final class IndexEntry implements RAFSerializable<Index.IndexEntry> {
+ public static final class IndexEntry {
public final String token;
private final String normalizedToken;
public final int startRow;
public List<HtmlEntry> htmlEntries;
public IndexEntry(final Index index, final String token, final String normalizedToken,
- final int startRow, final int numRows) {
+ final int startRow, final int numRows, final List<HtmlEntry> htmlEntries) {
assert token.equals(token.trim());
assert token.length() > 0;
this.token = token;
this.normalizedToken = normalizedToken;
this.startRow = startRow;
this.numRows = numRows;
- this.htmlEntries = new ArrayList<>();
+ this.htmlEntries = htmlEntries;
}
IndexEntry(final Index index, final DataInput raf) throws IOException {
public HtmlEntry get(int i) {
return index.dict.htmlEntries.get(htmlEntryIndices[i]);
}
+
@Override
public int size() {
return htmlEntryIndices.length;
}
private int findInsertionPointIndex(String token, final AtomicBoolean interrupted) {
+ String orig_token = token;
token = normalizeToken(token);
int start = 0;
if (comp == 0)
comp = sortCollator.compare(token, midEntry.normalizedToken());
if (comp == 0) {
- return windBackCase(token, mid, interrupted);
+ start = end = mid;
+ break;
} else if (comp < 0) {
// System.out.println("Upper bound: " + midEntry + ", norm=" +
// midEntry.normalizedToken() + ", mid=" + mid);
start--;
}
+ // If the search term was normalized, try to find an exact match first
+ if (!orig_token.equalsIgnoreCase(token)) {
+ int matchLen = findMatchLen(sortCollator, token, sortedIndexEntries.get(start).normalizedToken());
+ int scan = start;
+ while (scan >= 0 && scan < sortedIndexEntries.size()) {
+ IndexEntry e = sortedIndexEntries.get(scan);
+ if (e.token.equalsIgnoreCase(orig_token))
+ {
+ return scan;
+ }
+ if (matchLen > findMatchLen(sortCollator, token, e.normalizedToken()))
+ break;
+ if (interrupted.get()) return start;
+ scan++;
+ }
+ }
+
// If we search for a substring of a string that's in there, return
// that.
int result = Math.min(start, sortedIndexEntries.size() - 1);
return result;
}
- private final int windBackCase(final String token, int result, final AtomicBoolean interrupted) {
+ private int windBackCase(final String token, int result, final AtomicBoolean interrupted) {
while (result > 0 && sortedIndexEntries.get(result - 1).normalizedToken().equals(token)) {
--result;
if (interrupted.get()) {
private final Map<String, Integer> prefixToNumRows = new HashMap<>();
- private synchronized final int getUpperBoundOnRowsStartingWith(final String normalizedPrefix,
- final int maxRows, final AtomicBoolean interrupted) {
+ private synchronized int getUpperBoundOnRowsStartingWith(final String normalizedPrefix,
+ final int maxRows, final AtomicBoolean interrupted) {
final Integer numRows = prefixToNumRows.get(normalizedPrefix);
if (numRows != null) {
return numRows;
return normalizer.transliterate(searchToken);
} else {
// Do our best since the Transliterators aren't up yet.
- return searchToken.toLowerCase();
+ return searchToken.toLowerCase(Locale.US);
}
}