package com.hughes.android.dictionary.engine;
+import java.io.BufferedOutputStream;
import java.io.DataInput;
import java.io.DataOutput;
+import java.io.DataOutputStream;
+import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintStream;
import java.io.RandomAccessFile;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
+import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicBoolean;
stoplist.add(raf.readUTF());
}
} else if (dict.dictFileVersion >= 4) {
- raf.readInt(); // length
- raf.skipBytes(52);
stoplist = new HashSet<>();
- byte b;
+ raf.readInt(); // length
+ raf.skipBytes(18);
+ byte b = raf.readByte();
+ raf.skipBytes(b == 'L' ? 71 : 33);
while ((b = raf.readByte()) == 0x74) {
stoplist.add(raf.readUTF());
}
for (String i : stoplist) {
raf.writeUTF(i);
}
- UniformRAFList.write(raf, rows, new RowBase.Serializer(this), 3 /* bytes per entry */);
+ DataOutputStream outb = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(raf.getFD())));
+ UniformRAFList.write(outb, rows, new RowBase.Serializer(this), 3 /* bytes per entry */);
+ outb.flush();
}
public void print(final PrintStream out) {
public List<HtmlEntry> htmlEntries;
public IndexEntry(final Index index, final String token, final String normalizedToken,
- final int startRow, final int numRows) {
+ final int startRow, final int numRows, final List<HtmlEntry> htmlEntries) {
assert token.equals(token.trim());
assert token.length() > 0;
this.token = token;
this.normalizedToken = normalizedToken;
this.startRow = startRow;
this.numRows = numRows;
- this.htmlEntries = new ArrayList<>();
+ this.htmlEntries = htmlEntries;
}
IndexEntry(final Index index, final DataInput raf) throws IOException {
}
private int findInsertionPointIndex(String token, final AtomicBoolean interrupted) {
+ String orig_token = token;
token = normalizeToken(token);
int start = 0;
if (comp == 0)
comp = sortCollator.compare(token, midEntry.normalizedToken());
if (comp == 0) {
- return windBackCase(token, mid, interrupted);
+ start = end = mid;
+ break;
} else if (comp < 0) {
// System.out.println("Upper bound: " + midEntry + ", norm=" +
// midEntry.normalizedToken() + ", mid=" + mid);
start--;
}
+ // If the search term was normalized, try to find an exact match first
+ if (!orig_token.equalsIgnoreCase(token)) {
+ int matchLen = findMatchLen(sortCollator, token, sortedIndexEntries.get(start).normalizedToken());
+ int scan = start;
+ while (scan >= 0 && scan < sortedIndexEntries.size()) {
+ IndexEntry e = sortedIndexEntries.get(scan);
+ if (e.token.equalsIgnoreCase(orig_token))
+ {
+ return scan;
+ }
+ if (matchLen > findMatchLen(sortCollator, token, e.normalizedToken()))
+ break;
+ if (interrupted.get()) return start;
+ scan++;
+ }
+ }
+
// If we search for a substring of a string that's in there, return
// that.
int result = Math.min(start, sortedIndexEntries.size() - 1);
return normalizer.transliterate(searchToken);
} else {
// Do our best since the Transliterators aren't up yet.
- return searchToken.toLowerCase();
+ return searchToken.toLowerCase(Locale.US);
}
}