}
sortedIndexEntries = CachingList.create(
RAFList.create(inp, new IndexEntrySerializer(dict.dictFileVersion == 6 ? inp : null), inp.position(),
- dict.dictFileVersion, dict.dictInfo + " idx " + languageCode + ": "), CACHE_SIZE);
+ dict.dictFileVersion, dict.dictInfo + " idx " + languageCode + ": "), CACHE_SIZE, true);
if (dict.dictFileVersion >= 7) {
int count = StringUtil.readVarInt(raf);
stoplist = new HashSet<String>(count);
}
rows = CachingList.create(
UniformRAFList.create(inp, new RowBase.Serializer(this), inp.position()),
- CACHE_SIZE);
+ CACHE_SIZE, true);
}
@Override
public void write(DataOutput raf, IndexEntry t) throws IOException {
t.write(raf);
}
- };
+ }
public static final class IndexEntry implements RAFSerializable<Index.IndexEntry> {
public final String token;
this.htmlEntries = CachingList.create(
RAFList.create(ch, index.dict.htmlEntryIndexSerializer,
ch.position(), index.dict.dictFileVersion,
- index.dict.dictInfo + " htmlEntries: "), 1);
+ index.dict.dictInfo + " htmlEntries: "), 1, false);
} else {
this.htmlEntries = Collections.emptyList();
}
return NormalizeComparator.compareWithoutDash(token, entry.normalizedToken(), sortCollator, dict.dictFileVersion);
}
+ private int findMatchLen(final Comparator sortCollator, String a, String b) {
+ int start = 0;
+ int end = Math.min(a.length(), b.length());
+ while (start < end)
+ {
+ int mid = (start + end + 1) / 2;
+ if (sortCollator.compare(a.substring(0, mid), b.substring(0, mid)) == 0)
+ start = mid;
+ else
+ end = mid - 1;
+ }
+ return start;
+ }
+
public int findInsertionPointIndex(String token, final AtomicBoolean interrupted) {
token = normalizeToken(token);
}
}
+ // if the word before is the better match, move
+ // our result to it
+ if (start > 0 && start < sortedIndexEntries.size()) {
+ String prev = sortedIndexEntries.get(start - 1).normalizedToken();
+ String next = sortedIndexEntries.get(start).normalizedToken();
+ if (findMatchLen(sortCollator, token, prev) >= findMatchLen(sortCollator, token, next))
+ start--;
+ }
+
// If we search for a substring of a string that's in there, return
// that.
int result = Math.min(start, sortedIndexEntries.size() - 1);