import com.hughes.util.raf.RAFSerializer;
import com.hughes.util.raf.SerializableSerializer;
import com.hughes.util.raf.UniformRAFList;
-import com.ibm.icu.text.Collator;
+import java.text.Collator;
import com.ibm.icu.text.Transliterator;
import java.io.DataInput;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
+import java.util.Comparator;
import java.util.EnumMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
* normalizations.
*/
public NormalizeComparator getSortComparator() {
- return new NormalizeComparator(normalizer(), sortLanguage.getCollator());
+ return new NormalizeComparator(normalizer(), sortLanguage.getCollator(), dict.dictFileVersion);
}
public Index(final Dictionary dict, final DataInput inp) throws IOException {
mainTokenCount = raf.readInt();
}
sortedIndexEntries = CachingList.create(
- RAFList.create(raf, indexEntrySerializer, raf.getFilePointer(), dict.dictFileVersion,
- dict.dictFileVersion >= 7 ? 16 : 1, dict.dictFileVersion >= 7), CACHE_SIZE);
- if (dict.dictFileVersion >= 4) {
+ RAFList.create(raf, indexEntrySerializer, raf.getFilePointer(),
+ dict.dictFileVersion), CACHE_SIZE);
+ if (dict.dictFileVersion >= 7) {
+ int count = StringUtil.readVarInt(raf);
+ stoplist = new HashSet<String>(count);
+ for (int i = 0; i < count; ++i) {
+ stoplist.add(raf.readUTF());
+ }
+ } else if (dict.dictFileVersion >= 4) {
stoplist = new SerializableSerializer<Set<String>>().read(raf);
} else {
stoplist = Collections.emptySet();
if (dict.dictFileVersion >= 2) {
raf.writeInt(mainTokenCount);
}
- RAFList.write(raf, sortedIndexEntries, indexEntrySerializer, 16, true);
- new SerializableSerializer<Set<String>>().write(raf, stoplist);
+ RAFList.write(raf, sortedIndexEntries, indexEntrySerializer, 32, true);
+ StringUtil.writeVarInt(raf, stoplist.size());
+ for (String i : stoplist) {
+ raf.writeUTF(i);
+ }
UniformRAFList.write(raf, rows, new RowBase.Serializer(this), 3 /* bytes per entry */);
}
int start = 0;
int end = sortedIndexEntries.size();
- final Collator sortCollator = sortLanguage.getCollator();
+ final Comparator sortCollator = sortLanguage.getCollator();
while (start < end) {
final int mid = (start + end) / 2;
if (interrupted.get()) {
}
final IndexEntry midEntry = sortedIndexEntries.get(mid);
- final int comp = sortCollator.compare(token, midEntry.normalizedToken());
+ int comp = NormalizeComparator.compareWithoutLeadingDash(token, midEntry.normalizedToken(), sortCollator, dict.dictFileVersion);
+ if (comp == 0)
+ comp = sortCollator.compare(token, midEntry.normalizedToken());
if (comp == 0) {
final int result = windBackCase(token, mid, interrupted);
return result;