// limitations under the License.
/**
- *
+ *
*/
package com.hughes.android.dictionary.engine;
import com.hughes.android.dictionary.DictionaryInfo.IndexInfo;
import com.hughes.android.dictionary.engine.RowBase.RowKey;
import com.hughes.util.CachingList;
+import com.hughes.util.StringUtil;
import com.hughes.util.TransformingList;
import com.hughes.util.raf.RAFList;
import com.hughes.util.raf.RAFSerializable;
import com.hughes.util.raf.RAFSerializer;
import com.hughes.util.raf.SerializableSerializer;
import com.hughes.util.raf.UniformRAFList;
-import com.ibm.icu.text.Collator;
+import java.text.Collator;
import com.ibm.icu.text.Transliterator;
+import java.io.DataInput;
+import java.io.DataOutput;
import java.io.IOException;
import java.io.PrintStream;
import java.io.RandomAccessFile;
+import java.util.AbstractList;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
+import java.util.Comparator;
import java.util.EnumMap;
import java.util.HashSet;
-import java.util.LinkedHashMap;
-import java.util.LinkedHashSet;
+import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
*/
public synchronized Transliterator normalizer() {
if (normalizer == null) {
- normalizer = Transliterator
- .createFromRules("", normalizerRules, Transliterator.FORWARD);
+ normalizer = TransliteratorManager.get(normalizerRules);
}
return normalizer;
}
* normalizations.
*/
public NormalizeComparator getSortComparator() {
- return new NormalizeComparator(normalizer(), sortLanguage.getCollator());
+ return new NormalizeComparator(normalizer(), sortLanguage.getCollator(), dict.dictFileVersion);
}
- public Index(final Dictionary dict, final RandomAccessFile raf) throws IOException {
+ public Index(final Dictionary dict, final DataInput inp) throws IOException {
this.dict = dict;
+ RandomAccessFile raf = (RandomAccessFile)inp;
shortName = raf.readUTF();
longName = raf.readUTF();
final String languageCode = raf.readUTF();
mainTokenCount = raf.readInt();
}
sortedIndexEntries = CachingList.create(
- RAFList.create(raf, indexEntrySerializer, raf.getFilePointer()), CACHE_SIZE);
- if (dict.dictFileVersion >= 4) {
+ RAFList.create(raf, indexEntrySerializer, raf.getFilePointer(),
+ dict.dictFileVersion, dict.dictInfo + " idx " + languageCode + ": "), CACHE_SIZE);
+ if (dict.dictFileVersion >= 7) {
+ int count = StringUtil.readVarInt(raf);
+ stoplist = new HashSet<String>(count);
+ for (int i = 0; i < count; ++i) {
+ stoplist.add(raf.readUTF());
+ }
+ } else if (dict.dictFileVersion >= 4) {
stoplist = new SerializableSerializer<Set<String>>().read(raf);
} else {
stoplist = Collections.emptySet();
}
@Override
- public void write(final RandomAccessFile raf) throws IOException {
+ public void write(final DataOutput out) throws IOException {
+ RandomAccessFile raf = (RandomAccessFile)out;
raf.writeUTF(shortName);
raf.writeUTF(longName);
raf.writeUTF(sortLanguage.getIsoCode());
if (dict.dictFileVersion >= 2) {
raf.writeInt(mainTokenCount);
}
- RAFList.write(raf, sortedIndexEntries, indexEntrySerializer);
- new SerializableSerializer<Set<String>>().write(raf, stoplist);
- UniformRAFList.write(raf, (Collection<RowBase>) rows, new RowBase.Serializer(this), 5 /*
- * bytes
- * per
- * entry
- */);
+ RAFList.write(raf, sortedIndexEntries, indexEntrySerializer, 32, true);
+ StringUtil.writeVarInt(raf, stoplist.size());
+ for (String i : stoplist) {
+ raf.writeUTF(i);
+ }
+ UniformRAFList.write(raf, rows, new RowBase.Serializer(this), 3 /* bytes per entry */);
}
public void print(final PrintStream out) {
private final RAFSerializer<IndexEntry> indexEntrySerializer = new RAFSerializer<IndexEntry>() {
@Override
- public IndexEntry read(RandomAccessFile raf) throws IOException {
+ public IndexEntry read(DataInput raf) throws IOException {
return new IndexEntry(Index.this, raf);
}
@Override
- public void write(RandomAccessFile raf, IndexEntry t) throws IOException {
+ public void write(DataOutput raf, IndexEntry t) throws IOException {
t.write(raf);
}
};
public static final class IndexEntry implements RAFSerializable<Index.IndexEntry> {
- private final Index index;
public final String token;
private final String normalizedToken;
public final int startRow;
public final int numRows; // doesn't count the token row!
- public final List<HtmlEntry> htmlEntries;
+ public List<HtmlEntry> htmlEntries;
public IndexEntry(final Index index, final String token, final String normalizedToken,
final int startRow, final int numRows) {
- this.index = index;
assert token.equals(token.trim());
assert token.length() > 0;
this.token = token;
this.htmlEntries = new ArrayList<HtmlEntry>();
}
- public IndexEntry(final Index index, final RandomAccessFile raf) throws IOException {
- this.index = index;
+ public IndexEntry(final Index index, final DataInput raf) throws IOException {
token = raf.readUTF();
- startRow = raf.readInt();
- numRows = raf.readInt();
+ if (index.dict.dictFileVersion >= 7) {
+ startRow = StringUtil.readVarInt(raf);
+ numRows = StringUtil.readVarInt(raf);
+ } else {
+ startRow = raf.readInt();
+ numRows = raf.readInt();
+ }
final boolean hasNormalizedForm = raf.readBoolean();
normalizedToken = hasNormalizedForm ? raf.readUTF() : token;
- if (index.dict.dictFileVersion >= 6) {
+ if (index.dict.dictFileVersion >= 7) {
+ int size = StringUtil.readVarInt(raf);
+ if (size == 0) {
+ this.htmlEntries = Collections.emptyList();
+ } else {
+ final int[] htmlEntryIndices = new int[size];
+ for (int i = 0; i < size; ++i) {
+ htmlEntryIndices[i] = StringUtil.readVarInt(raf);
+ }
+ this.htmlEntries = new AbstractList<HtmlEntry>() {
+ @Override
+ public HtmlEntry get(int i) {
+ return index.dict.htmlEntries.get(htmlEntryIndices[i]);
+ }
+ @Override
+ public int size() {
+ return htmlEntryIndices.length;
+ }
+ };
+ }
+ } else if (index.dict.dictFileVersion >= 6) {
this.htmlEntries = CachingList.create(
- RAFList.create(raf, index.dict.htmlEntryIndexSerializer,
- raf.getFilePointer()), 1);
+ RAFList.create((RandomAccessFile)raf, index.dict.htmlEntryIndexSerializer,
+ ((RandomAccessFile)raf).getFilePointer(), index.dict.dictFileVersion,
+ index.dict.dictInfo + " htmlEntries: "), 1);
} else {
this.htmlEntries = Collections.emptyList();
}
}
- public void write(RandomAccessFile raf) throws IOException {
+ public void write(DataOutput raf) throws IOException {
raf.writeUTF(token);
- raf.writeInt(startRow);
- raf.writeInt(numRows);
+ StringUtil.writeVarInt(raf, startRow);
+ StringUtil.writeVarInt(raf, numRows);
final boolean hasNormalizedForm = !token.equals(normalizedToken);
raf.writeBoolean(hasNormalizedForm);
if (hasNormalizedForm) {
raf.writeUTF(normalizedToken);
}
- RAFList.write(raf, htmlEntries, index.dict.htmlEntryIndexSerializer);
+ StringUtil.writeVarInt(raf, htmlEntries.size());
+ for (HtmlEntry e : htmlEntries)
+ StringUtil.writeVarInt(raf, e.index());
}
public String toString() {
int start = 0;
int end = sortedIndexEntries.size();
- final Collator sortCollator = sortLanguage.getCollator();
+ final Comparator sortCollator = sortLanguage.getCollator();
while (start < end) {
final int mid = (start + end) / 2;
if (interrupted.get()) {
}
final IndexEntry midEntry = sortedIndexEntries.get(mid);
- final int comp = sortCollator.compare(token, midEntry.normalizedToken());
+ int comp = NormalizeComparator.compareWithoutDash(token, midEntry.normalizedToken(), sortCollator, dict.dictFileVersion);
+ if (comp == 0)
+ comp = sortCollator.compare(token, midEntry.normalizedToken());
if (comp == 0) {
final int result = windBackCase(token, mid, interrupted);
return result;
private static final int MAX_SEARCH_ROWS = 1000;
- private final Map<String, Integer> prefixToNumRows = new LinkedHashMap<String, Integer>();
+ private final Map<String, Integer> prefixToNumRows = new HashMap<String, Integer>();
private synchronized final int getUpperBoundOnRowsStartingWith(final String normalizedPrefix,
final int maxRows, final AtomicBoolean interrupted) {
final long startMills = System.currentTimeMillis();
final List<RowBase> result = new ArrayList<RowBase>();
- final Set<String> normalizedNonStoplist = new LinkedHashSet<String>();
+ final Set<String> normalizedNonStoplist = new HashSet<String>();
String bestPrefix = null;
int leastRows = Integer.MAX_VALUE;