*/
package com.hughes.android.dictionary.engine;
+import com.hughes.android.dictionary.DictionaryInfo;
+import com.hughes.android.dictionary.DictionaryInfo.IndexInfo;
+import com.hughes.android.dictionary.engine.RowBase.RowKey;
+import com.hughes.util.CachingList;
+import com.hughes.util.CollectionUtil;
+import com.hughes.util.TransformingList;
+import com.hughes.util.raf.RAFList;
+import com.hughes.util.raf.RAFSerializable;
+import com.hughes.util.raf.RAFSerializer;
+import com.hughes.util.raf.SerializableSerializer;
+import com.hughes.util.raf.UniformRAFList;
+import com.ibm.icu.text.Collator;
+import com.ibm.icu.text.Transliterator;
+
import java.io.IOException;
import java.io.PrintStream;
import java.io.RandomAccessFile;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.regex.Pattern;
-import com.hughes.android.dictionary.DictionaryInfo;
-import com.hughes.android.dictionary.DictionaryInfo.IndexInfo;
-import com.hughes.android.dictionary.engine.RowBase.RowKey;
-import com.hughes.util.CachingList;
-import com.hughes.util.TransformingList;
-import com.hughes.util.raf.RAFList;
-import com.hughes.util.raf.RAFSerializable;
-import com.hughes.util.raf.RAFSerializer;
-import com.hughes.util.raf.SerializableSerializer;
-import com.hughes.util.raf.UniformRAFList;
-import com.ibm.icu.text.Collator;
-import com.ibm.icu.text.Transliterator;
-
public final class Index implements RAFSerializable<Index> {
static final int CACHE_SIZE = 5000;
- final Dictionary dict;
+ public final Dictionary dict;
public final String shortName; // Typically the ISO code for the language.
public final String longName;
if (dict.dictFileVersion >= 2) {
mainTokenCount = raf.readInt();
}
- sortedIndexEntries = CachingList.create(RAFList.create(raf, IndexEntry.SERIALIZER, raf.getFilePointer()), CACHE_SIZE);
+ sortedIndexEntries = CachingList.create(RAFList.create(raf, indexEntrySerializer, raf.getFilePointer()), CACHE_SIZE);
if (dict.dictFileVersion >= 4) {
stoplist = new SerializableSerializer<Set<String>>().read(raf);
} else {
if (dict.dictFileVersion >= 2) {
raf.writeInt(mainTokenCount);
}
- RAFList.write(raf, sortedIndexEntries, IndexEntry.SERIALIZER);
+ RAFList.write(raf, sortedIndexEntries, indexEntrySerializer);
new SerializableSerializer<Set<String>>().write(raf, stoplist);
UniformRAFList.write(raf, (Collection<RowBase>) rows, new RowBase.Serializer(this), 5 /* bytes per entry */);
}
}
}
- public static final class IndexEntry implements RAFSerializable<Index.IndexEntry> {
- public final String token;
- private final String normalizedToken;
- public final int startRow;
- public final int numRows; // doesn't count the token row!
-
-
- static final RAFSerializer<IndexEntry> SERIALIZER = new RAFSerializer<IndexEntry> () {
+ private final RAFSerializer<IndexEntry> indexEntrySerializer = new RAFSerializer<IndexEntry> () {
@Override
public IndexEntry read(RandomAccessFile raf) throws IOException {
- return new IndexEntry(raf);
+ return new IndexEntry(Index.this, raf);
}
@Override
public void write(RandomAccessFile raf, IndexEntry t) throws IOException {
t.write(raf);
}};
- public IndexEntry(final String token, final String normalizedToken, final int startRow, final int numRows) {
+
+ public static final class IndexEntry implements RAFSerializable<Index.IndexEntry> {
+ private final Index index;
+ public final String token;
+ private final String normalizedToken;
+ public final int startRow;
+ public final int numRows; // doesn't count the token row!
+ public final List<HtmlEntry> htmlEntries;
+
+
+ public IndexEntry(final Index index, final String token, final String normalizedToken, final int startRow, final int numRows) {
+ this.index = index;
assert token.equals(token.trim());
assert token.length() > 0;
this.token = token;
this.normalizedToken = normalizedToken;
this.startRow = startRow;
this.numRows = numRows;
+ this.htmlEntries = new ArrayList<HtmlEntry>();
}
- public IndexEntry(final RandomAccessFile raf) throws IOException {
+ public IndexEntry(final Index index, final RandomAccessFile raf) throws IOException {
+ this.index = index;
token = raf.readUTF();
startRow = raf.readInt();
numRows = raf.readInt();
final boolean hasNormalizedForm = raf.readBoolean();
normalizedToken = hasNormalizedForm ? raf.readUTF() : token;
+ if (index.dict.dictFileVersion >= 6) {
+ this.htmlEntries = CachingList.create(RAFList.create(raf, index.dict.htmlEntryIndexSerializer, raf.getFilePointer()), 1);
+ } else {
+ this.htmlEntries = Collections.emptyList();
+ }
}
public void write(RandomAccessFile raf) throws IOException {
if (hasNormalizedForm) {
raf.writeUTF(normalizedToken);
}
+ RAFList.write(raf, htmlEntries, index.dict.htmlEntryIndexSerializer);
}
public String toString() {
final int result = windBackCase(token, mid, interrupted);
return result;
} else if (comp < 0) {
- //System.out.println("Upper bound: " + midEntry + ", norm=" + midEntry.normalizedToken() + ", mid=" + mid);
+ // System.out.println("Upper bound: " + midEntry + ", norm=" + midEntry.normalizedToken() + ", mid=" + mid);
end = mid;
} else {
- //System.out.println("Lower bound: " + midEntry + ", norm=" + midEntry.normalizedToken() + ", mid=" + mid);
+ // System.out.println("Lower bound: " + midEntry + ", norm=" + midEntry.normalizedToken() + ", mid=" + mid);
start = mid + 1;
}
}
}
- public final List<RowBase> multiWordSearch(final List<String> searchTokens, final AtomicBoolean interrupted) {
+ public final List<RowBase> multiWordSearch(
+ final String searchText, final List<String> searchTokens, final AtomicBoolean interrupted) {
final long startMills = System.currentTimeMillis();
final List<RowBase> result = new ArrayList<RowBase>();
String bestPrefix = null;
int leastRows = Integer.MAX_VALUE;
- final StringBuilder regex = new StringBuilder();
+ final StringBuilder searchTokensRegex = new StringBuilder();
for (int i = 0; i < searchTokens.size(); ++i) {
if (interrupted.get()) { return null; }
final String searchToken = searchTokens.get(i);
}
}
- if (regex.length() > 0) {
- regex.append("[\\s]*");
+ if (searchTokensRegex.length() > 0) {
+ searchTokensRegex.append("[\\s]*");
}
- regex.append(Pattern.quote(normalized));
+ searchTokensRegex.append(Pattern.quote(normalized));
}
- final Pattern pattern = Pattern.compile(regex.toString());
+ final Pattern pattern = Pattern.compile(searchTokensRegex.toString());
if (bestPrefix == null) {
bestPrefix = searchTokens.get(0);
}
int matchCount = 0;
- final Set<RowKey> cachedRowKeys = new HashSet<RowBase.RowKey>();
-// for (final String searchToken : searchTokens) {
- final String searchToken = bestPrefix;
+ final int exactMatchIndex = findInsertionPointIndex(searchText, interrupted);
+ if (exactMatchIndex != -1) {
+ final IndexEntry exactMatch = sortedIndexEntries.get(exactMatchIndex);
+ if (pattern.matcher(exactMatch.token).matches()) {
+ matches.get(RowMatchType.TITLE_MATCH).add(rows.get(exactMatch.startRow));
+ }
+ }
+
+ final String searchToken = bestPrefix;
final int insertionPointIndex = findInsertionPointIndex(searchToken, interrupted);
-
- for (int index = insertionPointIndex; index < sortedIndexEntries.size() && matchCount < MAX_SEARCH_ROWS; ++index) {
+ final Set<RowKey> rowsAlreadySeen = new HashSet<RowBase.RowKey>();
+ for (int index = insertionPointIndex;
+ index < sortedIndexEntries.size() && matchCount < MAX_SEARCH_ROWS;
+ ++index) {
if (interrupted.get()) { return null; }
final IndexEntry indexEntry = sortedIndexEntries.get(index);
if (!indexEntry.normalizedToken.startsWith(searchToken)) {
// System.out.println("Searching indexEntry: " + indexEntry.token);
// Extra +1 to skip token row.
- for (int rowIndex = indexEntry.startRow + 1; rowIndex < indexEntry.startRow + 1 + indexEntry.numRows && rowIndex < rows.size(); ++rowIndex) {
+ for (int rowIndex = indexEntry.startRow + 1;
+ rowIndex < indexEntry.startRow + 1 + indexEntry.numRows && rowIndex < rows.size();
+ ++rowIndex) {
if (interrupted.get()) { return null; }
final RowBase row = rows.get(rowIndex);
final RowBase.RowKey rowKey = row.getRowKey();
- if (cachedRowKeys.contains(rowKey)) {
+ if (rowsAlreadySeen.contains(rowKey)) {
continue;
}
- cachedRowKeys.add(rowKey);
+ rowsAlreadySeen.add(rowKey);
final RowMatchType matchType = row.matches(searchTokens, pattern, normalizer(), swapPairEntries);
if (matchType != RowMatchType.NO_MATCH) {
matches.get(matchType).add(row);