+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
/**
*
*/
import java.util.List;
import java.util.concurrent.atomic.AtomicBoolean;
+import com.hughes.android.dictionary.DictionaryInfo;
+import com.hughes.android.dictionary.DictionaryInfo.IndexInfo;
import com.hughes.util.CachingList;
import com.hughes.util.raf.RAFList;
import com.hughes.util.raf.RAFSerializable;
final Dictionary dict;
- public final String shortName;
+ public final String shortName; // Typically the ISO code for the language.
public final String longName;
// persisted: tells how the entries are sorted.
final String normalizerRules;
// Built from the two above.
- final Transliterator normalizer;
+ private Transliterator normalizer;
// persisted
public final List<IndexEntry> sortedIndexEntries;
// Various sub-types.
// persisted
public final List<RowBase> rows;
-
public final boolean swapPairEntries;
+ // Version 2:
+ int mainTokenCount = -1;
+
// --------------------------------------------------------------------------
public Index(final Dictionary dict, final String shortName, final String longName, final Language sortLanguage, final String normalizerRules, final boolean swapPairEntries) {
sortedIndexEntries = new ArrayList<IndexEntry>();
rows = new ArrayList<RowBase>();
- normalizer = Transliterator.createFromRules("", normalizerRules, Transliterator.FORWARD);
+ normalizer = null;
+ }
+
+ public synchronized Transliterator normalizer() {
+ if (normalizer == null) {
+ normalizer = Transliterator.createFromRules("", normalizerRules, Transliterator.FORWARD);
+ }
+ return normalizer;
}
public Index(final Dictionary dict, final RandomAccessFile raf) throws IOException {
if (sortLanguage == null) {
throw new IOException("Unsupported language: " + languageCode);
}
+ if (dict.dictFileVersion >= 2) {
+ mainTokenCount = raf.readInt();
+ }
sortedIndexEntries = CachingList.create(RAFList.create(raf, IndexEntry.SERIALIZER, raf.getFilePointer()), CACHE_SIZE);
rows = CachingList.create(UniformRAFList.create(raf, new RowBase.Serializer(this), raf.getFilePointer()), CACHE_SIZE);
-
- normalizer = Transliterator.createFromRules("", normalizerRules, Transliterator.FORWARD);
}
@Override
public void write(final RandomAccessFile raf) throws IOException {
raf.writeUTF(shortName);
raf.writeUTF(longName);
- raf.writeUTF(sortLanguage.getSymbol());
+ raf.writeUTF(sortLanguage.getIsoCode());
raf.writeUTF(normalizerRules);
raf.writeBoolean(swapPairEntries);
+ if (dict.dictFileVersion >= 2) {
+ raf.writeInt(mainTokenCount);
+ }
RAFList.write(raf, sortedIndexEntries, IndexEntry.SERIALIZER);
UniformRAFList.write(raf, (Collection<RowBase>) rows, new RowBase.Serializer(this), 5);
}
public static final class IndexEntry implements RAFSerializable<Index.IndexEntry> {
public final String token;
+ private final String normalizedToken;
public final int startRow;
public final int numRows;
- private String normalizedToken;
static final RAFSerializer<IndexEntry> SERIALIZER = new RAFSerializer<IndexEntry> () {
@Override
t.write(raf);
}};
- public IndexEntry(final String token, final int startRow, final int numRows) {
+ public IndexEntry(final String token, final String normalizedToken, final int startRow, final int numRows) {
assert token.equals(token.trim());
assert token.length() > 0;
this.token = token;
+ this.normalizedToken = normalizedToken;
this.startRow = startRow;
this.numRows = numRows;
}
token = raf.readUTF();
startRow = raf.readInt();
numRows = raf.readInt();
+ final boolean hasNormalizedForm = raf.readBoolean();
+ normalizedToken = hasNormalizedForm ? raf.readUTF() : token;
}
public void write(RandomAccessFile raf) throws IOException {
raf.writeUTF(token);
raf.writeInt(startRow);
raf.writeInt(numRows);
+ final boolean hasNormalizedForm = !token.equals(normalizedToken);
+ raf.writeBoolean(hasNormalizedForm);
+ if (hasNormalizedForm) {
+ raf.writeUTF(normalizedToken);
+ }
}
public String toString() {
return String.format("%s@%d(%d)", token, startRow, numRows);
}
- public synchronized String normalizedToken(final Transliterator normalizer) {
- if (normalizedToken == null) {
- normalizedToken = normalizer.transform(token);
- }
+ public String normalizedToken() {
return normalizedToken;
}
}
public IndexEntry findInsertionPoint(String token, final AtomicBoolean interrupted) {
- token = normalizer.transliterate(token);
+ if (TransliteratorManager.init(null)) {
+ final Transliterator normalizer = normalizer();
+ token = normalizer.transliterate(token);
+ } else {
+ // Do our best since the Transliterators aren't up yet.
+ token = token.toLowerCase();
+ }
int start = 0;
int end = sortedIndexEntries.size();
}
final IndexEntry midEntry = sortedIndexEntries.get(mid);
- final int comp = sortCollator.compare(token, midEntry.normalizedToken(normalizer));
+ final int comp = sortCollator.compare(token, midEntry.normalizedToken());
if (comp == 0) {
final int result = windBackCase(token, mid, interrupted);
return sortedIndexEntries.get(result);
} else if (comp < 0) {
- System.out.println("Upper bound: " + midEntry + ", norm=" + midEntry.normalizedToken(normalizer) + ", mid=" + mid);
+ //System.out.println("Upper bound: " + midEntry + ", norm=" + midEntry.normalizedToken() + ", mid=" + mid);
end = mid;
} else {
- System.out.println("Lower bound: " + midEntry + ", norm=" + midEntry.normalizedToken(normalizer) + ", mid=" + mid);
+ //System.out.println("Lower bound: " + midEntry + ", norm=" + midEntry.normalizedToken() + ", mid=" + mid);
start = mid + 1;
}
}
// If we search for a substring of a string that's in there, return that.
int result = Math.min(start, sortedIndexEntries.size() - 1);
- result = windBackCase(sortedIndexEntries.get(result).normalizedToken(normalizer), result, interrupted);
+ result = windBackCase(sortedIndexEntries.get(result).normalizedToken(), result, interrupted);
return sortedIndexEntries.get(result);
}
-
- public static final class SearchResult {
- public final IndexEntry insertionPoint;
- public final IndexEntry longestPrefix;
- public final String longestPrefixString;
- public final boolean success;
- public SearchResult(IndexEntry insertionPoint, IndexEntry longestPrefix,
- String longestPrefixString, boolean success) {
- this.insertionPoint = insertionPoint;
- this.longestPrefix = longestPrefix;
- this.longestPrefixString = longestPrefixString;
- this.success = success;
- }
-
- @Override
- public String toString() {
- return String.format("inerstionPoint=%s,longestPrefix=%s,longestPrefixString=%s,success=%b", insertionPoint.toString(), longestPrefix.toString(), longestPrefixString, success);
- }
- }
-
-// public SearchResult findLongestSubstring(String token, final AtomicBoolean interrupted) {
-// token = normalizer.transliterate(token);
-// if (token.length() == 0) {
-// return new SearchResult(sortedIndexEntries.get(0), sortedIndexEntries.get(0), "", true);
-// }
-// IndexEntry insertionPoint = null;
-// IndexEntry result = null;
-// boolean unmodified = true;
-// while (!interrupted.get() && token.length() > 0) {
-// result = findInsertionPoint(token, interrupted);
-// if (result == null) {
-// return null;
-// }
-// if (unmodified) {
-// insertionPoint = result;
-// }
-// if (result.normalizedToken(normalizer).startsWith(token)) {
-// return new SearchResult(insertionPoint, result, token, unmodified);
-// }
-// unmodified = false;
-// token = token.substring(0, token.length() - 1);
-// }
-// return new SearchResult(insertionPoint, sortedIndexEntries.get(0), "", false);
-// }
-
private final int windBackCase(final String token, int result, final AtomicBoolean interrupted) {
- while (result > 0 && sortedIndexEntries.get(result - 1).normalizedToken(normalizer).equals(token)) {
+ while (result > 0 && sortedIndexEntries.get(result - 1).normalizedToken().equals(token)) {
--result;
if (interrupted.get()) {
return result;
return result;
}
+ public IndexInfo getIndexInfo() {
+ return new DictionaryInfo.IndexInfo(shortName, sortedIndexEntries.size(), mainTokenCount);
+ }
}
\ No newline at end of file