// Copyright 2011 Google Inc. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. /** * */ package com.hughes.android.dictionary.engine; import java.io.IOException; import java.io.PrintStream; import java.io.RandomAccessFile; import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.concurrent.atomic.AtomicBoolean; import com.hughes.util.CachingList; import com.hughes.util.raf.RAFList; import com.hughes.util.raf.RAFSerializable; import com.hughes.util.raf.RAFSerializer; import com.hughes.util.raf.UniformRAFList; import com.ibm.icu.text.Collator; import com.ibm.icu.text.Transliterator; public final class Index implements RAFSerializable { static final int CACHE_SIZE = 5000; final Dictionary dict; public final String shortName; public final String longName; // persisted: tells how the entries are sorted. public final Language sortLanguage; final String normalizerRules; // Built from the two above. private Transliterator normalizer; // persisted public final List sortedIndexEntries; // One big list! // Various sub-types. // persisted public final List rows; public final boolean swapPairEntries; // -------------------------------------------------------------------------- public Index(final Dictionary dict, final String shortName, final String longName, final Language sortLanguage, final String normalizerRules, final boolean swapPairEntries) { this.dict = dict; this.shortName = shortName; this.longName = longName; this.sortLanguage = sortLanguage; this.normalizerRules = normalizerRules; this.swapPairEntries = swapPairEntries; sortedIndexEntries = new ArrayList(); rows = new ArrayList(); normalizer = null; } public synchronized Transliterator normalizer() { if (normalizer == null) { normalizer = Transliterator.createFromRules("", normalizerRules, Transliterator.FORWARD); } return normalizer; } public Index(final Dictionary dict, final RandomAccessFile raf) throws IOException { this.dict = dict; shortName = raf.readUTF(); longName = raf.readUTF(); final String languageCode = raf.readUTF(); sortLanguage = Language.lookup(languageCode); normalizerRules = raf.readUTF(); swapPairEntries = raf.readBoolean(); if (sortLanguage == null) { throw new IOException("Unsupported language: " + languageCode); } sortedIndexEntries = CachingList.create(RAFList.create(raf, IndexEntry.SERIALIZER, raf.getFilePointer()), CACHE_SIZE); rows = CachingList.create(UniformRAFList.create(raf, new RowBase.Serializer(this), raf.getFilePointer()), CACHE_SIZE); } @Override public void write(final RandomAccessFile raf) throws IOException { raf.writeUTF(shortName); raf.writeUTF(longName); raf.writeUTF(sortLanguage.getIsoCode()); raf.writeUTF(normalizerRules); raf.writeBoolean(swapPairEntries); RAFList.write(raf, sortedIndexEntries, IndexEntry.SERIALIZER); UniformRAFList.write(raf, (Collection) rows, new RowBase.Serializer(this), 5); } public void print(final PrintStream out) { for (final RowBase row : rows) { row.print(out); } } public static final class IndexEntry implements RAFSerializable { public final String token; private final String normalizedToken; public final int startRow; public final int numRows; static final RAFSerializer SERIALIZER = new RAFSerializer () { @Override public IndexEntry read(RandomAccessFile raf) throws IOException { return new IndexEntry(raf); } @Override public void write(RandomAccessFile raf, IndexEntry t) throws IOException { t.write(raf); }}; public IndexEntry(final String token, final String normalizedToken, final int startRow, final int numRows) { assert token.equals(token.trim()); assert token.length() > 0; this.token = token; this.normalizedToken = normalizedToken; this.startRow = startRow; this.numRows = numRows; } public IndexEntry(final RandomAccessFile raf) throws IOException { token = raf.readUTF(); startRow = raf.readInt(); numRows = raf.readInt(); final boolean hasNormalizedForm = raf.readBoolean(); normalizedToken = hasNormalizedForm ? raf.readUTF() : token; } public void write(RandomAccessFile raf) throws IOException { raf.writeUTF(token); raf.writeInt(startRow); raf.writeInt(numRows); final boolean hasNormalizedForm = !token.equals(normalizedToken); raf.writeBoolean(hasNormalizedForm); if (hasNormalizedForm) { raf.writeUTF(normalizedToken); } } public String toString() { return String.format("%s@%d(%d)", token, startRow, numRows); } public String normalizedToken() { return normalizedToken; } } public IndexEntry findInsertionPoint(String token, final AtomicBoolean interrupted) { final Transliterator normalizer = normalizer(); if (TransliteratorManager.init(null)) { token = normalizer.transliterate(token); } else { // Do our best since the Transliterators aren't up yet. token = token.toLowerCase(); } int start = 0; int end = sortedIndexEntries.size(); final Collator sortCollator = sortLanguage.getCollator(); while (start < end) { final int mid = (start + end) / 2; if (interrupted.get()) { return null; } final IndexEntry midEntry = sortedIndexEntries.get(mid); final int comp = sortCollator.compare(token, midEntry.normalizedToken()); if (comp == 0) { final int result = windBackCase(token, mid, interrupted); return sortedIndexEntries.get(result); } else if (comp < 0) { //System.out.println("Upper bound: " + midEntry + ", norm=" + midEntry.normalizedToken() + ", mid=" + mid); end = mid; } else { //System.out.println("Lower bound: " + midEntry + ", norm=" + midEntry.normalizedToken() + ", mid=" + mid); start = mid + 1; } } // If we search for a substring of a string that's in there, return that. int result = Math.min(start, sortedIndexEntries.size() - 1); result = windBackCase(sortedIndexEntries.get(result).normalizedToken(), result, interrupted); return sortedIndexEntries.get(result); } private final int windBackCase(final String token, int result, final AtomicBoolean interrupted) { while (result > 0 && sortedIndexEntries.get(result - 1).normalizedToken().equals(token)) { --result; if (interrupted.get()) { return result; } } return result; } }