Major refactor of down dictionary list is stored by app.

[Dictionary.git] / src / com / hughes / android / dictionary / engine / Index.java
diff --git a/src/com/hughes/android/dictionary/engine/Index.java b/src/com/hughes/android/dictionary/engine/Index.java

index 7cee7460c8e2f90702aaf98ca66ca8bcbb7171b2..19e0ecc8ed63d8285710596482950a26af45655d 100644 (file)
--- a/src/com/hughes/android/dictionary/engine/Index.java
+++ b/src/com/hughes/android/dictionary/engine/Index.java
@@ -1,3 +1,17 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
  /**
   * 
   */
@@ -11,6 +25,8 @@ import java.util.Collection;
  import java.util.List;
  import java.util.concurrent.atomic.AtomicBoolean;
  
+import com.hughes.android.dictionary.DictionaryInfo;
+import com.hughes.android.dictionary.DictionaryInfo.IndexInfo;
  import com.hughes.util.CachingList;
  import com.hughes.util.raf.RAFList;
  import com.hughes.util.raf.RAFSerializable;
@@ -25,7 +41,7 @@ public final class Index implements RAFSerializable<Index> {
    
    final Dictionary dict;
    
-  public final String shortName;
+  public final String shortName;  // Typically the ISO code for the language.
    public final String longName;
    
    // persisted: tells how the entries are sorted.
@@ -33,7 +49,7 @@ public final class Index implements RAFSerializable<Index> {
    final String normalizerRules;
    
    // Built from the two above.
-  final Transliterator normalizer;
+  private Transliterator normalizer;
      
    // persisted
    public final List<IndexEntry> sortedIndexEntries;
@@ -42,9 +58,11 @@ public final class Index implements RAFSerializable<Index> {
    // Various sub-types.
    // persisted
    public final List<RowBase> rows;
-  
    public final boolean swapPairEntries;
    
+  // Version 2:
+  int mainTokenCount = -1;
+  
    // --------------------------------------------------------------------------
    
    public Index(final Dictionary dict, final String shortName, final String longName, final Language sortLanguage, final String normalizerRules, final boolean swapPairEntries) {
@@ -57,7 +75,14 @@ public final class Index implements RAFSerializable<Index> {
      sortedIndexEntries = new ArrayList<IndexEntry>();
      rows = new ArrayList<RowBase>();
      
-    normalizer = Transliterator.createFromRules("", normalizerRules, Transliterator.FORWARD);
+    normalizer = null;
+  }
+  
+  public synchronized Transliterator normalizer() {
+    if (normalizer == null) {
+      normalizer = Transliterator.createFromRules("", normalizerRules, Transliterator.FORWARD);
+    }
+    return normalizer;
    }
    
    public Index(final Dictionary dict, final RandomAccessFile raf) throws IOException {
@@ -71,19 +96,23 @@ public final class Index implements RAFSerializable<Index> {
      if (sortLanguage == null) {
        throw new IOException("Unsupported language: " + languageCode);
      }
+    if (dict.dictFileVersion >= 2) {
+      mainTokenCount = raf.readInt();
+    }
      sortedIndexEntries = CachingList.create(RAFList.create(raf, IndexEntry.SERIALIZER, raf.getFilePointer()), CACHE_SIZE);
      rows = CachingList.create(UniformRAFList.create(raf, new RowBase.Serializer(this), raf.getFilePointer()), CACHE_SIZE);
-
-    normalizer = Transliterator.createFromRules("", normalizerRules, Transliterator.FORWARD);
    }
    
    @Override
    public void write(final RandomAccessFile raf) throws IOException {
      raf.writeUTF(shortName);
      raf.writeUTF(longName);
-    raf.writeUTF(sortLanguage.getSymbol());
+    raf.writeUTF(sortLanguage.getIsoCode());
      raf.writeUTF(normalizerRules);
      raf.writeBoolean(swapPairEntries);
+    if (dict.dictFileVersion >= 2) {
+      raf.writeInt(mainTokenCount);
+    }
      RAFList.write(raf, sortedIndexEntries, IndexEntry.SERIALIZER);
      UniformRAFList.write(raf, (Collection<RowBase>) rows, new RowBase.Serializer(this), 5);
    }
@@ -96,10 +125,10 @@ public final class Index implements RAFSerializable<Index> {
    
    public static final class IndexEntry implements RAFSerializable<Index.IndexEntry> {
      public final String token;
+    private final String normalizedToken;
      public final int startRow;
      public final int numRows;
      
-    private String normalizedToken;
      
      static final RAFSerializer<IndexEntry> SERIALIZER = new RAFSerializer<IndexEntry> () {
        @Override
@@ -111,10 +140,11 @@ public final class Index implements RAFSerializable<Index> {
          t.write(raf);
        }};
        
-    public IndexEntry(final String token, final int startRow, final int numRows) {
+    public IndexEntry(final String token, final String normalizedToken, final int startRow, final int numRows) {
        assert token.equals(token.trim());
        assert token.length() > 0;
        this.token = token;
+      this.normalizedToken = normalizedToken;
        this.startRow = startRow;
        this.numRows = numRows;
      }
@@ -123,28 +153,38 @@ public final class Index implements RAFSerializable<Index> {
        token = raf.readUTF();
        startRow = raf.readInt();
        numRows = raf.readInt();
+      final boolean hasNormalizedForm = raf.readBoolean();
+      normalizedToken = hasNormalizedForm ? raf.readUTF() : token;
      }
      
      public void write(RandomAccessFile raf) throws IOException {
        raf.writeUTF(token);
        raf.writeInt(startRow);
        raf.writeInt(numRows);
+      final boolean hasNormalizedForm = !token.equals(normalizedToken);
+      raf.writeBoolean(hasNormalizedForm);
+      if (hasNormalizedForm) {
+        raf.writeUTF(normalizedToken);
+      }
      }
  
      public String toString() {
        return String.format("%s@%d(%d)", token, startRow, numRows);
      }
  
-    public synchronized String normalizedToken(final Transliterator normalizer) {
-      if (normalizedToken == null) {
-        normalizedToken = normalizer.transform(token);
-      }
+    public String normalizedToken() {
        return normalizedToken;
      }
    }
    
    public IndexEntry findInsertionPoint(String token, final AtomicBoolean interrupted) {
-    token = normalizer.transliterate(token);
+    if (TransliteratorManager.init(null)) {
+      final Transliterator normalizer = normalizer();
+      token = normalizer.transliterate(token);
+    } else {
+      // Do our best since the Transliterators aren't up yet.
+      token = token.toLowerCase();
+    }
  
      int start = 0;
      int end = sortedIndexEntries.size();
@@ -157,72 +197,27 @@ public final class Index implements RAFSerializable<Index> {
        }
        final IndexEntry midEntry = sortedIndexEntries.get(mid);
  
-      final int comp = sortCollator.compare(token, midEntry.normalizedToken(normalizer));
+      final int comp = sortCollator.compare(token, midEntry.normalizedToken());
        if (comp == 0) {
          final int result = windBackCase(token, mid, interrupted);
          return sortedIndexEntries.get(result);
        } else if (comp < 0) {
-        System.out.println("Upper bound: " + midEntry + ", norm=" + midEntry.normalizedToken(normalizer) + ", mid=" + mid);
+        //System.out.println("Upper bound: " + midEntry + ", norm=" + midEntry.normalizedToken() + ", mid=" + mid);
          end = mid;
        } else {
-        System.out.println("Lower bound: " + midEntry + ", norm=" + midEntry.normalizedToken(normalizer) + ", mid=" + mid);
+        //System.out.println("Lower bound: " + midEntry + ", norm=" + midEntry.normalizedToken() + ", mid=" + mid);
          start = mid + 1;
        }
      }
  
      // If we search for a substring of a string that's in there, return that.
      int result = Math.min(start, sortedIndexEntries.size() - 1);
-    result = windBackCase(sortedIndexEntries.get(result).normalizedToken(normalizer), result, interrupted);
+    result = windBackCase(sortedIndexEntries.get(result).normalizedToken(), result, interrupted);
      return sortedIndexEntries.get(result);
    }
-  
-  public static final class SearchResult {
-    public final IndexEntry insertionPoint;
-    public final IndexEntry longestPrefix;
-    public final String longestPrefixString;
-    public final boolean success;
      
-    public SearchResult(IndexEntry insertionPoint, IndexEntry longestPrefix,
-        String longestPrefixString, boolean success) {
-      this.insertionPoint = insertionPoint;
-      this.longestPrefix = longestPrefix;
-      this.longestPrefixString = longestPrefixString;
-      this.success = success;
-    }
-    
-    @Override
-    public String toString() {
-      return String.format("inerstionPoint=%s,longestPrefix=%s,longestPrefixString=%s,success=%b", insertionPoint.toString(), longestPrefix.toString(), longestPrefixString, success);
-    }
-  }
-  
-//  public SearchResult findLongestSubstring(String token, final AtomicBoolean interrupted) {
-//    token = normalizer.transliterate(token);
-//    if (token.length() == 0) {
-//      return new SearchResult(sortedIndexEntries.get(0), sortedIndexEntries.get(0), "", true);
-//    }
-//    IndexEntry insertionPoint = null;
-//    IndexEntry result = null;
-//    boolean unmodified = true;
-//    while (!interrupted.get() && token.length() > 0) {
-//      result = findInsertionPoint(token, interrupted);
-//      if (result == null) {
-//        return null;
-//      }
-//      if (unmodified) {
-//        insertionPoint = result;
-//      }
-//      if (result.normalizedToken(normalizer).startsWith(token)) {
-//        return new SearchResult(insertionPoint, result, token, unmodified);
-//      }
-//      unmodified = false;
-//      token = token.substring(0, token.length() - 1);      
-//    }
-//    return new SearchResult(insertionPoint, sortedIndexEntries.get(0), "", false);
-//  }
-  
    private final int windBackCase(final String token, int result, final AtomicBoolean interrupted) {
-    while (result > 0 && sortedIndexEntries.get(result - 1).normalizedToken(normalizer).equals(token)) {
+    while (result > 0 && sortedIndexEntries.get(result - 1).normalizedToken().equals(token)) {
        --result;
        if (interrupted.get()) {
          return result;
@@ -231,5 +226,8 @@ public final class Index implements RAFSerializable<Index> {
      return result;
    }
  
+  public IndexInfo getIndexInfo() {
+    return new DictionaryInfo.IndexInfo(shortName, sortedIndexEntries.size(), mainTokenCount);
+  }
  
  }
 \ No newline at end of file