]> gitweb.fperrin.net Git - Dictionary.git/blobdiff - src/com/hughes/android/dictionary/engine/Index.java
Attempt to fix issue #49.
[Dictionary.git] / src / com / hughes / android / dictionary / engine / Index.java
index ce6947768ee77a8fd345eedbb53fb65e6c7f1d96..069cca798219be68e046be0bccd0115843c6dd92 100644 (file)
@@ -13,7 +13,7 @@
 // limitations under the License.
 
 /**
- * 
+ *
  */
 
 package com.hughes.android.dictionary.engine;
@@ -22,13 +22,14 @@ import com.hughes.android.dictionary.DictionaryInfo;
 import com.hughes.android.dictionary.DictionaryInfo.IndexInfo;
 import com.hughes.android.dictionary.engine.RowBase.RowKey;
 import com.hughes.util.CachingList;
+import com.hughes.util.StringUtil;
 import com.hughes.util.TransformingList;
 import com.hughes.util.raf.RAFList;
 import com.hughes.util.raf.RAFSerializable;
 import com.hughes.util.raf.RAFSerializer;
 import com.hughes.util.raf.SerializableSerializer;
 import com.hughes.util.raf.UniformRAFList;
-import com.ibm.icu.text.Collator;
+import java.text.Collator;
 import com.ibm.icu.text.Transliterator;
 
 import java.io.DataInput;
@@ -36,9 +37,11 @@ import java.io.DataOutput;
 import java.io.IOException;
 import java.io.PrintStream;
 import java.io.RandomAccessFile;
+import java.util.AbstractList;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
+import java.util.Comparator;
 import java.util.EnumMap;
 import java.util.HashSet;
 import java.util.LinkedHashMap;
@@ -114,7 +117,7 @@ public final class Index implements RAFSerializable<Index> {
      * normalizations.
      */
     public NormalizeComparator getSortComparator() {
-        return new NormalizeComparator(normalizer(), sortLanguage.getCollator());
+        return new NormalizeComparator(normalizer(), sortLanguage.getCollator(), dict.dictFileVersion);
     }
 
     public Index(final Dictionary dict, final DataInput inp) throws IOException {
@@ -133,8 +136,15 @@ public final class Index implements RAFSerializable<Index> {
             mainTokenCount = raf.readInt();
         }
         sortedIndexEntries = CachingList.create(
-                RAFList.create(raf, indexEntrySerializer, raf.getFilePointer()), CACHE_SIZE);
-        if (dict.dictFileVersion >= 4) {
+                RAFList.create(raf, indexEntrySerializer, raf.getFilePointer(),
+                               dict.dictFileVersion, dict.dictInfo + " idx " + languageCode + ": "), CACHE_SIZE);
+        if (dict.dictFileVersion >= 7) {
+            int count = StringUtil.readVarInt(raf);
+            stoplist = new HashSet<String>(count);
+            for (int i = 0; i < count; ++i) {
+                stoplist.add(raf.readUTF());
+            }
+       } else if (dict.dictFileVersion >= 4) {
             stoplist = new SerializableSerializer<Set<String>>().read(raf);
         } else {
             stoplist = Collections.emptySet();
@@ -155,13 +165,12 @@ public final class Index implements RAFSerializable<Index> {
         if (dict.dictFileVersion >= 2) {
             raf.writeInt(mainTokenCount);
         }
-        RAFList.write(raf, sortedIndexEntries, indexEntrySerializer);
-        new SerializableSerializer<Set<String>>().write(raf, stoplist);
-        UniformRAFList.write(raf, rows, new RowBase.Serializer(this), 5 /*
-                                                                                               * bytes
-                                                                                               * per
-                                                                                               * entry
-                                                                                               */);
+        RAFList.write(raf, sortedIndexEntries, indexEntrySerializer, 32, true);
+        StringUtil.writeVarInt(raf, stoplist.size());
+        for (String i : stoplist) {
+            raf.writeUTF(i);
+        }
+        UniformRAFList.write(raf, rows, new RowBase.Serializer(this), 3 /* bytes per entry */);
     }
 
     public void print(final PrintStream out) {
@@ -188,7 +197,8 @@ public final class Index implements RAFSerializable<Index> {
         private final String normalizedToken;
         public final int startRow;
         public final int numRows; // doesn't count the token row!
-        public final List<HtmlEntry> htmlEntries;
+        public List<HtmlEntry> htmlEntries;
+        private int[] htmlEntryIndices;
 
         public IndexEntry(final Index index, final String token, final String normalizedToken,
                 final int startRow, final int numRows) {
@@ -202,34 +212,57 @@ public final class Index implements RAFSerializable<Index> {
             this.htmlEntries = new ArrayList<HtmlEntry>();
         }
 
-        public IndexEntry(final Index index, final DataInput inp) throws IOException {
+        public IndexEntry(final Index index, final DataInput raf) throws IOException {
             this.index = index;
-            RandomAccessFile raf = (RandomAccessFile)inp;
             token = raf.readUTF();
-            startRow = raf.readInt();
-            numRows = raf.readInt();
+            if (index.dict.dictFileVersion >= 7) {
+                startRow = StringUtil.readVarInt(raf);
+                numRows = StringUtil.readVarInt(raf);
+            } else {
+                startRow = raf.readInt();
+                numRows = raf.readInt();
+            }
             final boolean hasNormalizedForm = raf.readBoolean();
             normalizedToken = hasNormalizedForm ? raf.readUTF() : token;
-            if (index.dict.dictFileVersion >= 6) {
+            htmlEntryIndices = null;
+            if (index.dict.dictFileVersion >= 7) {
+                int size = StringUtil.readVarInt(raf);
+                htmlEntryIndices = new int[size];
+                for (int i = 0; i < size; ++i) {
+                    htmlEntryIndices[i] = StringUtil.readVarInt(raf);
+                }
+                this.htmlEntries = CachingList.create(new AbstractList<HtmlEntry>() {
+                    @Override
+                    public HtmlEntry get(int i) {
+                        return index.dict.htmlEntries.get(htmlEntryIndices[i]);
+                    }
+                    @Override
+                    public int size() {
+                        return htmlEntryIndices.length;
+                    }
+                    }, 1);
+            } else if (index.dict.dictFileVersion >= 6) {
                 this.htmlEntries = CachingList.create(
-                        RAFList.create(raf, index.dict.htmlEntryIndexSerializer,
-                                raf.getFilePointer()), 1);
+                        RAFList.create((RandomAccessFile)raf, index.dict.htmlEntryIndexSerializer,
+                                ((RandomAccessFile)raf).getFilePointer(), index.dict.dictFileVersion,
+                                index.dict.dictInfo + " htmlEntries: "), 1);
             } else {
                 this.htmlEntries = Collections.emptyList();
             }
         }
 
-        public void write(DataOutput out) throws IOException {
-            RandomAccessFile raf = (RandomAccessFile)out;
+        public void write(DataOutput raf) throws IOException {
             raf.writeUTF(token);
-            raf.writeInt(startRow);
-            raf.writeInt(numRows);
+            StringUtil.writeVarInt(raf, startRow);
+            StringUtil.writeVarInt(raf, numRows);
             final boolean hasNormalizedForm = !token.equals(normalizedToken);
             raf.writeBoolean(hasNormalizedForm);
             if (hasNormalizedForm) {
                 raf.writeUTF(normalizedToken);
             }
-            RAFList.write(raf, htmlEntries, index.dict.htmlEntryIndexSerializer);
+            StringUtil.writeVarInt(raf, htmlEntries.size());
+            for (HtmlEntry e : htmlEntries)
+                StringUtil.writeVarInt(raf, e.index());
         }
 
         public String toString() {
@@ -269,7 +302,7 @@ public final class Index implements RAFSerializable<Index> {
         int start = 0;
         int end = sortedIndexEntries.size();
 
-        final Collator sortCollator = sortLanguage.getCollator();
+        final Comparator sortCollator = sortLanguage.getCollator();
         while (start < end) {
             final int mid = (start + end) / 2;
             if (interrupted.get()) {
@@ -277,7 +310,9 @@ public final class Index implements RAFSerializable<Index> {
             }
             final IndexEntry midEntry = sortedIndexEntries.get(mid);
 
-            final int comp = sortCollator.compare(token, midEntry.normalizedToken());
+            int comp = NormalizeComparator.compareWithoutDash(token, midEntry.normalizedToken(), sortCollator, dict.dictFileVersion);
+            if (comp == 0)
+                comp = sortCollator.compare(token, midEntry.normalizedToken());
             if (comp == 0) {
                 final int result = windBackCase(token, mid, interrupted);
                 return result;