]> gitweb.fperrin.net Git - Dictionary.git/commitdiff
Update to new mmapped IO.
authorReimar Döffinger <Reimar.Doeffinger@gmx.de>
Wed, 15 Apr 2020 06:47:16 +0000 (08:47 +0200)
committerReimar Döffinger <Reimar.Doeffinger@gmx.de>
Wed, 15 Apr 2020 15:51:49 +0000 (17:51 +0200)
Util
src/com/hughes/android/dictionary/engine/Dictionary.java
src/com/hughes/android/dictionary/engine/HtmlEntry.java
src/com/hughes/android/dictionary/engine/Index.java

diff --git a/Util b/Util
index 93327017669a7620ad62344e8e5fe4ddfde7bff3..585113b2b62a7cfb8daac6936f55082fbb1bca12 160000 (submodule)
--- a/Util
+++ b/Util
@@ -1 +1 @@
-Subproject commit 93327017669a7620ad62344e8e5fe4ddfde7bff3
+Subproject commit 585113b2b62a7cfb8daac6936f55082fbb1bca12
index cde3eb9ca04fd521f1575a2f1a5294fecb928401..c8e6cb197d6e51c5fe5a06e0d209762c1697ccd9 100644 (file)
@@ -22,6 +22,7 @@ import java.io.IOException;
 import java.io.ObjectOutputStream;
 import java.io.PrintStream;
 import java.io.RandomAccessFile;
+import java.nio.MappedByteBuffer;
 import java.nio.channels.Channels;
 import java.nio.channels.FileChannel;
 import java.nio.charset.StandardCharsets;
@@ -31,6 +32,7 @@ import java.util.List;
 
 import com.hughes.android.dictionary.DictionaryInfo;
 import com.hughes.util.CachingList;
+import com.hughes.util.DataInputBuffer;
 import com.hughes.util.raf.RAFList;
 import com.hughes.util.raf.RAFListSerializer;
 import com.hughes.util.raf.RAFSerializable;
@@ -49,7 +51,7 @@ public class Dictionary implements RAFSerializable<Dictionary> {
     public final List<PairEntry> pairEntries;
     public final List<TextEntry> textEntries;
     public final List<HtmlEntry> htmlEntries;
-    public final List<byte[]> htmlData;
+    public final List<DataInputBuffer> htmlData;
     public final List<EntrySource> sources;
     public final List<Index> indices;
 
@@ -71,46 +73,47 @@ public class Dictionary implements RAFSerializable<Dictionary> {
     }
 
     public Dictionary(final FileChannel ch) throws IOException {
-        DataInput raf = new DataInputStream(Channels.newInputStream(ch));
-        dictFileVersion = raf.readInt();
+        MappedByteBuffer wholefile = ch.map(FileChannel.MapMode.READ_ONLY, 0, ch.size());
+        DataInputBuffer in = new DataInputBuffer(wholefile, 0);
+        dictFileVersion = in.readInt();
         if (dictFileVersion < 0 || dictFileVersion > CURRENT_DICT_VERSION) {
             throw new IOException("Invalid dictionary version: " + dictFileVersion);
         }
-        creationMillis = raf.readLong();
-        dictInfo = raf.readUTF();
+        creationMillis = in.readLong();
+        dictInfo = in.readUTF();
 
         // Load the sources, then seek past them, because reading them later
         // disrupts the offset.
         try {
-            final RAFList<EntrySource> rafSources = RAFList.create(ch, new EntrySource.Serializer(
-                    this), ch.position(), dictFileVersion, dictInfo + " sources: ");
+            final RAFList<EntrySource> rafSources = RAFList.create(in, new EntrySource.Serializer(
+                    this), dictFileVersion, dictInfo + " sources: ");
             sources = new ArrayList<>(rafSources);
             ch.position(rafSources.getEndOffset());
 
             pairEntries = CachingList.create(
-                              RAFList.create(ch, new PairEntry.Serializer(this), ch.position(), dictFileVersion, dictInfo + " pairs: "),
+                              RAFList.create(in, new PairEntry.Serializer(this), dictFileVersion, dictInfo + " pairs: "),
                               CACHE_SIZE, false);
             textEntries = CachingList.create(
-                              RAFList.create(ch, new TextEntry.Serializer(this), ch.position(), dictFileVersion, dictInfo + " text: "),
+                              RAFList.create(in, new TextEntry.Serializer(this), dictFileVersion, dictInfo + " text: "),
                               CACHE_SIZE, true);
             if (dictFileVersion >= 5) {
                 htmlEntries = CachingList.create(
-                                  RAFList.create(ch, new HtmlEntry.Serializer(this, ch), ch.position(), dictFileVersion, dictInfo + " html: "),
+                                  RAFList.create(in, new HtmlEntry.Serializer(this), dictFileVersion, dictInfo + " html: "),
                                   CACHE_SIZE, false);
             } else {
                 htmlEntries = Collections.emptyList();
             }
             if (dictFileVersion >= 7) {
-                htmlData = RAFList.create(ch, new HtmlEntry.DataDeserializer(), ch.position(), dictFileVersion, dictInfo + " html: ");
+                htmlData = RAFList.create(in, new HtmlEntry.DataDeserializer(), dictFileVersion, dictInfo + " html: ");
             } else {
                 htmlData = null;
             }
-            indices = CachingList.createFullyCached(RAFList.create(ch, new IndexSerializer(ch),
-                                                    ch.position(), dictFileVersion, dictInfo + " index: "));
+            indices = CachingList.createFullyCached(RAFList.create(in, new IndexSerializer(),
+                                                    dictFileVersion, dictInfo + " index: "));
         } catch (RuntimeException e) {
             throw new IOException("RuntimeException loading dictionary", e);
         }
-        final String end = raf.readUTF();
+        final String end = in.readUTF();
         if (!end.equals(END_OF_DICTIONARY)) {
             throw new IOException("Dictionary seems corrupt: " + end);
         }
@@ -130,26 +133,20 @@ public class Dictionary implements RAFSerializable<Dictionary> {
         System.out.println("text start: " + raf.getFilePointer());
         RAFList.write(raf, textEntries, new TextEntry.Serializer(this));
         System.out.println("html index start: " + raf.getFilePointer());
-        RAFList.write(raf, htmlEntries, new HtmlEntry.Serializer(this, null), 64, true);
+        RAFList.write(raf, htmlEntries, new HtmlEntry.Serializer(this), 64, true);
         System.out.println("html data start: " + raf.getFilePointer());
         assert htmlData == null;
         RAFList.write(raf, htmlEntries, new HtmlEntry.DataSerializer(), 128, true);
         System.out.println("indices start: " + raf.getFilePointer());
-        RAFList.write(raf, indices, new IndexSerializer(null));
+        RAFList.write(raf, indices, new IndexSerializer());
         System.out.println("end: " + raf.getFilePointer());
         raf.writeUTF(END_OF_DICTIONARY);
     }
 
     private final class IndexSerializer implements RAFListSerializer<Index> {
-        private final FileChannel ch;
-
-        IndexSerializer(FileChannel ch) {
-            this.ch = ch;
-        }
-
         @Override
         public Index read(DataInput raf, final int readIndex) throws IOException {
-            return new Index(Dictionary.this, ch, raf);
+            return new Index(Dictionary.this, (DataInputBuffer)raf);
         }
 
         @Override
index 59ed59b3c54c08004308a7b8559f2298aca820fc..1b191a93ac4b80000672876b4f453cfe5c1c097e 100644 (file)
@@ -6,11 +6,13 @@ import java.io.DataOutput;
 import java.io.IOException;
 import java.io.PrintStream;
 import java.lang.ref.SoftReference;
+import java.nio.ByteBuffer;
 import java.nio.channels.FileChannel;
 import java.nio.charset.StandardCharsets;
 import java.util.List;
 import java.util.regex.Pattern;
 
+import com.hughes.util.DataInputBuffer;
 import com.hughes.util.StringUtil;
 import com.hughes.util.raf.RAFListSerializer;
 import com.hughes.util.raf.RAFListSerializerSkippable;
@@ -30,11 +32,11 @@ public class HtmlEntry extends AbstractEntry implements Comparable<HtmlEntry> {
         lazyHtmlLoader = null;
     }
 
-    public HtmlEntry(Dictionary dictionary, FileChannel ch, DataInput raf, final int index)
+    public HtmlEntry(Dictionary dictionary, DataInput raf, final int index)
     throws IOException {
         super(dictionary, raf, index);
         title = raf.readUTF();
-        lazyHtmlLoader = new LazyHtmlLoader(ch, raf, dictionary.htmlData, index);
+        lazyHtmlLoader = new LazyHtmlLoader(raf, dictionary.htmlData, index);
         html = null;
     }
 
@@ -49,11 +51,9 @@ public class HtmlEntry extends AbstractEntry implements Comparable<HtmlEntry> {
         raf.write(bytes);
     }
 
-    private static byte[] readData(DataInput raf) throws IOException {
+    private static DataInputBuffer readData(DataInput raf) throws IOException {
         int len = StringUtil.readVarInt(raf);
-        final byte[] bytes = new byte[Math.min(len, 20 * 1024 * 1024)];
-        raf.readFully(bytes);
-        return bytes;
+        return ((DataInputBuffer)raf).slice(len);
     }
 
     String getHtml() {
@@ -75,16 +75,14 @@ public class HtmlEntry extends AbstractEntry implements Comparable<HtmlEntry> {
     static final class Serializer implements RAFListSerializerSkippable<HtmlEntry> {
 
         final Dictionary dictionary;
-        final FileChannel ch;
 
-        Serializer(Dictionary dictionary, FileChannel ch) {
+        Serializer(Dictionary dictionary) {
             this.dictionary = dictionary;
-            this.ch = ch;
         }
 
         @Override
         public HtmlEntry read(DataInput raf, final int index) throws IOException {
-            return new HtmlEntry(dictionary, ch, raf, index);
+            return new HtmlEntry(dictionary, raf, index);
         }
 
         @Override
@@ -120,14 +118,14 @@ public class HtmlEntry extends AbstractEntry implements Comparable<HtmlEntry> {
         }
     }
 
-    static final class DataDeserializer implements RAFListSerializer<byte[]> {
+    static final class DataDeserializer implements RAFListSerializer<DataInputBuffer> {
         @Override
-        public byte[] read(DataInput raf, final int index) throws IOException {
+        public DataInputBuffer read(DataInput raf, final int index) throws IOException {
             return HtmlEntry.readData(raf);
         }
 
         @Override
-        public void write(DataOutput raf, byte[] t) {
+        public void write(DataOutput raf, DataInputBuffer t) {
             assert false;
         }
     }
@@ -228,34 +226,26 @@ public class HtmlEntry extends AbstractEntry implements Comparable<HtmlEntry> {
 
     @SuppressWarnings("WeakerAccess")
     public static final class LazyHtmlLoader {
-        final DataInput raf;
-        final FileChannel ch;
-        final long offset;
+        final DataInputBuffer buf;
         final int numBytes;
-        final int numZipBytes;
-        final List<byte[]> data;
+        final List<DataInputBuffer> data;
         final int index;
 
         // Not sure this volatile is right, but oh well.
         volatile SoftReference<String> htmlRef = new SoftReference<>(null);
 
-        private LazyHtmlLoader(FileChannel ch, final DataInput inp, List<byte[]> data, int index) throws IOException {
+        private LazyHtmlLoader(final DataInput inp, List<DataInputBuffer> data, int index) throws IOException {
             this.data = data;
             this.index = index;
             if (data != null) {
-                this.raf = null;
-                this.ch = null;
-                this.offset = 0;
+                buf = null;
                 this.numBytes = -1;
-                this.numZipBytes = -1;
                 return;
             }
-            raf = inp;
-            this.ch = ch;
-            numBytes = Math.min(raf.readInt(), 20 * 1024 * 1024);
-            numZipBytes = Math.min(raf.readInt(), 20 * 1024 * 1024);
-            offset = ch.position();
-            raf.skipBytes(numZipBytes);
+            numBytes = Math.min(inp.readInt(), 20 * 1024 * 1024);
+            int numZipBytes = Math.min(inp.readInt(), 20 * 1024 * 1024);
+            DataInputBuffer b = (DataInputBuffer)inp;
+            buf = b.slice(numZipBytes);
         }
 
         String getHtml() {
@@ -264,21 +254,15 @@ public class HtmlEntry extends AbstractEntry implements Comparable<HtmlEntry> {
                 return html;
             }
             if (data != null) {
-                html = new String(data.get(index), StandardCharsets.UTF_8);
+                html = data.get(index).asString();
                 htmlRef = new SoftReference<>(html);
                 return html;
             }
             System.out.println("Loading Html: numBytes=" + numBytes + ", numZipBytes="
-                               + numZipBytes);
-            final byte[] zipBytes = new byte[numZipBytes];
-            synchronized (ch) {
-                try {
-                    ch.position(offset);
-                    raf.readFully(zipBytes);
-                } catch (IOException e) {
-                    throw new RuntimeException("Failed to read HTML data from dictionary", e);
-                }
-            }
+                               + buf.limit());
+            final byte[] zipBytes = new byte[buf.limit()];
+            buf.rewind();
+            buf.readFully(zipBytes);
             try {
                 final byte[] bytes = StringUtil.unzipFully(zipBytes, numBytes);
                 html = new String(bytes, StandardCharsets.UTF_8);
index 5812b685aafb458b1fbd038071d1520955fcb3fb..db44b41546569ee39e178511fdbf5f5bf92b4bea 100644 (file)
@@ -19,7 +19,6 @@ import java.io.DataOutput;
 import java.io.IOException;
 import java.io.PrintStream;
 import java.io.RandomAccessFile;
-import java.nio.channels.FileChannel;
 import java.util.AbstractList;
 import java.util.ArrayList;
 import java.util.Collection;
@@ -38,6 +37,7 @@ import com.hughes.android.dictionary.DictionaryInfo;
 import com.hughes.android.dictionary.DictionaryInfo.IndexInfo;
 import com.hughes.android.dictionary.engine.RowBase.RowKey;
 import com.hughes.util.CachingList;
+import com.hughes.util.DataInputBuffer;
 import com.hughes.util.StringUtil;
 import com.hughes.util.TransformingList;
 import com.hughes.util.raf.RAFList;
@@ -117,7 +117,7 @@ public final class Index implements RAFSerializable<Index> {
         return new NormalizeComparator(normalizer(), sortLanguage.getCollator(), dict.dictFileVersion);
     }
 
-    public Index(final Dictionary dict, final FileChannel inp, final DataInput raf) throws IOException {
+    public Index(final Dictionary dict, final DataInputBuffer raf) throws IOException {
         this.dict = dict;
         shortName = raf.readUTF();
         longName = raf.readUTF();
@@ -129,7 +129,7 @@ public final class Index implements RAFSerializable<Index> {
             mainTokenCount = raf.readInt();
         }
         sortedIndexEntries = CachingList.create(
-                                 RAFList.create(inp, new IndexEntrySerializer(dict.dictFileVersion == 6 ? inp : null), inp.position(),
+                                 RAFList.create(raf, new IndexEntrySerializer(),
                                                 dict.dictFileVersion, dict.dictInfo + " idx " + languageCode + ": "), CACHE_SIZE, true);
         if (dict.dictFileVersion >= 7) {
             int count = StringUtil.readVarInt(raf);
@@ -143,7 +143,7 @@ public final class Index implements RAFSerializable<Index> {
             stoplist = Collections.emptySet();
         }
         rows = CachingList.create(
-                   UniformRAFList.create(inp, new RowBase.Serializer(this), inp.position()),
+                   UniformRAFList.create(raf, new RowBase.Serializer(this)),
                    CACHE_SIZE, true);
     }
 
@@ -156,7 +156,7 @@ public final class Index implements RAFSerializable<Index> {
         raf.writeUTF(normalizerRules);
         raf.writeBoolean(swapPairEntries);
         raf.writeInt(mainTokenCount);
-        RAFList.write(raf, sortedIndexEntries, new IndexEntrySerializer(null), 32, true);
+        RAFList.write(raf, sortedIndexEntries, new IndexEntrySerializer(), 32, true);
         StringUtil.writeVarInt(raf, stoplist.size());
         for (String i : stoplist) {
             raf.writeUTF(i);
@@ -171,15 +171,9 @@ public final class Index implements RAFSerializable<Index> {
     }
 
     private final class IndexEntrySerializer implements RAFSerializer<IndexEntry> {
-        private final FileChannel ch;
-
-        IndexEntrySerializer(FileChannel ch) {
-            this.ch = ch;
-        }
-
         @Override
         public IndexEntry read(DataInput raf) throws IOException {
-            return new IndexEntry(Index.this, ch, raf);
+            return new IndexEntry(Index.this, raf);
         }
 
         @Override
@@ -206,7 +200,7 @@ public final class Index implements RAFSerializable<Index> {
             this.htmlEntries = new ArrayList<>();
         }
 
-        IndexEntry(final Index index, final FileChannel ch, final DataInput raf) throws IOException {
+        IndexEntry(final Index index, final DataInput raf) throws IOException {
             token = raf.readUTF();
             if (index.dict.dictFileVersion >= 7) {
                 startRow = StringUtil.readVarInt(raf);
@@ -239,8 +233,8 @@ public final class Index implements RAFSerializable<Index> {
                 }
             } else if (index.dict.dictFileVersion >= 6) {
                 this.htmlEntries = CachingList.create(
-                                       RAFList.create(ch, index.dict.htmlEntryIndexSerializer,
-                                                      ch.position(), index.dict.dictFileVersion,
+                                       RAFList.create((DataInputBuffer)raf, index.dict.htmlEntryIndexSerializer,
+                                                      index.dict.dictFileVersion,
                                                       index.dict.dictInfo + " htmlEntries: "), 1, false);
             } else {
                 this.htmlEntries = Collections.emptyList();