From: Reimar Döffinger Date: Wed, 15 Apr 2020 06:47:16 +0000 (+0200) Subject: Update to new mmapped IO. X-Git-Url: http://gitweb.fperrin.net/?p=Dictionary.git;a=commitdiff_plain;h=35b7b7dc537441278934398a6b81009c1ec42bbf Update to new mmapped IO. --- diff --git a/Util b/Util index 9332701..585113b 160000 --- a/Util +++ b/Util @@ -1 +1 @@ -Subproject commit 93327017669a7620ad62344e8e5fe4ddfde7bff3 +Subproject commit 585113b2b62a7cfb8daac6936f55082fbb1bca12 diff --git a/src/com/hughes/android/dictionary/engine/Dictionary.java b/src/com/hughes/android/dictionary/engine/Dictionary.java index cde3eb9..c8e6cb1 100644 --- a/src/com/hughes/android/dictionary/engine/Dictionary.java +++ b/src/com/hughes/android/dictionary/engine/Dictionary.java @@ -22,6 +22,7 @@ import java.io.IOException; import java.io.ObjectOutputStream; import java.io.PrintStream; import java.io.RandomAccessFile; +import java.nio.MappedByteBuffer; import java.nio.channels.Channels; import java.nio.channels.FileChannel; import java.nio.charset.StandardCharsets; @@ -31,6 +32,7 @@ import java.util.List; import com.hughes.android.dictionary.DictionaryInfo; import com.hughes.util.CachingList; +import com.hughes.util.DataInputBuffer; import com.hughes.util.raf.RAFList; import com.hughes.util.raf.RAFListSerializer; import com.hughes.util.raf.RAFSerializable; @@ -49,7 +51,7 @@ public class Dictionary implements RAFSerializable { public final List pairEntries; public final List textEntries; public final List htmlEntries; - public final List htmlData; + public final List htmlData; public final List sources; public final List indices; @@ -71,46 +73,47 @@ public class Dictionary implements RAFSerializable { } public Dictionary(final FileChannel ch) throws IOException { - DataInput raf = new DataInputStream(Channels.newInputStream(ch)); - dictFileVersion = raf.readInt(); + MappedByteBuffer wholefile = ch.map(FileChannel.MapMode.READ_ONLY, 0, ch.size()); + DataInputBuffer in = new DataInputBuffer(wholefile, 0); + dictFileVersion = in.readInt(); if (dictFileVersion < 0 || dictFileVersion > CURRENT_DICT_VERSION) { throw new IOException("Invalid dictionary version: " + dictFileVersion); } - creationMillis = raf.readLong(); - dictInfo = raf.readUTF(); + creationMillis = in.readLong(); + dictInfo = in.readUTF(); // Load the sources, then seek past them, because reading them later // disrupts the offset. try { - final RAFList rafSources = RAFList.create(ch, new EntrySource.Serializer( - this), ch.position(), dictFileVersion, dictInfo + " sources: "); + final RAFList rafSources = RAFList.create(in, new EntrySource.Serializer( + this), dictFileVersion, dictInfo + " sources: "); sources = new ArrayList<>(rafSources); ch.position(rafSources.getEndOffset()); pairEntries = CachingList.create( - RAFList.create(ch, new PairEntry.Serializer(this), ch.position(), dictFileVersion, dictInfo + " pairs: "), + RAFList.create(in, new PairEntry.Serializer(this), dictFileVersion, dictInfo + " pairs: "), CACHE_SIZE, false); textEntries = CachingList.create( - RAFList.create(ch, new TextEntry.Serializer(this), ch.position(), dictFileVersion, dictInfo + " text: "), + RAFList.create(in, new TextEntry.Serializer(this), dictFileVersion, dictInfo + " text: "), CACHE_SIZE, true); if (dictFileVersion >= 5) { htmlEntries = CachingList.create( - RAFList.create(ch, new HtmlEntry.Serializer(this, ch), ch.position(), dictFileVersion, dictInfo + " html: "), + RAFList.create(in, new HtmlEntry.Serializer(this), dictFileVersion, dictInfo + " html: "), CACHE_SIZE, false); } else { htmlEntries = Collections.emptyList(); } if (dictFileVersion >= 7) { - htmlData = RAFList.create(ch, new HtmlEntry.DataDeserializer(), ch.position(), dictFileVersion, dictInfo + " html: "); + htmlData = RAFList.create(in, new HtmlEntry.DataDeserializer(), dictFileVersion, dictInfo + " html: "); } else { htmlData = null; } - indices = CachingList.createFullyCached(RAFList.create(ch, new IndexSerializer(ch), - ch.position(), dictFileVersion, dictInfo + " index: ")); + indices = CachingList.createFullyCached(RAFList.create(in, new IndexSerializer(), + dictFileVersion, dictInfo + " index: ")); } catch (RuntimeException e) { throw new IOException("RuntimeException loading dictionary", e); } - final String end = raf.readUTF(); + final String end = in.readUTF(); if (!end.equals(END_OF_DICTIONARY)) { throw new IOException("Dictionary seems corrupt: " + end); } @@ -130,26 +133,20 @@ public class Dictionary implements RAFSerializable { System.out.println("text start: " + raf.getFilePointer()); RAFList.write(raf, textEntries, new TextEntry.Serializer(this)); System.out.println("html index start: " + raf.getFilePointer()); - RAFList.write(raf, htmlEntries, new HtmlEntry.Serializer(this, null), 64, true); + RAFList.write(raf, htmlEntries, new HtmlEntry.Serializer(this), 64, true); System.out.println("html data start: " + raf.getFilePointer()); assert htmlData == null; RAFList.write(raf, htmlEntries, new HtmlEntry.DataSerializer(), 128, true); System.out.println("indices start: " + raf.getFilePointer()); - RAFList.write(raf, indices, new IndexSerializer(null)); + RAFList.write(raf, indices, new IndexSerializer()); System.out.println("end: " + raf.getFilePointer()); raf.writeUTF(END_OF_DICTIONARY); } private final class IndexSerializer implements RAFListSerializer { - private final FileChannel ch; - - IndexSerializer(FileChannel ch) { - this.ch = ch; - } - @Override public Index read(DataInput raf, final int readIndex) throws IOException { - return new Index(Dictionary.this, ch, raf); + return new Index(Dictionary.this, (DataInputBuffer)raf); } @Override diff --git a/src/com/hughes/android/dictionary/engine/HtmlEntry.java b/src/com/hughes/android/dictionary/engine/HtmlEntry.java index 59ed59b..1b191a9 100644 --- a/src/com/hughes/android/dictionary/engine/HtmlEntry.java +++ b/src/com/hughes/android/dictionary/engine/HtmlEntry.java @@ -6,11 +6,13 @@ import java.io.DataOutput; import java.io.IOException; import java.io.PrintStream; import java.lang.ref.SoftReference; +import java.nio.ByteBuffer; import java.nio.channels.FileChannel; import java.nio.charset.StandardCharsets; import java.util.List; import java.util.regex.Pattern; +import com.hughes.util.DataInputBuffer; import com.hughes.util.StringUtil; import com.hughes.util.raf.RAFListSerializer; import com.hughes.util.raf.RAFListSerializerSkippable; @@ -30,11 +32,11 @@ public class HtmlEntry extends AbstractEntry implements Comparable { lazyHtmlLoader = null; } - public HtmlEntry(Dictionary dictionary, FileChannel ch, DataInput raf, final int index) + public HtmlEntry(Dictionary dictionary, DataInput raf, final int index) throws IOException { super(dictionary, raf, index); title = raf.readUTF(); - lazyHtmlLoader = new LazyHtmlLoader(ch, raf, dictionary.htmlData, index); + lazyHtmlLoader = new LazyHtmlLoader(raf, dictionary.htmlData, index); html = null; } @@ -49,11 +51,9 @@ public class HtmlEntry extends AbstractEntry implements Comparable { raf.write(bytes); } - private static byte[] readData(DataInput raf) throws IOException { + private static DataInputBuffer readData(DataInput raf) throws IOException { int len = StringUtil.readVarInt(raf); - final byte[] bytes = new byte[Math.min(len, 20 * 1024 * 1024)]; - raf.readFully(bytes); - return bytes; + return ((DataInputBuffer)raf).slice(len); } String getHtml() { @@ -75,16 +75,14 @@ public class HtmlEntry extends AbstractEntry implements Comparable { static final class Serializer implements RAFListSerializerSkippable { final Dictionary dictionary; - final FileChannel ch; - Serializer(Dictionary dictionary, FileChannel ch) { + Serializer(Dictionary dictionary) { this.dictionary = dictionary; - this.ch = ch; } @Override public HtmlEntry read(DataInput raf, final int index) throws IOException { - return new HtmlEntry(dictionary, ch, raf, index); + return new HtmlEntry(dictionary, raf, index); } @Override @@ -120,14 +118,14 @@ public class HtmlEntry extends AbstractEntry implements Comparable { } } - static final class DataDeserializer implements RAFListSerializer { + static final class DataDeserializer implements RAFListSerializer { @Override - public byte[] read(DataInput raf, final int index) throws IOException { + public DataInputBuffer read(DataInput raf, final int index) throws IOException { return HtmlEntry.readData(raf); } @Override - public void write(DataOutput raf, byte[] t) { + public void write(DataOutput raf, DataInputBuffer t) { assert false; } } @@ -228,34 +226,26 @@ public class HtmlEntry extends AbstractEntry implements Comparable { @SuppressWarnings("WeakerAccess") public static final class LazyHtmlLoader { - final DataInput raf; - final FileChannel ch; - final long offset; + final DataInputBuffer buf; final int numBytes; - final int numZipBytes; - final List data; + final List data; final int index; // Not sure this volatile is right, but oh well. volatile SoftReference htmlRef = new SoftReference<>(null); - private LazyHtmlLoader(FileChannel ch, final DataInput inp, List data, int index) throws IOException { + private LazyHtmlLoader(final DataInput inp, List data, int index) throws IOException { this.data = data; this.index = index; if (data != null) { - this.raf = null; - this.ch = null; - this.offset = 0; + buf = null; this.numBytes = -1; - this.numZipBytes = -1; return; } - raf = inp; - this.ch = ch; - numBytes = Math.min(raf.readInt(), 20 * 1024 * 1024); - numZipBytes = Math.min(raf.readInt(), 20 * 1024 * 1024); - offset = ch.position(); - raf.skipBytes(numZipBytes); + numBytes = Math.min(inp.readInt(), 20 * 1024 * 1024); + int numZipBytes = Math.min(inp.readInt(), 20 * 1024 * 1024); + DataInputBuffer b = (DataInputBuffer)inp; + buf = b.slice(numZipBytes); } String getHtml() { @@ -264,21 +254,15 @@ public class HtmlEntry extends AbstractEntry implements Comparable { return html; } if (data != null) { - html = new String(data.get(index), StandardCharsets.UTF_8); + html = data.get(index).asString(); htmlRef = new SoftReference<>(html); return html; } System.out.println("Loading Html: numBytes=" + numBytes + ", numZipBytes=" - + numZipBytes); - final byte[] zipBytes = new byte[numZipBytes]; - synchronized (ch) { - try { - ch.position(offset); - raf.readFully(zipBytes); - } catch (IOException e) { - throw new RuntimeException("Failed to read HTML data from dictionary", e); - } - } + + buf.limit()); + final byte[] zipBytes = new byte[buf.limit()]; + buf.rewind(); + buf.readFully(zipBytes); try { final byte[] bytes = StringUtil.unzipFully(zipBytes, numBytes); html = new String(bytes, StandardCharsets.UTF_8); diff --git a/src/com/hughes/android/dictionary/engine/Index.java b/src/com/hughes/android/dictionary/engine/Index.java index 5812b68..db44b41 100644 --- a/src/com/hughes/android/dictionary/engine/Index.java +++ b/src/com/hughes/android/dictionary/engine/Index.java @@ -19,7 +19,6 @@ import java.io.DataOutput; import java.io.IOException; import java.io.PrintStream; import java.io.RandomAccessFile; -import java.nio.channels.FileChannel; import java.util.AbstractList; import java.util.ArrayList; import java.util.Collection; @@ -38,6 +37,7 @@ import com.hughes.android.dictionary.DictionaryInfo; import com.hughes.android.dictionary.DictionaryInfo.IndexInfo; import com.hughes.android.dictionary.engine.RowBase.RowKey; import com.hughes.util.CachingList; +import com.hughes.util.DataInputBuffer; import com.hughes.util.StringUtil; import com.hughes.util.TransformingList; import com.hughes.util.raf.RAFList; @@ -117,7 +117,7 @@ public final class Index implements RAFSerializable { return new NormalizeComparator(normalizer(), sortLanguage.getCollator(), dict.dictFileVersion); } - public Index(final Dictionary dict, final FileChannel inp, final DataInput raf) throws IOException { + public Index(final Dictionary dict, final DataInputBuffer raf) throws IOException { this.dict = dict; shortName = raf.readUTF(); longName = raf.readUTF(); @@ -129,7 +129,7 @@ public final class Index implements RAFSerializable { mainTokenCount = raf.readInt(); } sortedIndexEntries = CachingList.create( - RAFList.create(inp, new IndexEntrySerializer(dict.dictFileVersion == 6 ? inp : null), inp.position(), + RAFList.create(raf, new IndexEntrySerializer(), dict.dictFileVersion, dict.dictInfo + " idx " + languageCode + ": "), CACHE_SIZE, true); if (dict.dictFileVersion >= 7) { int count = StringUtil.readVarInt(raf); @@ -143,7 +143,7 @@ public final class Index implements RAFSerializable { stoplist = Collections.emptySet(); } rows = CachingList.create( - UniformRAFList.create(inp, new RowBase.Serializer(this), inp.position()), + UniformRAFList.create(raf, new RowBase.Serializer(this)), CACHE_SIZE, true); } @@ -156,7 +156,7 @@ public final class Index implements RAFSerializable { raf.writeUTF(normalizerRules); raf.writeBoolean(swapPairEntries); raf.writeInt(mainTokenCount); - RAFList.write(raf, sortedIndexEntries, new IndexEntrySerializer(null), 32, true); + RAFList.write(raf, sortedIndexEntries, new IndexEntrySerializer(), 32, true); StringUtil.writeVarInt(raf, stoplist.size()); for (String i : stoplist) { raf.writeUTF(i); @@ -171,15 +171,9 @@ public final class Index implements RAFSerializable { } private final class IndexEntrySerializer implements RAFSerializer { - private final FileChannel ch; - - IndexEntrySerializer(FileChannel ch) { - this.ch = ch; - } - @Override public IndexEntry read(DataInput raf) throws IOException { - return new IndexEntry(Index.this, ch, raf); + return new IndexEntry(Index.this, raf); } @Override @@ -206,7 +200,7 @@ public final class Index implements RAFSerializable { this.htmlEntries = new ArrayList<>(); } - IndexEntry(final Index index, final FileChannel ch, final DataInput raf) throws IOException { + IndexEntry(final Index index, final DataInput raf) throws IOException { token = raf.readUTF(); if (index.dict.dictFileVersion >= 7) { startRow = StringUtil.readVarInt(raf); @@ -239,8 +233,8 @@ public final class Index implements RAFSerializable { } } else if (index.dict.dictFileVersion >= 6) { this.htmlEntries = CachingList.create( - RAFList.create(ch, index.dict.htmlEntryIndexSerializer, - ch.position(), index.dict.dictFileVersion, + RAFList.create((DataInputBuffer)raf, index.dict.htmlEntryIndexSerializer, + index.dict.dictFileVersion, index.dict.dictInfo + " htmlEntries: "), 1, false); } else { this.htmlEntries = Collections.emptyList();