-Subproject commit 93327017669a7620ad62344e8e5fe4ddfde7bff3
+Subproject commit 585113b2b62a7cfb8daac6936f55082fbb1bca12
import java.io.ObjectOutputStream;
import java.io.PrintStream;
import java.io.RandomAccessFile;
+import java.nio.MappedByteBuffer;
import java.nio.channels.Channels;
import java.nio.channels.FileChannel;
import java.nio.charset.StandardCharsets;
import com.hughes.android.dictionary.DictionaryInfo;
import com.hughes.util.CachingList;
+import com.hughes.util.DataInputBuffer;
import com.hughes.util.raf.RAFList;
import com.hughes.util.raf.RAFListSerializer;
import com.hughes.util.raf.RAFSerializable;
public final List<PairEntry> pairEntries;
public final List<TextEntry> textEntries;
public final List<HtmlEntry> htmlEntries;
- public final List<byte[]> htmlData;
+ public final List<DataInputBuffer> htmlData;
public final List<EntrySource> sources;
public final List<Index> indices;
}
public Dictionary(final FileChannel ch) throws IOException {
- DataInput raf = new DataInputStream(Channels.newInputStream(ch));
- dictFileVersion = raf.readInt();
+ MappedByteBuffer wholefile = ch.map(FileChannel.MapMode.READ_ONLY, 0, ch.size());
+ DataInputBuffer in = new DataInputBuffer(wholefile, 0);
+ dictFileVersion = in.readInt();
if (dictFileVersion < 0 || dictFileVersion > CURRENT_DICT_VERSION) {
throw new IOException("Invalid dictionary version: " + dictFileVersion);
}
- creationMillis = raf.readLong();
- dictInfo = raf.readUTF();
+ creationMillis = in.readLong();
+ dictInfo = in.readUTF();
// Load the sources, then seek past them, because reading them later
// disrupts the offset.
try {
- final RAFList<EntrySource> rafSources = RAFList.create(ch, new EntrySource.Serializer(
- this), ch.position(), dictFileVersion, dictInfo + " sources: ");
+ final RAFList<EntrySource> rafSources = RAFList.create(in, new EntrySource.Serializer(
+ this), dictFileVersion, dictInfo + " sources: ");
sources = new ArrayList<>(rafSources);
ch.position(rafSources.getEndOffset());
pairEntries = CachingList.create(
- RAFList.create(ch, new PairEntry.Serializer(this), ch.position(), dictFileVersion, dictInfo + " pairs: "),
+ RAFList.create(in, new PairEntry.Serializer(this), dictFileVersion, dictInfo + " pairs: "),
CACHE_SIZE, false);
textEntries = CachingList.create(
- RAFList.create(ch, new TextEntry.Serializer(this), ch.position(), dictFileVersion, dictInfo + " text: "),
+ RAFList.create(in, new TextEntry.Serializer(this), dictFileVersion, dictInfo + " text: "),
CACHE_SIZE, true);
if (dictFileVersion >= 5) {
htmlEntries = CachingList.create(
- RAFList.create(ch, new HtmlEntry.Serializer(this, ch), ch.position(), dictFileVersion, dictInfo + " html: "),
+ RAFList.create(in, new HtmlEntry.Serializer(this), dictFileVersion, dictInfo + " html: "),
CACHE_SIZE, false);
} else {
htmlEntries = Collections.emptyList();
}
if (dictFileVersion >= 7) {
- htmlData = RAFList.create(ch, new HtmlEntry.DataDeserializer(), ch.position(), dictFileVersion, dictInfo + " html: ");
+ htmlData = RAFList.create(in, new HtmlEntry.DataDeserializer(), dictFileVersion, dictInfo + " html: ");
} else {
htmlData = null;
}
- indices = CachingList.createFullyCached(RAFList.create(ch, new IndexSerializer(ch),
- ch.position(), dictFileVersion, dictInfo + " index: "));
+ indices = CachingList.createFullyCached(RAFList.create(in, new IndexSerializer(),
+ dictFileVersion, dictInfo + " index: "));
} catch (RuntimeException e) {
throw new IOException("RuntimeException loading dictionary", e);
}
- final String end = raf.readUTF();
+ final String end = in.readUTF();
if (!end.equals(END_OF_DICTIONARY)) {
throw new IOException("Dictionary seems corrupt: " + end);
}
System.out.println("text start: " + raf.getFilePointer());
RAFList.write(raf, textEntries, new TextEntry.Serializer(this));
System.out.println("html index start: " + raf.getFilePointer());
- RAFList.write(raf, htmlEntries, new HtmlEntry.Serializer(this, null), 64, true);
+ RAFList.write(raf, htmlEntries, new HtmlEntry.Serializer(this), 64, true);
System.out.println("html data start: " + raf.getFilePointer());
assert htmlData == null;
RAFList.write(raf, htmlEntries, new HtmlEntry.DataSerializer(), 128, true);
System.out.println("indices start: " + raf.getFilePointer());
- RAFList.write(raf, indices, new IndexSerializer(null));
+ RAFList.write(raf, indices, new IndexSerializer());
System.out.println("end: " + raf.getFilePointer());
raf.writeUTF(END_OF_DICTIONARY);
}
private final class IndexSerializer implements RAFListSerializer<Index> {
- private final FileChannel ch;
-
- IndexSerializer(FileChannel ch) {
- this.ch = ch;
- }
-
@Override
public Index read(DataInput raf, final int readIndex) throws IOException {
- return new Index(Dictionary.this, ch, raf);
+ return new Index(Dictionary.this, (DataInputBuffer)raf);
}
@Override
import java.io.IOException;
import java.io.PrintStream;
import java.lang.ref.SoftReference;
+import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.charset.StandardCharsets;
import java.util.List;
import java.util.regex.Pattern;
+import com.hughes.util.DataInputBuffer;
import com.hughes.util.StringUtil;
import com.hughes.util.raf.RAFListSerializer;
import com.hughes.util.raf.RAFListSerializerSkippable;
lazyHtmlLoader = null;
}
- public HtmlEntry(Dictionary dictionary, FileChannel ch, DataInput raf, final int index)
+ public HtmlEntry(Dictionary dictionary, DataInput raf, final int index)
throws IOException {
super(dictionary, raf, index);
title = raf.readUTF();
- lazyHtmlLoader = new LazyHtmlLoader(ch, raf, dictionary.htmlData, index);
+ lazyHtmlLoader = new LazyHtmlLoader(raf, dictionary.htmlData, index);
html = null;
}
raf.write(bytes);
}
- private static byte[] readData(DataInput raf) throws IOException {
+ private static DataInputBuffer readData(DataInput raf) throws IOException {
int len = StringUtil.readVarInt(raf);
- final byte[] bytes = new byte[Math.min(len, 20 * 1024 * 1024)];
- raf.readFully(bytes);
- return bytes;
+ return ((DataInputBuffer)raf).slice(len);
}
String getHtml() {
static final class Serializer implements RAFListSerializerSkippable<HtmlEntry> {
final Dictionary dictionary;
- final FileChannel ch;
- Serializer(Dictionary dictionary, FileChannel ch) {
+ Serializer(Dictionary dictionary) {
this.dictionary = dictionary;
- this.ch = ch;
}
@Override
public HtmlEntry read(DataInput raf, final int index) throws IOException {
- return new HtmlEntry(dictionary, ch, raf, index);
+ return new HtmlEntry(dictionary, raf, index);
}
@Override
}
}
- static final class DataDeserializer implements RAFListSerializer<byte[]> {
+ static final class DataDeserializer implements RAFListSerializer<DataInputBuffer> {
@Override
- public byte[] read(DataInput raf, final int index) throws IOException {
+ public DataInputBuffer read(DataInput raf, final int index) throws IOException {
return HtmlEntry.readData(raf);
}
@Override
- public void write(DataOutput raf, byte[] t) {
+ public void write(DataOutput raf, DataInputBuffer t) {
assert false;
}
}
@SuppressWarnings("WeakerAccess")
public static final class LazyHtmlLoader {
- final DataInput raf;
- final FileChannel ch;
- final long offset;
+ final DataInputBuffer buf;
final int numBytes;
- final int numZipBytes;
- final List<byte[]> data;
+ final List<DataInputBuffer> data;
final int index;
// Not sure this volatile is right, but oh well.
volatile SoftReference<String> htmlRef = new SoftReference<>(null);
- private LazyHtmlLoader(FileChannel ch, final DataInput inp, List<byte[]> data, int index) throws IOException {
+ private LazyHtmlLoader(final DataInput inp, List<DataInputBuffer> data, int index) throws IOException {
this.data = data;
this.index = index;
if (data != null) {
- this.raf = null;
- this.ch = null;
- this.offset = 0;
+ buf = null;
this.numBytes = -1;
- this.numZipBytes = -1;
return;
}
- raf = inp;
- this.ch = ch;
- numBytes = Math.min(raf.readInt(), 20 * 1024 * 1024);
- numZipBytes = Math.min(raf.readInt(), 20 * 1024 * 1024);
- offset = ch.position();
- raf.skipBytes(numZipBytes);
+ numBytes = Math.min(inp.readInt(), 20 * 1024 * 1024);
+ int numZipBytes = Math.min(inp.readInt(), 20 * 1024 * 1024);
+ DataInputBuffer b = (DataInputBuffer)inp;
+ buf = b.slice(numZipBytes);
}
String getHtml() {
return html;
}
if (data != null) {
- html = new String(data.get(index), StandardCharsets.UTF_8);
+ html = data.get(index).asString();
htmlRef = new SoftReference<>(html);
return html;
}
System.out.println("Loading Html: numBytes=" + numBytes + ", numZipBytes="
- + numZipBytes);
- final byte[] zipBytes = new byte[numZipBytes];
- synchronized (ch) {
- try {
- ch.position(offset);
- raf.readFully(zipBytes);
- } catch (IOException e) {
- throw new RuntimeException("Failed to read HTML data from dictionary", e);
- }
- }
+ + buf.limit());
+ final byte[] zipBytes = new byte[buf.limit()];
+ buf.rewind();
+ buf.readFully(zipBytes);
try {
final byte[] bytes = StringUtil.unzipFully(zipBytes, numBytes);
html = new String(bytes, StandardCharsets.UTF_8);
import java.io.IOException;
import java.io.PrintStream;
import java.io.RandomAccessFile;
-import java.nio.channels.FileChannel;
import java.util.AbstractList;
import java.util.ArrayList;
import java.util.Collection;
import com.hughes.android.dictionary.DictionaryInfo.IndexInfo;
import com.hughes.android.dictionary.engine.RowBase.RowKey;
import com.hughes.util.CachingList;
+import com.hughes.util.DataInputBuffer;
import com.hughes.util.StringUtil;
import com.hughes.util.TransformingList;
import com.hughes.util.raf.RAFList;
return new NormalizeComparator(normalizer(), sortLanguage.getCollator(), dict.dictFileVersion);
}
- public Index(final Dictionary dict, final FileChannel inp, final DataInput raf) throws IOException {
+ public Index(final Dictionary dict, final DataInputBuffer raf) throws IOException {
this.dict = dict;
shortName = raf.readUTF();
longName = raf.readUTF();
mainTokenCount = raf.readInt();
}
sortedIndexEntries = CachingList.create(
- RAFList.create(inp, new IndexEntrySerializer(dict.dictFileVersion == 6 ? inp : null), inp.position(),
+ RAFList.create(raf, new IndexEntrySerializer(),
dict.dictFileVersion, dict.dictInfo + " idx " + languageCode + ": "), CACHE_SIZE, true);
if (dict.dictFileVersion >= 7) {
int count = StringUtil.readVarInt(raf);
stoplist = Collections.emptySet();
}
rows = CachingList.create(
- UniformRAFList.create(inp, new RowBase.Serializer(this), inp.position()),
+ UniformRAFList.create(raf, new RowBase.Serializer(this)),
CACHE_SIZE, true);
}
raf.writeUTF(normalizerRules);
raf.writeBoolean(swapPairEntries);
raf.writeInt(mainTokenCount);
- RAFList.write(raf, sortedIndexEntries, new IndexEntrySerializer(null), 32, true);
+ RAFList.write(raf, sortedIndexEntries, new IndexEntrySerializer(), 32, true);
StringUtil.writeVarInt(raf, stoplist.size());
for (String i : stoplist) {
raf.writeUTF(i);
}
private final class IndexEntrySerializer implements RAFSerializer<IndexEntry> {
- private final FileChannel ch;
-
- IndexEntrySerializer(FileChannel ch) {
- this.ch = ch;
- }
-
@Override
public IndexEntry read(DataInput raf) throws IOException {
- return new IndexEntry(Index.this, ch, raf);
+ return new IndexEntry(Index.this, raf);
}
@Override
this.htmlEntries = new ArrayList<>();
}
- IndexEntry(final Index index, final FileChannel ch, final DataInput raf) throws IOException {
+ IndexEntry(final Index index, final DataInput raf) throws IOException {
token = raf.readUTF();
if (index.dict.dictFileVersion >= 7) {
startRow = StringUtil.readVarInt(raf);
}
} else if (index.dict.dictFileVersion >= 6) {
this.htmlEntries = CachingList.create(
- RAFList.create(ch, index.dict.htmlEntryIndexSerializer,
- ch.position(), index.dict.dictFileVersion,
+ RAFList.create((DataInputBuffer)raf, index.dict.htmlEntryIndexSerializer,
+ index.dict.dictFileVersion,
index.dict.dictInfo + " htmlEntries: "), 1, false);
} else {
this.htmlEntries = Collections.emptyList();