From: Reimar Döffinger Date: Tue, 8 Dec 2015 18:58:16 +0000 (+0100) Subject: Vastly improve HtmlEntry compression. X-Git-Url: http://gitweb.fperrin.net/?p=Dictionary.git;a=commitdiff_plain;h=9aefce5e3be1428ef19e1cb7f94a1fb34e0de3e0 Vastly improve HtmlEntry compression. --- diff --git a/res/raw/dictionary_info.txt b/res/raw/dictionary_info.txt index 00a2268..d21e885 100644 --- a/res/raw/dictionary_info.txt +++ b/res/raw/dictionary_info.txt @@ -2,7 +2,7 @@ AR-DE.quickdic http://github.com/rdoeffinger/Dictionary/releases/download/v0.2-d DE-CA.quickdic http://github.com/rdoeffinger/Dictionary/releases/download/v0.2-dictionaries/DE-CA.quickdic.v006.zip 1442056078905 4811184 2807340 2 DE 18490 17773 CA 15490 14102 (DE)Wiktionary-based DE-CA dictionary. DE-CS.quickdic http://github.com/rdoeffinger/Dictionary/releases/download/v0.2-dictionaries/DE-CS.quickdic.v006.zip 1442056193569 8280756 5450496 2 DE 23504 22264 CS 24042 19644 (DE)Wiktionary-based DE-CS dictionary. DE-EO.quickdic http://github.com/rdoeffinger/Dictionary/releases/download/v0.2-dictionaries/DE-EO.quickdic.v006.zip 1442056315613 4811254 2404134 2 DE 14741 14061 EO 17158 16319 (DE)Wiktionary-based DE-EO dictionary. -DE-ES.quickdic http://github.com/rdoeffinger/Dictionary/releases/download/v0.2-dictionaries/DE-ES.quickdic.v007.zip 1449523882659 4000266 3273614 2 DE 34399 32197 ES 29606 26911 (DE)Wiktionary-based DE-ES dictionary. +DE-ES.quickdic http://github.com/rdoeffinger/Dictionary/releases/download/v0.2-dictionaries/DE-ES.quickdic.v006.zip 1442056427916 7416740 3343255 2 DE 34399 32197 ES 29606 26911 (DE)Wiktionary-based DE-ES dictionary. DE-FR.quickdic http://github.com/rdoeffinger/Dictionary/releases/download/v0.2-dictionaries/DE-FR.quickdic.v006.zip 1442056552895 20761086 15018822 2 DE 48386 45902 FR 32854 29253 (DE)Wiktionary-based DE-FR dictionary. DE-HE.quickdic http://github.com/rdoeffinger/Dictionary/releases/download/v0.2-dictionaries/DE-HE.quickdic.v006.zip 1357252532185 1672312 577438 2 DE 8503 7935 HE 12590 5651 Wikitionary-based DE-HE dictionary. DE-HU.quickdic http://github.com/rdoeffinger/Dictionary/releases/download/v0.2-dictionaries/DE-HU.quickdic.v006.zip 1442056707554 3771060 1667008 2 DE 19685 18595 HU 17566 15923 (DE)Wiktionary-based DE-HU dictionary. @@ -99,7 +99,7 @@ FR-BG.quickdic http://github.com/rdoeffinger/Dictionary/releases/download/v0.2-d FR-CS.quickdic http://github.com/rdoeffinger/Dictionary/releases/download/v0.2-dictionaries/FR-CS.quickdic.v006.zip 1442254346613 18429081 14030906 2 FR 16874 15501 CS 44564 42900 (FR)Wiktionary-based FR-CS dictionary. FR-EO.quickdic http://github.com/rdoeffinger/Dictionary/releases/download/v0.2-dictionaries/FR-EO.quickdic.v006.zip 1442254492317 24003758 15169444 2 FR 11413 10566 EO 76377 75778 (FR)Wiktionary-based FR-EO dictionary. FR-ES.quickdic http://github.com/rdoeffinger/Dictionary/releases/download/v0.2-dictionaries/FR-ES.quickdic.v006.zip 1442254654705 11757104 7409077 2 FR 25597 23149 ES 37072 34887 (FR)Wiktionary-based FR-ES dictionary. -FR-IS.quickdic http://github.com/rdoeffinger/Dictionary/releases/download/v0.2-dictionaries/FR-IS.quickdic.v006.zip 1442254796905 6183005 3940577 2 FR 8601 7788 IS 22387 21244 (FR)Wiktionary-based FR-IS dictionary. +FR-IS.quickdic http://github.com/rdoeffinger/Dictionary/releases/download/v0.2-dictionaries/FR-IS.quickdic.v007.zip 1449600233503 1606289 1541404 2 FR 8601 7788 IS 22387 21244 (FR)Wiktionary-based FR-IS dictionary. FR-IT.quickdic http://github.com/rdoeffinger/Dictionary/releases/download/v0.2-dictionaries/FR-IT.quickdic.v006.zip 1442254922263 25484806 17114112 2 FR 27542 25759 IT 68481 66981 (FR)Wiktionary-based FR-IT dictionary. FR-JA.quickdic http://github.com/rdoeffinger/Dictionary/releases/download/v0.2-dictionaries/FR-JA.quickdic.v006.zip 1442255093894 16876289 10667997 2 FR 19970 17691 JA 82978 45912 (FR)Wiktionary-based FR-JA dictionary. FR-LA.quickdic http://github.com/rdoeffinger/Dictionary/releases/download/v0.2-dictionaries/FR-LA.quickdic.v006.zip 1442255330498 29529636 20990153 2 FR 7432 6911 LA 85255 84555 (FR)Wiktionary-based FR-LA dictionary. diff --git a/src/com/hughes/android/dictionary/engine/AbstractEntry.java b/src/com/hughes/android/dictionary/engine/AbstractEntry.java index 2db0a3e..2e71135 100644 --- a/src/com/hughes/android/dictionary/engine/AbstractEntry.java +++ b/src/com/hughes/android/dictionary/engine/AbstractEntry.java @@ -15,6 +15,7 @@ package com.hughes.android.dictionary.engine; import com.hughes.util.IndexedObject; +import com.hughes.util.StringUtil; import java.io.DataInput; import java.io.DataOutput; @@ -33,7 +34,7 @@ public abstract class AbstractEntry extends IndexedObject { throws IOException { super(index); if (dictionary.dictFileVersion >= 1) { - final int entrySouceIdx = raf.readShort(); + final int entrySouceIdx = dictionary.dictFileVersion >= 7 ? StringUtil.readVarInt(raf) : raf.readShort(); this.entrySource = dictionary.sources.get(entrySouceIdx); } else { this.entrySource = null; @@ -41,7 +42,7 @@ public abstract class AbstractEntry extends IndexedObject { } public void write(DataOutput raf) throws IOException { - raf.writeShort(entrySource.index()); + StringUtil.writeVarInt(raf, entrySource.index()); } public abstract void addToDictionary(final Dictionary dictionary); diff --git a/src/com/hughes/android/dictionary/engine/Dictionary.java b/src/com/hughes/android/dictionary/engine/Dictionary.java index 57ae6e7..cb7d32f 100644 --- a/src/com/hughes/android/dictionary/engine/Dictionary.java +++ b/src/com/hughes/android/dictionary/engine/Dictionary.java @@ -45,6 +45,7 @@ public class Dictionary implements RAFSerializable { public final List pairEntries; public final List textEntries; public final List htmlEntries; + public final List htmlData; public final List sources; public final List indices; @@ -60,6 +61,7 @@ public class Dictionary implements RAFSerializable { pairEntries = new ArrayList(); textEntries = new ArrayList(); htmlEntries = new ArrayList(); + htmlData = null; sources = new ArrayList(); indices = new ArrayList(); } @@ -88,11 +90,16 @@ public class Dictionary implements RAFSerializable { CACHE_SIZE); if (dictFileVersion >= 5) { htmlEntries = CachingList.create( - RAFList.create(raf, new HtmlEntry.Serializer(this), raf.getFilePointer(), dictFileVersion), + RAFList.create(raf, new HtmlEntry.Serializer(this), raf.getFilePointer(), dictFileVersion, dictFileVersion >= 7 ? 64 : 1, dictFileVersion >= 7), CACHE_SIZE); } else { htmlEntries = Collections.emptyList(); } + if (dictFileVersion >= 7) { + htmlData = RAFList.create(raf, new HtmlEntry.DataDeserializer(), raf.getFilePointer(), dictFileVersion, 16, true); + } else { + htmlData = null; + } indices = CachingList.createFullyCached(RAFList.create(raf, indexSerializer, raf.getFilePointer(), dictFileVersion)); } catch (RuntimeException e) { @@ -119,7 +126,9 @@ public class Dictionary implements RAFSerializable { System.out.println("text start: " + raf.getFilePointer()); RAFList.write(raf, textEntries, new TextEntry.Serializer(this)); System.out.println("html start: " + raf.getFilePointer()); - RAFList.write(raf, htmlEntries, new HtmlEntry.Serializer(this)); + RAFList.write(raf, htmlEntries, new HtmlEntry.Serializer(this), 64, true); + assert htmlData == null; + RAFList.write(raf, htmlEntries, new HtmlEntry.DataSerializer(), 16, true); System.out.println("indices start: " + raf.getFilePointer()); RAFList.write(raf, indices, indexSerializer); System.out.println("end: " + raf.getFilePointer()); diff --git a/src/com/hughes/android/dictionary/engine/HtmlEntry.java b/src/com/hughes/android/dictionary/engine/HtmlEntry.java index 573a969..8e2b9ba 100644 --- a/src/com/hughes/android/dictionary/engine/HtmlEntry.java +++ b/src/com/hughes/android/dictionary/engine/HtmlEntry.java @@ -11,6 +11,7 @@ import java.io.DataOutput; import java.io.IOException; import java.io.PrintStream; import java.io.RandomAccessFile; +import java.io.UnsupportedEncodingException; import java.lang.ref.SoftReference; import java.util.List; import java.util.regex.Pattern; @@ -33,7 +34,7 @@ public class HtmlEntry extends AbstractEntry implements RAFSerializable { + @Override + public HtmlEntry read(DataInput raf, final int index) throws IOException { + assert false; + return null; + } + + @Override + public void write(DataOutput raf, HtmlEntry t) throws IOException { + t.writeData(raf); + } + } + + static final class DataDeserializer implements RAFListSerializer { + @Override + public byte[] read(DataInput raf, final int index) throws IOException { + return HtmlEntry.readData(raf); + } + + @Override + public void write(DataOutput raf, byte[] t) throws IOException { + assert false; } } @@ -183,19 +227,25 @@ public class HtmlEntry extends AbstractEntry implements RAFSerializable data; + final int index; // Not sure this volatile is right, but oh well. volatile SoftReference htmlRef = new SoftReference(null); - private LazyHtmlLoader(final DataInput inp, int version) throws IOException { - raf = (RandomAccessFile)inp; - if (version >= 7) { - numBytes = -1; - numZipBytes = StringUtil.readVarInt(raf); - } else { - numBytes = raf.readInt(); - numZipBytes = raf.readInt(); + private LazyHtmlLoader(final DataInput inp, List data, int index) throws IOException { + this.data = data; + this.index = index; + if (data != null) { + this.raf = null; + this.offset = 0; + this.numBytes = -1; + this.numZipBytes = -1; + return; } + raf = (RandomAccessFile)inp; + numBytes = raf.readInt(); + numZipBytes = raf.readInt(); offset = raf.getFilePointer(); raf.skipBytes(numZipBytes); } @@ -205,6 +255,15 @@ public class HtmlEntry extends AbstractEntry implements RAFSerializable(html); + return html; + } System.out.println("Loading Html: numBytes=" + numBytes + ", numZipBytes=" + numZipBytes); final byte[] zipBytes = new byte[numZipBytes];