]> gitweb.fperrin.net Git - Dictionary.git/commitdiff
Vastly improve HtmlEntry compression.
authorReimar Döffinger <Reimar.Doeffinger@gmx.de>
Tue, 8 Dec 2015 18:58:16 +0000 (19:58 +0100)
committerReimar Döffinger <Reimar.Doeffinger@gmx.de>
Tue, 8 Dec 2015 18:58:16 +0000 (19:58 +0100)
res/raw/dictionary_info.txt
src/com/hughes/android/dictionary/engine/AbstractEntry.java
src/com/hughes/android/dictionary/engine/Dictionary.java
src/com/hughes/android/dictionary/engine/HtmlEntry.java

index 00a226896cf8bb0f9a90741971f277c28c14ded7..d21e885783a8e969eadc0ab8fcf39730904d41ec 100644 (file)
@@ -2,7 +2,7 @@ AR-DE.quickdic  http://github.com/rdoeffinger/Dictionary/releases/download/v0.2-d
 DE-CA.quickdic http://github.com/rdoeffinger/Dictionary/releases/download/v0.2-dictionaries/DE-CA.quickdic.v006.zip    1442056078905   4811184 2807340 2       DE      18490   17773   CA      15490   14102   (DE)Wiktionary-based DE-CA dictionary.
 DE-CS.quickdic http://github.com/rdoeffinger/Dictionary/releases/download/v0.2-dictionaries/DE-CS.quickdic.v006.zip    1442056193569   8280756 5450496 2       DE      23504   22264   CS      24042   19644   (DE)Wiktionary-based DE-CS dictionary.
 DE-EO.quickdic http://github.com/rdoeffinger/Dictionary/releases/download/v0.2-dictionaries/DE-EO.quickdic.v006.zip    1442056315613   4811254 2404134 2       DE      14741   14061   EO      17158   16319   (DE)Wiktionary-based DE-EO dictionary.
-DE-ES.quickdic http://github.com/rdoeffinger/Dictionary/releases/download/v0.2-dictionaries/DE-ES.quickdic.v007.zip    1449523882659   4000266 3273614 2       DE      34399   32197   ES      29606   26911   (DE)Wiktionary-based DE-ES dictionary.
+DE-ES.quickdic http://github.com/rdoeffinger/Dictionary/releases/download/v0.2-dictionaries/DE-ES.quickdic.v006.zip    1442056427916   7416740 3343255 2       DE      34399   32197   ES      29606   26911   (DE)Wiktionary-based DE-ES dictionary.
 DE-FR.quickdic http://github.com/rdoeffinger/Dictionary/releases/download/v0.2-dictionaries/DE-FR.quickdic.v006.zip    1442056552895   20761086        15018822        2       DE      48386   45902   FR      32854   29253   (DE)Wiktionary-based DE-FR dictionary.
 DE-HE.quickdic http://github.com/rdoeffinger/Dictionary/releases/download/v0.2-dictionaries/DE-HE.quickdic.v006.zip    1357252532185   1672312 577438  2       DE      8503    7935    HE      12590   5651    Wikitionary-based DE-HE dictionary.
 DE-HU.quickdic http://github.com/rdoeffinger/Dictionary/releases/download/v0.2-dictionaries/DE-HU.quickdic.v006.zip    1442056707554   3771060 1667008 2       DE      19685   18595   HU      17566   15923   (DE)Wiktionary-based DE-HU dictionary.
@@ -99,7 +99,7 @@ FR-BG.quickdic        http://github.com/rdoeffinger/Dictionary/releases/download/v0.2-d
 FR-CS.quickdic http://github.com/rdoeffinger/Dictionary/releases/download/v0.2-dictionaries/FR-CS.quickdic.v006.zip    1442254346613   18429081        14030906        2       FR      16874   15501   CS      44564   42900   (FR)Wiktionary-based FR-CS dictionary.
 FR-EO.quickdic http://github.com/rdoeffinger/Dictionary/releases/download/v0.2-dictionaries/FR-EO.quickdic.v006.zip    1442254492317   24003758        15169444        2       FR      11413   10566   EO      76377   75778   (FR)Wiktionary-based FR-EO dictionary.
 FR-ES.quickdic http://github.com/rdoeffinger/Dictionary/releases/download/v0.2-dictionaries/FR-ES.quickdic.v006.zip    1442254654705   11757104        7409077 2       FR      25597   23149   ES      37072   34887   (FR)Wiktionary-based FR-ES dictionary.
-FR-IS.quickdic http://github.com/rdoeffinger/Dictionary/releases/download/v0.2-dictionaries/FR-IS.quickdic.v006.zip    1442254796905   6183005 3940577 2       FR      8601    7788    IS      22387   21244   (FR)Wiktionary-based FR-IS dictionary.
+FR-IS.quickdic http://github.com/rdoeffinger/Dictionary/releases/download/v0.2-dictionaries/FR-IS.quickdic.v007.zip    1449600233503   1606289 1541404 2       FR      8601    7788    IS      22387   21244   (FR)Wiktionary-based FR-IS dictionary.
 FR-IT.quickdic http://github.com/rdoeffinger/Dictionary/releases/download/v0.2-dictionaries/FR-IT.quickdic.v006.zip    1442254922263   25484806        17114112        2       FR      27542   25759   IT      68481   66981   (FR)Wiktionary-based FR-IT dictionary.
 FR-JA.quickdic http://github.com/rdoeffinger/Dictionary/releases/download/v0.2-dictionaries/FR-JA.quickdic.v006.zip    1442255093894   16876289        10667997        2       FR      19970   17691   JA      82978   45912   (FR)Wiktionary-based FR-JA dictionary.
 FR-LA.quickdic http://github.com/rdoeffinger/Dictionary/releases/download/v0.2-dictionaries/FR-LA.quickdic.v006.zip    1442255330498   29529636        20990153        2       FR      7432    6911    LA      85255   84555   (FR)Wiktionary-based FR-LA dictionary.
index 2db0a3e4a83c3c4c8c24c5163e7b950ef374ada0..2e711356475a4e68418906c627ce6076a7b01e7d 100644 (file)
@@ -15,6 +15,7 @@
 package com.hughes.android.dictionary.engine;
 
 import com.hughes.util.IndexedObject;
+import com.hughes.util.StringUtil;
 
 import java.io.DataInput;
 import java.io.DataOutput;
@@ -33,7 +34,7 @@ public abstract class AbstractEntry extends IndexedObject {
             throws IOException {
         super(index);
         if (dictionary.dictFileVersion >= 1) {
-            final int entrySouceIdx = raf.readShort();
+            final int entrySouceIdx = dictionary.dictFileVersion >= 7 ? StringUtil.readVarInt(raf) : raf.readShort();
             this.entrySource = dictionary.sources.get(entrySouceIdx);
         } else {
             this.entrySource = null;
@@ -41,7 +42,7 @@ public abstract class AbstractEntry extends IndexedObject {
     }
 
     public void write(DataOutput raf) throws IOException {
-        raf.writeShort(entrySource.index());
+        StringUtil.writeVarInt(raf, entrySource.index());
     }
 
     public abstract void addToDictionary(final Dictionary dictionary);
index 57ae6e73cd5f6c41ed392fa7d2f050eaa4b76251..cb7d32fc3599094c60a8f55bf6d5c8cc5dceea58 100644 (file)
@@ -45,6 +45,7 @@ public class Dictionary implements RAFSerializable<Dictionary> {
     public final List<PairEntry> pairEntries;
     public final List<TextEntry> textEntries;
     public final List<HtmlEntry> htmlEntries;
+    public final List<byte[]> htmlData;
     public final List<EntrySource> sources;
     public final List<Index> indices;
 
@@ -60,6 +61,7 @@ public class Dictionary implements RAFSerializable<Dictionary> {
         pairEntries = new ArrayList<PairEntry>();
         textEntries = new ArrayList<TextEntry>();
         htmlEntries = new ArrayList<HtmlEntry>();
+        htmlData = null;
         sources = new ArrayList<EntrySource>();
         indices = new ArrayList<Index>();
     }
@@ -88,11 +90,16 @@ public class Dictionary implements RAFSerializable<Dictionary> {
                     CACHE_SIZE);
             if (dictFileVersion >= 5) {
                 htmlEntries = CachingList.create(
-                        RAFList.create(raf, new HtmlEntry.Serializer(this), raf.getFilePointer(), dictFileVersion),
+                        RAFList.create(raf, new HtmlEntry.Serializer(this), raf.getFilePointer(), dictFileVersion, dictFileVersion >= 7 ? 64 : 1, dictFileVersion >= 7),
                         CACHE_SIZE);
             } else {
                 htmlEntries = Collections.emptyList();
             }
+            if (dictFileVersion >= 7) {
+                htmlData = RAFList.create(raf, new HtmlEntry.DataDeserializer(), raf.getFilePointer(), dictFileVersion, 16, true);
+            } else {
+                htmlData = null;
+            }
             indices = CachingList.createFullyCached(RAFList.create(raf, indexSerializer,
                     raf.getFilePointer(), dictFileVersion));
         } catch (RuntimeException e) {
@@ -119,7 +126,9 @@ public class Dictionary implements RAFSerializable<Dictionary> {
         System.out.println("text start: " + raf.getFilePointer());
         RAFList.write(raf, textEntries, new TextEntry.Serializer(this));
         System.out.println("html start: " + raf.getFilePointer());
-        RAFList.write(raf, htmlEntries, new HtmlEntry.Serializer(this));
+        RAFList.write(raf, htmlEntries, new HtmlEntry.Serializer(this), 64, true);
+        assert htmlData == null;
+        RAFList.write(raf, htmlEntries, new HtmlEntry.DataSerializer(), 16, true);
         System.out.println("indices start: " + raf.getFilePointer());
         RAFList.write(raf, indices, indexSerializer);
         System.out.println("end: " + raf.getFilePointer());
index 573a96992d88e2f53e33c44206c9486e927d3f2a..8e2b9ba3686dfd6f7d66fb2b63d71e7ab160431b 100644 (file)
@@ -11,6 +11,7 @@ import java.io.DataOutput;
 import java.io.IOException;
 import java.io.PrintStream;
 import java.io.RandomAccessFile;
+import java.io.UnsupportedEncodingException;
 import java.lang.ref.SoftReference;
 import java.util.List;
 import java.util.regex.Pattern;
@@ -33,7 +34,7 @@ public class HtmlEntry extends AbstractEntry implements RAFSerializable<HtmlEntr
             throws IOException {
         super(dictionary, raf, index);
         title = raf.readUTF();
-        lazyHtmlLoader = new LazyHtmlLoader(raf, dictionary.dictFileVersion);
+        lazyHtmlLoader = new LazyHtmlLoader(raf, dictionary.htmlData, index);
         html = null;
     }
 
@@ -48,6 +49,24 @@ public class HtmlEntry extends AbstractEntry implements RAFSerializable<HtmlEntr
         raf.write(zipBytes);
     }
 
+    public void writeBase(DataOutput raf) throws IOException {
+        super.write(raf);
+        raf.writeUTF(title);
+    }
+
+    public void writeData(DataOutput raf) throws IOException {
+        final byte[] bytes = getHtml().getBytes("UTF-8");
+        StringUtil.writeVarInt(raf, bytes.length);
+        raf.write(bytes);
+    }
+
+    public static byte[] readData(DataInput raf) throws IOException {
+        int len = StringUtil.readVarInt(raf);
+        final byte[] bytes = new byte[len];
+        raf.readFully(bytes);
+        return bytes;
+    }
+
     String getHtml() {
         return html != null ? html : lazyHtmlLoader.getHtml();
     }
@@ -79,7 +98,32 @@ public class HtmlEntry extends AbstractEntry implements RAFSerializable<HtmlEntr
 
         @Override
         public void write(DataOutput raf, HtmlEntry t) throws IOException {
-            t.write(raf);
+            t.writeBase(raf);
+        }
+    }
+
+    static final class DataSerializer implements RAFListSerializer<HtmlEntry> {
+        @Override
+        public HtmlEntry read(DataInput raf, final int index) throws IOException {
+            assert false;
+            return null;
+        }
+
+        @Override
+        public void write(DataOutput raf, HtmlEntry t) throws IOException {
+            t.writeData(raf);
+        }
+    }
+
+    static final class DataDeserializer implements RAFListSerializer<byte[]> {
+        @Override
+        public byte[] read(DataInput raf, final int index) throws IOException {
+            return HtmlEntry.readData(raf);
+        }
+
+        @Override
+        public void write(DataOutput raf, byte[] t) throws IOException {
+            assert false;
         }
     }
 
@@ -183,19 +227,25 @@ public class HtmlEntry extends AbstractEntry implements RAFSerializable<HtmlEntr
         final long offset;
         final int numBytes;
         final int numZipBytes;
+        final List<byte[]> data;
+        final int index;
 
         // Not sure this volatile is right, but oh well.
         volatile SoftReference<String> htmlRef = new SoftReference<String>(null);
 
-        private LazyHtmlLoader(final DataInput inp, int version) throws IOException {
-            raf = (RandomAccessFile)inp;
-            if (version >= 7) {
-                numBytes = -1;
-                numZipBytes = StringUtil.readVarInt(raf);
-            } else {
-                numBytes = raf.readInt();
-                numZipBytes = raf.readInt();
+        private LazyHtmlLoader(final DataInput inp, List<byte[]> data, int index) throws IOException {
+            this.data = data;
+            this.index = index;
+            if (data != null) {
+                this.raf = null;
+                this.offset = 0;
+                this.numBytes = -1;
+                this.numZipBytes = -1;
+                return;
             }
+            raf = (RandomAccessFile)inp;
+            numBytes = raf.readInt();
+            numZipBytes = raf.readInt();
             offset = raf.getFilePointer();
             raf.skipBytes(numZipBytes);
         }
@@ -205,6 +255,15 @@ public class HtmlEntry extends AbstractEntry implements RAFSerializable<HtmlEntr
             if (html != null) {
                 return html;
             }
+            if (data != null) {
+                try {
+                    html = new String(data.get(index), "UTF-8");
+                } catch (UnsupportedEncodingException e) {
+                    throw new RuntimeException(e);
+                }
+                htmlRef = new SoftReference<String>(html);
+                return html;
+            }
             System.out.println("Loading Html: numBytes=" + numBytes + ", numZipBytes="
                     + numZipBytes);
             final byte[] zipBytes = new byte[numZipBytes];