]> gitweb.fperrin.net Git - DictionaryPC.git/commitdiff
Add read-ahead buffer to decompress in parallel.
authorReimar Döffinger <Reimar.Doeffinger@gmx.de>
Sat, 14 Oct 2017 17:55:06 +0000 (19:55 +0200)
committerReimar Döffinger <Reimar.Doeffinger@gmx.de>
Sat, 14 Oct 2017 17:55:06 +0000 (19:55 +0200)
Allows using more than one CPU core for a good speedup.
Benchmarks:
Uncompressed files:    196.29 CPU, 5:18.34 wall clock time
xz-compressed, before: 299.19 CPU, 5:21.85 wall clock time
xz-compressed, after:  308.96 CPU, 3:29.60 wall clock time

(first was I/O limited, second was CPU-limited, now it is
almost only limited by CPU-time for XML parsing)

src/com/hughes/android/dictionary/engine/ReadAheadBuffer.java [new file with mode: 0644]
src/com/hughes/android/dictionary/engine/WiktionarySplitter.java

diff --git a/src/com/hughes/android/dictionary/engine/ReadAheadBuffer.java b/src/com/hughes/android/dictionary/engine/ReadAheadBuffer.java
new file mode 100644 (file)
index 0000000..d4b3ab5
--- /dev/null
@@ -0,0 +1,52 @@
+// Copyright 2017 Reimar Döffinger
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.hughes.android.dictionary.engine;
+
+import java.io.InputStream;
+import java.io.IOException;
+import java.io.PipedInputStream;
+import java.io.PipedOutputStream;
+
+public class ReadAheadBuffer extends PipedInputStream {
+    static int BLOCK_SIZE = 1024 * 1024;
+    public ReadAheadBuffer(InputStream in, int size) {
+        super(size);
+        assert size >= 2 * BLOCK_SIZE;
+        this.in = in;
+        try {
+            pipe = new PipedOutputStream(this);
+            buffer = new byte[BLOCK_SIZE];
+            new Thread(new Runnable() {
+                public void run() {
+                    int read;
+                    try {
+                        while ((read = in.read(buffer)) > 0)
+                        {
+                            pipe.write(buffer, 0, read);
+                            pipe.flush();
+                        }
+                    } catch (IOException e) {}
+                    try {
+                        pipe.close();
+                    } catch (IOException e) {}
+                }
+            }).start();
+        } catch (IOException e) {}
+    }
+
+    InputStream in;
+    PipedOutputStream pipe;
+    byte buffer[];
+}
index 290a58fccc1e38a6c36acdd44f2cb08cf42abb40..850dedee2018c7fcf2ea4bb558edefdfb27551a7 100644 (file)
@@ -96,6 +96,7 @@ public class WiktionarySplitter extends org.xml.sax.helpers.DefaultHandler {
                 } else {
                     InputStream compressedIn = new BufferedInputStream(new FileInputStream(input));
                     InputStream in = new CompressorStreamFactory().createCompressorInputStream(compressedIn);
                 } else {
                     InputStream compressedIn = new BufferedInputStream(new FileInputStream(input));
                     InputStream in = new CompressorStreamFactory().createCompressorInputStream(compressedIn);
+                    in = new ReadAheadBuffer(in, 20 * 1024 * 1024);
                     parser.parse(new BufferedInputStream(in), this);
                 }
             } catch (Exception e) {
                     parser.parse(new BufferedInputStream(in), this);
                 }
             } catch (Exception e) {