From b60bc4096cf591206cc7ad2e235b88030a9ee65f Mon Sep 17 00:00:00 2001 From: =?utf8?q?Reimar=20D=C3=B6ffinger?= Date: Sat, 14 Oct 2017 19:55:06 +0200 Subject: [PATCH] Add read-ahead buffer to decompress in parallel. Allows using more than one CPU core for a good speedup. Benchmarks: Uncompressed files: 196.29 CPU, 5:18.34 wall clock time xz-compressed, before: 299.19 CPU, 5:21.85 wall clock time xz-compressed, after: 308.96 CPU, 3:29.60 wall clock time (first was I/O limited, second was CPU-limited, now it is almost only limited by CPU-time for XML parsing) --- .../dictionary/engine/ReadAheadBuffer.java | 52 +++++++++++++++++++ .../dictionary/engine/WiktionarySplitter.java | 1 + 2 files changed, 53 insertions(+) create mode 100644 src/com/hughes/android/dictionary/engine/ReadAheadBuffer.java diff --git a/src/com/hughes/android/dictionary/engine/ReadAheadBuffer.java b/src/com/hughes/android/dictionary/engine/ReadAheadBuffer.java new file mode 100644 index 0000000..d4b3ab5 --- /dev/null +++ b/src/com/hughes/android/dictionary/engine/ReadAheadBuffer.java @@ -0,0 +1,52 @@ +// Copyright 2017 Reimar Döffinger +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.hughes.android.dictionary.engine; + +import java.io.InputStream; +import java.io.IOException; +import java.io.PipedInputStream; +import java.io.PipedOutputStream; + +public class ReadAheadBuffer extends PipedInputStream { + static int BLOCK_SIZE = 1024 * 1024; + public ReadAheadBuffer(InputStream in, int size) { + super(size); + assert size >= 2 * BLOCK_SIZE; + this.in = in; + try { + pipe = new PipedOutputStream(this); + buffer = new byte[BLOCK_SIZE]; + new Thread(new Runnable() { + public void run() { + int read; + try { + while ((read = in.read(buffer)) > 0) + { + pipe.write(buffer, 0, read); + pipe.flush(); + } + } catch (IOException e) {} + try { + pipe.close(); + } catch (IOException e) {} + } + }).start(); + } catch (IOException e) {} + } + + InputStream in; + PipedOutputStream pipe; + byte buffer[]; +} diff --git a/src/com/hughes/android/dictionary/engine/WiktionarySplitter.java b/src/com/hughes/android/dictionary/engine/WiktionarySplitter.java index 290a58f..850dede 100644 --- a/src/com/hughes/android/dictionary/engine/WiktionarySplitter.java +++ b/src/com/hughes/android/dictionary/engine/WiktionarySplitter.java @@ -96,6 +96,7 @@ public class WiktionarySplitter extends org.xml.sax.helpers.DefaultHandler { } else { InputStream compressedIn = new BufferedInputStream(new FileInputStream(input)); InputStream in = new CompressorStreamFactory().createCompressorInputStream(compressedIn); + in = new ReadAheadBuffer(in, 20 * 1024 * 1024); parser.parse(new BufferedInputStream(in), this); } } catch (Exception e) { -- 2.43.0