From: Reimar Döffinger Date: Sun, 15 Oct 2017 10:08:25 +0000 (+0200) Subject: Support compressed input for parsers. X-Git-Url: http://gitweb.fperrin.net/?p=DictionaryPC.git;a=commitdiff_plain;h=f4144760af0445ac225ed3b5a2373723bf78e4d1 Support compressed input for parsers. --- diff --git a/src/com/hughes/android/dictionary/parser/wiktionary/AbstractWiktionaryParser.java b/src/com/hughes/android/dictionary/parser/wiktionary/AbstractWiktionaryParser.java index b77c341..c66a0d7 100644 --- a/src/com/hughes/android/dictionary/parser/wiktionary/AbstractWiktionaryParser.java +++ b/src/com/hughes/android/dictionary/parser/wiktionary/AbstractWiktionaryParser.java @@ -19,6 +19,7 @@ import java.io.DataInputStream; import java.io.EOFException; import java.io.File; import java.io.FileInputStream; +import java.io.InputStream; import java.io.IOException; import java.util.LinkedHashMap; import java.util.LinkedHashSet; @@ -32,10 +33,14 @@ import java.util.logging.Level; import java.util.logging.Logger; import java.util.regex.Pattern; +import org.apache.commons.compress.compressors.CompressorStreamFactory; +import org.apache.commons.compress.compressors.CompressorException; + import com.hughes.android.dictionary.engine.EntrySource; import com.hughes.android.dictionary.engine.EntryTypeName; import com.hughes.android.dictionary.engine.IndexBuilder; import com.hughes.android.dictionary.engine.IndexedEntry; +import com.hughes.android.dictionary.engine.ReadAheadBuffer; import com.hughes.android.dictionary.parser.Parser; import com.hughes.android.dictionary.parser.WikiTokenizer; import com.hughes.util.EnumUtil; @@ -59,7 +64,23 @@ public abstract class AbstractWiktionaryParser implements Parser { public void parse(final File file, final EntrySource entrySource, final int pageLimit) throws IOException { this.entrySource = entrySource; int pageCount = 0; - final DataInputStream dis = new DataInputStream(new BufferedInputStream(new FileInputStream(file))); + File input = new File(file.getPath() + ".bz2"); + if (!input.exists()) input = new File(file.getPath() + ".gz"); + if (!input.exists()) input = new File(file.getPath() + ".xz"); + DataInputStream dis; + if (!input.exists()) { + // Fallback to uncompressed file + dis = new DataInputStream(new BufferedInputStream(new FileInputStream(file))); + } else { + InputStream compressedIn = new BufferedInputStream(new FileInputStream(input)); + try { + InputStream in = new CompressorStreamFactory().createCompressorInputStream(compressedIn); + in = new ReadAheadBuffer(in, 20 * 1024 * 1024); + dis = new DataInputStream(in); + } catch (CompressorException e) { + throw new IOException(e); + } + } try { while (true) { if (pageLimit >= 0 && pageCount >= pageLimit) {