]> gitweb.fperrin.net Git - DictionaryPC.git/blobdiff - src/com/hughes/android/dictionary/engine/WiktionarySplitter.java
Compress WiktionarySplitter output files.
[DictionaryPC.git] / src / com / hughes / android / dictionary / engine / WiktionarySplitter.java
index 850dedee2018c7fcf2ea4bb558edefdfb27551a7..0624025387b7f134242de98736ec4082e4915029 100644 (file)
@@ -22,6 +22,7 @@ import java.io.FileInputStream;
 import java.io.FileOutputStream;
 import java.io.InputStream;
 import java.io.IOException;
+import java.io.OutputStream;
 import java.util.ArrayList;
 import java.util.LinkedHashMap;
 import java.util.List;
@@ -82,7 +83,11 @@ public class WiktionarySplitter extends org.xml.sax.helpers.DefaultHandler {
             currentSelectors = pathToSelectorsEntry.getValue();
 
             for (final Selector selector : currentSelectors) {
-                selector.out = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(selector.outFilename)));
+                OutputStream tmp = new FileOutputStream(selector.outFilename + ".gz");
+                tmp = new BufferedOutputStream(tmp);
+                tmp = new CompressorStreamFactory().createCompressorOutputStream(CompressorStreamFactory.GZIP, tmp);
+                tmp = new WriteBuffer(tmp, 20 * 1024 * 1024);
+                selector.out = new DataOutputStream(tmp);
             }
 
             // Do it.
@@ -114,6 +119,14 @@ public class WiktionarySplitter extends org.xml.sax.helpers.DefaultHandler {
 
     String lastPageTitle = null;
     int pageCount = 0;
+    Pattern endPatterns[] = new Pattern[100];
+
+    private Pattern getEndPattern(int depth) {
+        if (endPatterns[depth] == null)
+            endPatterns[depth] = Pattern.compile(String.format("^={1,%d}[^=].*$", depth), Pattern.MULTILINE);
+        return endPatterns[depth];
+    }
+
     private void endPage() {
         final String title = titleBuilder.toString();
         lastPageTitle = title;
@@ -200,7 +213,7 @@ public class WiktionarySplitter extends org.xml.sax.helpers.DefaultHandler {
                 if (heading.indexOf("Translingual") != -1) {
                     // Find end.
                     final int depth = startMatcher.group(1).length();
-                    final Pattern endPattern = Pattern.compile(String.format("^={1,%d}[^=].*$", depth), Pattern.MULTILINE);
+                    final Pattern endPattern = getEndPattern(depth);
 
                     final Matcher endMatcher = endPattern.matcher(text);
                     if (endMatcher.find()) {
@@ -214,7 +227,7 @@ public class WiktionarySplitter extends org.xml.sax.helpers.DefaultHandler {
 
                     // Find end.
                     final int depth = startMatcher.group(1).length();
-                    final Pattern endPattern = Pattern.compile(String.format("^={1,%d}[^=].*$", depth), Pattern.MULTILINE);
+                    final Pattern endPattern = getEndPattern(depth);
 
                     final Matcher endMatcher = endPattern.matcher(text);
                     final int end;