]> gitweb.fperrin.net Git - DictionaryPC.git/blobdiff - src/com/hughes/android/dictionary/engine/WiktionarySplitter.java
Minor optimizations for endPage function.
[DictionaryPC.git] / src / com / hughes / android / dictionary / engine / WiktionarySplitter.java
index 097643f1af7e3762cb1d76b16e0a44c4e0f3e240..3cee85da6a2a89743f53be952239e4aea8f87d0f 100644 (file)
@@ -83,7 +83,9 @@ public class WiktionarySplitter extends org.xml.sax.helpers.DefaultHandler {
             currentSelectors = pathToSelectorsEntry.getValue();
 
             for (final Selector selector : currentSelectors) {
-                OutputStream tmp = new FileOutputStream(selector.outFilename);
+                OutputStream tmp = new FileOutputStream(selector.outFilename + ".gz");
+                tmp = new BufferedOutputStream(tmp);
+                tmp = new CompressorStreamFactory().createCompressorOutputStream(CompressorStreamFactory.GZIP, tmp);
                 tmp = new WriteBuffer(tmp, 20 * 1024 * 1024);
                 selector.out = new DataOutputStream(tmp);
             }
@@ -197,45 +199,49 @@ public class WiktionarySplitter extends org.xml.sax.helpers.DefaultHandler {
 
         String text = textBuilder.toString();
         String translingual = "";
+        int start = 0;
+        final Matcher startMatcher = headingStart.matcher(text);
 
-        while (text.length() > 0) {
+        while (start < text.length()) {
             // Find start.
-            final Matcher startMatcher = headingStart.matcher(text);
-            if (!startMatcher.find()) {
+            if (!startMatcher.find(start)) {
                 return;
             }
-            text = text.substring(startMatcher.end());
+            start = startMatcher.end();
 
             final String heading = startMatcher.group();
-            for (final Selector selector : currentSelectors) {
-                if (heading.indexOf("Translingual") != -1) {
-                    // Find end.
-                    final int depth = startMatcher.group(1).length();
-                    final Pattern endPattern = getEndPattern(depth);
 
-                    final Matcher endMatcher = endPattern.matcher(text);
-                    if (endMatcher.find()) {
-                        int end = endMatcher.start();
-                        translingual = text.substring(0, endMatcher.start());
-                        text = text.substring(end);
-                        break;
-                    }
+            // For Translingual entries just store the text for later
+            // use in the per-language sections
+            if (heading.indexOf("Translingual") != -1) {
+                // Find end.
+                final int depth = startMatcher.group(1).length();
+                final Pattern endPattern = getEndPattern(depth);
+
+                final Matcher endMatcher = endPattern.matcher(text);
+                if (endMatcher.find(start)) {
+                    int end = endMatcher.start();
+                    translingual = text.substring(start, end);
+                    start = end;
+                    continue;
                 }
-                if (selector.pattern.matcher(heading).find()) {
+            }
 
+            for (final Selector selector : currentSelectors) {
+                if (selector.pattern.matcher(heading).find()) {
                     // Find end.
                     final int depth = startMatcher.group(1).length();
                     final Pattern endPattern = getEndPattern(depth);
 
                     final Matcher endMatcher = endPattern.matcher(text);
                     final int end;
-                    if (endMatcher.find()) {
+                    if (endMatcher.find(start)) {
                         end = endMatcher.start();
                     } else {
                         end = text.length();
                     }
 
-                    String sectionText = text.substring(0, end);
+                    String sectionText = text.substring(start, end);
                     // Hack to remove empty dummy section from French
                     if (sectionText.startsWith("\n=== {{S|étymologie}} ===\n: {{ébauche-étym")) {
                         int dummy_end = sectionText.indexOf("}}", 41) + 2;
@@ -257,7 +263,7 @@ public class WiktionarySplitter extends org.xml.sax.helpers.DefaultHandler {
                         throw new RuntimeException(e);
                     }
 
-                    text = text.substring(end);
+                    start = end;
                     break;
                 }
             }