]> gitweb.fperrin.net Git - DictionaryPC.git/blobdiff - src/com/hughes/android/dictionary/engine/WiktionarySplitter.java
Cache compiled patterns.
[DictionaryPC.git] / src / com / hughes / android / dictionary / engine / WiktionarySplitter.java
index 290a58fccc1e38a6c36acdd44f2cb08cf42abb40..c1cb09feee66380514a1de7a4aca2a933855e2a6 100644 (file)
@@ -96,6 +96,7 @@ public class WiktionarySplitter extends org.xml.sax.helpers.DefaultHandler {
                 } else {
                     InputStream compressedIn = new BufferedInputStream(new FileInputStream(input));
                     InputStream in = new CompressorStreamFactory().createCompressorInputStream(compressedIn);
+                    in = new ReadAheadBuffer(in, 20 * 1024 * 1024);
                     parser.parse(new BufferedInputStream(in), this);
                 }
             } catch (Exception e) {
@@ -113,6 +114,14 @@ public class WiktionarySplitter extends org.xml.sax.helpers.DefaultHandler {
 
     String lastPageTitle = null;
     int pageCount = 0;
+    Pattern endPatterns[] = new Pattern[100];
+
+    private Pattern getEndPattern(int depth) {
+        if (endPatterns[depth] == null)
+            endPatterns[depth] = Pattern.compile(String.format("^={1,%d}[^=].*$", depth), Pattern.MULTILINE);
+        return endPatterns[depth];
+    }
+
     private void endPage() {
         final String title = titleBuilder.toString();
         lastPageTitle = title;
@@ -199,7 +208,7 @@ public class WiktionarySplitter extends org.xml.sax.helpers.DefaultHandler {
                 if (heading.indexOf("Translingual") != -1) {
                     // Find end.
                     final int depth = startMatcher.group(1).length();
-                    final Pattern endPattern = Pattern.compile(String.format("^={1,%d}[^=].*$", depth), Pattern.MULTILINE);
+                    final Pattern endPattern = getEndPattern(depth);
 
                     final Matcher endMatcher = endPattern.matcher(text);
                     if (endMatcher.find()) {
@@ -213,7 +222,7 @@ public class WiktionarySplitter extends org.xml.sax.helpers.DefaultHandler {
 
                     // Find end.
                     final int depth = startMatcher.group(1).length();
-                    final Pattern endPattern = Pattern.compile(String.format("^={1,%d}[^=].*$", depth), Pattern.MULTILINE);
+                    final Pattern endPattern = getEndPattern(depth);
 
                     final Matcher endMatcher = endPattern.matcher(text);
                     final int end;