]> gitweb.fperrin.net Git - DictionaryPC.git/commitdiff
Minor optimizations for endPage function.
authorReimar Döffinger <Reimar.Doeffinger@gmx.de>
Sun, 15 Oct 2017 14:03:59 +0000 (16:03 +0200)
committerReimar Döffinger <Reimar.Doeffinger@gmx.de>
Sun, 15 Oct 2017 14:03:59 +0000 (16:03 +0200)
WiktionarySplitter.sh
generate_dictionaries.sh
src/com/hughes/android/dictionary/engine/WiktionarySplitter.java

index 57b16cbbe95fb81056eabc58cc2251a0cfd93865..f1dcae4051e1a1f459d69d0117b1bf1508495ab9 100755 (executable)
@@ -7,4 +7,4 @@ test -r "$XERCES" || XERCES=/usr/share/xerces-2/lib/xercesImpl.jar
 COMMONS_COMPRESS=/usr/share/java/commons-compress-1.13.jar
 JAVA=/usr/lib/jvm/java-8-openjdk-amd64/jre/bin/java
 test -x "$JAVA" || JAVA=java
-"$JAVA" -classpath src:../Util/src/:../Dictionary/src/:"$ICU4J":"$XERCES":"$COMMONS_COMPRESS" com.hughes.android.dictionary.engine.WiktionarySplitter "$@"
+"$JAVA" -Xverify:none -classpath src:../Util/src/:../Dictionary/src/:"$ICU4J":"$XERCES":"$COMMONS_COMPRESS" com.hughes.android.dictionary.engine.WiktionarySplitter "$@"
index 17136d0d9b95eb285419975b5693c14166a0feaa..01038e23fc9d9f8065f0081cd45f57a46b0923cd 100755 (executable)
@@ -1,16 +1,16 @@
 #!/bin/sh
 DE_DICTS=true
-#DE_DICTS=false
+DE_DICTS=false
 EN_DICTS=true
-#EN_DICTS=false
+EN_DICTS=false
 FR_DICTS=true
-#FR_DICTS=false
+FR_DICTS=false
 IT_DICTS=true
-#IT_DICTS=false
+IT_DICTS=false
 EN_TRANS_DICTS=true
-#EN_TRANS_DICTS=false
+EN_TRANS_DICTS=false
 SINGLE_DICTS="en de fr it es pt"
-#SINGLE_DICTS=""
+SINGLE_DICTS="en"
 
 VERSION=v007
 
index 28b11bc669f3cbdb146a4a41c4dc2afc802ae2a9..3cee85da6a2a89743f53be952239e4aea8f87d0f 100644 (file)
@@ -199,14 +199,15 @@ public class WiktionarySplitter extends org.xml.sax.helpers.DefaultHandler {
 
         String text = textBuilder.toString();
         String translingual = "";
+        int start = 0;
+        final Matcher startMatcher = headingStart.matcher(text);
 
-        while (text.length() > 0) {
+        while (start < text.length()) {
             // Find start.
-            final Matcher startMatcher = headingStart.matcher(text);
-            if (!startMatcher.find()) {
+            if (!startMatcher.find(start)) {
                 return;
             }
-            text = text.substring(startMatcher.end());
+            start = startMatcher.end();
 
             final String heading = startMatcher.group();
 
@@ -218,10 +219,10 @@ public class WiktionarySplitter extends org.xml.sax.helpers.DefaultHandler {
                 final Pattern endPattern = getEndPattern(depth);
 
                 final Matcher endMatcher = endPattern.matcher(text);
-                if (endMatcher.find()) {
+                if (endMatcher.find(start)) {
                     int end = endMatcher.start();
-                    translingual = text.substring(0, endMatcher.start());
-                    text = text.substring(end);
+                    translingual = text.substring(start, end);
+                    start = end;
                     continue;
                 }
             }
@@ -234,13 +235,13 @@ public class WiktionarySplitter extends org.xml.sax.helpers.DefaultHandler {
 
                     final Matcher endMatcher = endPattern.matcher(text);
                     final int end;
-                    if (endMatcher.find()) {
+                    if (endMatcher.find(start)) {
                         end = endMatcher.start();
                     } else {
                         end = text.length();
                     }
 
-                    String sectionText = text.substring(0, end);
+                    String sectionText = text.substring(start, end);
                     // Hack to remove empty dummy section from French
                     if (sectionText.startsWith("\n=== {{S|étymologie}} ===\n: {{ébauche-étym")) {
                         int dummy_end = sectionText.indexOf("}}", 41) + 2;
@@ -262,7 +263,7 @@ public class WiktionarySplitter extends org.xml.sax.helpers.DefaultHandler {
                         throw new RuntimeException(e);
                     }
 
-                    text = text.substring(end);
+                    start = end;
                     break;
                 }
             }