From: Reimar Döffinger Date: Sun, 15 Oct 2017 14:03:59 +0000 (+0200) Subject: Minor optimizations for endPage function. X-Git-Url: http://gitweb.fperrin.net/?p=DictionaryPC.git;a=commitdiff_plain;h=4c7701228aff69ceb4dffd23ed319382b5f0a55c Minor optimizations for endPage function. --- diff --git a/WiktionarySplitter.sh b/WiktionarySplitter.sh index 57b16cb..f1dcae4 100755 --- a/WiktionarySplitter.sh +++ b/WiktionarySplitter.sh @@ -7,4 +7,4 @@ test -r "$XERCES" || XERCES=/usr/share/xerces-2/lib/xercesImpl.jar COMMONS_COMPRESS=/usr/share/java/commons-compress-1.13.jar JAVA=/usr/lib/jvm/java-8-openjdk-amd64/jre/bin/java test -x "$JAVA" || JAVA=java -"$JAVA" -classpath src:../Util/src/:../Dictionary/src/:"$ICU4J":"$XERCES":"$COMMONS_COMPRESS" com.hughes.android.dictionary.engine.WiktionarySplitter "$@" +"$JAVA" -Xverify:none -classpath src:../Util/src/:../Dictionary/src/:"$ICU4J":"$XERCES":"$COMMONS_COMPRESS" com.hughes.android.dictionary.engine.WiktionarySplitter "$@" diff --git a/generate_dictionaries.sh b/generate_dictionaries.sh index 17136d0..01038e2 100755 --- a/generate_dictionaries.sh +++ b/generate_dictionaries.sh @@ -1,16 +1,16 @@ #!/bin/sh DE_DICTS=true -#DE_DICTS=false +DE_DICTS=false EN_DICTS=true -#EN_DICTS=false +EN_DICTS=false FR_DICTS=true -#FR_DICTS=false +FR_DICTS=false IT_DICTS=true -#IT_DICTS=false +IT_DICTS=false EN_TRANS_DICTS=true -#EN_TRANS_DICTS=false +EN_TRANS_DICTS=false SINGLE_DICTS="en de fr it es pt" -#SINGLE_DICTS="" +SINGLE_DICTS="en" VERSION=v007 diff --git a/src/com/hughes/android/dictionary/engine/WiktionarySplitter.java b/src/com/hughes/android/dictionary/engine/WiktionarySplitter.java index 28b11bc..3cee85d 100644 --- a/src/com/hughes/android/dictionary/engine/WiktionarySplitter.java +++ b/src/com/hughes/android/dictionary/engine/WiktionarySplitter.java @@ -199,14 +199,15 @@ public class WiktionarySplitter extends org.xml.sax.helpers.DefaultHandler { String text = textBuilder.toString(); String translingual = ""; + int start = 0; + final Matcher startMatcher = headingStart.matcher(text); - while (text.length() > 0) { + while (start < text.length()) { // Find start. - final Matcher startMatcher = headingStart.matcher(text); - if (!startMatcher.find()) { + if (!startMatcher.find(start)) { return; } - text = text.substring(startMatcher.end()); + start = startMatcher.end(); final String heading = startMatcher.group(); @@ -218,10 +219,10 @@ public class WiktionarySplitter extends org.xml.sax.helpers.DefaultHandler { final Pattern endPattern = getEndPattern(depth); final Matcher endMatcher = endPattern.matcher(text); - if (endMatcher.find()) { + if (endMatcher.find(start)) { int end = endMatcher.start(); - translingual = text.substring(0, endMatcher.start()); - text = text.substring(end); + translingual = text.substring(start, end); + start = end; continue; } } @@ -234,13 +235,13 @@ public class WiktionarySplitter extends org.xml.sax.helpers.DefaultHandler { final Matcher endMatcher = endPattern.matcher(text); final int end; - if (endMatcher.find()) { + if (endMatcher.find(start)) { end = endMatcher.start(); } else { end = text.length(); } - String sectionText = text.substring(0, end); + String sectionText = text.substring(start, end); // Hack to remove empty dummy section from French if (sectionText.startsWith("\n=== {{S|étymologie}} ===\n: {{ébauche-étym")) { int dummy_end = sectionText.indexOf("}}", 41) + 2; @@ -262,7 +263,7 @@ public class WiktionarySplitter extends org.xml.sax.helpers.DefaultHandler { throw new RuntimeException(e); } - text = text.substring(end); + start = end; break; } }