From eec1a89b6cdffe7048aefa3cb2b3497b1744be99 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Reimar=20D=C3=B6ffinger?= Date: Sun, 15 Oct 2017 16:25:32 +0200 Subject: [PATCH] Reduce progress prints and optimize title check. --- .../dictionary/engine/WiktionarySplitter.java | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/com/hughes/android/dictionary/engine/WiktionarySplitter.java b/src/com/hughes/android/dictionary/engine/WiktionarySplitter.java index 3cee85d..37344ca 100644 --- a/src/com/hughes/android/dictionary/engine/WiktionarySplitter.java +++ b/src/com/hughes/android/dictionary/engine/WiktionarySplitter.java @@ -130,10 +130,12 @@ public class WiktionarySplitter extends org.xml.sax.helpers.DefaultHandler { private void endPage() { final String title = titleBuilder.toString(); lastPageTitle = title; - if (++pageCount % 1000 == 0) { + if (++pageCount % 100000 == 0) { System.out.println("endPage: " + title + ", count=" + pageCount); } - if (title.startsWith("Wiktionary:") || + if (title.startsWith("Unsupported titles/")) return; + if (title.contains(":")) { + if (title.startsWith("Wiktionary:") || title.startsWith("Appendix:") || title.startsWith("Help:") || title.startsWith("Index:") || @@ -144,7 +146,6 @@ public class WiktionarySplitter extends org.xml.sax.helpers.DefaultHandler { title.startsWith("Rhymes:") || title.startsWith("Category:") || title.startsWith("Wikisaurus:") || - title.startsWith("Unsupported titles/") || title.startsWith("Transwiki:") || title.startsWith("File:") || title.startsWith("Thread:") || @@ -188,10 +189,7 @@ public class WiktionarySplitter extends org.xml.sax.helpers.DefaultHandler { // sentinel false - ) { - return; - } - if (title.contains(":")) { + ) return; if (!title.startsWith("Sign gloss:")) { System.err.println("title with colon: " + title); } -- 2.43.0