From 8df3f0a9fdbae32221cc7d552b70f80f40872d0c Mon Sep 17 00:00:00 2001 From: =?utf8?q?Reimar=20D=C3=B6ffinger?= Date: Wed, 5 Oct 2016 21:56:18 +0200 Subject: [PATCH] Partial progress to fix frwiktionary parsing. --- .../parser/wiktionary/EnForeignParser.java | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/com/hughes/android/dictionary/parser/wiktionary/EnForeignParser.java b/src/com/hughes/android/dictionary/parser/wiktionary/EnForeignParser.java index c6743f2..7dd933e 100644 --- a/src/com/hughes/android/dictionary/parser/wiktionary/EnForeignParser.java +++ b/src/com/hughes/android/dictionary/parser/wiktionary/EnForeignParser.java @@ -54,6 +54,13 @@ public final class EnForeignParser extends EnParser { incrementCount("WARNING: Translations not in English section"); } else if (headingName.equals("Pronunciation")) { //doPronunciation(wikiLineReader); + } else if (headingName.startsWith(" {{S|")) { + // HACK to support parsing frwiktionary + String[] parts = headingName.split("\\|"); + if (parts.length > 2 && langCodePattern.matcher(parts[2]).find() && + (parts.length < 4 || !parts[3].startsWith("flexion"))) { + doForeignPartOfSpeech(lang, headingName, wikiTokenizer.headingDepth(), wikiTokenizer); + } } else if (partOfSpeechHeader.matcher(headingName).matches()) { doForeignPartOfSpeech(lang, headingName, wikiTokenizer.headingDepth(), wikiTokenizer); } @@ -263,13 +270,14 @@ public final class EnForeignParser extends EnParser { pairEntry.pairs.add(pair); } lastForeign = null; - } else if (nextPrefix.equals("#:") || nextPrefix.equals("##:")){ + // TODO: make #* and #*: work + } else if (nextPrefix.equals("#:") || nextPrefix.equals("##:")/* || nextPrefix.equals("#*")*/){ final Pair pair = new Pair("--", formatAndIndexExampleString(nextLine, null, indexedEntry), swap); lastForeign = nextLine; if (pair.lang1 != "--" && pair.lang1 != "--") { pairEntry.pairs.add(pair); } - } else if (nextPrefix.equals("#::") || nextPrefix.equals("#**")) { + } else if (nextPrefix.equals("#::") || nextPrefix.equals("#**")/* || nextPrefix.equals("#*:")*/) { if (lastForeign != null && pairEntry.pairs.size() > 0) { if (i + 1 < listSection.nextPrefixes.size()) { // Chinese has sometimes multiple foreign lines -- 2.43.0