From cd86f5f7abf5e545daf89c03d9dbbfd4e5bc65a4 Mon Sep 17 00:00:00 2001 From: thadh Date: Mon, 24 Sep 2012 22:47:22 -0700 Subject: [PATCH] it-noun. --- .../engine/DictionaryBuilderMain.java | 2 +- .../wiktionary/AbstractWiktionaryParser.java | 2 +- .../wiktionary/EnFunctionCallbacks.java | 16 ++++++- .../wiktionary/WholeSectionToHtmlParser.java | 28 +++++++++++ .../goldens/wiktionary.it_it.quickdic.text | 48 +++++++++---------- 5 files changed, 69 insertions(+), 27 deletions(-) diff --git a/src/com/hughes/android/dictionary/engine/DictionaryBuilderMain.java b/src/com/hughes/android/dictionary/engine/DictionaryBuilderMain.java index 41edc82..981e6b5 100644 --- a/src/com/hughes/android/dictionary/engine/DictionaryBuilderMain.java +++ b/src/com/hughes/android/dictionary/engine/DictionaryBuilderMain.java @@ -174,8 +174,8 @@ public class DictionaryBuilderMain extends TestCase { final String[][] nonEnPairs = new String[][] { // The 3 I use most: - {"DE", "EN" }, {"IT", "EN" }, + {"DE", "EN" }, {"DE", "IT" }, diff --git a/src/com/hughes/android/dictionary/parser/wiktionary/AbstractWiktionaryParser.java b/src/com/hughes/android/dictionary/parser/wiktionary/AbstractWiktionaryParser.java index 7fff4b7..765ea9a 100644 --- a/src/com/hughes/android/dictionary/parser/wiktionary/AbstractWiktionaryParser.java +++ b/src/com/hughes/android/dictionary/parser/wiktionary/AbstractWiktionaryParser.java @@ -110,7 +110,7 @@ public abstract class AbstractWiktionaryParser implements Parser { } public void addLinkToCurrentEntry(final String token, final EntryTypeName entryTypeName) { - assert false; + assert false : token; } diff --git a/src/com/hughes/android/dictionary/parser/wiktionary/EnFunctionCallbacks.java b/src/com/hughes/android/dictionary/parser/wiktionary/EnFunctionCallbacks.java index 955e957..dbd158b 100644 --- a/src/com/hughes/android/dictionary/parser/wiktionary/EnFunctionCallbacks.java +++ b/src/com/hughes/android/dictionary/parser/wiktionary/EnFunctionCallbacks.java @@ -234,6 +234,7 @@ class EnFunctionCallbacks { final Map namedArgs, final T parser, final AppendAndIndexWikiCallback appendAndIndexWikiCallback) { + //namedArgs.remove("lang"); if (!namedArgs.isEmpty()) { EnParser.LOG.warning("weird encoding: " + wikiTokenizer.token()); return false; @@ -617,6 +618,12 @@ class EnFunctionCallbacks { appendAndIndexWikiCallback.builder.append(" {").append(gender).append("}, "); appendAndIndexWikiCallback.dispatch(plural, null, null); appendAndIndexWikiCallback.builder.append(" {pl}"); + final String f = namedArgs.remove("f"); + if (f != null) { + appendAndIndexWikiCallback.builder.append(", "); + appendAndIndexWikiCallback.dispatch(f, null, null); + appendAndIndexWikiCallback.builder.append(" {f}"); + } parser.wordForms.add(singular); parser.wordForms.add(plural); if (!namedArgs.isEmpty() || args.size() > 4) { @@ -1060,7 +1067,14 @@ static final class it_conj_are implements Fu if (!namedArgs.isEmpty()) { System.err.println("NON-EMPTY namedArgs: " + namedArgs); - assert false; + if ("muovesse".equals(namedArgs.get("impsib3s2"))) { + return false; + } + if ("percuotesse".equals(namedArgs.get("impsib3s2"))) { + return false; + } + // Too many to deal with: + //assert false; return false; } diff --git a/src/com/hughes/android/dictionary/parser/wiktionary/WholeSectionToHtmlParser.java b/src/com/hughes/android/dictionary/parser/wiktionary/WholeSectionToHtmlParser.java index 0a702d0..70a02c3 100644 --- a/src/com/hughes/android/dictionary/parser/wiktionary/WholeSectionToHtmlParser.java +++ b/src/com/hughes/android/dictionary/parser/wiktionary/WholeSectionToHtmlParser.java @@ -58,6 +58,34 @@ public class WholeSectionToHtmlParser extends AbstractWiktionaryParser { Map> functionCallbacks) { EnFunctionCallbacks.addGenericCallbacks(functionCallbacks); }}); + + final LangConfig basicLangConfig = new LangConfig() { + @Override + public boolean skipSection(String headingText) { + return false; + } + + @Override + public boolean skipWikiLink(WikiTokenizer wikiTokenizer) { + final String wikiText = wikiTokenizer.wikiLinkText(); + if (wikiText.startsWith("Category:")) { + return true; + } + return false; + } + @Override + public String adjustWikiLink(String wikiLinkDest) { + return wikiLinkDest; + } + + @Override + public void addFunctionCallbacks( + Map> functionCallbacks) { + } + }; + isoToLangConfig.put("FR", basicLangConfig); + isoToLangConfig.put("DE", basicLangConfig); + isoToLangConfig.put("IT", basicLangConfig); } final IndexBuilder titleIndexBuilder; diff --git a/testdata/goldens/wiktionary.it_it.quickdic.text b/testdata/goldens/wiktionary.it_it.quickdic.text index 5315caa..0c989e5 100644 --- a/testdata/goldens/wiktionary.it_it.quickdic.text +++ b/testdata/goldens/wiktionary.it_it.quickdic.text @@ -177,9 +177,9 @@ Index: IT IT->EN accusare {it-verb} {transitive} :: To accuse, to reproach, to impeach. accusato {{it-pp|accusat}} :: {past participle of|accusare} ===accusati=== - accusato {m}, accusati {pl} :: accused person, accused + accusato {m}, accusati {pl}, accusata {f} :: accused person, accused ***accusato*** - accusato {m}, accusati {pl} :: accused person, accused + accusato {m}, accusati {pl}, accusata {f} :: accused person, accused accusato {{it-adj|accusat}} :: accused accusato {{it-pp|accusat}} :: {past participle of|accusare} ===accuse=== @@ -518,10 +518,10 @@ Index: IT IT->EN ===banane=== banana {f}, banane {pl} :: banana (fruit) ===bancari=== - bancario {m}, bancari {pl} :: Bank employee + bancario {m}, bancari {pl}, bancaria {f} :: Bank employee ***bancario*** bancario {{it-adj|bancar|io|ia|i|ie}} :: bank (attributive) - bancario {m}, bancari {pl} :: Bank employee + bancario {m}, bancari {pl}, bancaria {f} :: Bank employee ***Bangkok*** Bangkok {it-proper noun} :: Bangkok (capital of Thailand) ***Bangladesh*** @@ -784,13 +784,13 @@ Index: IT IT->EN cane (adjective) {inv} :: freezing, biting (cold) Oggi fa un freddo cane! :: Today is freezing cold! cane (adjective) {inv} :: terrible, dreadful, awful - cane {m}, cani {pl} :: dog in general, male dog - cane {m}, cani {pl} :: {{context|firearms}} hammer + cane {m}, cani {pl}, cagna {f} :: dog in general, male dog + cane {m}, cani {pl}, cagna {f} :: {{context|firearms}} hammer can {m}, cani {pl} :: {{context|poetic|_|and literary form of cane}} dog ===cani=== can {m}, cani {pl} :: {{context|poetic|_|and literary form of cane}} dog - cane {m}, cani {pl} :: dog in general, male dog - cane {m}, cani {pl} :: {{context|firearms}} hammer + cane {m}, cani {pl}, cagna {f} :: dog in general, male dog + cane {m}, cani {pl}, cagna {f} :: {{context|firearms}} hammer ===cannella=== cannelle {f} :: {plural of|cannella} ===cannelle=== @@ -3036,11 +3036,11 @@ Index: IT IT->EN ===prepose=== prepose :: {conjugation of|preporre|3|s|past historic} ***presidente*** - presidente {m}, presidenti {pl} :: chairman, chairperson, chair, chief - presidente {m}, presidenti {pl} :: president + presidente {m}, presidenti {pl}, presidentessa {f} :: chairman, chairperson, chair, chief + presidente {m}, presidenti {pl}, presidentessa {f} :: president ===presidenti=== - presidente {m}, presidenti {pl} :: chairman, chairperson, chair, chief - presidente {m}, presidenti {pl} :: president + presidente {m}, presidenti {pl}, presidentessa {f} :: chairman, chairperson, chair, chief + presidente {m}, presidenti {pl}, presidentessa {f} :: president ***primavera*** primavera {f}, primavere {pl} :: spring, the season primavera {f}, primavere {pl} :: (plural, familiar) years, winters @@ -4302,7 +4302,7 @@ Index: EN EN->IT ===accuse=== accusare {it-verb} {transitive} :: To accuse, to reproach, to impeach. ===accused=== - accusato {m}, accusati {pl} :: accused person, accused + accusato {m}, accusati {pl}, accusata {f} :: accused person, accused accusato {{it-adj|accusat}} :: accused ===acerbity=== amarezza {f}, amarezze {pl} :: {figuratively} bitterness, acerbity, sadness @@ -4587,7 +4587,7 @@ Index: EN EN->IT ===bank=== bancario {{it-adj|bancar|io|ia|i|ie}} :: bank (attributive) ===Bank=== - bancario {m}, bancari {pl} :: Bank employee + bancario {m}, bancari {pl}, bancaria {f} :: Bank employee ===bar=== bar {m} (noun) {inv} :: bar (place serving drinks) C'è un bar qui vicino? :: Is there a bar nearby? @@ -4916,11 +4916,11 @@ Index: EN EN->IT ===chaff=== pula {f}, pule {pl} :: chaff ===chair=== - presidente {m}, presidenti {pl} :: chairman, chairperson, chair, chief + presidente {m}, presidenti {pl}, presidentessa {f} :: chairman, chairperson, chair, chief ===chairman=== - presidente {m}, presidenti {pl} :: chairman, chairperson, chair, chief + presidente {m}, presidenti {pl}, presidentessa {f} :: chairman, chairperson, chair, chief ===chairperson=== - presidente {m}, presidenti {pl} :: chairman, chairperson, chair, chief + presidente {m}, presidenti {pl}, presidentessa {f} :: chairman, chairperson, chair, chief ===chamber=== camera {f}, camere {pl} :: room, chamber ===chance=== @@ -4941,7 +4941,7 @@ Index: EN EN->IT ===chicken=== pollo {m}, polli {pl} :: {{context|meats}} chicken (especially chicken meat) ===chief=== - presidente {m}, presidenti {pl} :: chairman, chairperson, chair, chief + presidente {m}, presidenti {pl}, presidentessa {f} :: chairman, chairperson, chair, chief generalissimo {m}, generalissimi {pl} :: commander-in-chief leader {m|f} (noun) {inv} :: leader (chief; one in front) ===Chieti=== @@ -5341,7 +5341,7 @@ Index: EN EN->IT Ci vuole poco a farmi felice. :: It doesn't take much to make me happy. ===dog=== can {m}, cani {pl} :: {{context|poetic|_|and literary form of cane}} dog - cane {m}, cani {pl} :: dog in general, male dog + cane {m}, cani {pl}, cagna {f} :: dog in general, male dog ===doll=== pupa {f}, pupe {pl} :: doll (child's toy) ===Dominica=== @@ -5445,7 +5445,7 @@ Index: EN EN->IT ===emphasised=== te (pronoun) :: (emphasised objective of tu) you ===employee=== - bancario {m}, bancari {pl} :: Bank employee + bancario {m}, bancari {pl}, bancaria {f} :: Bank employee ===end=== finale {m}, finali {pl} :: end, ending, conclusion ===ending=== @@ -5873,7 +5873,7 @@ Index: EN EN->IT ===gelded=== castrato {{it-adj|castrat}} :: castrated, gelded, neutered ===general=== - cane {m}, cani {pl} :: dog in general, male dog + cane {m}, cani {pl}, cagna {f} :: dog in general, male dog ===generale=== PG (initialism) :: procuratore generale PG (initialism) :: procura generale @@ -6018,7 +6018,7 @@ Index: EN EN->IT ===hamburger=== hamburger {m} (noun) {inv} :: hamburger ===hammer=== - cane {m}, cani {pl} :: {{context|firearms}} hammer + cane {m}, cani {pl}, cagna {f} :: {{context|firearms}} hammer ===handicap=== handicap {m} (noun) {inv} :: handicap (disability; horserace) ===hang=== @@ -6637,7 +6637,7 @@ Index: EN EN->IT Malawi {{it-proper noun|m}} :: Malawi ===male=== tigre {f}, tigri {pl} :: tiger (male) - cane {m}, cani {pl} :: dog in general, male dog + cane {m}, cani {pl}, cagna {f} :: dog in general, male dog boy {m} (noun), inv :: A male ballet dancer. regina {f}, regine {pl} :: queen (monarch, male homosexual) poeta {m}, poeti {pl} Feminine poetessa :: poet (male or unspecified sex) @@ -7457,7 +7457,7 @@ Index: EN EN->IT ===preposition=== lui (pronoun) :: him (indirect form of lui used after a preposition) ===president=== - presidente {m}, presidenti {pl} :: president + presidente {m}, presidenti {pl}, presidentessa {f} :: president ===presto=== uno {m} ({f} una) :: Sono uno a cui piace alzarsi presto - I’m someone who likes getting up early or I’m a person who likes getting up early ===priests=== -- 2.43.0