From 0eaf1a63bc6d1145490b64d8c68e5a545401ec16 Mon Sep 17 00:00:00 2001 From: thadh Date: Mon, 16 Jul 2012 21:09:50 -0700 Subject: [PATCH] Updated unit tests, added WholeSectionToHtmlParser. --- .classpath | 2 +- data/inputs/stoplists/de.txt | 1 + data/inputs/stoplists/en.txt | 6 ++ data/wikipedia_info.txt | 9 +++ .../engine/DictionaryBuilderMain.java | 29 +++++++-- .../SimpleSingleWiktionaryParser.java | 7 -- .../wiktionary/WholeSectionToHtmlParser.java | 25 ++++++++ .../parser/wiktionary/WiktionaryLangs.java | 3 + .../goldens/wiktionary.ar_ar.quickdic.text | 14 +--- .../goldens/wiktionary.de_de.quickdic.text | 43 +------------ .../goldens/wiktionary.de_en.quickdic.text | 15 +---- .../goldens/wiktionary.fr_fr.quickdic.text | 36 +---------- .../goldens/wiktionary.it_en.quickdic.text | 12 +--- .../goldens/wiktionary.it_it.quickdic.text | 64 +------------------ .../goldens/wiktionary.zh_en.quickdic.text | 13 +--- .../goldens/wiktionary.zh_zh.quickdic.text | 5 +- 16 files changed, 76 insertions(+), 208 deletions(-) create mode 100644 data/wikipedia_info.txt delete mode 100644 src/com/hughes/android/dictionary/parser/wiktionary/SimpleSingleWiktionaryParser.java create mode 100644 src/com/hughes/android/dictionary/parser/wiktionary/WholeSectionToHtmlParser.java diff --git a/.classpath b/.classpath index 0aa1483..409e896 100755 --- a/.classpath +++ b/.classpath @@ -2,7 +2,7 @@ - + diff --git a/data/inputs/stoplists/de.txt b/data/inputs/stoplists/de.txt index 41a060b..b78eeff 100644 --- a/data/inputs/stoplists/de.txt +++ b/data/inputs/stoplists/de.txt @@ -46,6 +46,7 @@ einer hat Ich sie +sport er ab aus diff --git a/data/inputs/stoplists/en.txt b/data/inputs/stoplists/en.txt index b23066a..9f96dce 100644 --- a/data/inputs/stoplists/en.txt +++ b/data/inputs/stoplists/en.txt @@ -1,4 +1,5 @@ of +form and Compound Plural @@ -11,7 +12,10 @@ the in gerund person +le +li A +ne imperative s with @@ -27,6 +31,7 @@ from that by on +gli singular something up @@ -38,6 +43,7 @@ being as who second +tu is I Br diff --git a/data/wikipedia_info.txt b/data/wikipedia_info.txt new file mode 100644 index 0000000..e126255 --- /dev/null +++ b/data/wikipedia_info.txt @@ -0,0 +1,9 @@ +Download Mac OS mysql installation. +Unzip in ~, link ~/mysql to it. +cd mysql +scripts/mysql_install_db --user=thadh +./bin/mysqld_safe --user=thadh + +in mysql client: +source enwiki-20120702-langlinks.sql.sql +source enwiki-20120702-page.sql.sql diff --git a/src/com/hughes/android/dictionary/engine/DictionaryBuilderMain.java b/src/com/hughes/android/dictionary/engine/DictionaryBuilderMain.java index 159513d..0faf144 100644 --- a/src/com/hughes/android/dictionary/engine/DictionaryBuilderMain.java +++ b/src/com/hughes/android/dictionary/engine/DictionaryBuilderMain.java @@ -70,20 +70,26 @@ public class DictionaryBuilderMain extends TestCase { // Build the non EN ones. final String[][] nonEnPairs = new String[][] { + + /* {"AR", "DE" }, {"AR", "ES" }, {"AR", "FR" }, + {"AR", "HE" }, {"AR", "IT" }, {"AR", "JA" }, {"AR", "RU" }, + {"AR", "TR" }, // Turkish {"AR", "ZH" }, + {"DE", "AR" }, {"DE", "FR" }, {"DE", "CA" }, // Catalan {"DE", "CS" }, // Czech {"DE", "EO" }, // Esperanto {"DE", "ES" }, {"DE", "FR" }, + {"DE", "HE" }, {"DE", "HU" }, // Hungarian {"DE", "IT" }, {"DE", "JA" }, @@ -91,6 +97,7 @@ public class DictionaryBuilderMain extends TestCase { {"DE", "PL" }, // Polish {"DE", "RU" }, {"DE", "SV" }, // Swedish + {"DE", "TR" }, // Turkish {"DE", "ZH" }, @@ -103,6 +110,7 @@ public class DictionaryBuilderMain extends TestCase { {"FR", "LA" }, {"FR", "NL" }, // Dutch {"FR", "RU" }, + {"FR", "TR" }, // Turkish {"FR", "ZH" }, {"IT", "DE" }, @@ -117,24 +125,33 @@ public class DictionaryBuilderMain extends TestCase { {"IT", "PL" }, {"IT", "RU" }, {"IT", "SV" }, + {"IT", "TR" }, // Turkish {"IT", "ZH" }, {"JA", "ZH" }, {"JA", "AR" }, + {"JA", "KO" }, {"ZH", "AR" }, {"ZH", "DE" }, {"ZH", "ES" }, {"ZH", "FR" }, {"ZH", "IT" }, + {"ZH", "KO" }, {"NO", "SV" }, {"NO", "FI" }, {"FI", "SV" }, - {"AR", "HE" }, - {"KO", "JA" }, - {"KO", "ZH" }, + + {"PL", "FR" }, // Polish + {"PL", "RU" }, // Polish + {"PL", "HU" }, // Polish + {"PL", "ES" }, // Polish + + */ + + }; final Set> done = new LinkedHashSet>(); @@ -176,7 +193,7 @@ public class DictionaryBuilderMain extends TestCase { }); } if (1==1) { - return; + //return; } @@ -184,9 +201,9 @@ public class DictionaryBuilderMain extends TestCase { // isoToWikiName.keySet().retainAll(Arrays.asList("UK", "HR", "FI")); //isoToWikiName.clear(); - boolean go = true; + boolean go = false; for (final String foreignIso : isoToWikiName.keySet()) { - if (foreignIso.equals("GD")) { + if (foreignIso.equals("SL")) { go = true; } if (!go) { diff --git a/src/com/hughes/android/dictionary/parser/wiktionary/SimpleSingleWiktionaryParser.java b/src/com/hughes/android/dictionary/parser/wiktionary/SimpleSingleWiktionaryParser.java deleted file mode 100644 index d82b276..0000000 --- a/src/com/hughes/android/dictionary/parser/wiktionary/SimpleSingleWiktionaryParser.java +++ /dev/null @@ -1,7 +0,0 @@ -package com.hughes.android.dictionary.parser.wiktionary; - -public class SimpleSingleWiktionaryParser { - - // Just does everything about a word, minus translations. - -} diff --git a/src/com/hughes/android/dictionary/parser/wiktionary/WholeSectionToHtmlParser.java b/src/com/hughes/android/dictionary/parser/wiktionary/WholeSectionToHtmlParser.java new file mode 100644 index 0000000..dcf6f49 --- /dev/null +++ b/src/com/hughes/android/dictionary/parser/wiktionary/WholeSectionToHtmlParser.java @@ -0,0 +1,25 @@ +package com.hughes.android.dictionary.parser.wiktionary; + +import java.util.Map; +import java.util.regex.Pattern; + +import com.hughes.android.dictionary.engine.IndexBuilder; + +public class WholeSectionToHtmlParser extends AbstractWiktionaryParser { + + final IndexBuilder thisIndexBuilder; + final IndexBuilder foreignIndexBuilder; + final Pattern langPattern; + final Pattern langCodePattern; + + + @Override + void parseSection(String heading, String text) { + + } + + @Override + void removeUselessArgs(Map namedArgs) { + } + +} diff --git a/src/com/hughes/android/dictionary/parser/wiktionary/WiktionaryLangs.java b/src/com/hughes/android/dictionary/parser/wiktionary/WiktionaryLangs.java index 83fa0a1..e2caa41 100644 --- a/src/com/hughes/android/dictionary/parser/wiktionary/WiktionaryLangs.java +++ b/src/com/hughes/android/dictionary/parser/wiktionary/WiktionaryLangs.java @@ -58,7 +58,9 @@ public class WiktionaryLangs { isoCodeToWikiName.put("JA", "Japanese"); isoCodeToWikiName.put("KO", "Korean"); isoCodeToWikiName.put("KU", "Kurdish"); + isoCodeToWikiName.put("LO", "Lao"); isoCodeToWikiName.put("MS", "Malay"); + isoCodeToWikiName.put("ML", "Malayalam"); isoCodeToWikiName.put("MI", "Maori"); isoCodeToWikiName.put("MN", "Mongolian"); isoCodeToWikiName.put("NE", "Nepali"); @@ -72,6 +74,7 @@ public class WiktionaryLangs { isoCodeToWikiName.put("SA", "Sanskrit"); isoCodeToWikiName.put("SR", "Serbian"); isoCodeToWikiName.put("SK", "Slovak"); + isoCodeToWikiName.put("SL", "Slovene|Slovenian"); isoCodeToWikiName.put("SO", "Somali"); isoCodeToWikiName.put("ES", "Spanish"); isoCodeToWikiName.put("SW", "Swahili"); diff --git a/testdata/goldens/wiktionary.ar_ar.quickdic.text b/testdata/goldens/wiktionary.ar_ar.quickdic.text index a107f98..f9c259f 100644 --- a/testdata/goldens/wiktionary.ar_ar.quickdic.text +++ b/testdata/goldens/wiktionary.ar_ar.quickdic.text @@ -1,5 +1,5 @@ dictInfo=SomeWikiData -EntrySource: enwiktionary.arabic 15359 +EntrySource: enwiktionary.arabic 15348 Index: AR AR->EN ***٠*** @@ -13621,18 +13621,6 @@ Index: EN EN->AR ===forget=== ذهب (ḏáhaba) {{ar-verb|form=1|impf=يذهب|impftr=yaḏhabu}} :: to escape, to slip, to lose sight of, to forget نَامَ (nāma) {{ar-verb|form=I|impfhead=يَنامُ|impf=ينام|impftr=yanāmu|II=و}} :: to forget -===form=== - سن {{ar-verb (old)|I|سن|sánna}}{{ar-verb (old)|II|سن|sánna}}{{ar-verb (old)|IV|اسن|’ásanna}}{{ar-verb (old)|VIII|استن|istánna}} :: to mold, to shape, to form - جبل {{ar-verb (old)|I|جبل|jábala}} :: to mold, to form, to shape, to fashion - عن {{ar-verb (old)|I|عَنّ|ʕánna}} :: to take shape, to form, to arise, to spring up - قطر {{ar-verb (old)|I|قطر|qáṭara}}{{ar-verb (old)|II|قطر|qáṭṭara}}{{ar-verb (old)|V|تقطر|taqáṭṭara}}{{ar-verb (old)|VI|تقاطر|taqāṭara}}{{ar-verb (old)|X|استقطر|istáqṭara}} :: to form a train of camels, to line up camels in single file (connected with halters) - عربية (ʕarabíyya) {f} or {p} :: Arabic (feminine or plural form of عربي) - ﻫ (initial form of ه) (hā’) :: Normally the twenty-sixth letter of the Arabic alphabet, when this letter is used in this initial form as an enumerator, it is interpreted as the fifth letter in traditional abjad order, equivalent to our Roman numeral V or Ⅴ (see abjad numerals). It is preceded by د and followed by و. - عرب {{ar-verb (old)|II|عَرّبَ|{LR}3arraba}}{{ar-verb (old)|IV|أعرب|'á3raba}}{{ar-verb (old)|V|تعرب|ta3árraba}}{{ar-verb (old)|X|استعرب|istá3raba}} :: to Arabicize, Arabize; to give an Arabic form. - عرب {{ar-verb (old)|II|عَرّبَ|{LR}3arraba}}{{ar-verb (old)|IV|أعرب|'á3raba}}{{ar-verb (old)|V|تعرب|ta3árraba}}{{ar-verb (old)|X|استعرب|istá3raba}} :: to Arabicize, Arabize; to give an Arabic form. - زبر (zúbar) {p} :: Plural form of زبرة. - ازهر أزْهُر (’áz-hur) :: flowers, blossoms (Plural form of زهر) - اعراب (iʕrāb) {m}اعراب{p} :: Arabs (Plural form of عرب). ===formal=== فتوى (fatwā) {f}, فتاو (fatāwin) {p}, فتاوى (fatāwā) {p} :: fatwa, formal legal opinion ===formerly=== diff --git a/testdata/goldens/wiktionary.de_de.quickdic.text b/testdata/goldens/wiktionary.de_de.quickdic.text index b32792a..3dcb59f 100644 --- a/testdata/goldens/wiktionary.de_de.quickdic.text +++ b/testdata/goldens/wiktionary.de_de.quickdic.text @@ -1,5 +1,5 @@ dictInfo=SomeWikiData -EntrySource: enwiktionary.german 5579 +EntrySource: enwiktionary.german 5547 Index: DE DE->EN ===001=== @@ -6703,47 +6703,6 @@ Index: EN EN->DE (Old High German) wat {{goh-noun|g=n}} :: ford ===fork=== Gabel {{de-noun|g=f|plural=Gabeln}} :: fork -===form=== - biegen {de-verb} :: {{transitive|auxiliary: “haben”}} to bend; to form (something) into a curve. - (Low German) was (verb form) :: wash; apocoped form of wasse, singular imperative of wassen; mainly used in the Netherlands, equivalent to other dialekts' wasche/waske - (Low German) was (verb form) :: wax; apocoped form of wasse, singular imperative of wassen - (Low German) was (verb form) :: grow; apocoped form of wasse, singular imperative of wassen - deutscher {m} (adjective form) :: male form of deutsch - ein deutscher Wein :: -- - (Middle Low German) sîn (pronoun) :: {possessive} his; possessive form of he - (Middle Low German) sîn (pronoun) :: of his; genitive form of he - lohant ret her Zeno hen na Verona to dem vader sin. :: -- - John rode Sir Zeno to Verona, to the father of his. :: -- - (Middle Low German) sîn (pronoun) :: sometimes used to form the genitive - Deme könnink sin land, dat is: des könninges land. :: -- - The king his land, that is: the king's land. :: -- - (Middle Low German) sîn (pronoun) :: {possessive} its; possessive form of it - (Middle Low German) sîn (pronoun) :: of it; genitive form of it - deutsche (adjective form) :: form of deutsch after a definite article - die deutsche Sprache; der deutsche Bundespräsident; das deutsche Gesundheitssystem :: -- - meine {f/pl} (pronoun form) :: {possessive} Feminine nominative and accusative singular form of mein. - meine {f/pl} (pronoun form) :: {possessive} Nominative and accusative plural form of mein. - meine (verb form) :: First-person singular indicative present form of meinen. - meine (verb form) :: First-person singular subjunctive present form of meinen. - meine (verb form) :: Third-person singular subjunctive present form of meinen. - meine (verb form) :: Second-person singular imperative form of meinen. - grub (verb form) :: singular past imperfect form of graben - esse (verb form) :: First-person singular indicative present form of essen. - esse (verb form) :: First-person singular subjunctive present form of essen. - esse (verb form) :: Third-person singular subjunctive present form of essen. - -er (suffix) :: Forming agent nouns from verbs with the sense of ‘person or thing which does’, suffixed to the first-person singular indicative present form from which the E is dropped. - arbeiten 'to work'; (ich) arbeit(e) + -er '-er' -> Arbeiter 'worker' :: -- - englische :: nominative singular form of englisch (English) used after the definite article. - englische :: nominative singular feminine form of englisch (English) used after the indefinite article. - englische :: accusative singular feminine and neuter form of englisch (English) used after the definite article. - englische :: accusative singular feminine form of englisch (English) used after the indefinite article. - ward (verb form) :: {archaic} First-person singular indicative past form of werden. - ward (verb form) :: {archaic} Third-person singular indicative past form of werden. - Und Gott sprach: »Es werde Licht!« Und es ward Licht. [http://www.bibledbdata.org/onlinebibles/german_l/01_001.htm] :: And God said: "Let there be light." And there was light. - sorg :: imperative singular form of sorgen (‘to worry’, ‘to care’) - Claudia (proper noun) :: {{given name|female}} from the Latin feminine form of Claudius; quite popular from the 1960s to the 1980s. - junges (adjective form) :: Neuter form of jung. - Adam (proper noun) :: {{given name|male}}. Pet form: Adi ===formal=== sie (pl.) :: {personal} you, used to refer to any number of persons in formal conversations ===former=== diff --git a/testdata/goldens/wiktionary.de_en.quickdic.text b/testdata/goldens/wiktionary.de_en.quickdic.text index 8383451..461ba0d 100644 --- a/testdata/goldens/wiktionary.de_en.quickdic.text +++ b/testdata/goldens/wiktionary.de_en.quickdic.text @@ -1,5 +1,5 @@ dictInfo=SomeWikiData -EntrySource: enwiktionary.english 5036 +EntrySource: enwiktionary.english 5024 Index: DE DE->EN ===2=== @@ -4799,19 +4799,6 @@ Index: EN EN->DE Haupt- :: head (foremost in rank or importance) (adjective) ===forgiven=== Absolution {f} :: absolution (Exercise of priestly jurisdiction in the sacrament of penance, by which Catholics believe the sins of the truly penitent are forgiven) (noun) -===form=== - Abkürzung {f}, Kurzbezeichnung {f}, Kürzel {n}, Kurzform {f} :: abbreviation (shortened or contracted form of a word or phrase) (noun) - Plural {m}, Mehrzahl {f} :: plural (word in plural form) (noun) - Außerirdischer {m}, Außerirdische {f} :: alien (life form of non-Earth origin) (noun) - Alphabetismus {m} :: alphabetism (form of literacy) (noun) - sein :: be (elliptical form of "be here", or similar) (verb) - werden :: be (used to form the passive voice) (verb) - sein :: be (used to form the continuous forms of various tenses) (verb) - sein :: be ((archaic) used to form the perfect aspect with certain intransitive verbs) (verb) - sein :: be (used to form future tenses, especially the future subjunctive) (verb) - -ität {f} :: -ity (Used to form nouns from adjectives.) (suffix) - qualitativ :: qualitative ((chemistry) of a form of analysis that yields the identity of a compound) (adjective) - Einzahl {f}, Singular {m} :: singular (grammar: form of a word that refers to only one thing) (noun) ===formal=== Marsch {m} :: march (formal, rhythmic way of walking) (noun) ===formalism=== diff --git a/testdata/goldens/wiktionary.fr_fr.quickdic.text b/testdata/goldens/wiktionary.fr_fr.quickdic.text index d25d88a..7b80a41 100644 --- a/testdata/goldens/wiktionary.fr_fr.quickdic.text +++ b/testdata/goldens/wiktionary.fr_fr.quickdic.text @@ -1,5 +1,5 @@ dictInfo=SomeWikiData -EntrySource: enwiktionary.french 6554 +EntrySource: enwiktionary.french 6529 Index: FR FR->EN ===00=== @@ -8301,29 +8301,6 @@ Index: EN EN->FR en {fr-prep} :: of, made of (used to describe composition) une chaise en hêtre :: a chair made of beech/a beech chair une fourchette en métal :: a fork made of metal/a metal fork -===form=== - former {fr-verb} :: to form (generic sense) - abrasive {f} :: Feminine singular form of abrasif - abusive {f} :: Feminine singular form of abusif - Fanny {fr-proper noun} :: {{given name|female}} borrowed from English; also used as a pet form of Stéphanie. - case {{fr-noun|f}} :: box (on form) - rata :: third-person singular past historic form of rater - vomit :: third-person singular present indicative form of vomir - vomit :: third-person singular past historic form of vomir - rate {fr-verb-form} :: first-person singular indicative present form of rater - rate {fr-verb-form} :: third-person singular indicative present form of rater - rate {fr-verb-form} :: first-person singular subjunctive present form of rater - rate {fr-verb-form} :: third-person singular subjunctive present form of rater - suit :: third-person singular present indicative form of suivre - être {{fr-verb|type=auxiliary}} :: {auxiliary} Used to form the perfect and pluperfect tense of certain verbs (including all reflexive verbs) - Après être allé au yoga, je suis rentré chez moi. :: After having gone to yoga, I came back home. - être {{fr-verb|type=auxiliary}} :: {auxiliary} to be (Used to form the passive voice) - Il peut être battu ce soir. :: He could be beaten this evening. - avoir {{fr-verb|type=auxiliary}} :: {{context|auxiliary}} to have (auxiliary verb to form compound past tenses of most verbs) - J'ai parlé. :: I have spoken. - (Middle French) avoir (verb) :: {{context|auxiliary verb}} to have (verb used to form the perfect tense) - (Old French) avoir (verb) :: {{context|auxiliary verb}} to have (verb used to form the perfect tense) - dame {{fr-noun|f}} :: A polite form of address for a woman. ===Form=== bel {fr-adj-form} :: Form of beau to be used before masculine nouns starting with a vowel. quatre-vingt {{fr-noun-inv|m}} :: {France} Form of quatre-vingts (eighty) used in compounds (the numbers 81 to 99, larger numbers ending in numbers from 81 to 99, and the ordinal numbers corresponding to any of these numbers). @@ -9653,13 +9630,6 @@ Index: EN EN->FR robin {{fr-noun|m}} :: {{archaic|pejorative| lang=fr}} lawyer ===lay=== civil {fr-adj} :: {politics} lay -===le=== - (Old French) al (contraction) :: contraction of a + le (to the) - (Old French) del (contraction) :: contraction of de + le (of the) - du (contraction) :: contraction of de + le (of the). - du (contraction) :: contraction of de + le, forms the partitive article. - The partitive article signifies "some", but it often is not translated in English, Dutch, or German. :: -- - (Old French) du (contraction) :: contraction of de + le (of the) ===Le=== (Old French) face {{fro-noun|f}} :: {anatomy} face circa 1170, {{w|Chrétien de Troyes}}, Érec et Énide: :: Le chief li desarme et la face. @@ -9719,10 +9689,6 @@ Index: EN EN->FR circa 1170, {{w|Chrétien de Troyes}}, Érec et Énide: :: quel haste avez,
Qui a tel ore vos levez? What haste do you have :: -- That wakes up at this time of day? :: -- -===li=== - (Old French) face {{fro-noun|f}} :: {anatomy} face - circa 1170, {{w|Chrétien de Troyes}}, Érec et Énide: :: Le chief li desarme et la face. - He exposed his head and his face. :: -- ===liberty=== libre {fr-adj-mf} :: free, at liberty Un homme libre. :: A free man. diff --git a/testdata/goldens/wiktionary.it_en.quickdic.text b/testdata/goldens/wiktionary.it_en.quickdic.text index f1b0a7b..b19caa2 100644 --- a/testdata/goldens/wiktionary.it_en.quickdic.text +++ b/testdata/goldens/wiktionary.it_en.quickdic.text @@ -1,5 +1,5 @@ dictInfo=SomeWikiData -EntrySource: enwiktionary.english 3497 +EntrySource: enwiktionary.english 3488 Index: IT IT->EN ===15=== @@ -3221,16 +3221,6 @@ Index: EN EN->IT all'estero :: abroad (in foreign countries) (adverb) ===foreigner=== straniero {m}, forestiero {m} :: alien (foreigner) (noun) -===form=== - abbreviazione {f} :: abbreviation (shortened or contracted form of a word or phrase) (noun) - plurale {m} :: plural (word in plural form) (noun) - extraterrestre, alieno :: alien (life form of non-Earth origin) (noun) - raffazzonato :: crude (statistics: in an unanalyzed form) (adjective) - andare, venire :: be (elliptical form of "be here", or similar) (verb) - essere :: be (used to form the passive voice) (verb) - stare :: be (used to form the continuous forms of various tenses) (verb) - -ità {f} :: -ity (Used to form nouns from adjectives.) (suffix) - singolare {m} :: singular (grammar: form of a word that refers to only one thing) (noun) ===formal=== marcia {f} :: march (formal, rhythmic way of walking) (noun) ===forms=== diff --git a/testdata/goldens/wiktionary.it_it.quickdic.text b/testdata/goldens/wiktionary.it_it.quickdic.text index 1ec8583..c51c6bd 100644 --- a/testdata/goldens/wiktionary.it_it.quickdic.text +++ b/testdata/goldens/wiktionary.it_it.quickdic.text @@ -1,5 +1,5 @@ dictInfo=SomeWikiData -EntrySource: enwiktionary.italian 5239 +EntrySource: enwiktionary.italian 5184 Index: IT IT->EN ===1963=== @@ -5726,68 +5726,6 @@ Index: EN EN->IT Forli {it-proper noun} :: Forli (town) ===Forlì=== Forlì-Cesena {it-proper noun} :: Forlì-Cesena -===form=== - pie {f} :: Feminine plural form of pio - fa (verb form) :: Third-person singular indicative present form of fare. - fa (verb form) :: Second-person singular imperative form of fare. - ablative {f} :: Feminine plural form of ablativo - abortive {f} :: Feminine plural form of abortivo. - abrasive {f} :: Feminine plural form of abrasivo - abrogative {f} :: Feminine plural form of abrogativo - lente (adjective form) {f}{p} :: (feminine plural form of lento) slow - abusive {f} :: Feminine plural form of abusivo - premature :: Feminine plural form of prematuro - qualitative {f} :: Feminine plural form of qualitativo - transfinite {f} :: Feminine plural form of transfinito - accentuate {f} :: Feminine plural form of accentuato - derivative {f} :: Feminine plural form of derivativo - cube {f} (adjective form) :: Feminine plural form of cubo - nominative {f} :: Feminine plural form of nominativo. - vocative {f} :: Feminine plural form of vocativo - quadruple {f} :: Feminine plural form of quadruplo - ultramarine {f} :: Feminine plural form of ultramarino - scalene {f} :: Feminine plural form of scaleno - ben {it-adv} :: Short form of bene. - ben fatto :: well done - Alberta {{it-proper noun|g=f}} :: {{given name|female}}, feminine form of Alberto. - dissociative {f} :: Feminine plural form of dissociativo - trite {f} :: Feminine plural form of trito - simulate {f} :: Feminine plural form of simulato - fricative {f} :: Feminine plural form of fricativo - dark (adjective) {inv} :: dark (used especially to describe a form of punk music) - none {f|p} (adjective form) :: (feminine plural form of nono) ninth - none {f|p} :: (feminine plural form of nono) ninth (the one in the ninth position; fraction) - venturi {m} :: Plural form of venturo - delegate {f} :: Feminine plural form of delegato - obsolete {{{f|p}}} (adjective form), feminine plural form of: obsoleto :: Feminine plural form of obsoleto - locative {f} :: Feminine plural form of locativo - lui (pronoun) :: him (indirect form of lui used after a preposition) - asinine {f} :: Feminine plural form of asinino - sei (verb form) :: second-person singular indicative present form of essere - accelerative {f} :: Feminine plural form of accelerativo - si (pronoun) :: (the so-called si passivante, used to form the passive voice of a verb) it (but also see note below) - Example: Si dice che Maria voleva uccidere Giovanni (It is said that Maria wanted to kill Giovanni). :: -- - Note: In this sense, verb + si is often translated as become or get + past participle in English. :: -- - Examples: :: -- - Ci vuole un po’ di tempo per abituarsi (It takes a while to become accustomed) :: -- - A Luca piace ubriacarsi (Luca likes to get drunk) :: -- - meri {m} :: Plural form of mero - argentine {f} :: Feminine plural form of argentino - info {m} (noun) {inv} :: {informal} Short form of informazione. - generative {f} :: Feminine plural form of generativo - laureate {f} :: Feminine plural form of laureato - secure {f} :: Feminine plural form of securo - obtrusive {f} :: Feminine plural form of obtrusivo - secrete {f} :: Feminine plural form of secreto - acetose {f} :: Feminine plural form of acetoso - furtive {f} :: Feminine plural form of furtivo - associative {f} :: Feminine plural form of associativo - porcine {f} :: Feminine plural form of porcino - erudite {f} :: Feminine plural form of erudito - grate {f} :: Feminine plural form of grato - nude {f} :: Feminine plural form of nudo - creole {f} :: Feminine plural form of creolo - ubique {f} :: Feminine plural form of ubiquo ===former=== Ragusa {{it-proper noun|g=f}} :: former name, before 1918, of Dubrovnik ===formula=== diff --git a/testdata/goldens/wiktionary.zh_en.quickdic.text b/testdata/goldens/wiktionary.zh_en.quickdic.text index 4f0bb78..dcd8dfb 100644 --- a/testdata/goldens/wiktionary.zh_en.quickdic.text +++ b/testdata/goldens/wiktionary.zh_en.quickdic.text @@ -1,5 +1,5 @@ dictInfo=SomeWikiData -EntrySource: enwiktionary.english 4625 +EntrySource: enwiktionary.english 4615 Index: ZH ZH->EN ===1=== @@ -5255,17 +5255,6 @@ Index: EN EN->ZH 外國人, 外国人 (wàiguórén), 外人 (wàirén), 老外 (lǎowài) (colloquial) :: alien (foreigner) (noun) ===foreigners=== 外債, 外债 (wàizhài), 對外債務, 对外债务 (duìwài zhàiwù) :: foreign debt (a debt owed to foreigners) (noun) -===form=== - 縮寫, 缩写 (suōxiě); 簡寫, 简写 (jiǎnxiě); 略語, 略语 (lüèyǔ) :: abbreviation (shortened or contracted form of a word or phrase) (noun) - 複數, 复数 (fùshù), 眾數, 众数 (zhòngshù) :: plural (word in plural form) (noun) - 外星人 (wàixīngrén), 宇宙人 (yǔzhòurén) :: alien (life form of non-Earth origin) (noun) - 是 (shì), 有 (yǒu), 在 (zài), 來, 来 (lái) :: be (elliptical form of "be here", or similar) (verb) - 被 (bèi) + verb (particle) :: be (used to form the passive voice) (verb) - 在 (zài), 正在 (zhèngzài); verb + 著 / 着 (zhe) :: be (used to form the continuous forms of various tenses) (verb) - (not used) :: be ((archaic) used to form the perfect aspect with certain intransitive verbs) (verb) - (not used) :: be (used to form future tenses, especially the future subjunctive) (verb) - 性 (xìng) :: -ity (Used to form nouns from adjectives.) (suffix) - 單數, 单数 (dānshù) :: singular (grammar: form of a word that refers to only one thing) (noun) ===formal=== 行進, 行进 (xíngjìn) :: march (formal, rhythmic way of walking) (noun) ===forms=== diff --git a/testdata/goldens/wiktionary.zh_zh.quickdic.text b/testdata/goldens/wiktionary.zh_zh.quickdic.text index f62e461..938cb26 100644 --- a/testdata/goldens/wiktionary.zh_zh.quickdic.text +++ b/testdata/goldens/wiktionary.zh_zh.quickdic.text @@ -1,5 +1,5 @@ dictInfo=SomeWikiData -EntrySource: enwiktionary.chinese 631 +EntrySource: enwiktionary.chinese 629 Index: ZH ZH->EN ===3=== @@ -1202,9 +1202,6 @@ Index: EN EN->ZH 五 {{cmn-car-num|ts|pin=wǔ|pint=wu3|rs=二02}} :: five ===fond=== 愛 {{cmn-verb|t|pin=ài|pint=ai4|tra=愛|sim=爱|rs=心09}} :: {{Beginning Mandarin|script=traditional|skey=心09}} to love; to be fond of -===form=== - 书 {{cmn-hanzi|tra=書|pin=qián (qian2), shū (shu1)|wg=ch'ien2, shu1}} :: form of a written or printed character;script - 円 (yuán) :: archaic form of 圆 ===found=== 東京 {{cmn-noun|t|pin=Dōngjīng|pint=dong1jing1|tra=東京|sim=东京|rs=木04}} :: {historical} secondary capital, usually found to the east of the main capital ===four=== -- 2.43.0