From: Thad Hughes Date: Tue, 24 Jan 2012 05:33:01 +0000 (-0800) Subject: Added Urdu! X-Git-Url: http://gitweb.fperrin.net/?p=DictionaryPC.git;a=commitdiff_plain;h=372a902551f43fc66b2a5e1c378392c84514c4d2 Added Urdu! --- diff --git a/data/downloadInputs.sh b/data/downloadInputs.sh index 412ab18..8dcad04 100755 --- a/data/downloadInputs.sh +++ b/data/downloadInputs.sh @@ -16,9 +16,9 @@ echo "Note that unzipping is slow." L=en echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/" WIKI=${L}wiktionary-20120109-pages-articles.xml -curl --remote-name http://dumps.wikimedia.org/${L}wiktionary/20120109/${WIKI}.bz2 -bunzip2 ${WIKI}.bz2 -mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml +#curl --remote-name http://dumps.wikimedia.org/${L}wiktionary/20120109/${WIKI}.bz2 +#bunzip2 ${WIKI}.bz2 +#mv ${WIKI} inputs/${L}wiktionary-pages-articles.xml L=fr echo "Downloading from: http://dumps.wikimedia.org/${L}wiktionary/" diff --git a/src/com/hughes/android/dictionary/engine/CheckDictionariesMain.java b/src/com/hughes/android/dictionary/engine/CheckDictionariesMain.java index 898f462..811ebb0 100644 --- a/src/com/hughes/android/dictionary/engine/CheckDictionariesMain.java +++ b/src/com/hughes/android/dictionary/engine/CheckDictionariesMain.java @@ -11,7 +11,7 @@ import com.hughes.android.dictionary.DictionaryInfo; public class CheckDictionariesMain { static final String BASE_URL = "http://quickdic-dictionary.googlecode.com/files/"; - static final String VERSION_CODE = "v003"; + static final String VERSION_CODE = "v002"; public static void main(String[] args) throws IOException { final File dictDir = new File(DictionaryBuilderMain.OUTPUTS); diff --git a/src/com/hughes/android/dictionary/engine/DictionaryBuilder.java b/src/com/hughes/android/dictionary/engine/DictionaryBuilder.java index 5196637..8051a90 100644 --- a/src/com/hughes/android/dictionary/engine/DictionaryBuilder.java +++ b/src/com/hughes/android/dictionary/engine/DictionaryBuilder.java @@ -42,8 +42,8 @@ public class DictionaryBuilder { public final Dictionary dictionary; public final List indexBuilders = new ArrayList(); - public DictionaryBuilder(final String dictInfo, final Language lang0, final Language lang1, final String normalizerRules1, final String normalizerRules2, final Set lang1Stoplist, final Set lang2Stoplist) { - dictionary = new Dictionary(dictInfo); + public DictionaryBuilder(final String dictInfoString, final Language lang0, final Language lang1, final String normalizerRules1, final String normalizerRules2, final Set lang1Stoplist, final Set lang2Stoplist) { + dictionary = new Dictionary(dictInfoString); indexBuilders.add(new IndexBuilder(this, lang0.getIsoCode(), lang0.getIsoCode() + "->" + lang1.getIsoCode(), lang0, normalizerRules1, lang1Stoplist, false)); indexBuilders.add(new IndexBuilder(this, lang1.getIsoCode(), lang1.getIsoCode() + "->" + lang0.getIsoCode(), lang1, normalizerRules2, lang2Stoplist, true)); } @@ -128,7 +128,7 @@ public class DictionaryBuilder { fatalError("Must specify human readable name for: " + prefix + "Name"); } - final EntrySource entrySource = new EntrySource(dictionaryBuilder.dictionary.sources.size(), inputName); + final EntrySource entrySource = new EntrySource(dictionaryBuilder.dictionary.sources.size(), inputName, 0); System.out.println(""); String inputFormat = keyValueArgs.remove(prefix + "Format"); diff --git a/src/com/hughes/android/dictionary/engine/IndexBuilder.java b/src/com/hughes/android/dictionary/engine/IndexBuilder.java index 44bfa76..1140b64 100644 --- a/src/com/hughes/android/dictionary/engine/IndexBuilder.java +++ b/src/com/hughes/android/dictionary/engine/IndexBuilder.java @@ -68,6 +68,7 @@ public class IndexBuilder { } if (tokenEntryDatas.add(entryData)) { rows.add(new PairEntry.Row(entryData.index(), rows.size(), index)); + ++entryData.entry.entrySource.numEntries; ++numRows; // System.out.print(" " + typeToEntry.getKey() + ": "); diff --git a/src/com/hughes/android/dictionary/parser/enwiktionary/EnWiktionaryLangs.java b/src/com/hughes/android/dictionary/parser/enwiktionary/EnWiktionaryLangs.java index 80f47ed..83d5a7b 100644 --- a/src/com/hughes/android/dictionary/parser/enwiktionary/EnWiktionaryLangs.java +++ b/src/com/hughes/android/dictionary/parser/enwiktionary/EnWiktionaryLangs.java @@ -65,6 +65,7 @@ public class EnWiktionaryLangs { isoCodeToWikiName.put("BO", "Tibetan"); isoCodeToWikiName.put("TR", "Turkish"); isoCodeToWikiName.put("UK", "Ukrainian"); + isoCodeToWikiName.put("UR", "Urdu"); isoCodeToWikiName.put("VI", "Vietnamese"); isoCodeToWikiName.put("CI", "Welsh"); isoCodeToWikiName.put("YI", "Yiddish"); diff --git a/testdata/goldens/de-en.quickdic.text b/testdata/goldens/de-en.quickdic.text index 6a16c6b..29f7c0b 100644 --- a/testdata/goldens/de-en.quickdic.text +++ b/testdata/goldens/de-en.quickdic.text @@ -3,6 +3,9 @@ Version: devel, 2009-08-12 Source: http://dict.tu-chemnitz.de/ Thanks to Frank Richter. +EntrySource: chemnitz 980 +EntrySource: dictcc 13 + Index: DE DE->EN ***40*** 40 :: 40 diff --git a/testdata/goldens/wiktionary.ar_ar.quickdic.text b/testdata/goldens/wiktionary.ar_ar.quickdic.text index 1193e0e..e8c8354 100644 --- a/testdata/goldens/wiktionary.ar_ar.quickdic.text +++ b/testdata/goldens/wiktionary.ar_ar.quickdic.text @@ -1,4 +1,6 @@ dictInfo=SomeWikiData +EntrySource: enwiktionary.arabic 13363 + Index: AR AR->EN ***أ*** أ / ‍أ (’álifu hámzatin) :: The first letter of the Arabic alphabet is the small hamza (ء) that sits on top of أ, and the tall column is its bearer. The composite letter is called الف (’álif) and the hamza represents a glottal stop (/ʔ/). (For the pronunciation without hamza, see ا.) It is followed by ب. diff --git a/testdata/goldens/wiktionary.de_de.quickdic.text b/testdata/goldens/wiktionary.de_de.quickdic.text index 592109e..edf8ca9 100644 --- a/testdata/goldens/wiktionary.de_de.quickdic.text +++ b/testdata/goldens/wiktionary.de_de.quickdic.text @@ -1,4 +1,6 @@ dictInfo=SomeWikiData +EntrySource: enwiktionary.german 5303 + Index: DE DE->EN ===001=== ward (verb form) :: {archaic} Third-person singular indicative past form of werden. diff --git a/testdata/goldens/wiktionary.de_en.quickdic.text b/testdata/goldens/wiktionary.de_en.quickdic.text index 6de44b1..f7a8038 100644 --- a/testdata/goldens/wiktionary.de_en.quickdic.text +++ b/testdata/goldens/wiktionary.de_en.quickdic.text @@ -1,4 +1,6 @@ dictInfo=SomeWikiData +EntrySource: enwiktionary.english 4965 + Index: DE DE->EN ===2=== Zehn {f} (2) :: ten (the number following nine) (noun) diff --git a/testdata/goldens/wiktionary.fr_fr.quickdic.text b/testdata/goldens/wiktionary.fr_fr.quickdic.text index 460d5b3..6fd55d4 100644 --- a/testdata/goldens/wiktionary.fr_fr.quickdic.text +++ b/testdata/goldens/wiktionary.fr_fr.quickdic.text @@ -1,4 +1,6 @@ dictInfo=SomeWikiData +EntrySource: enwiktionary.french 6667 + Index: FR FR->EN ===00=== de {fr-prep} :: from (used to indicate the start of a time or range) diff --git a/testdata/goldens/wiktionary.it_en.quickdic.text b/testdata/goldens/wiktionary.it_en.quickdic.text index 6fe85b1..eaf5b1a 100644 --- a/testdata/goldens/wiktionary.it_en.quickdic.text +++ b/testdata/goldens/wiktionary.it_en.quickdic.text @@ -1,4 +1,6 @@ dictInfo=SomeWikiData +EntrySource: enwiktionary.english 3462 + Index: IT IT->EN ===15=== (periodo di) due settimane ; quindicina {f} (actually 15 days) :: fortnight (period of two weeks) (noun) diff --git a/testdata/goldens/wiktionary.it_it.quickdic.text b/testdata/goldens/wiktionary.it_it.quickdic.text index 3b6f946..c9fc3cf 100644 --- a/testdata/goldens/wiktionary.it_it.quickdic.text +++ b/testdata/goldens/wiktionary.it_it.quickdic.text @@ -1,4 +1,6 @@ dictInfo=SomeWikiData +EntrySource: enwiktionary.italian 5146 + Index: IT IT->EN ===1963=== dal :: since diff --git a/testdata/goldens/wiktionary.zh_en.quickdic.text b/testdata/goldens/wiktionary.zh_en.quickdic.text index 63351c7..0e1e948 100644 --- a/testdata/goldens/wiktionary.zh_en.quickdic.text +++ b/testdata/goldens/wiktionary.zh_en.quickdic.text @@ -1,4 +1,6 @@ dictInfo=SomeWikiData +EntrySource: enwiktionary.english 4579 + Index: ZH ZH->EN ===1=== (Cantonese) 今日 (gam1yat6) :: today (on the current day) (adverb) diff --git a/testdata/goldens/wiktionary.zh_zh.quickdic.text b/testdata/goldens/wiktionary.zh_zh.quickdic.text index 45382cd..e027b76 100644 --- a/testdata/goldens/wiktionary.zh_zh.quickdic.text +++ b/testdata/goldens/wiktionary.zh_zh.quickdic.text @@ -1,4 +1,6 @@ dictInfo=SomeWikiData +EntrySource: enwiktionary.chinese 628 + Index: ZH ZH->EN ===3=== NB {{cmn-adj|p|pint=nb}} :: {{slang|skey=nb}} fucking awesome diff --git a/todo.txt b/todo.txt index 68289df..23295b2 100644 --- a/todo.txt +++ b/todo.txt @@ -1,4 +1,6 @@ For next release: +help screen +eng_urdu fix up dictionary manager: thread that handles unzipping, downloading for the life of the application (so screen changes don't screw it up). check over UI.