From: Thad Hughes Date: Sun, 25 Dec 2011 08:53:07 +0000 (-0800) Subject: Moved data here. X-Git-Url: https://gitweb.fperrin.net/?a=commitdiff_plain;h=d4f4b7eed6992cf3527a8d5ef9460c223f4644a4;hp=6e550bafb1cb6a916d94c15ded707c865678e5b4;p=DictionaryPC.git Moved data here. --- diff --git a/data/.gitignore b/data/.gitignore new file mode 100644 index 0000000..e69de29 diff --git a/data/inputs/.gitignore b/data/inputs/.gitignore new file mode 100644 index 0000000..ac761eb --- /dev/null +++ b/data/inputs/.gitignore @@ -0,0 +1,3 @@ +NONFREE/* +de-en_chemnitz.txt +enwiktionary-* diff --git a/data/inputs/de-en_all.info b/data/inputs/de-en_all.info new file mode 100644 index 0000000..e574333 --- /dev/null +++ b/data/inputs/de-en_all.info @@ -0,0 +1 @@ +Chemnitz + DictCC. Do not distribute!!! diff --git a/data/inputs/de-en_chemnitz_enwiktionary.info b/data/inputs/de-en_chemnitz_enwiktionary.info new file mode 100644 index 0000000..9f1d34e --- /dev/null +++ b/data/inputs/de-en_chemnitz_enwiktionary.info @@ -0,0 +1,13 @@ +German-English dictionary dedicated to my parents Bob and Judy Hughes, +my German teachers, Suzanne Blount and Steven Sidore, +my friends Max Haeberlin, Severin Obertuefer, and Ben Keck, +and all my friends and students at Munich International School. +Und Die Toten Hosen. + +German-English dictionary contains information from: +de-en.txt - a German-English dictionary +Version: devel 2011-06-21 +Source: http://dict.tu-chemnitz.de/ +Thanks to Frank Richter. +And from: +(EN)Wiktionary diff --git a/data/inputs/enWikiSplit/.gitignore b/data/inputs/enWikiSplit/.gitignore new file mode 100644 index 0000000..f9e8176 --- /dev/null +++ b/data/inputs/enWikiSplit/.gitignore @@ -0,0 +1 @@ +*.data diff --git a/data/inputs/flag_graphics/americanFlag.jpg b/data/inputs/flag_graphics/americanFlag.jpg new file mode 100755 index 0000000..8d85b25 Binary files /dev/null and b/data/inputs/flag_graphics/americanFlag.jpg differ diff --git a/data/inputs/flag_graphics/as-lgflag.gif b/data/inputs/flag_graphics/as-lgflag.gif new file mode 100644 index 0000000..07cc33b Binary files /dev/null and b/data/inputs/flag_graphics/as-lgflag.gif differ diff --git a/data/inputs/flag_graphics/au-lgflag.gif b/data/inputs/flag_graphics/au-lgflag.gif new file mode 100644 index 0000000..ff3e618 Binary files /dev/null and b/data/inputs/flag_graphics/au-lgflag.gif differ diff --git a/data/inputs/flag_graphics/be-lgflag.gif b/data/inputs/flag_graphics/be-lgflag.gif new file mode 100644 index 0000000..bc57368 Binary files /dev/null and b/data/inputs/flag_graphics/be-lgflag.gif differ diff --git a/data/inputs/flag_graphics/br-lgflag.gif b/data/inputs/flag_graphics/br-lgflag.gif new file mode 100644 index 0000000..940432b Binary files /dev/null and b/data/inputs/flag_graphics/br-lgflag.gif differ diff --git a/data/inputs/flag_graphics/ca-lgflag.gif b/data/inputs/flag_graphics/ca-lgflag.gif new file mode 100644 index 0000000..efc4496 Binary files /dev/null and b/data/inputs/flag_graphics/ca-lgflag.gif differ diff --git a/data/inputs/flag_graphics/ee-lgflag.gif b/data/inputs/flag_graphics/ee-lgflag.gif new file mode 100644 index 0000000..97c742b Binary files /dev/null and b/data/inputs/flag_graphics/ee-lgflag.gif differ diff --git a/data/inputs/flag_graphics/ei-lgflag.gif b/data/inputs/flag_graphics/ei-lgflag.gif new file mode 100644 index 0000000..68a9273 Binary files /dev/null and b/data/inputs/flag_graphics/ei-lgflag.gif differ diff --git a/data/inputs/flag_graphics/flags.xcf b/data/inputs/flag_graphics/flags.xcf new file mode 100755 index 0000000..b322c28 Binary files /dev/null and b/data/inputs/flag_graphics/flags.xcf differ diff --git a/data/inputs/flag_graphics/fr-lgflag.gif b/data/inputs/flag_graphics/fr-lgflag.gif new file mode 100644 index 0000000..9fa5027 Binary files /dev/null and b/data/inputs/flag_graphics/fr-lgflag.gif differ diff --git a/data/inputs/flag_graphics/germanFlag.jpg b/data/inputs/flag_graphics/germanFlag.jpg new file mode 100755 index 0000000..b796c87 Binary files /dev/null and b/data/inputs/flag_graphics/germanFlag.jpg differ diff --git a/data/inputs/flag_graphics/gm-lgflag.gif b/data/inputs/flag_graphics/gm-lgflag.gif new file mode 100644 index 0000000..ef82b20 Binary files /dev/null and b/data/inputs/flag_graphics/gm-lgflag.gif differ diff --git a/data/inputs/flag_graphics/gr-lgflag.gif b/data/inputs/flag_graphics/gr-lgflag.gif new file mode 100644 index 0000000..f8f35d0 Binary files /dev/null and b/data/inputs/flag_graphics/gr-lgflag.gif differ diff --git a/data/inputs/flag_graphics/it-lgflag.gif b/data/inputs/flag_graphics/it-lgflag.gif new file mode 100644 index 0000000..18b6f03 Binary files /dev/null and b/data/inputs/flag_graphics/it-lgflag.gif differ diff --git a/data/inputs/flag_graphics/ja-lgflag.gif b/data/inputs/flag_graphics/ja-lgflag.gif new file mode 100644 index 0000000..af4419a Binary files /dev/null and b/data/inputs/flag_graphics/ja-lgflag.gif differ diff --git a/data/inputs/flag_graphics/mx-lgflag.gif b/data/inputs/flag_graphics/mx-lgflag.gif new file mode 100644 index 0000000..2569250 Binary files /dev/null and b/data/inputs/flag_graphics/mx-lgflag.gif differ diff --git a/data/inputs/flag_graphics/nl-lgflag.gif b/data/inputs/flag_graphics/nl-lgflag.gif new file mode 100644 index 0000000..e6fa805 Binary files /dev/null and b/data/inputs/flag_graphics/nl-lgflag.gif differ diff --git a/data/inputs/flag_graphics/no-lgflag.gif b/data/inputs/flag_graphics/no-lgflag.gif new file mode 100644 index 0000000..c29a5eb Binary files /dev/null and b/data/inputs/flag_graphics/no-lgflag.gif differ diff --git a/data/inputs/flag_graphics/po-lgflag.gif b/data/inputs/flag_graphics/po-lgflag.gif new file mode 100644 index 0000000..e7a49d8 Binary files /dev/null and b/data/inputs/flag_graphics/po-lgflag.gif differ diff --git a/data/inputs/flag_graphics/rs-lgflag.gif b/data/inputs/flag_graphics/rs-lgflag.gif new file mode 100644 index 0000000..c958629 Binary files /dev/null and b/data/inputs/flag_graphics/rs-lgflag.gif differ diff --git a/data/inputs/flag_graphics/sf-lgflag.gif b/data/inputs/flag_graphics/sf-lgflag.gif new file mode 100644 index 0000000..3750169 Binary files /dev/null and b/data/inputs/flag_graphics/sf-lgflag.gif differ diff --git a/data/inputs/flag_graphics/sp-lgflag.gif b/data/inputs/flag_graphics/sp-lgflag.gif new file mode 100644 index 0000000..7cf2cb7 Binary files /dev/null and b/data/inputs/flag_graphics/sp-lgflag.gif differ diff --git a/data/inputs/flag_graphics/sw-lgflag.gif b/data/inputs/flag_graphics/sw-lgflag.gif new file mode 100644 index 0000000..4fdb247 Binary files /dev/null and b/data/inputs/flag_graphics/sw-lgflag.gif differ diff --git a/data/inputs/flag_graphics/sz-lgflag.gif b/data/inputs/flag_graphics/sz-lgflag.gif new file mode 100644 index 0000000..8184466 Binary files /dev/null and b/data/inputs/flag_graphics/sz-lgflag.gif differ diff --git a/data/inputs/flag_graphics/ts-lgflag.gif b/data/inputs/flag_graphics/ts-lgflag.gif new file mode 100644 index 0000000..ae0c7f9 Binary files /dev/null and b/data/inputs/flag_graphics/ts-lgflag.gif differ diff --git a/data/inputs/flag_graphics/tw-lgflag.gif b/data/inputs/flag_graphics/tw-lgflag.gif new file mode 100644 index 0000000..4c9c400 Binary files /dev/null and b/data/inputs/flag_graphics/tw-lgflag.gif differ diff --git a/data/inputs/flag_graphics/uk-lgflag.gif b/data/inputs/flag_graphics/uk-lgflag.gif new file mode 100644 index 0000000..17b15b7 Binary files /dev/null and b/data/inputs/flag_graphics/uk-lgflag.gif differ diff --git a/data/inputs/flag_graphics/us-lgflag.gif b/data/inputs/flag_graphics/us-lgflag.gif new file mode 100644 index 0000000..7269199 Binary files /dev/null and b/data/inputs/flag_graphics/us-lgflag.gif differ diff --git a/data/inputs/flag_graphics/wa-lgflag.gif b/data/inputs/flag_graphics/wa-lgflag.gif new file mode 100644 index 0000000..63a7799 Binary files /dev/null and b/data/inputs/flag_graphics/wa-lgflag.gif differ diff --git a/data/inputs/stoplists/de.txt b/data/inputs/stoplists/de.txt new file mode 100644 index 0000000..b78eeff --- /dev/null +++ b/data/inputs/stoplists/de.txt @@ -0,0 +1,59 @@ +ornith +techn +med +in +etw +der +von +sich +die +mit +auf +nicht +ein +am +zu +min +geogr +ist +sein +zool +ugs +eine +cook +und +für +den +comp +bot +an +jdn +geol +im +mach +fin +übtr +ich +mil +econ +aus +mus +jdm +das +chem +Sie +einer +hat +Ich +sie +sport +er +ab +aus +an +ein +auf +Er +einen +Das +jur diff --git a/data/inputs/stoplists/empty.txt b/data/inputs/stoplists/empty.txt new file mode 100644 index 0000000..e69de29 diff --git a/data/inputs/stoplists/en.txt b/data/inputs/stoplists/en.txt new file mode 100644 index 0000000..c11bb7e --- /dev/null +++ b/data/inputs/stoplists/en.txt @@ -0,0 +1,58 @@ +of +form +and +Compound +Plural +To +plural +to +Feminine +or +a +the +in +gerund +person +lo +la +le +li +A +ne +imperative +s +with +one +an +for +female +attributive +be +Variant +past +etc +from +that +by +on +gli +singular +something +up +present +participle +The +all +being +as +who +second +tu +is +I +Br +Am +sth +sb +at +it diff --git a/data/inputs/stoplists/es.txt b/data/inputs/stoplists/es.txt new file mode 100644 index 0000000..1102879 --- /dev/null +++ b/data/inputs/stoplists/es.txt @@ -0,0 +1,7 @@ +de +a +la +en +del +y +el diff --git a/data/inputs/stoplists/fr.txt b/data/inputs/stoplists/fr.txt new file mode 100644 index 0000000..08f8b30 --- /dev/null +++ b/data/inputs/stoplists/fr.txt @@ -0,0 +1,10 @@ +de +à +de +en +la +du +le +l +un +a diff --git a/data/inputs/stoplists/it.txt b/data/inputs/stoplists/it.txt new file mode 100644 index 0000000..9b75292 --- /dev/null +++ b/data/inputs/stoplists/it.txt @@ -0,0 +1,32 @@ +di +a +in +da +del +d +per +della +e +1 +il +all +la +al +2 +non +dell +alla +un +che +delle +le +dei +l +i +the +una +of +con +ref +su +ad diff --git a/data/outputs/.gitignore b/data/outputs/.gitignore new file mode 100644 index 0000000..61f6cdb --- /dev/null +++ b/data/outputs/.gitignore @@ -0,0 +1,2 @@ +*quickdic* +*quickdic* diff --git a/googlecode_upload.py b/googlecode_upload.py old mode 100644 new mode 100755 diff --git a/src/com/hughes/android/dictionary/engine/DictionaryBuilderMain.java b/src/com/hughes/android/dictionary/engine/DictionaryBuilderMain.java index 52c937a..3bb66b0 100644 --- a/src/com/hughes/android/dictionary/engine/DictionaryBuilderMain.java +++ b/src/com/hughes/android/dictionary/engine/DictionaryBuilderMain.java @@ -68,9 +68,9 @@ public class DictionaryBuilderMain extends TestCase { // isoToWikiName.keySet().retainAll(Arrays.asList("UK", "HR", "FI")); //isoToWikiName.clear(); - boolean go = true; + boolean go = false; for (final String foreignIso : isoToWikiName.keySet()) { - if (foreignIso.equals("SV")) { + if (foreignIso.equals("HE")) { go = true; } if (!go) { diff --git a/src/com/hughes/android/dictionary/parser/EnWiktionaryXmlParser.java b/src/com/hughes/android/dictionary/parser/EnWiktionaryXmlParser.java index f5018ed..0fa3c92 100644 --- a/src/com/hughes/android/dictionary/parser/EnWiktionaryXmlParser.java +++ b/src/com/hughes/android/dictionary/parser/EnWiktionaryXmlParser.java @@ -742,7 +742,11 @@ public class EnWiktionaryXmlParser { name.contains("plural of")) { String formName = name; if (name.equals("form of")) { - formName = args.remove(0); + formName = remove(args, 0, null); + } + if (formName == null) { + LOG.warning("Missing form name: " + title); + formName = "form of"; } String baseForm = get(args, 1, ""); if ("".equals(baseForm)) { @@ -753,8 +757,12 @@ public class EnWiktionaryXmlParser { } namedArgs.keySet().removeAll(USELESS_WIKI_ARGS); WikiTokenizer.appendFunction(englishBuilder.append("{"), formName, args, namedArgs).append("}"); - otherIndexBuilder.addEntryWithString(indexedEntry, baseForm, EntryTypeName.WIKTIONARY_BASE_FORM_SINGLE, EntryTypeName.WIKTIONARY_BASE_FORM_MULTI); - + if (baseForm != null) { + otherIndexBuilder.addEntryWithString(indexedEntry, baseForm, EntryTypeName.WIKTIONARY_BASE_FORM_SINGLE, EntryTypeName.WIKTIONARY_BASE_FORM_MULTI); + } else { + // null baseForm happens in Danish. + LOG.warning("Null baseform: " + title); + } } else { namedArgs.keySet().removeAll(USELESS_WIKI_ARGS); if (args.size() == 0 && namedArgs.isEmpty()) { diff --git a/todo.txt b/todo.txt index c6a74b1..a2b306b 100644 --- a/todo.txt +++ b/todo.txt @@ -1,8 +1,19 @@ +random word jump +move dict to top of list when downloaded +pronunciation +speech recognition +flashcards +text to speech / audio from wiktionary +font size +synonyms + + + **** UI: ! multi search X version number ! enter should hide keyboard -icons +icons inside dictionaries **** PC: @@ -12,6 +23,7 @@ handle word-info in English. Handle other sections: Pronunciation Synonyms + Usage notes. Chinese: handle "Compounds" section {{count page|[[Wiktionary:Page count]]}}