From: Thad Hughes Date: Sun, 16 Oct 2011 01:57:30 +0000 (-0700) Subject: go X-Git-Url: http://gitweb.fperrin.net/?p=DictionaryPC.git;a=commitdiff_plain;h=8e3f4c62f8056258237c86998728072d202455b1 go --- diff --git a/src/com/hughes/android/dictionary/engine/EntryData.java b/src/com/hughes/android/dictionary/engine/EntryData.java deleted file mode 100644 index 19521f2..0000000 --- a/src/com/hughes/android/dictionary/engine/EntryData.java +++ /dev/null @@ -1,14 +0,0 @@ -/** - * - */ -package com.hughes.android.dictionary.engine; - -import com.hughes.util.IndexedObject; - -public class EntryData extends IndexedObject { - public EntryData(final int index, final Entry entry) { - super(index); - this.entry = entry; - } - Entry entry; -} \ No newline at end of file diff --git a/src/com/hughes/android/dictionary/engine/IndexBuilder.java b/src/com/hughes/android/dictionary/engine/IndexBuilder.java index 4f64fa2..172be90 100644 --- a/src/com/hughes/android/dictionary/engine/IndexBuilder.java +++ b/src/com/hughes/android/dictionary/engine/IndexBuilder.java @@ -28,7 +28,7 @@ public class IndexBuilder { } public void build() { - final Set tokenEntryDatas = new HashSet(); + final Set tokenEntryDatas = new HashSet(); final List rows = index.rows; for (final TokenData tokenData : tokenToData.values()) { tokenEntryDatas.clear(); @@ -38,8 +38,8 @@ public class IndexBuilder { // System.out.println("Added TokenRow: " + rows.get(rows.size() - 1)); int numRows = 0; // System.out.println("TOKEN: " + tokenData.token); - for (final Map.Entry> typeToEntry : tokenData.typeToEntries.entrySet()) { - for (final EntryData entryData : typeToEntry.getValue()) { + for (final Map.Entry> typeToEntry : tokenData.typeToEntries.entrySet()) { + for (final IndexedEntry entryData : typeToEntry.getValue()) { if (tokenEntryDatas.add(entryData)) { rows.add(new PairEntry.Row(entryData.index(), rows.size(), index)); ++numRows; @@ -69,7 +69,7 @@ public class IndexBuilder { static class TokenData { final String token; - final Map> typeToEntries = new EnumMap>(EntryTypeName.class); + final Map> typeToEntries = new EnumMap>(EntryTypeName.class); TokenData(final String token) { assert token.equals(token.trim()); @@ -87,17 +87,17 @@ public class IndexBuilder { return tokenData; } - public List getOrCreateEntries(final String token, final EntryTypeName entryTypeName) { + public List getOrCreateEntries(final String token, final EntryTypeName entryTypeName) { final TokenData tokenData = getOrCreateTokenData(token); - List entries = tokenData.typeToEntries.get(entryTypeName); + List entries = tokenData.typeToEntries.get(entryTypeName); if (entries == null) { - entries = new ArrayList(); + entries = new ArrayList(); tokenData.typeToEntries.put(entryTypeName, entries); } return entries; } - public void addEntryWithTokens(final EntryData entryData, final Set tokens, + public void addEntryWithTokens(final IndexedEntry entryData, final Set tokens, final EntryTypeName entryTypeName) { for (final String token : tokens) { getOrCreateEntries(token, entryTypeName).add(entryData); diff --git a/src/com/hughes/android/dictionary/parser/DictFileParser.java b/src/com/hughes/android/dictionary/parser/DictFileParser.java index bb5d47d..909a6b7 100644 --- a/src/com/hughes/android/dictionary/parser/DictFileParser.java +++ b/src/com/hughes/android/dictionary/parser/DictFileParser.java @@ -15,7 +15,7 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; import com.hughes.android.dictionary.engine.DictionaryBuilder; -import com.hughes.android.dictionary.engine.EntryData; +import com.hughes.android.dictionary.engine.IndexedEntry; import com.hughes.android.dictionary.engine.EntryTypeName; import com.hughes.android.dictionary.engine.IndexBuilder; import com.hughes.android.dictionary.engine.Language; @@ -122,8 +122,7 @@ public class DictFileParser { subfields[1][i] = subfields[1][i].trim(); pairEntry.pairs.add(new Pair(subfields[0][i], subfields[1][i])); } - final EntryData entryData = new EntryData(dictBuilder.dictionary.pairEntries.size(), pairEntry); - dictBuilder.dictionary.pairEntries.add(pairEntry); + final IndexedEntry entryData = new IndexedEntry(pairEntry); for (int l = 0; l < 2; ++l) { // alreadyDone.clear(); @@ -142,7 +141,7 @@ public class DictFileParser { } private void parseFieldGeneric(final IndexBuilder indexBuilder, String field, - final EntryData entryData, final int subfieldIdx, final int numSubFields) { + final IndexedEntry entryData, final int subfieldIdx, final int numSubFields) { // remove bracketed and parenthesized stuff. final StringBuilder bracketed = new StringBuilder(); final StringBuilder parenthesized = new StringBuilder(); @@ -198,7 +197,7 @@ public class DictFileParser { for (String token : tokens) { token = TRIM_PUNC.matcher(token).replaceAll(""); if (/*!alreadyDone.contains(token) && */token.length() > 0) { - final List entries = indexBuilder.getOrCreateEntries(token, entryTypeName); + final List entries = indexBuilder.getOrCreateEntries(token, entryTypeName); entries.add(entryData); // alreadyDone.add(token); @@ -207,7 +206,7 @@ public class DictFileParser { final String[] dashed = token.split("-"); for (final String dashedToken : dashed) { if (/*!alreadyDone.contains(dashedToken) && */dashedToken.length() > 0) { - final List dashEntries = indexBuilder.getOrCreateEntries(dashedToken, EntryTypeName.PART_OF_HYPHENATED); + final List dashEntries = indexBuilder.getOrCreateEntries(dashedToken, EntryTypeName.PART_OF_HYPHENATED); dashEntries.add(entryData); } } @@ -221,7 +220,7 @@ public class DictFileParser { for (final String token : bracketedTokens) { assert !token.contains("-"); if (/*!alreadyDone.contains(token) && */token.length() > 0) { - final List entries = indexBuilder.getOrCreateEntries(token, EntryTypeName.BRACKETED); + final List entries = indexBuilder.getOrCreateEntries(token, EntryTypeName.BRACKETED); entries.add(entryData); } } @@ -231,7 +230,7 @@ public class DictFileParser { for (final String token : parenTokens) { assert !token.contains("-"); if (/*!alreadyDone.contains(token) && */token.length() > 0) { - final List entries = indexBuilder.getOrCreateEntries(token, EntryTypeName.PARENTHESIZED); + final List entries = indexBuilder.getOrCreateEntries(token, EntryTypeName.PARENTHESIZED); entries.add(entryData); } } @@ -239,7 +238,7 @@ public class DictFileParser { } private String parseField_DE(final IndexBuilder indexBuilder, String field, - final EntryData entryData, final int subfieldIdx) { + final IndexedEntry entryData, final int subfieldIdx) { // final Matcher matcher = DE_NOUN.matcher(field); // while (matcher.find()) { @@ -259,7 +258,7 @@ public class DictFileParser { } private String parseField_EN(final IndexBuilder indexBuilder, String field, - final EntryData entryData, final int subfieldIdx) { + final IndexedEntry entryData, final int subfieldIdx) { if (field.startsWith("to ")) { field = field.substring(3); } diff --git a/src/com/hughes/android/dictionary/parser/EnWiktionaryXmlParser.java b/src/com/hughes/android/dictionary/parser/EnWiktionaryXmlParser.java index cb9f3f5..cfdf0f1 100644 --- a/src/com/hughes/android/dictionary/parser/EnWiktionaryXmlParser.java +++ b/src/com/hughes/android/dictionary/parser/EnWiktionaryXmlParser.java @@ -190,8 +190,6 @@ public class EnWiktionaryXmlParser { String rest = line.substring(colonIndex + 1); final StringBuilder lineText = new StringBuilder(); - final - boolean ttbc = false; WikiFunction wikiFunction; while ((wikiFunction = WikiFunction.getFunction(line)) != null) { @@ -207,7 +205,7 @@ public class EnWiktionaryXmlParser { final String transliteration = wikiFunction.getNamedArg("tr"); } } else if (wikiFunction.name.equals("qualifier")) { - qualifier = wikiFunction.getArg(0); + String qualifier = wikiFunction.getArg(0); } else if (encodings.contains(wikiFunction.name)) { rest = wikiFunction.replaceWith(rest, wikiFunction.getArg(0)); wikiFunction = null; diff --git a/src/com/hughes/android/dictionary/parser/WikiFunction.java b/src/com/hughes/android/dictionary/parser/WikiFunction.java index e52a470..770d1ef 100644 --- a/src/com/hughes/android/dictionary/parser/WikiFunction.java +++ b/src/com/hughes/android/dictionary/parser/WikiFunction.java @@ -7,8 +7,6 @@ import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; -import com.sun.org.apache.bcel.internal.generic.NamedAndTyped; - public class WikiFunction { public int start; diff --git a/src/com/hughes/android/dictionary/parser/WikiWord.java b/src/com/hughes/android/dictionary/parser/WikiWord.java deleted file mode 100644 index 66adeed..0000000 --- a/src/com/hughes/android/dictionary/parser/WikiWord.java +++ /dev/null @@ -1,339 +0,0 @@ -package com.hughes.android.dictionary.parser; - -import java.util.ArrayList; -import java.util.LinkedHashMap; -import java.util.LinkedHashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.regex.Pattern; - -import com.hughes.android.dictionary.engine.DictionaryBuilder; -import com.hughes.android.dictionary.engine.EntryData; -import com.hughes.android.dictionary.engine.EntryTypeName; -import com.hughes.android.dictionary.engine.IndexBuilder; -import com.hughes.android.dictionary.engine.PairEntry; -import com.hughes.android.dictionary.engine.PairEntry.Pair; -import com.hughes.util.ListUtil; - -public class WikiWord { - final int depth; - - final String title; - String language; - - int index; - - final Map accentToPronunciation = new LinkedHashMap(); - StringBuilder currentPronunciation = null; - - final List partsOfSpeech = new ArrayList(); - - public WikiWord(final String title, int depth) { - this.title = title.intern(); - this.depth = depth; - } - - static class PartOfSpeech { - final int depth; - final String name; - - final List meanings = new ArrayList(); - - final List translationSenses = new ArrayList(); - - final List formOfs = new ArrayList(); - - public PartOfSpeech(final int depth, String name) { - this.depth = depth; - this.name = name.intern(); - } - - public Meaning newMeaning() { - final Meaning meaning = new Meaning(); - meanings.add(meaning); - return meaning; - } - - public Meaning lastMeaning() { - return meanings.isEmpty() ? newMeaning() : ListUtil.getLast(meanings); - } - } - - static class TranslationSense { - String sense; - List> translations = new ArrayList>(); - { - translations.add(new ArrayList()); - translations.add(new ArrayList()); - } - } - - static class Translation { - String language; - String text; - - public Translation(final String language, final String text) { - this.language = language; - this.text = text; - } - - @Override - public String toString() { - return language + ": " + text; - } - } - - static class FormOf { - final String grammarForm; - final String target; - - public FormOf(final String grammarForm, final String token) { - this.grammarForm = grammarForm; - this.target = token; - } - } - - static class Meaning { - String meaning; - final List examples = new ArrayList(); - - public Example newExample() { - final Example example = new Example(); - this.examples.add(example); - return example; - } - - public Example lastExample() { - return examples.isEmpty() ? newExample() : ListUtil.getLast(examples); - } - } - - static class Example { - String source; - final StringBuilder example = new StringBuilder(); - final StringBuilder exampleInEnglish = new StringBuilder(); - } - - // ------------------------------------------------------------------------- - - void wikiWordToQuickDic(final DictionaryBuilder dictBuilder, final int enIndexBuilder) { - //System.out.println("\n" + title + ", " + language + ", pron=" + accentToPronunciation); - if (partsOfSpeech.isEmpty() && title.indexOf(":") == -1 && !language.equals("Translingual")) { - System.err.println("Word with no POS: " + title); - } - for (final WikiWord.PartOfSpeech partOfSpeech : partsOfSpeech) { - partOfSpeechToQuickDic(dictBuilder, enIndexBuilder, partOfSpeech); - } // PartOfSpeech - - // Pronunciation. - if (index != -1) { - final PairEntry pronEntry = new PairEntry(); - for (final Map.Entry accentToPron : accentToPronunciation.entrySet()) { - String accent = accentToPron.getKey(); - if (accent.length() > 0) { - accent = accent + ": "; - } - pronEntry.pairs.add(new Pair(accent + accentToPron.getValue(), "", index != 0)); - } - if (pronEntry.pairs.size() > 0) { - final EntryData entryData = new EntryData(dictBuilder.dictionary.pairEntries.size(), pronEntry); - dictBuilder.dictionary.pairEntries.add(pronEntry); - final Set tokens = DictFileParser.tokenize(title, DictFileParser.NON_CHAR); - dictBuilder.indexBuilders.get(index).addEntryWithTokens(entryData, tokens, EntryTypeName.WIKTIONARY_PRONUNCIATION); - } - } - } - - - static final Pattern templateName = Pattern.compile("\\{[^,]*,"); - private void partOfSpeechToQuickDic(final DictionaryBuilder dictBuilder, - final int enIndexBuilder, final WikiWord.PartOfSpeech partOfSpeech) { - //System.out.println(" pos: " + partOfSpeech.name); - - for (final WikiWord.Meaning meaning : partOfSpeech.meanings) { - //System.out.println(" meaning: " + meaning.meaning); - for (final WikiWord.Example example : meaning.examples) { - if (example.example.length() > 0) { - //System.out.println(" example: " + example.example); - } - if (example.exampleInEnglish.length() > 0) { - //System.out.println(" exampleInEnglish: " + example.exampleInEnglish); - } - } - } - - if (index != -1) { - final boolean formOfSwap = index != 0; - for (final FormOf formOf : partOfSpeech.formOfs) { - final Pair pair = new Pair(title + ": " + formOf.grammarForm + ": " + formOf.target, "", formOfSwap); - final PairEntry pairEntry = new PairEntry(); - pairEntry.pairs.add(pair); - final EntryData entryData = new EntryData(dictBuilder.dictionary.pairEntries.size(), pairEntry); - dictBuilder.dictionary.pairEntries.add(pairEntry); - - // File under title token. - final Set tokens = DictFileParser.tokenize(formOf.target, DictFileParser.NON_CHAR); - dictBuilder.indexBuilders.get(index).addEntryWithTokens(entryData, tokens, EntryTypeName.WIKTIONARY_FORM_OF); - } - } - - - if (enIndexBuilder != -1 && index != -1 && enIndexBuilder != index) { - final String entryBase = title + " (" + partOfSpeech.name.toLowerCase() + ")"; - final boolean swap = enIndexBuilder == 1; - - // Meanings. - for (final Meaning meaning : partOfSpeech.meanings) { - final PairEntry pairEntry = new PairEntry(); - final List pairs = pairEntry.pairs; - - final List> exampleTokens = new ArrayList>(); - exampleTokens.add(new LinkedHashSet()); - exampleTokens.add(new LinkedHashSet()); - - if (meaning.meaning != null && meaning.meaning.length() > 0) { - final Pair meaningPair = new Pair(meaning.meaning, entryBase, swap); - pairs.add(meaningPair); - } else { - System.err.println("Empty meaning: " + title + ", " + language + ", " + partOfSpeech.name); - } - - // Examples - for (final Example example : meaning.examples) { - final int dashIndex = example.example.indexOf("—"); - if (example.exampleInEnglish.length() == 0 && dashIndex != -1) { - System.out.println("Splitting example: title=" + title + ", "+ example.example); - example.exampleInEnglish.append(example.example.substring(dashIndex + 1).trim()); - example.example.delete(dashIndex, example.example.length()); - } - - if (example.example.length() > 0 && example.exampleInEnglish.length() > 0) { - final Pair pair = new Pair(example.exampleInEnglish.toString(), example.example.toString(), swap); - pairs.add(pair); - - for (int i = 0; i < 2; ++i) { - exampleTokens.get(i).addAll(DictFileParser.tokenize(pair.get(i), DictFileParser.NON_CHAR)); - } - } - } - - // Create EntryData with the PairEntry. - final EntryData entryData = new EntryData(dictBuilder.dictionary.pairEntries.size(), pairEntry); - dictBuilder.dictionary.pairEntries.add(pairEntry); - - // File under title token. - final Set titleTokens = DictFileParser.tokenize(title, DictFileParser.NON_CHAR); - dictBuilder.indexBuilders.get(index).addEntryWithTokens(entryData, titleTokens, titleTokens.size() == 1 ? EntryTypeName.WIKTIONARY_TITLE_ONE_WORD : EntryTypeName.WIKTIONARY_TITLE_MULTI_WORD); - - // File under the meaning tokens (English): - if (meaning.meaning != null) { - // If the meaning contains any templates, strip out the template name - // so we don't index it. - final String meaningToIndex = templateName.matcher(meaning.meaning).replaceAll(""); - final Set meaningTokens = DictFileParser.tokenize(meaningToIndex, DictFileParser.NON_CHAR); - dictBuilder.indexBuilders.get(enIndexBuilder).addEntryWithTokens(entryData, meaningTokens, meaningTokens.size() == 1 ? EntryTypeName.WIKTIONARY_MEANING_ONE_WORD : EntryTypeName.WIKTIONARY_MEANING_MULTI_WORD); - } - - // File under other tokens that we saw. - for (int i = 0; i < 2; ++i) { - dictBuilder.indexBuilders.get(i).addEntryWithTokens(entryData, exampleTokens.get(i), EntryTypeName.WIKTIONARY_EXAMPLE_OTHER_WORDS); - } - - - } // Meanings. - - } - - translationSensesToQuickDic(dictBuilder, enIndexBuilder, partOfSpeech); - } - - - private void translationSensesToQuickDic(final DictionaryBuilder dictBuilder, - final int enIndexBuilder, final WikiWord.PartOfSpeech partOfSpeech) { - if (!partOfSpeech.translationSenses.isEmpty()) { - if (!language.equals("English")) { - System.err.println("Translation sections not in English."); - } - - final String englishBase = title + " (" + partOfSpeech.name.toLowerCase() + "%s)"; - - for (final TranslationSense translationSense : partOfSpeech.translationSenses) { - //System.out.println(" sense: " + translationSense.sense); - if (translationSense.sense == null) { - //System.err.println(" null sense: " + title); - } - String englishSense = String.format(englishBase, translationSense.sense != null ? (": " + translationSense.sense) : ""); - - final StringBuilder[] sideBuilders = new StringBuilder[2]; - final List>> sideTokens = new ArrayList>>(); - for (int i = 0; i < 2; ++i) { - sideBuilders[i] = new StringBuilder(); - sideTokens.add(new LinkedHashMap>()); - } - - if (enIndexBuilder != -1) { - sideBuilders[enIndexBuilder].append(englishSense); - addTokens(title, sideTokens.get(enIndexBuilder), EntryTypeName.WIKTIONARY_TITLE_ONE_WORD); - } - - // Get the entries from the translation section. - for (int i = 0; i < 2; ++i) { - //System.out.println(" lang: " + i); - for (final Translation translation : translationSense.translations.get(i)) { - //System.out.println(" translation: " + translation); - sideBuilders[i].append(sideBuilders[i].length() > 0 ? "\n" : ""); - if (translationSense.translations.get(i).size() > 1) { - sideBuilders[i].append(translation.language).append(": "); - } - sideBuilders[i].append(translation.text); - - // TODO: Don't index {m}, {f} - // TODO: Don't even show: (1), (1-2), etc. - addTokens(translation.text, sideTokens.get(i), EntryTypeName.WIKTIONARY_TRANSLATION_ONE_WORD); - } - } - - // Construct the Translations-based QuickDic entry for this TranslationSense. - if (sideBuilders[0].length() > 0 && sideBuilders[1].length() > 0) { - final Pair pair = new Pair(sideBuilders[0].toString(), sideBuilders[1].toString()); - final PairEntry pairEntry = new PairEntry(); - pairEntry.pairs.add(pair); - final EntryData entryData = new EntryData(dictBuilder.dictionary.pairEntries.size(), pairEntry); - dictBuilder.dictionary.pairEntries.add(pairEntry); - - // Add the EntryData to the indices under the correct tokens. - for (int i = 0; i < 2; ++i) { - final IndexBuilder indexBuilder = dictBuilder.indexBuilders.get(i); - for (final Map.Entry> entry : sideTokens.get(i).entrySet()) { - for (final String token : entry.getValue()) { - final List entries = indexBuilder.getOrCreateEntries(token, entry.getKey()); - entries.add(entryData); - } - } - - } - - } - } // Senses - } // Translations - } - - - static void addTokens(final String text, final Map> map, - EntryTypeName entryTypeName) { - final Set tokens = DictFileParser.tokenize(text, DictFileParser.NON_CHAR); - if (tokens.size() > 1 && entryTypeName == EntryTypeName.WIKTIONARY_TITLE_ONE_WORD) { - entryTypeName = EntryTypeName.WIKTIONARY_TITLE_MULTI_WORD; - } - List tokenList = map.get(entryTypeName); - if (tokenList == null) { - tokenList = new ArrayList(); - map.put(entryTypeName, tokenList); - } - tokenList.addAll(tokens); - } - - - -}