X-Git-Url: http://gitweb.fperrin.net/?a=blobdiff_plain;ds=sidebyside;f=src%2Fcom%2Fhughes%2Fandroid%2Fdictionary%2Fengine%2FIndexBuilder.java;h=1140b64ac7579a8067937095c9331949d99f8e8e;hb=372a902551f43fc66b2a5e1c378392c84514c4d2;hp=87ea308b363b60fdb2a9f00786c92b66739aeb0d;hpb=a8052a74747df9244c098041dc82c745f64d51c6;p=DictionaryPC.git diff --git a/src/com/hughes/android/dictionary/engine/IndexBuilder.java b/src/com/hughes/android/dictionary/engine/IndexBuilder.java index 87ea308..1140b64 100644 --- a/src/com/hughes/android/dictionary/engine/IndexBuilder.java +++ b/src/com/hughes/android/dictionary/engine/IndexBuilder.java @@ -32,23 +32,31 @@ public class IndexBuilder { final DictionaryBuilder dictionaryBuilder; public final Index index; + final Set stoplist; final SortedMap tokenToData; - IndexBuilder(final DictionaryBuilder dictionaryBuilder, final String shortName, final String longName, final Language language, final String normalizerRules, final boolean swapPairEntries) { + IndexBuilder(final DictionaryBuilder dictionaryBuilder, final String shortName, final String longName, final Language language, final String normalizerRules, final Set stoplist, final boolean swapPairEntries) { this.dictionaryBuilder = dictionaryBuilder; index = new Index(dictionaryBuilder.dictionary, shortName, longName, language, normalizerRules, swapPairEntries); tokenToData = new TreeMap(new NormalizeComparator(index.normalizer(), language.getCollator())); + this.stoplist = stoplist; } public void build() { final Set tokenEntryDatas = new HashSet(); final List rows = index.rows; + index.mainTokenCount = 0; for (final TokenData tokenData : tokenToData.values()) { tokenEntryDatas.clear(); final int indexIndex = index.sortedIndexEntries.size(); final int startRow = rows.size(); - rows.add(new TokenRow(indexIndex, rows.size(), index)); + + final TokenRow tokenRow = new TokenRow(indexIndex, rows.size(), index, tokenData.hasMainEntry); + rows.add(tokenRow); + if (tokenRow.hasMainEntry) { + index.mainTokenCount++; + } // System.out.println("Added TokenRow: " + rows.get(rows.size() - 1)); int numRows = 0; // System.out.println("TOKEN: " + tokenData.token); @@ -60,6 +68,7 @@ public class IndexBuilder { } if (tokenEntryDatas.add(entryData)) { rows.add(new PairEntry.Row(entryData.index(), rows.size(), index)); + ++entryData.entry.entrySource.numEntries; ++numRows; // System.out.print(" " + typeToEntry.getKey() + ": "); @@ -72,15 +81,15 @@ public class IndexBuilder { .normalizer().transliterate(tokenData.token), startRow, numRows)); } - final List sortedEntries = new ArrayList(index.sortedIndexEntries); - Collections.sort(sortedEntries, new Comparator() { + final List entriesSortedByNumRows = new ArrayList(index.sortedIndexEntries); + Collections.sort(entriesSortedByNumRows, new Comparator() { @Override public int compare(IndexEntry object1, IndexEntry object2) { return object2.numRows - object1.numRows; }}); System.out.println("Most common tokens:"); - for (int i = 0; i < 50 && i < sortedEntries.size(); ++i) { - System.out.println(" " + sortedEntries.get(i)); + for (int i = 0; i < 50 && i < entriesSortedByNumRows.size(); ++i) { + System.out.println(" " + entriesSortedByNumRows.get(i)); } } @@ -88,6 +97,7 @@ public class IndexBuilder { final String token; final Map> typeToEntries = new EnumMap>(EntryTypeName.class); + boolean hasMainEntry = false; TokenData(final String token) { assert token.equals(token.trim()); @@ -96,7 +106,7 @@ public class IndexBuilder { } } - public TokenData getOrCreateTokenData(final String token) { + private TokenData getOrCreateTokenData(final String token) { TokenData tokenData = tokenToData.get(token); if (tokenData == null) { tokenData = new TokenData(token); @@ -105,9 +115,12 @@ public class IndexBuilder { return tokenData; } - public List getOrCreateEntries(final String token, final EntryTypeName entryTypeName) { + private List getOrCreateEntries(final String token, final EntryTypeName entryTypeName) { final TokenData tokenData = getOrCreateTokenData(token); List entries = tokenData.typeToEntries.get(entryTypeName); + if (entryTypeName.overridesStopList) { + tokenData.hasMainEntry = true; + } if (entries == null) { entries = new ArrayList(); tokenData.typeToEntries.put(entryTypeName, entries); @@ -117,19 +130,26 @@ public class IndexBuilder { public void addEntryWithTokens(final IndexedEntry indexedEntry, final Set tokens, final EntryTypeName entryTypeName) { + if (indexedEntry == null) { + System.out.println("asdfasdf"); + } + assert indexedEntry != null; for (final String token : tokens) { - getOrCreateEntries(token, entryTypeName).add(indexedEntry); + if (entryTypeName.overridesStopList || !stoplist.contains(token)) { + getOrCreateEntries(token, entryTypeName).add(indexedEntry); + } } } public void addEntryWithString(final IndexedEntry indexedEntry, final String untokenizedString, - final EntryTypeName singleTokenEntryTypeName, final EntryTypeName multiTokenEntryTypeName) { + final EntryTypeName entryTypeName) { final Set tokens = DictFileParser.tokenize(untokenizedString, DictFileParser.NON_CHAR); - addEntryWithTokens(indexedEntry, tokens, tokens.size() == 1 ? singleTokenEntryTypeName : multiTokenEntryTypeName); + addEntryWithTokens(indexedEntry, tokens, tokens.size() == 1 ? entryTypeName.singleWordInstance : entryTypeName); } - public void addEntryWithString(final IndexedEntry indexedEntry, final String untokenizedString, + public void addEntryWithStringNoSingle(final IndexedEntry indexedEntry, final String untokenizedString, final EntryTypeName entryTypeName) { - addEntryWithString(indexedEntry, untokenizedString, entryTypeName, entryTypeName); + final Set tokens = DictFileParser.tokenize(untokenizedString, DictFileParser.NON_CHAR); + addEntryWithTokens(indexedEntry, tokens, entryTypeName); } }