X-Git-Url: http://gitweb.fperrin.net/?a=blobdiff_plain;f=src%2Fcom%2Fhughes%2Fandroid%2Fdictionary%2Fengine%2FIndexBuilder.java;h=9fe234b7c7723c80ec11c02446a3bbdde84cd3a3;hb=2b238094993e8348bafddf30bcb88ee0bf9ed899;hp=81de5a2901e6d534a4ea63617dda451973613d24;hpb=794c2989d4ff4c456c9aa1066150c6d51a5aae84;p=DictionaryPC.git diff --git a/src/com/hughes/android/dictionary/engine/IndexBuilder.java b/src/com/hughes/android/dictionary/engine/IndexBuilder.java index 81de5a2..9fe234b 100644 --- a/src/com/hughes/android/dictionary/engine/IndexBuilder.java +++ b/src/com/hughes/android/dictionary/engine/IndexBuilder.java @@ -38,30 +38,48 @@ public class IndexBuilder { IndexBuilder(final DictionaryBuilder dictionaryBuilder, final String shortName, final String longName, final Language language, final String normalizerRules, final Set stoplist, final boolean swapPairEntries) { this.dictionaryBuilder = dictionaryBuilder; - index = new Index(dictionaryBuilder.dictionary, shortName, longName, language, normalizerRules, swapPairEntries); - tokenToData = new TreeMap(new NormalizeComparator(index.normalizer(), language.getCollator())); + index = new Index(dictionaryBuilder.dictionary, shortName, longName, language, normalizerRules, swapPairEntries, stoplist); + tokenToData = new TreeMap(index.getSortComparator()); this.stoplist = stoplist; } public void build() { - final Set tokenEntryDatas = new HashSet(); + final Set tokenIndexedEntries = new HashSet(); final List rows = index.rows; + index.mainTokenCount = 0; for (final TokenData tokenData : tokenToData.values()) { - tokenEntryDatas.clear(); + tokenIndexedEntries.clear(); final int indexIndex = index.sortedIndexEntries.size(); final int startRow = rows.size(); - rows.add(new TokenRow(indexIndex, rows.size(), index)); -// System.out.println("Added TokenRow: " + rows.get(rows.size() - 1)); - int numRows = 0; + + TokenRow tokenRow = null; + if (!tokenData.htmlEntries.isEmpty()) { + tokenRow = new TokenRow(indexIndex, rows.size(), index, /* hasMainEntry */ true); + rows.add(tokenRow); + } + +// System.out.println("Added TokenRow: " + rows.get(rows.size() - 1)); + + int numRows = 0; // off by one--doesn't count the token row! // System.out.println("TOKEN: " + tokenData.token); - for (final Map.Entry> typeToEntry : tokenData.typeToEntries.entrySet()) { - for (final IndexedEntry entryData : typeToEntry.getValue()) { - if (entryData.index() == -1) { - entryData.addToDictionary(dictionaryBuilder.dictionary); - assert entryData.index() >= 0; + for (final Map.Entry> typeToIndexedEntries : tokenData.typeToEntries.entrySet()) { + for (final IndexedEntry indexedEntry : typeToIndexedEntries.getValue()) { + if (!indexedEntry.isValid) { + continue; + } + + if (tokenRow == null) { + tokenRow = new TokenRow(indexIndex, rows.size(), index, tokenData.hasMainEntry); + rows.add(tokenRow); + } + + if (indexedEntry.entry.index() == -1) { + indexedEntry.entry.addToDictionary(dictionaryBuilder.dictionary); + assert indexedEntry.entry.index() >= 0; } - if (tokenEntryDatas.add(entryData)) { - rows.add(new PairEntry.Row(entryData.index(), rows.size(), index)); + if (tokenIndexedEntries.add(indexedEntry)) { + rows.add(indexedEntry.entry.CreateRow(rows.size(), index)); + ++indexedEntry.entry.entrySource.numEntries; ++numRows; // System.out.print(" " + typeToEntry.getKey() + ": "); @@ -70,26 +88,38 @@ public class IndexBuilder { } } } - index.sortedIndexEntries.add(new Index.IndexEntry(tokenData.token, index - .normalizer().transliterate(tokenData.token), startRow, numRows)); + + if (tokenRow != null) { + if (tokenRow.hasMainEntry) { + index.mainTokenCount++; + } + + final Index.IndexEntry indexEntry = new Index.IndexEntry(index, tokenData.token, index + .normalizer().transliterate(tokenData.token), startRow, numRows); + indexEntry.htmlEntries.addAll(tokenData.htmlEntries); + index.sortedIndexEntries.add(indexEntry); + } } - final List entriesSortedByRows = new ArrayList(index.sortedIndexEntries); - Collections.sort(entriesSortedByRows, new Comparator() { + final List entriesSortedByNumRows = new ArrayList(index.sortedIndexEntries); + Collections.sort(entriesSortedByNumRows, new Comparator() { @Override public int compare(IndexEntry object1, IndexEntry object2) { return object2.numRows - object1.numRows; }}); System.out.println("Most common tokens:"); - for (int i = 0; i < 50 && i < entriesSortedByRows.size(); ++i) { - System.out.println(" " + entriesSortedByRows.get(i)); + for (int i = 0; i < 50 && i < entriesSortedByNumRows.size(); ++i) { + System.out.println(" " + entriesSortedByNumRows.get(i)); } } - static class TokenData { + public static class TokenData { final String token; final Map> typeToEntries = new EnumMap>(EntryTypeName.class); + boolean hasMainEntry = false; + + public List htmlEntries = new ArrayList(); TokenData(final String token) { assert token.equals(token.trim()); @@ -98,7 +128,7 @@ public class IndexBuilder { } } - private TokenData getOrCreateTokenData(final String token) { + public TokenData getOrCreateTokenData(final String token) { TokenData tokenData = tokenToData.get(token); if (tokenData == null) { tokenData = new TokenData(token); @@ -110,6 +140,9 @@ public class IndexBuilder { private List getOrCreateEntries(final String token, final EntryTypeName entryTypeName) { final TokenData tokenData = getOrCreateTokenData(token); List entries = tokenData.typeToEntries.get(entryTypeName); + if (entryTypeName.mainWord) { + tokenData.hasMainEntry = true; + } if (entries == null) { entries = new ArrayList(); tokenData.typeToEntries.put(entryTypeName, entries); @@ -119,9 +152,14 @@ public class IndexBuilder { public void addEntryWithTokens(final IndexedEntry indexedEntry, final Set tokens, final EntryTypeName entryTypeName) { + if (indexedEntry == null) { + System.out.println("asdfasdf"); + } + assert indexedEntry != null; for (final String token : tokens) { - if (entryTypeName.overridesStopList || !stoplist.contains(token)) - getOrCreateEntries(token, entryTypeName).add(indexedEntry); + if (entryTypeName.overridesStopList || !stoplist.contains(token)) { + getOrCreateEntries(token, entryTypeName).add(indexedEntry); + } } }