package com.hughes.android.dictionary.engine;
import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
import java.util.EnumMap;
import java.util.HashSet;
import java.util.List;
import java.util.SortedMap;
import java.util.TreeMap;
-import com.hughes.android.dictionary.Language;
+import com.hughes.android.dictionary.engine.Index.IndexEntry;
+
public class IndexBuilder {
final DictionaryBuilder dictionaryBuilder;
- final Index index;
+ public final Index index;
final SortedMap<String, TokenData> tokenToData;
- @SuppressWarnings("unchecked")
- IndexBuilder(final DictionaryBuilder dictionaryBuilder, final String shortName, final String longName, final Language language) {
+ IndexBuilder(final DictionaryBuilder dictionaryBuilder, final String shortName, final String longName, final Language language, final String normalizerRules, final boolean swapPairEntries) {
this.dictionaryBuilder = dictionaryBuilder;
- index = new Index(dictionaryBuilder.dictionary, shortName, longName, language);
- tokenToData = new TreeMap<String, TokenData>(language.getSortCollator());
+ index = new Index(dictionaryBuilder.dictionary, shortName, longName, language, normalizerRules, swapPairEntries);
+ tokenToData = new TreeMap<String, TokenData>(new NormalizeComparator(index.normalizer, language.collator));
}
public void build() {
final List<RowBase> rows = index.rows;
for (final TokenData tokenData : tokenToData.values()) {
tokenEntryDatas.clear();
- final int indexRow = index.sortedIndexEntries.size();
- index.sortedIndexEntries.add(new Index.IndexEntry(tokenData.token, rows.size()));
- rows.add(new TokenRow(indexRow, rows.size(), index));
- int count = 0;
- for (final List<EntryData> entryDatas : tokenData.typeToEntries.values()) {
- for (final EntryData entryData : entryDatas) {
+ final int indexIndex = index.sortedIndexEntries.size();
+ final int startRow = rows.size();
+ rows.add(new TokenRow(indexIndex, rows.size(), index));
+// System.out.println("Added TokenRow: " + rows.get(rows.size() - 1));
+ int numRows = 0;
+// System.out.println("TOKEN: " + tokenData.token);
+ for (final Map.Entry<EntryTypeName, List<EntryData>> typeToEntry : tokenData.typeToEntries.entrySet()) {
+ for (final EntryData entryData : typeToEntry.getValue()) {
if (tokenEntryDatas.add(entryData)) {
rows.add(new PairEntry.Row(entryData.index(), rows.size(), index));
- ++count;
+ ++numRows;
+
+// System.out.print(" " + typeToEntry.getKey() + ": ");
+ // rows.get(rows.size() - 1).print(System.out);
+// System.out.println();
}
}
}
- System.out.println(count + " ENTRIES FOR TOKEN " + tokenData.token);
+ index.sortedIndexEntries.add(new Index.IndexEntry(tokenData.token, startRow, numRows));
+ }
+
+ final List<IndexEntry> sortedEntries = new ArrayList<IndexEntry>(index.sortedIndexEntries);
+ Collections.sort(sortedEntries, new Comparator<IndexEntry>() {
+ @Override
+ public int compare(IndexEntry object1, IndexEntry object2) {
+ return object2.numRows - object1.numRows;
+ }});
+ System.out.println("Most common tokens:");
+ for (int i = 0; i < 50 && i < sortedEntries.size(); ++i) {
+ System.out.println(" " + sortedEntries.get(i));
}
}
}
return entries;
}
+
+ public void addEntryWithTokens(final EntryData entryData, final Set<String> tokens,
+ final EntryTypeName entryTypeName) {
+ for (final String token : tokens) {
+ getOrCreateEntries(token, entryTypeName).add(entryData);
+ }
+ }
}