1 package com.hughes.android.dictionary;
\r
3 import java.util.ArrayList;
\r
4 import java.util.Arrays;
\r
5 import java.util.Collections;
\r
6 import java.util.Comparator;
\r
7 import java.util.LinkedHashSet;
\r
8 import java.util.List;
\r
9 import java.util.Set;
\r
10 import java.util.SortedMap;
\r
11 import java.util.TreeMap;
\r
12 import java.util.regex.Pattern;
\r
14 import com.hughes.android.dictionary.Dictionary.IndexEntry;
\r
15 import com.hughes.android.dictionary.Dictionary.Row;
\r
17 public class DictionaryBuilder {
\r
19 static final Pattern WHITESPACE = Pattern.compile("\\s+");
\r
21 public static void createIndex(final Dictionary dict, final byte lang) {
\r
23 final SortedMap<String, TokenData> sortedIndex = new TreeMap<String, TokenData>(
\r
24 EntryFactory.entryFactory.getEntryComparator());
\r
25 final EntryData entryDatas[] = new EntryData[dict.entries.size()];
\r
27 for (int e = 0; e < dict.entries.size(); ++e) {
\r
28 final Entry entry = dict.entries.get(e);
\r
29 final String text = entry.getIndexableText(lang);
\r
30 final Set<String> tokens = new LinkedHashSet<String>(Arrays
\r
31 .asList(WHITESPACE.split(text.trim())));
\r
32 entryDatas[e] = new EntryData(tokens.size());
\r
33 for (final String token : tokens) {
\r
34 TokenData tokenData = sortedIndex.get(token);
\r
35 if (tokenData == null) {
\r
36 tokenData = new TokenData(token);
\r
37 sortedIndex.put(token, tokenData);
\r
39 tokenData.entries.add(e);
\r
45 final Comparator<Integer> entryComparator = new Comparator<Integer>() {
\r
47 public int compare(Integer o1, Integer o2) {
\r
48 return entryDatas[o1].numTokens < entryDatas[o2].numTokens ? -1
\r
49 : entryDatas[o1].numTokens == entryDatas[o2].numTokens ? 0 : 1;
\r
53 for (final TokenData tokenData : sortedIndex.values()) {
\r
54 Collections.sort(tokenData.entries, entryComparator);
\r
57 // Put it all together.
\r
59 final List<Row> rows = dict.languages[lang].rows;
\r
60 final List<IndexEntry> indexEntries = dict.languages[lang].sortedIndex;
\r
62 int tokenDataIndex = 0;
\r
63 for (final TokenData tokenData : sortedIndex.values()) {
\r
64 final int startRow = rows.size();
\r
65 final IndexEntry indexEntry = new IndexEntry(tokenData.token, startRow);
\r
66 indexEntries.add(indexEntry);
\r
68 final Row tokenRow = new Row(-(tokenDataIndex + 1));
\r
71 for (final Integer e : tokenData.entries) {
\r
72 final Row entryRow = new Row(e);
\r
80 static final class EntryData {
\r
81 final int numTokens;
\r
83 public EntryData(int numTokens) {
\r
84 this.numTokens = numTokens;
\r
88 static final class TokenData {
\r
90 final List<Integer> entries = new ArrayList<Integer>();
\r
94 public TokenData(String token) {
\r