4 package com.hughes.android.dictionary.engine;
6 import java.io.IOException;
7 import java.io.PrintStream;
8 import java.io.RandomAccessFile;
9 import java.util.ArrayList;
10 import java.util.Collection;
11 import java.util.List;
12 import java.util.concurrent.atomic.AtomicBoolean;
14 import com.hughes.util.CachingList;
15 import com.hughes.util.raf.RAFList;
16 import com.hughes.util.raf.RAFSerializable;
17 import com.hughes.util.raf.RAFSerializer;
18 import com.hughes.util.raf.UniformRAFList;
19 import com.ibm.icu.text.Collator;
21 public final class Index implements RAFSerializable<Index> {
23 static final int CACHE_SIZE = 5000;
25 final Dictionary dict;
27 final String shortName;
28 final String longName;
30 // persisted: tells how the entries are sorted.
31 final Language sortLanguage;
34 final List<IndexEntry> sortedIndexEntries;
39 final List<RowBase> rows;
41 // --------------------------------------------------------------------------
43 public Index(final Dictionary dict, final String shortName, final String longName, final Language sortLanguage) {
45 this.shortName = shortName;
46 this.longName = longName;
47 this.sortLanguage = sortLanguage;
48 sortedIndexEntries = new ArrayList<IndexEntry>();
49 rows = new ArrayList<RowBase>();
52 public Index(final Dictionary dict, final RandomAccessFile raf) throws IOException {
54 shortName = raf.readUTF();
55 longName = raf.readUTF();
56 final String languageCode = raf.readUTF();
57 sortLanguage = Language.lookup(languageCode);
58 if (sortLanguage == null) {
59 throw new IOException("Unsupported language: " + languageCode);
61 sortedIndexEntries = CachingList.create(RAFList.create(raf, IndexEntry.SERIALIZER, raf.getFilePointer()), CACHE_SIZE);
62 rows = CachingList.create(UniformRAFList.create(raf, new RowBase.Serializer(this), raf.getFilePointer()), CACHE_SIZE);
65 public void print(final PrintStream out) {
66 for (final RowBase row : rows) {
72 public void write(final RandomAccessFile raf) throws IOException {
73 raf.writeUTF(shortName);
74 raf.writeUTF(longName);
75 raf.writeUTF(sortLanguage.getSymbol());
76 RAFList.write(raf, sortedIndexEntries, IndexEntry.SERIALIZER);
77 UniformRAFList.write(raf, (Collection<RowBase>) rows, new RowBase.Serializer(this), 5);
81 static final class IndexEntry implements RAFSerializable<Index.IndexEntry> {
85 static final RAFSerializer<IndexEntry> SERIALIZER = new RAFSerializer<IndexEntry> () {
87 public IndexEntry read(RandomAccessFile raf) throws IOException {
88 return new IndexEntry(raf);
91 public void write(RandomAccessFile raf, IndexEntry t) throws IOException {
95 public IndexEntry(final String token, final int startRow) {
96 assert token.equals(token.trim());
97 assert token.length() > 0;
99 this.startRow = startRow;
102 public IndexEntry(final RandomAccessFile raf) throws IOException {
103 token = raf.readUTF();
104 startRow = raf.readInt();
107 public void write(RandomAccessFile raf) throws IOException {
109 raf.writeInt(startRow);
112 public String toString() {
113 return token + "@" + startRow;
118 private TokenRow sortedIndexToToken(final int sortedIndex) {
119 final IndexEntry indexEntry = sortedIndexEntries.get(sortedIndex);
120 return (TokenRow) rows.get(indexEntry.startRow);
123 public TokenRow find(String token, final AtomicBoolean interrupted) {
124 token = sortLanguage.textNorm(token, true);
127 int end = sortedIndexEntries.size();
129 final Collator sortCollator = sortLanguage.getSortCollator();
130 while (start < end) {
131 final int mid = (start + end) / 2;
132 if (interrupted.get()) {
133 return sortedIndexToToken(mid);
135 final IndexEntry midEntry = sortedIndexEntries.get(mid);
137 final int comp = sortCollator.compare(token, sortLanguage.textNorm(midEntry.token, true));
139 final int result = windBack(token, mid, sortCollator, interrupted);
140 return sortedIndexToToken(result);
141 } else if (comp < 0) {
142 // Log.d("THAD", "Upper bound: " + midEntry);
145 // Log.d("THAD", "Lower bound: " + midEntry);
149 int result = Math.min(start, sortedIndexEntries.size() - 1);
150 result = windBack(token, result, sortCollator, interrupted);
151 if (result > 0 && sortCollator.compare(sortLanguage.textNorm(sortedIndexEntries.get(result).token, true), token) > 0) {
152 result = windBack(sortLanguage.textNorm(sortedIndexEntries.get(result - 1).token, true), result, sortCollator, interrupted);
154 return sortedIndexToToken(result);
157 private final int windBack(final String token, int result, final Collator sortCollator, final AtomicBoolean interrupted) {
158 while (result > 0 && sortCollator.compare(sortLanguage.textNorm(sortedIndexEntries.get(result - 1).token, true), token) >= 0) {
160 if (interrupted.get()) {