1 package com.hughes.android.dictionary;
\r
3 import java.io.IOException;
\r
4 import java.io.RandomAccessFile;
\r
5 import java.util.ArrayList;
\r
6 import java.util.List;
\r
7 import java.util.concurrent.atomic.AtomicBoolean;
\r
9 import com.hughes.android.dictionary.engine.Language;
\r
10 import com.hughes.util.CachingList;
\r
11 import com.hughes.util.raf.RAFList;
\r
12 import com.hughes.util.raf.RAFFactory;
\r
13 import com.hughes.util.raf.RAFSerializable;
\r
14 import com.hughes.util.raf.RAFSerializableSerializer;
\r
15 import com.hughes.util.raf.RAFSerializer;
\r
16 import com.hughes.util.raf.UniformRAFList;
\r
18 public final class Dictionary implements RAFSerializable<Dictionary> {
\r
20 private static final String VERSION_CODE = "DictionaryVersion=2.0";
\r
22 static final RAFSerializer<SimpleEntry> ENTRY_SERIALIZER = null;
\r
23 static final RAFSerializer<Row> ROW_SERIALIZER = new RAFSerializableSerializer<Row>(
\r
25 static final RAFSerializer<IndexEntry> INDEX_ENTRY_SERIALIZER = new RAFSerializableSerializer<IndexEntry>(
\r
26 IndexEntry.RAF_FACTORY);
\r
28 final String dictionaryInfo;
\r
29 final List<String> sources;
\r
30 final List<Entry> entries;
\r
31 final LanguageData[] languageDatas = new LanguageData[2];
\r
33 public Dictionary(final String dictionaryInfo, final Language language0, final Language language1) {
\r
34 this.dictionaryInfo = dictionaryInfo;
\r
35 sources = new ArrayList<String>();
\r
36 languageDatas[0] = new LanguageData(this, language0, SimpleEntry.LANG1);
\r
37 languageDatas[1] = new LanguageData(this, language1, SimpleEntry.LANG2);
\r
38 entries = new ArrayList<Entry>();
\r
41 public Dictionary(final RandomAccessFile raf) throws IOException {
\r
42 dictionaryInfo = raf.readUTF();
\r
43 sources = new ArrayList<String>(RAFList.create(raf, RAFSerializer.STRING, raf.getFilePointer()));
\r
45 languageDatas[0] = new LanguageData(this, raf, SimpleEntry.LANG1);
\r
46 languageDatas[1] = new LanguageData(this, raf, SimpleEntry.LANG2);
\r
47 final String version = raf.readUTF();
\r
48 if (!VERSION_CODE.equals(version)) {
\r
49 throw new IOException("Invalid dictionary version, found " + version + ", expected: " + VERSION_CODE);
\r
53 public void write(RandomAccessFile raf) throws IOException {
\r
54 raf.writeUTF(dictionaryInfo);
\r
55 RAFList.write(raf, sources, RAFSerializer.STRING);
\r
56 //RAFList.write(raf, entries, ENTRY_SERIALIZER);
\r
57 languageDatas[0].write(raf);
\r
58 languageDatas[1].write(raf);
\r
59 raf.writeUTF(VERSION_CODE);
\r
62 final class LanguageData implements RAFSerializable<LanguageData> {
\r
63 final Dictionary dictionary;
\r
64 final Language language;
\r
66 final List<Row> rows;
\r
67 final List<IndexEntry> sortedIndex;
\r
69 LanguageData(final Dictionary dictionary, final Language language, final byte lang) {
\r
70 this.dictionary = dictionary;
\r
71 this.language = language;
\r
73 rows = new ArrayList<Row>();
\r
74 sortedIndex = new ArrayList<IndexEntry>();
\r
77 LanguageData(final Dictionary dictionary, final RandomAccessFile raf, final byte lang) throws IOException {
\r
78 this.dictionary = dictionary;
\r
79 language = Language.lookup(raf.readUTF());
\r
80 if (language == null) {
\r
81 throw new RuntimeException("Unknown language.");
\r
84 rows = CachingList.create(UniformRAFList.create(raf, ROW_SERIALIZER, raf
\r
85 .getFilePointer()), 10000);
\r
86 sortedIndex = CachingList.create(RAFList.create(raf,
\r
87 INDEX_ENTRY_SERIALIZER, raf.getFilePointer()), 10000);
\r
90 public void write(final RandomAccessFile raf) throws IOException {
\r
91 raf.writeUTF(language.symbol);
\r
92 UniformRAFList.write(raf, rows, ROW_SERIALIZER, 4);
\r
93 RAFList.write(raf, sortedIndex, INDEX_ENTRY_SERIALIZER);
\r
96 String rowToString(final Row row, final boolean onlyFirstSubentry) {
\r
98 //return row.isToken() ? sortedIndex.get(row.getIndex()).word : entries
\r
99 // .get(row.getIndex()).getRawText(onlyFirstSubentry);
\r
102 int lookup(String word, final AtomicBoolean interrupted) {
\r
103 word = word.toLowerCase();
\r
106 int end = sortedIndex.size();
\r
107 while (start < end) {
\r
108 final int mid = (start + end) / 2;
\r
109 if (interrupted.get()) {
\r
112 final IndexEntry midEntry = sortedIndex.get(mid);
\r
113 if (midEntry.word.equals("pre-print")) {
\r
114 System.out.println();
\r
117 final int comp = language.sortComparator.compare(word, midEntry.word.toLowerCase());
\r
120 while (result > 0 && language.findComparator.compare(word, sortedIndex.get(result - 1).word.toLowerCase()) == 0) {
\r
122 if (interrupted.get()) {
\r
127 } else if (comp < 0) {
\r
128 // Log.d("THAD", "Upper bound: " + midEntry);
\r
131 // Log.d("THAD", "Lower bound: " + midEntry);
\r
135 return Math.min(sortedIndex.size() - 1, start);
\r
138 public int getPrevTokenRow(final int rowIndex) {
\r
139 final IndexEntry indexEntry = getIndexEntryForRow(rowIndex);
\r
140 final Row tokenRow = rows.get(indexEntry.startRow);
\r
141 assert tokenRow.isToken();
\r
142 final int prevTokenIndex = tokenRow.getIndex() - 1;
\r
143 if (indexEntry.startRow == rowIndex && prevTokenIndex >= 0) {
\r
144 return sortedIndex.get(prevTokenIndex).startRow;
\r
146 return indexEntry.startRow;
\r
149 public int getNextTokenRow(final int rowIndex) {
\r
150 final IndexEntry indexEntry = getIndexEntryForRow(rowIndex);
\r
151 final Row tokenRow = rows.get(indexEntry.startRow);
\r
152 assert tokenRow.isToken();
\r
153 final int nextTokenIndex = tokenRow.getIndex() + 1;
\r
154 if (nextTokenIndex < sortedIndex.size()) {
\r
155 return sortedIndex.get(nextTokenIndex).startRow;
\r
157 return rows.size() - 1;
\r
160 public IndexEntry getIndexEntryForRow(final int rowIndex) {
\r
161 // TODO: this kinda blows.
\r
165 row = rows.get(r);
\r
166 if (row.isToken() || row.indexEntry != null) {
\r
171 final IndexEntry indexEntry = row.isToken() ? sortedIndex.get(row.getIndex()) : row.indexEntry;
\r
172 for (; r <= rowIndex; ++r) {
\r
173 rows.get(r).indexEntry = indexEntry;
\r
175 assert rows.get(indexEntry.startRow).isToken();
\r
180 public static final class Row implements RAFSerializable<Row> {
\r
183 IndexEntry indexEntry = null;
\r
185 public Row(final int index) {
\r
186 this.index = index;
\r
189 static final RAFFactory<Row> RAF_FACTORY = new RAFFactory<Row>() {
\r
190 public Row create(RandomAccessFile raf) throws IOException {
\r
191 return new Row(raf.readInt());
\r
195 public void write(RandomAccessFile raf) throws IOException {
\r
196 raf.writeInt(index);
\r
199 boolean isToken() {
\r
203 public int getIndex() {
\r
211 public static final class IndexEntry implements RAFSerializable<IndexEntry> {
\r
213 final int startRow;
\r
215 public IndexEntry(final String word, final int startRow) {
\r
217 this.startRow = startRow;
\r
220 static final RAFFactory<IndexEntry> RAF_FACTORY = new RAFFactory<IndexEntry>() {
\r
221 public IndexEntry create(RandomAccessFile raf) throws IOException {
\r
222 final String word = raf.readUTF();
\r
223 final int startRow = raf.readInt();
\r
224 return new IndexEntry(word, startRow);
\r
228 public void write(final RandomAccessFile raf) throws IOException {
\r
229 raf.writeUTF(word);
\r
230 raf.writeInt(startRow);
\r
234 public String toString() {
\r
235 return word + "@" + startRow;
\r