-package com.hughes.android.dictionary;\r
-\r
-import java.io.IOException;\r
-import java.io.RandomAccessFile;\r
-import java.util.ArrayList;\r
-import java.util.List;\r
-import java.util.concurrent.atomic.AtomicBoolean;\r
-\r
-import com.hughes.android.dictionary.engine.Language;\r
-import com.hughes.util.CachingList;\r
-import com.hughes.util.raf.RAFList;\r
-import com.hughes.util.raf.RAFFactory;\r
-import com.hughes.util.raf.RAFSerializable;\r
-import com.hughes.util.raf.RAFSerializableSerializer;\r
-import com.hughes.util.raf.RAFSerializer;\r
-import com.hughes.util.raf.UniformRAFList;\r
-\r
-public final class Dictionary implements RAFSerializable<Dictionary> {\r
- \r
- private static final String VERSION_CODE = "DictionaryVersion=2.0";\r
-\r
- static final RAFSerializer<SimpleEntry> ENTRY_SERIALIZER = null;\r
- static final RAFSerializer<Row> ROW_SERIALIZER = new RAFSerializableSerializer<Row>(\r
- Row.RAF_FACTORY);\r
- static final RAFSerializer<IndexEntry> INDEX_ENTRY_SERIALIZER = new RAFSerializableSerializer<IndexEntry>(\r
- IndexEntry.RAF_FACTORY);\r
-\r
- final String dictionaryInfo;\r
- final List<String> sources;\r
- final List<Entry> entries;\r
- final LanguageData[] languageDatas = new LanguageData[2];\r
-\r
- public Dictionary(final String dictionaryInfo, final Language language0, final Language language1) {\r
- this.dictionaryInfo = dictionaryInfo;\r
- sources = new ArrayList<String>();\r
- languageDatas[0] = new LanguageData(this, language0, SimpleEntry.LANG1);\r
- languageDatas[1] = new LanguageData(this, language1, SimpleEntry.LANG2);\r
- entries = new ArrayList<Entry>();\r
- }\r
-\r
- public Dictionary(final RandomAccessFile raf) throws IOException {\r
- dictionaryInfo = raf.readUTF();\r
- sources = new ArrayList<String>(RAFList.create(raf, RAFSerializer.STRING, raf.getFilePointer()));\r
- entries = null;\r
- languageDatas[0] = new LanguageData(this, raf, SimpleEntry.LANG1);\r
- languageDatas[1] = new LanguageData(this, raf, SimpleEntry.LANG2);\r
- final String version = raf.readUTF();\r
- if (!VERSION_CODE.equals(version)) {\r
- throw new IOException("Invalid dictionary version, found " + version + ", expected: " + VERSION_CODE);\r
- }\r
- }\r
-\r
- public void write(RandomAccessFile raf) throws IOException {\r
- raf.writeUTF(dictionaryInfo);\r
- RAFList.write(raf, sources, RAFSerializer.STRING);\r
- //RAFList.write(raf, entries, ENTRY_SERIALIZER);\r
- languageDatas[0].write(raf);\r
- languageDatas[1].write(raf);\r
- raf.writeUTF(VERSION_CODE);\r
- }\r
-\r
- final class LanguageData implements RAFSerializable<LanguageData> {\r
- final Dictionary dictionary;\r
- final Language language;\r
- final byte lang;\r
- final List<Row> rows;\r
- final List<IndexEntry> sortedIndex;\r
-\r
- LanguageData(final Dictionary dictionary, final Language language, final byte lang) {\r
- this.dictionary = dictionary;\r
- this.language = language;\r
- this.lang = lang;\r
- rows = new ArrayList<Row>();\r
- sortedIndex = new ArrayList<IndexEntry>();\r
- }\r
-\r
- LanguageData(final Dictionary dictionary, final RandomAccessFile raf, final byte lang) throws IOException {\r
- this.dictionary = dictionary;\r
- language = Language.lookup(raf.readUTF());\r
- if (language == null) {\r
- throw new RuntimeException("Unknown language.");\r
- }\r
- this.lang = lang;\r
- rows = CachingList.create(UniformRAFList.create(raf, ROW_SERIALIZER, raf\r
- .getFilePointer()), 10000);\r
- sortedIndex = CachingList.create(RAFList.create(raf,\r
- INDEX_ENTRY_SERIALIZER, raf.getFilePointer()), 10000);\r
- }\r
-\r
- public void write(final RandomAccessFile raf) throws IOException {\r
- raf.writeUTF(language.symbol);\r
- UniformRAFList.write(raf, rows, ROW_SERIALIZER, 4);\r
- RAFList.write(raf, sortedIndex, INDEX_ENTRY_SERIALIZER);\r
- }\r
-\r
- String rowToString(final Row row, final boolean onlyFirstSubentry) {\r
- return null;\r
- //return row.isToken() ? sortedIndex.get(row.getIndex()).word : entries\r
- // .get(row.getIndex()).getRawText(onlyFirstSubentry);\r
- }\r
-\r
- int lookup(String word, final AtomicBoolean interrupted) {\r
- word = word.toLowerCase();\r
-\r
- int start = 0;\r
- int end = sortedIndex.size();\r
- while (start < end) {\r
- final int mid = (start + end) / 2;\r
- if (interrupted.get()) {\r
- return mid;\r
- }\r
- final IndexEntry midEntry = sortedIndex.get(mid);\r
- if (midEntry.word.equals("pre-print")) {\r
- System.out.println();\r
- }\r
-\r
- final int comp = language.sortComparator.compare(word, midEntry.word.toLowerCase());\r
- if (comp == 0) {\r
- int result = mid;\r
- while (result > 0 && language.findComparator.compare(word, sortedIndex.get(result - 1).word.toLowerCase()) == 0) {\r
- --result;\r
- if (interrupted.get()) {\r
- return result;\r
- }\r
- }\r
- return result;\r
- } else if (comp < 0) {\r
-// Log.d("THAD", "Upper bound: " + midEntry);\r
- end = mid;\r
- } else {\r
-// Log.d("THAD", "Lower bound: " + midEntry);\r
- start = mid + 1;\r
- }\r
- }\r
- return Math.min(sortedIndex.size() - 1, start);\r
- }\r
- \r
- public int getPrevTokenRow(final int rowIndex) {\r
- final IndexEntry indexEntry = getIndexEntryForRow(rowIndex);\r
- final Row tokenRow = rows.get(indexEntry.startRow);\r
- assert tokenRow.isToken();\r
- final int prevTokenIndex = tokenRow.getIndex() - 1;\r
- if (indexEntry.startRow == rowIndex && prevTokenIndex >= 0) {\r
- return sortedIndex.get(prevTokenIndex).startRow;\r
- }\r
- return indexEntry.startRow;\r
- }\r
-\r
- public int getNextTokenRow(final int rowIndex) {\r
- final IndexEntry indexEntry = getIndexEntryForRow(rowIndex);\r
- final Row tokenRow = rows.get(indexEntry.startRow);\r
- assert tokenRow.isToken();\r
- final int nextTokenIndex = tokenRow.getIndex() + 1;\r
- if (nextTokenIndex < sortedIndex.size()) {\r
- return sortedIndex.get(nextTokenIndex).startRow;\r
- }\r
- return rows.size() - 1;\r
- }\r
-\r
- public IndexEntry getIndexEntryForRow(final int rowIndex) {\r
- // TODO: this kinda blows.\r
- int r = rowIndex;\r
- Row row;\r
- while (true) {\r
- row = rows.get(r); \r
- if (row.isToken() || row.indexEntry != null) {\r
- break;\r
- }\r
- --r;\r
- }\r
- final IndexEntry indexEntry = row.isToken() ? sortedIndex.get(row.getIndex()) : row.indexEntry;\r
- for (; r <= rowIndex; ++r) {\r
- rows.get(r).indexEntry = indexEntry;\r
- }\r
- assert rows.get(indexEntry.startRow).isToken();\r
- return indexEntry;\r
- }\r
- }\r
-\r
- public static final class Row implements RAFSerializable<Row> {\r
- final int index;\r
-\r
- IndexEntry indexEntry = null;\r
-\r
- public Row(final int index) {\r
- this.index = index;\r
- }\r
-\r
- static final RAFFactory<Row> RAF_FACTORY = new RAFFactory<Row>() {\r
- public Row create(RandomAccessFile raf) throws IOException {\r
- return new Row(raf.readInt());\r
- }\r
- };\r
-\r
- public void write(RandomAccessFile raf) throws IOException {\r
- raf.writeInt(index);\r
- }\r
-\r
- boolean isToken() {\r
- return index < 0;\r
- }\r
-\r
- public int getIndex() {\r
- if (index >= 0) {\r
- return index;\r
- }\r
- return -index - 1;\r
- }\r
- }\r
-\r
- public static final class IndexEntry implements RAFSerializable<IndexEntry> {\r
- final String word;\r
- final int startRow;\r
-\r
- public IndexEntry(final String word, final int startRow) {\r
- this.word = word;\r
- this.startRow = startRow;\r
- }\r
-\r
- static final RAFFactory<IndexEntry> RAF_FACTORY = new RAFFactory<IndexEntry>() {\r
- public IndexEntry create(RandomAccessFile raf) throws IOException {\r
- final String word = raf.readUTF();\r
- final int startRow = raf.readInt();\r
- return new IndexEntry(word, startRow);\r
- }\r
- };\r
-\r
- public void write(final RandomAccessFile raf) throws IOException {\r
- raf.writeUTF(word);\r
- raf.writeInt(startRow);\r
- }\r
-\r
- @Override\r
- public String toString() {\r
- return word + "@" + startRow;\r
- }\r
-\r
- }\r
-\r
-}\r