import java.io.IOException;\r
import java.io.RandomAccessFile;\r
import java.util.ArrayList;\r
-import java.util.Comparator;\r
import java.util.List;\r
import java.util.concurrent.atomic.AtomicBoolean;\r
\r
static final RAFSerializer<IndexEntry> INDEX_ENTRY_SERIALIZER = new RAFSerializableSerializer<IndexEntry>(\r
IndexEntry.RAF_FACTORY);\r
\r
+ final String dictionaryInfo;\r
final List<Entry> entries;\r
- final Language[] languages = new Language[2];\r
+ final LanguageData[] languageDatas = new LanguageData[2];\r
\r
- public Dictionary(final String lang0, final String lang1) {\r
- languages[0] = new Language(lang0, Entry.LANG1);\r
- languages[1] = new Language(lang1, Entry.LANG2);\r
+ public Dictionary(final String dictionaryInfo, final Language language0, final Language language1) {\r
+ this.dictionaryInfo = dictionaryInfo;\r
+ languageDatas[0] = new LanguageData(this, language0, Entry.LANG1);\r
+ languageDatas[1] = new LanguageData(this, language1, Entry.LANG2);\r
entries = new ArrayList<Entry>();\r
}\r
\r
public Dictionary(final RandomAccessFile raf) throws IOException {\r
+ dictionaryInfo = raf.readUTF();\r
entries = CachingList.create(FileList.create(raf, ENTRY_SERIALIZER, raf\r
.getFilePointer()), 10000);\r
- languages[0] = new Language(raf, Entry.LANG1);\r
- languages[1] = new Language(raf, Entry.LANG2);\r
+ languageDatas[0] = new LanguageData(this, raf, Entry.LANG1);\r
+ languageDatas[1] = new LanguageData(this, raf, Entry.LANG2);\r
}\r
\r
public void write(RandomAccessFile raf) throws IOException {\r
+ raf.writeUTF(dictionaryInfo);\r
FileList.write(raf, entries, ENTRY_SERIALIZER);\r
- languages[0].write(raf);\r
- languages[1].write(raf);\r
+ languageDatas[0].write(raf);\r
+ languageDatas[1].write(raf);\r
}\r
\r
- final class Language implements RAFSerializable<Language> {\r
+ final class LanguageData implements RAFSerializable<LanguageData> {\r
+ final Dictionary dictionary;\r
+ final Language language;\r
final byte lang;\r
- final String symbol;\r
final List<Row> rows;\r
final List<IndexEntry> sortedIndex;\r
- final Comparator<String> comparator = EntryFactory.entryFactory\r
- .getEntryComparator();\r
\r
- Language(final String symbol, final byte lang) {\r
+ LanguageData(final Dictionary dictionary, final Language language, final byte lang) {\r
+ this.dictionary = dictionary;\r
+ this.language = language;\r
this.lang = lang;\r
- this.symbol = symbol;\r
rows = new ArrayList<Row>();\r
sortedIndex = new ArrayList<IndexEntry>();\r
}\r
\r
- Language(final RandomAccessFile raf, final byte lang) throws IOException {\r
+ LanguageData(final Dictionary dictionary, final RandomAccessFile raf, final byte lang) throws IOException {\r
+ this.dictionary = dictionary;\r
+ language = Language.lookup(raf.readUTF());\r
+ if (language == null) {\r
+ throw new RuntimeException("Unknown language.");\r
+ }\r
this.lang = lang;\r
- symbol = raf.readUTF();\r
rows = CachingList.create(UniformFileList.create(raf, ROW_SERIALIZER, raf\r
.getFilePointer()), 10000);\r
sortedIndex = CachingList.create(FileList.create(raf,\r
}\r
\r
public void write(final RandomAccessFile raf) throws IOException {\r
- raf.writeUTF(symbol);\r
+ raf.writeUTF(language.symbol);\r
UniformFileList.write(raf, rows, ROW_SERIALIZER, 4);\r
FileList.write(raf, sortedIndex, INDEX_ENTRY_SERIALIZER);\r
}\r
return mid;\r
}\r
final IndexEntry midEntry = sortedIndex.get(mid);\r
+ if (midEntry.word.equals("pre-print")) {\r
+ System.out.println();\r
+ }\r
\r
- final int comp = comparator.compare(word, midEntry.word.toLowerCase());\r
+ final int comp = language.sortComparator.compare(word, midEntry.word.toLowerCase());\r
if (comp == 0) {\r
int result = mid;\r
- while (result > 0 && comparator.compare(word, sortedIndex.get(result - 1).word.toLowerCase()) == 0) {\r
+ while (result > 0 && language.findComparator.compare(word, sortedIndex.get(result - 1).word.toLowerCase()) == 0) {\r
--result;\r
if (interrupted.get()) {\r
return result;\r
}\r
return result;\r
} else if (comp < 0) {\r
+// Log.d("THAD", "Upper bound: " + midEntry);\r
end = mid;\r
} else {\r
+// Log.d("THAD", "Lower bound: " + midEntry);\r
start = mid + 1;\r
}\r
}\r
- return start;\r
+ return Math.min(sortedIndex.size() - 1, start);\r
+ }\r
+ \r
+ public int getPrevTokenRow(final int rowIndex) {\r
+ final IndexEntry indexEntry = getIndexEntryForRow(rowIndex);\r
+ final Row tokenRow = rows.get(indexEntry.startRow);\r
+ assert tokenRow.isToken();\r
+ final int prevTokenIndex = tokenRow.getIndex() - 1;\r
+ if (indexEntry.startRow == rowIndex && prevTokenIndex >= 0) {\r
+ return sortedIndex.get(prevTokenIndex).startRow;\r
+ }\r
+ return indexEntry.startRow;\r
+ }\r
+\r
+ public int getNextTokenRow(final int rowIndex) {\r
+ final IndexEntry indexEntry = getIndexEntryForRow(rowIndex);\r
+ final Row tokenRow = rows.get(indexEntry.startRow);\r
+ assert tokenRow.isToken();\r
+ final int nextTokenIndex = tokenRow.getIndex() + 1;\r
+ if (nextTokenIndex < sortedIndex.size()) {\r
+ return sortedIndex.get(nextTokenIndex).startRow;\r
+ }\r
+ return rows.size() - 1;\r
+ }\r
+\r
+ public IndexEntry getIndexEntryForRow(final int rowIndex) {\r
+ // TODO: this kinda blows.\r
+ int r = rowIndex;\r
+ Row row;\r
+ while (true) {\r
+ row = rows.get(r); \r
+ if (row.isToken() || row.indexEntry != null) {\r
+ break;\r
+ }\r
+ --r;\r
+ }\r
+ final IndexEntry indexEntry = row.isToken() ? sortedIndex.get(row.getIndex()) : row.indexEntry;\r
+ for (; r <= rowIndex; ++r) {\r
+ rows.get(r).indexEntry = indexEntry;\r
+ }\r
+ assert rows.get(indexEntry.startRow).isToken();\r
+ return indexEntry;\r
}\r
}\r
\r
public static final class Row implements RAFSerializable<Row> {\r
final int index;\r
\r
+ IndexEntry indexEntry = null;\r
+\r
public Row(final int index) {\r
this.index = index;\r
}\r