import com.hughes.util.raf.UniformFileList;\r
\r
public final class Dictionary implements RAFSerializable<Dictionary> {\r
+ \r
+ private static final String VERSION_CODE = "DictionaryVersion=2.0";\r
\r
- static final RAFSerializer<Entry> ENTRY_SERIALIZER = new RAFSerializableSerializer<Entry>(\r
- Entry.RAF_FACTORY);\r
+ static final RAFSerializer<SimpleEntry> ENTRY_SERIALIZER = new RAFSerializableSerializer<SimpleEntry>(\r
+ SimpleEntry.RAF_FACTORY);\r
static final RAFSerializer<Row> ROW_SERIALIZER = new RAFSerializableSerializer<Row>(\r
Row.RAF_FACTORY);\r
static final RAFSerializer<IndexEntry> INDEX_ENTRY_SERIALIZER = new RAFSerializableSerializer<IndexEntry>(\r
IndexEntry.RAF_FACTORY);\r
\r
final String dictionaryInfo;\r
+ final List<String> sources;\r
final List<Entry> entries;\r
final LanguageData[] languageDatas = new LanguageData[2];\r
\r
public Dictionary(final String dictionaryInfo, final Language language0, final Language language1) {\r
this.dictionaryInfo = dictionaryInfo;\r
- languageDatas[0] = new LanguageData(this, language0, Entry.LANG1);\r
- languageDatas[1] = new LanguageData(this, language1, Entry.LANG2);\r
+ sources = new ArrayList<String>();\r
+ languageDatas[0] = new LanguageData(this, language0, SimpleEntry.LANG1);\r
+ languageDatas[1] = new LanguageData(this, language1, SimpleEntry.LANG2);\r
entries = new ArrayList<Entry>();\r
}\r
\r
public Dictionary(final RandomAccessFile raf) throws IOException {\r
dictionaryInfo = raf.readUTF();\r
+ sources = new ArrayList<String>(FileList.create(raf, RAFSerializer.STRING, raf.getFilePointer()));\r
entries = CachingList.create(FileList.create(raf, ENTRY_SERIALIZER, raf\r
.getFilePointer()), 10000);\r
- languageDatas[0] = new LanguageData(this, raf, Entry.LANG1);\r
- languageDatas[1] = new LanguageData(this, raf, Entry.LANG2);\r
+ languageDatas[0] = new LanguageData(this, raf, SimpleEntry.LANG1);\r
+ languageDatas[1] = new LanguageData(this, raf, SimpleEntry.LANG2);\r
+ final String version = raf.readUTF();\r
+ if (!VERSION_CODE.equals(version)) {\r
+ throw new IOException("Invalid dictionary version, found " + version + ", expected: " + VERSION_CODE);\r
+ }\r
}\r
\r
public void write(RandomAccessFile raf) throws IOException {\r
raf.writeUTF(dictionaryInfo);\r
+ FileList.write(raf, sources, RAFSerializer.STRING);\r
FileList.write(raf, entries, ENTRY_SERIALIZER);\r
languageDatas[0].write(raf);\r
languageDatas[1].write(raf);\r
+ raf.writeUTF(VERSION_CODE);\r
}\r
\r
final class LanguageData implements RAFSerializable<LanguageData> {\r
FileList.write(raf, sortedIndex, INDEX_ENTRY_SERIALIZER);\r
}\r
\r
- String rowToString(final Row row) {\r
+ String rowToString(final Row row, final boolean onlyFirstSubentry) {\r
return row.isToken() ? sortedIndex.get(row.getIndex()).word : entries\r
- .get(row.getIndex()).toString();\r
+ .get(row.getIndex()).getRawText(onlyFirstSubentry);\r
}\r
\r
int lookup(String word, final AtomicBoolean interrupted) {\r
return mid;\r
}\r
final IndexEntry midEntry = sortedIndex.get(mid);\r
+ if (midEntry.word.equals("pre-print")) {\r
+ System.out.println();\r
+ }\r
\r
final int comp = language.sortComparator.compare(word, midEntry.word.toLowerCase());\r
if (comp == 0) {\r
}\r
return result;\r
} else if (comp < 0) {\r
+// Log.d("THAD", "Upper bound: " + midEntry);\r
end = mid;\r
} else {\r
+// Log.d("THAD", "Lower bound: " + midEntry);\r
start = mid + 1;\r
}\r
}\r
return Math.min(sortedIndex.size() - 1, start);\r
}\r
+ \r
+ public int getPrevTokenRow(final int rowIndex) {\r
+ final IndexEntry indexEntry = getIndexEntryForRow(rowIndex);\r
+ final Row tokenRow = rows.get(indexEntry.startRow);\r
+ assert tokenRow.isToken();\r
+ final int prevTokenIndex = tokenRow.getIndex() - 1;\r
+ if (indexEntry.startRow == rowIndex && prevTokenIndex >= 0) {\r
+ return sortedIndex.get(prevTokenIndex).startRow;\r
+ }\r
+ return indexEntry.startRow;\r
+ }\r
+\r
+ public int getNextTokenRow(final int rowIndex) {\r
+ final IndexEntry indexEntry = getIndexEntryForRow(rowIndex);\r
+ final Row tokenRow = rows.get(indexEntry.startRow);\r
+ assert tokenRow.isToken();\r
+ final int nextTokenIndex = tokenRow.getIndex() + 1;\r
+ if (nextTokenIndex < sortedIndex.size()) {\r
+ return sortedIndex.get(nextTokenIndex).startRow;\r
+ }\r
+ return rows.size() - 1;\r
+ }\r
\r
public IndexEntry getIndexEntryForRow(final int rowIndex) {\r
// TODO: this kinda blows.\r
for (; r <= rowIndex; ++r) {\r
rows.get(r).indexEntry = indexEntry;\r
}\r
- assert false && rows.get(indexEntry.startRow).isToken();\r
+ assert rows.get(indexEntry.startRow).isToken();\r
return indexEntry;\r
}\r
}\r