import java.io.IOException;\r
import java.io.RandomAccessFile;\r
import java.util.ArrayList;\r
+import java.util.Comparator;\r
import java.util.List;\r
+import java.util.concurrent.atomic.AtomicBoolean;\r
\r
import com.hughes.util.CachingList;\r
import com.hughes.util.raf.FileList;\r
import com.hughes.util.raf.RAFSerializable;\r
import com.hughes.util.raf.RAFSerializableSerializer;\r
import com.hughes.util.raf.RAFSerializer;\r
+import com.hughes.util.raf.UniformFileList;\r
\r
public final class Dictionary implements RAFSerializable<Dictionary> {\r
- \r
- static final RAFSerializer<Entry> ENTRY_SERIALIZER = new RAFSerializableSerializer<Entry>(Entry.RAF_FACTORY);\r
- static final RAFSerializer<Row> ROW_SERIALIZER = new RAFSerializableSerializer<Row>(Row.RAF_FACTORY);\r
- static final RAFSerializer<IndexEntry> INDEX_ENTRY_SERIALIZER = new RAFSerializableSerializer<IndexEntry>(IndexEntry.RAF_FACTORY);\r
+\r
+ static final RAFSerializer<Entry> ENTRY_SERIALIZER = new RAFSerializableSerializer<Entry>(\r
+ Entry.RAF_FACTORY);\r
+ static final RAFSerializer<Row> ROW_SERIALIZER = new RAFSerializableSerializer<Row>(\r
+ Row.RAF_FACTORY);\r
+ static final RAFSerializer<IndexEntry> INDEX_ENTRY_SERIALIZER = new RAFSerializableSerializer<IndexEntry>(\r
+ IndexEntry.RAF_FACTORY);\r
\r
final List<Entry> entries;\r
final Language[] languages = new Language[2];\r
- \r
- Language activeLanguage = null;\r
\r
public Dictionary(final String lang0, final String lang1) {\r
- languages[0] = new Language(lang0);\r
- languages[1] = new Language(lang1);\r
+ languages[0] = new Language(lang0, Entry.LANG1);\r
+ languages[1] = new Language(lang1, Entry.LANG2);\r
entries = new ArrayList<Entry>();\r
}\r
- \r
+\r
public Dictionary(final RandomAccessFile raf) throws IOException {\r
- entries = CachingList.create(FileList.create(raf, ENTRY_SERIALIZER, raf.getFilePointer()), 10000);\r
- languages[0] = new Language(raf);\r
- languages[1] = new Language(raf);\r
+ entries = CachingList.create(FileList.create(raf, ENTRY_SERIALIZER, raf\r
+ .getFilePointer()), 10000);\r
+ languages[0] = new Language(raf, Entry.LANG1);\r
+ languages[1] = new Language(raf, Entry.LANG2);\r
}\r
+\r
public void write(RandomAccessFile raf) throws IOException {\r
FileList.write(raf, entries, ENTRY_SERIALIZER);\r
languages[0].write(raf);\r
languages[1].write(raf);\r
}\r
- \r
- static final class Language implements RAFSerializable<Language> {\r
+\r
+ final class Language implements RAFSerializable<Language> {\r
+ final byte lang;\r
final String symbol;\r
final List<Row> rows;\r
final List<IndexEntry> sortedIndex;\r
- \r
- public Language(final String symbol) {\r
+ final Comparator<String> comparator = EntryFactory.entryFactory\r
+ .getEntryComparator();\r
+\r
+ Language(final String symbol, final byte lang) {\r
+ this.lang = lang;\r
this.symbol = symbol;\r
rows = new ArrayList<Row>();\r
sortedIndex = new ArrayList<IndexEntry>();\r
}\r
\r
- public Language(final RandomAccessFile raf) throws IOException {\r
+ Language(final RandomAccessFile raf, final byte lang) throws IOException {\r
+ this.lang = lang;\r
symbol = raf.readUTF();\r
- rows = CachingList.create(FileList.create(raf, ROW_SERIALIZER, raf.getFilePointer()), 10000);\r
- sortedIndex = CachingList.create(FileList.create(raf, INDEX_ENTRY_SERIALIZER, raf.getFilePointer()), 10000);\r
+ rows = CachingList.create(UniformFileList.create(raf, ROW_SERIALIZER, raf\r
+ .getFilePointer()), 10000);\r
+ sortedIndex = CachingList.create(FileList.create(raf,\r
+ INDEX_ENTRY_SERIALIZER, raf.getFilePointer()), 10000);\r
}\r
+\r
public void write(final RandomAccessFile raf) throws IOException {\r
raf.writeUTF(symbol);\r
- FileList.write(raf, rows, ROW_SERIALIZER);\r
+ UniformFileList.write(raf, rows, ROW_SERIALIZER, 4);\r
FileList.write(raf, sortedIndex, INDEX_ENTRY_SERIALIZER);\r
}\r
+\r
+ String rowToString(final Row row) {\r
+ return row.isToken() ? sortedIndex.get(row.getIndex()).word : entries\r
+ .get(row.getIndex()).toString();\r
+ }\r
+\r
+ int lookup(String word, final AtomicBoolean interrupted) {\r
+ word = word.toLowerCase();\r
+\r
+ int start = 0;\r
+ int end = sortedIndex.size();\r
+ while (start < end) {\r
+ final int mid = (start + end) / 2;\r
+ if (interrupted.get()) {\r
+ return mid;\r
+ }\r
+ final IndexEntry midEntry = sortedIndex.get(mid);\r
+\r
+ final int comp = comparator.compare(word, midEntry.word.toLowerCase());\r
+ if (comp == 0) {\r
+ int result = mid;\r
+ while (result > 0 && comparator.compare(word, sortedIndex.get(result - 1).word.toLowerCase()) == 0) {\r
+ --result;\r
+ if (interrupted.get()) {\r
+ return result;\r
+ }\r
+ }\r
+ return result;\r
+ } else if (comp < 0) {\r
+ end = mid;\r
+ } else {\r
+ start = mid + 1;\r
+ }\r
+ }\r
+ return start;\r
+ }\r
}\r
- \r
+\r
public static final class Row implements RAFSerializable<Row> {\r
final int index;\r
\r
- public Row(int index) {\r
+ public Row(final int index) {\r
this.index = index;\r
}\r
\r
static final RAFFactory<Row> RAF_FACTORY = new RAFFactory<Row>() {\r
public Row create(RandomAccessFile raf) throws IOException {\r
return new Row(raf.readInt());\r
- }};\r
+ }\r
+ };\r
+\r
public void write(RandomAccessFile raf) throws IOException {\r
raf.writeInt(index);\r
}\r
+\r
+ boolean isToken() {\r
+ return index < 0;\r
+ }\r
+\r
+ public int getIndex() {\r
+ if (index >= 0) {\r
+ return index;\r
+ }\r
+ return -index - 1;\r
+ }\r
}\r
\r
public static final class IndexEntry implements RAFSerializable<IndexEntry> {\r
final String word;\r
final int startRow;\r
- \r
+\r
public IndexEntry(final String word, final int startRow) {\r
this.word = word;\r
this.startRow = startRow;\r
final String word = raf.readUTF();\r
final int startRow = raf.readInt();\r
return new IndexEntry(word, startRow);\r
- }};\r
+ }\r
+ };\r
+\r
public void write(final RandomAccessFile raf) throws IOException {\r
raf.writeUTF(word);\r
raf.writeInt(startRow);\r
public String toString() {\r
return word + "@" + startRow;\r
}\r
- \r
- \r
+\r
}\r
\r
}\r