X-Git-Url: http://gitweb.fperrin.net/?a=blobdiff_plain;f=src%2Fcom%2Fhughes%2Fandroid%2Fdictionary%2Fengine%2FIndex.java;h=5b7b43d5aadd20a39a3de5e047b3ca348cdd7b83;hb=5f7b259669237dad4cbfdec8536537815846979b;hp=30c18e2eb3bce1acd67ae9cac22d4ad102bb7637;hpb=83d9dc7cd871082a82c2dd0dbb7a0ceabd7c83a0;p=Dictionary.git diff --git a/src/com/hughes/android/dictionary/engine/Index.java b/src/com/hughes/android/dictionary/engine/Index.java index 30c18e2..5b7b43d 100644 --- a/src/com/hughes/android/dictionary/engine/Index.java +++ b/src/com/hughes/android/dictionary/engine/Index.java @@ -14,40 +14,38 @@ package com.hughes.android.dictionary.engine; -import com.hughes.android.dictionary.DictionaryInfo; -import com.hughes.android.dictionary.DictionaryInfo.IndexInfo; -import com.hughes.android.dictionary.engine.RowBase.RowKey; -import com.hughes.util.CachingList; -import com.hughes.util.StringUtil; -import com.hughes.util.TransformingList; -import com.hughes.util.raf.RAFList; -import com.hughes.util.raf.RAFSerializable; -import com.hughes.util.raf.RAFSerializer; -import com.hughes.util.raf.SerializableSerializer; -import com.hughes.util.raf.UniformRAFList; -import com.ibm.icu.text.Transliterator; - import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import java.io.PrintStream; import java.io.RandomAccessFile; -import java.nio.channels.FileChannel; import java.util.AbstractList; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.EnumMap; -import java.util.HashSet; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.atomic.AtomicBoolean; import java.util.regex.Pattern; -public final class Index implements RAFSerializable { +import com.hughes.android.dictionary.DictionaryInfo; +import com.hughes.android.dictionary.DictionaryInfo.IndexInfo; +import com.hughes.android.dictionary.engine.RowBase.RowKey; +import com.hughes.util.CachingList; +import com.hughes.util.DataInputBuffer; +import com.hughes.util.StringUtil; +import com.hughes.util.TransformingList; +import com.hughes.util.raf.RAFList; +import com.hughes.util.raf.RAFSerializer; +import com.hughes.util.raf.UniformRAFList; +import com.ibm.icu.text.Transliterator; + +public final class Index { private static final int CACHE_SIZE = 5000; @@ -58,7 +56,7 @@ public final class Index implements RAFSerializable { // persisted: tells how the entries are sorted. public final Language sortLanguage; - private final String normalizerRules; + public final String normalizerRules; // Built from the two above. private Transliterator normalizer; @@ -67,7 +65,7 @@ public final class Index implements RAFSerializable { public final List sortedIndexEntries; // persisted. - private final Set stoplist; + public final Set stoplist; // One big list! // Various sub-types. @@ -117,7 +115,7 @@ public final class Index implements RAFSerializable { return new NormalizeComparator(normalizer(), sortLanguage.getCollator(), dict.dictFileVersion); } - public Index(final Dictionary dict, final FileChannel inp, final DataInput raf) throws IOException { + public Index(final Dictionary dict, final DataInputBuffer raf) throws IOException { this.dict = dict; shortName = raf.readUTF(); longName = raf.readUTF(); @@ -125,14 +123,11 @@ public final class Index implements RAFSerializable { sortLanguage = Language.lookup(languageCode); normalizerRules = raf.readUTF(); swapPairEntries = raf.readBoolean(); - if (sortLanguage == null) { - throw new IOException("Unsupported language: " + languageCode); - } if (dict.dictFileVersion >= 2) { mainTokenCount = raf.readInt(); } sortedIndexEntries = CachingList.create( - RAFList.create(inp, new IndexEntrySerializer(dict.dictFileVersion == 6 ? inp : null), inp.position(), + RAFList.create(raf, new IndexEntrySerializer(), dict.dictFileVersion, dict.dictInfo + " idx " + languageCode + ": "), CACHE_SIZE, true); if (dict.dictFileVersion >= 7) { int count = StringUtil.readVarInt(raf); @@ -141,16 +136,22 @@ public final class Index implements RAFSerializable { stoplist.add(raf.readUTF()); } } else if (dict.dictFileVersion >= 4) { - stoplist = new SerializableSerializer>().read(raf); + raf.readInt(); // length + raf.skipBytes(52); + stoplist = new HashSet<>(); + byte b; + while ((b = raf.readByte()) == 0x74) { + stoplist.add(raf.readUTF()); + } + if (b != 0x78) throw new IOException("Invalid data in dictionary stoplist!"); } else { stoplist = Collections.emptySet(); } rows = CachingList.create( - UniformRAFList.create(inp, new RowBase.Serializer(this), inp.position()), + UniformRAFList.create(raf, new RowBase.Serializer(this)), CACHE_SIZE, true); } - @Override public void write(final DataOutput out) throws IOException { RandomAccessFile raf = (RandomAccessFile)out; raf.writeUTF(shortName); @@ -158,10 +159,8 @@ public final class Index implements RAFSerializable { raf.writeUTF(sortLanguage.getIsoCode()); raf.writeUTF(normalizerRules); raf.writeBoolean(swapPairEntries); - if (dict.dictFileVersion >= 2) { - raf.writeInt(mainTokenCount); - } - RAFList.write(raf, sortedIndexEntries, new IndexEntrySerializer(null), 32, true); + raf.writeInt(mainTokenCount); + RAFList.write(raf, sortedIndexEntries, new IndexEntrySerializer(), 32, true); StringUtil.writeVarInt(raf, stoplist.size()); for (String i : stoplist) { raf.writeUTF(i); @@ -176,15 +175,9 @@ public final class Index implements RAFSerializable { } private final class IndexEntrySerializer implements RAFSerializer { - private final FileChannel ch; - - IndexEntrySerializer(FileChannel ch) { - this.ch = ch; - } - @Override public IndexEntry read(DataInput raf) throws IOException { - return new IndexEntry(Index.this, ch, raf); + return new IndexEntry(Index.this, raf); } @Override @@ -193,7 +186,7 @@ public final class Index implements RAFSerializable { } } - public static final class IndexEntry implements RAFSerializable { + public static final class IndexEntry { public final String token; private final String normalizedToken; public final int startRow; @@ -211,7 +204,7 @@ public final class Index implements RAFSerializable { this.htmlEntries = new ArrayList<>(); } - IndexEntry(final Index index, final FileChannel ch, final DataInput raf) throws IOException { + IndexEntry(final Index index, final DataInput raf) throws IOException { token = raf.readUTF(); if (index.dict.dictFileVersion >= 7) { startRow = StringUtil.readVarInt(raf); @@ -236,6 +229,7 @@ public final class Index implements RAFSerializable { public HtmlEntry get(int i) { return index.dict.htmlEntries.get(htmlEntryIndices[i]); } + @Override public int size() { return htmlEntryIndices.length; @@ -244,8 +238,8 @@ public final class Index implements RAFSerializable { } } else if (index.dict.dictFileVersion >= 6) { this.htmlEntries = CachingList.create( - RAFList.create(ch, index.dict.htmlEntryIndexSerializer, - ch.position(), index.dict.dictFileVersion, + RAFList.create((DataInputBuffer)raf, index.dict.htmlEntryIndexSerializer, + index.dict.dictFileVersion, index.dict.dictInfo + " htmlEntries: "), 1, false); } else { this.htmlEntries = Collections.emptyList(); @@ -378,7 +372,7 @@ public final class Index implements RAFSerializable { return result; } - private final int windBackCase(final String token, int result, final AtomicBoolean interrupted) { + private int windBackCase(final String token, int result, final AtomicBoolean interrupted) { while (result > 0 && sortedIndexEntries.get(result - 1).normalizedToken().equals(token)) { --result; if (interrupted.get()) { @@ -396,8 +390,8 @@ public final class Index implements RAFSerializable { private final Map prefixToNumRows = new HashMap<>(); - private synchronized final int getUpperBoundOnRowsStartingWith(final String normalizedPrefix, - final int maxRows, final AtomicBoolean interrupted) { + private synchronized int getUpperBoundOnRowsStartingWith(final String normalizedPrefix, + final int maxRows, final AtomicBoolean interrupted) { final Integer numRows = prefixToNumRows.get(normalizedPrefix); if (numRows != null) { return numRows; @@ -410,7 +404,8 @@ public final class Index implements RAFSerializable { return -1; } final IndexEntry indexEntry = sortedIndexEntries.get(index); - if (!indexEntry.normalizedToken.startsWith(normalizedPrefix)) { + if (!indexEntry.normalizedToken.startsWith(normalizedPrefix) && + !NormalizeComparator.withoutDash(indexEntry.normalizedToken).startsWith(normalizedPrefix)) { break; } rowCount += indexEntry.numRows + indexEntry.htmlEntries.size(); @@ -500,7 +495,8 @@ public final class Index implements RAFSerializable { return null; } final IndexEntry indexEntry = sortedIndexEntries.get(index); - if (!indexEntry.normalizedToken.startsWith(searchToken)) { + if (!indexEntry.normalizedToken.startsWith(searchToken) && + !NormalizeComparator.withoutDash(indexEntry.normalizedToken).startsWith(searchToken)) { break; } @@ -534,7 +530,7 @@ public final class Index implements RAFSerializable { swapPairEntries); for (final Collection rows : matches.values()) { final List ordered = new ArrayList<>(rows); - Collections.sort(ordered, lengthComparator); + ordered.sort(lengthComparator); result.addAll(ordered); }