X-Git-Url: http://gitweb.fperrin.net/?a=blobdiff_plain;f=src%2Fcom%2Fhughes%2Fandroid%2Fdictionary%2Fengine%2FDictionary.java;h=cde3eb9ca04fd521f1575a2f1a5294fecb928401;hb=a82db103035d624c4f6d333a0143d54495d66ba2;hp=e74876da9f42daf85166c2bf917240927eb7271f;hpb=ca8920678fca725851fb18fdb1bd01752391e38a;p=Dictionary.git diff --git a/src/com/hughes/android/dictionary/engine/Dictionary.java b/src/com/hughes/android/dictionary/engine/Dictionary.java index e74876d..cde3eb9 100644 --- a/src/com/hughes/android/dictionary/engine/Dictionary.java +++ b/src/com/hughes/android/dictionary/engine/Dictionary.java @@ -14,7 +14,6 @@ package com.hughes.android.dictionary.engine; -import java.io.ByteArrayOutputStream; import java.io.DataInput; import java.io.DataInputStream; import java.io.DataOutput; @@ -29,7 +28,6 @@ import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collections; import java.util.List; -import java.util.zip.GZIPOutputStream; import com.hughes.android.dictionary.DictionaryInfo; import com.hughes.util.CachingList; @@ -46,7 +44,7 @@ public class Dictionary implements RAFSerializable { // persisted final int dictFileVersion; - private final long creationMillis; + public final long creationMillis; public final String dictInfo; public final List pairEntries; public final List textEntries; @@ -142,243 +140,6 @@ public class Dictionary implements RAFSerializable { raf.writeUTF(END_OF_DICTIONARY); } - private void writev6Sources(RandomAccessFile out) throws IOException { - out.writeInt(sources.size()); - long tocPos = out.getFilePointer(); - out.seek(tocPos + sources.size() * 8 + 8); - for (EntrySource s : sources) { - long dataPos = out.getFilePointer(); - out.seek(tocPos); - out.writeLong(dataPos); - tocPos += 8; - out.seek(dataPos); - out.writeUTF(s.getName()); - out.writeInt(s.getNumEntries()); - } - long dataPos = out.getFilePointer(); - out.seek(tocPos); - out.writeLong(dataPos); - out.seek(dataPos); - } - - private void writev6PairEntries(RandomAccessFile out) throws IOException { - out.writeInt(pairEntries.size()); - long tocPos = out.getFilePointer(); - out.seek(tocPos + pairEntries.size() * 8 + 8); - for (PairEntry pe : pairEntries) { - long dataPos = out.getFilePointer(); - out.seek(tocPos); - out.writeLong(dataPos); - tocPos += 8; - out.seek(dataPos); - out.writeShort(pe.entrySource.index()); - out.writeInt(pe.pairs.size()); - for (PairEntry.Pair p : pe.pairs) { - out.writeUTF(p.lang1); - out.writeUTF(p.lang2); - } - } - long dataPos = out.getFilePointer(); - out.seek(tocPos); - out.writeLong(dataPos); - out.seek(dataPos); - } - - private void writev6TextEntries(RandomAccessFile out) throws IOException { - out.writeInt(textEntries.size()); - long tocPos = out.getFilePointer(); - out.seek(tocPos + textEntries.size() * 8 + 8); - for (TextEntry t : textEntries) { - long dataPos = out.getFilePointer(); - out.seek(tocPos); - out.writeLong(dataPos); - tocPos += 8; - out.seek(dataPos); - out.writeShort(t.entrySource.index()); - out.writeUTF(t.text); - } - long dataPos = out.getFilePointer(); - out.seek(tocPos); - out.writeLong(dataPos); - out.seek(dataPos); - } - - private void writev6EmptyList(RandomAccessFile out) throws IOException { - out.writeInt(0); - out.writeLong(out.getFilePointer() + 8); - } - - private void writev6HtmlEntries(RandomAccessFile out) throws IOException { - out.writeInt(htmlEntries.size()); - long tocPos = out.getFilePointer(); - out.seek(tocPos + htmlEntries.size() * 8 + 8); - for (HtmlEntry h : htmlEntries) { - long dataPos = out.getFilePointer(); - out.seek(tocPos); - out.writeLong(dataPos); - tocPos += 8; - out.seek(dataPos); - out.writeShort(h.entrySource.index()); - out.writeUTF(h.title); - byte[] data = h.getHtml().getBytes(StandardCharsets.UTF_8); - out.writeInt(data.length); - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - GZIPOutputStream gzout = new GZIPOutputStream(baos); - gzout.write(data); - gzout.close(); - out.writeInt(baos.size()); - out.write(baos.toByteArray()); - } - long dataPos = out.getFilePointer(); - out.seek(tocPos); - out.writeLong(dataPos); - out.seek(dataPos); - } - - private void writev6HtmlIndices(RandomAccessFile out, List entries) throws IOException { - out.writeInt(entries.size()); - long tocPos = out.getFilePointer(); - out.seek(tocPos + entries.size() * 8 + 8); - for (HtmlEntry e : entries) { - long dataPos = out.getFilePointer(); - out.seek(tocPos); - out.writeLong(dataPos); - tocPos += 8; - out.seek(dataPos); - out.writeInt(e.index()); - } - long dataPos = out.getFilePointer(); - out.seek(tocPos); - out.writeLong(dataPos); - out.seek(dataPos); - } - - private void writev6IndexEntries(RandomAccessFile out, List entries, int[] prunedRowIdx) throws IOException { - out.writeInt(entries.size()); - long tocPos = out.getFilePointer(); - out.seek(tocPos + entries.size() * 8 + 8); - for (Index.IndexEntry e : entries) { - long dataPos = out.getFilePointer(); - out.seek(tocPos); - out.writeLong(dataPos); - tocPos += 8; - out.seek(dataPos); - out.writeUTF(e.token); - - int startRow = e.startRow; - int numRows = e.numRows; - if (prunedRowIdx != null) { - // note: the start row will always be a TokenRow - // and thus never be pruned - int newNumRows = 1; - for (int i = 1; i < numRows; i++) { - if (prunedRowIdx[startRow + i] >= 0) newNumRows++; - } - startRow = prunedRowIdx[startRow]; - numRows = newNumRows; - } - - out.writeInt(startRow); - out.writeInt(numRows); - final boolean hasNormalizedForm = !e.token.equals(e.normalizedToken()); - out.writeBoolean(hasNormalizedForm); - if (hasNormalizedForm) out.writeUTF(e.normalizedToken()); - writev6HtmlIndices(out, prunedRowIdx == null ? e.htmlEntries : Collections.emptyList()); - } - long dataPos = out.getFilePointer(); - out.seek(tocPos); - out.writeLong(dataPos); - out.seek(dataPos); - } - - private void writev6Index(RandomAccessFile out, boolean skipHtml) throws IOException { - out.writeInt(indices.size()); - long tocPos = out.getFilePointer(); - out.seek(tocPos + indices.size() * 8 + 8); - for (Index idx : indices) { - // create pruned index for skipHtml feature - int[] prunedRowIdx = null; - int prunedSize = 0; - if (skipHtml) { - prunedRowIdx = new int[idx.rows.size()]; - for (int i = 0; i < idx.rows.size(); i++) { - final RowBase r = idx.rows.get(i); - // prune Html entries - boolean pruned = r instanceof HtmlEntry.Row; - prunedRowIdx[i] = pruned ? -1 : prunedSize; - if (!pruned) prunedSize++; - } - } - - long dataPos = out.getFilePointer(); - out.seek(tocPos); - out.writeLong(dataPos); - tocPos += 8; - out.seek(dataPos); - out.writeUTF(idx.shortName); - out.writeUTF(idx.longName); - out.writeUTF(idx.sortLanguage.getIsoCode()); - out.writeUTF(idx.normalizerRules); - out.writeBoolean(idx.swapPairEntries); - out.writeInt(idx.mainTokenCount); - writev6IndexEntries(out, idx.sortedIndexEntries, prunedRowIdx); - - // write stoplist, serializing the whole Set *shudder* - final ByteArrayOutputStream baos = new ByteArrayOutputStream(); - final ObjectOutputStream oos = new ObjectOutputStream(baos); - oos.writeObject(idx.stoplist); - oos.close(); - final byte[] bytes = baos.toByteArray(); - out.writeInt(bytes.length); - out.write(bytes); - - out.writeInt(skipHtml ? prunedSize : idx.rows.size()); - out.writeInt(5); - for (RowBase r : idx.rows) { - int type = 0; - if (r instanceof PairEntry.Row) { - type = 0; - } else if (r instanceof TokenRow) { - final TokenRow tokenRow = (TokenRow)r; - type = tokenRow.hasMainEntry ? 1 : 3; - } else if (r instanceof TextEntry.Row) { - type = 2; - } else if (r instanceof HtmlEntry.Row) { - type = 4; - if (skipHtml) continue; - } else { - throw new RuntimeException("Row type not supported for v6"); - } - out.writeByte(type); - out.writeInt(r.referenceIndex); - } - } - long dataPos = out.getFilePointer(); - out.seek(tocPos); - out.writeLong(dataPos); - out.seek(dataPos); - } - - public void writev6(DataOutput out, boolean skipHtml) throws IOException { - RandomAccessFile raf = (RandomAccessFile)out; - raf.writeInt(6); - raf.writeLong(creationMillis); - raf.writeUTF(dictInfo); - System.out.println("sources start: " + raf.getFilePointer()); - writev6Sources(raf); - System.out.println("pair start: " + raf.getFilePointer()); - writev6PairEntries(raf); - System.out.println("text start: " + raf.getFilePointer()); - writev6TextEntries(raf); - System.out.println("html index start: " + raf.getFilePointer()); - if (skipHtml) writev6EmptyList(raf); - else writev6HtmlEntries(raf); - System.out.println("indices start: " + raf.getFilePointer()); - writev6Index(raf, skipHtml); - System.out.println("end: " + raf.getFilePointer()); - raf.writeUTF(END_OF_DICTIONARY); - } - private final class IndexSerializer implements RAFListSerializer { private final FileChannel ch;