X-Git-Url: http://gitweb.fperrin.net/?a=blobdiff_plain;f=src%2Fcom%2Fhughes%2Fandroid%2Fdictionary%2Fengine%2FHtmlEntry.java;h=f279a3d8f8fa14071dd89bd2ba29d86c8ba15a23;hb=e79165503392ed6a7cb7a8eadc15eaae0cda9443;hp=c7e06282e52aadec67c29d9f897ecc2e543601d2;hpb=9c501f32b5275b07f2104d65d696c0b4735b021c;p=Dictionary.git diff --git a/src/com/hughes/android/dictionary/engine/HtmlEntry.java b/src/com/hughes/android/dictionary/engine/HtmlEntry.java index c7e0628..f279a3d 100644 --- a/src/com/hughes/android/dictionary/engine/HtmlEntry.java +++ b/src/com/hughes/android/dictionary/engine/HtmlEntry.java @@ -1,238 +1,292 @@ -package com.hughes.android.dictionary.engine; -import android.content.Intent; -import android.net.Uri; -import android.util.Log; +package com.hughes.android.dictionary.engine; -import com.hughes.android.dictionary.C; import com.hughes.util.StringUtil; import com.hughes.util.raf.RAFListSerializer; -import com.hughes.util.raf.RAFSerializable; +import com.hughes.util.raf.RAFListSerializerSkippable; import com.ibm.icu.text.Transliterator; +import java.io.DataInput; +import java.io.DataOutput; import java.io.IOException; import java.io.PrintStream; -import java.io.RandomAccessFile; import java.io.UnsupportedEncodingException; import java.lang.ref.SoftReference; -import java.net.URLEncoder; +import java.nio.channels.FileChannel; +import java.nio.charset.StandardCharsets; import java.util.List; import java.util.regex.Pattern; -public class HtmlEntry extends AbstractEntry implements RAFSerializable, Comparable { - - // Title is not HTML escaped. - public final String title; - public final LazyHtmlLoader lazyHtmlLoader; - public String html; - - public HtmlEntry(final EntrySource entrySource, String title) { - super(entrySource); - this.title = title; - lazyHtmlLoader = null; - } - - public HtmlEntry(Dictionary dictionary, RandomAccessFile raf, final int index) throws IOException { - super(dictionary, raf, index); - title = raf.readUTF(); - lazyHtmlLoader = new LazyHtmlLoader(raf); - html = null; - } - - @Override - public void write(RandomAccessFile raf) throws IOException { - super.write(raf); - raf.writeUTF(title); - - final byte[] bytes = getHtml().getBytes("UTF-8"); - final byte[] zipBytes = StringUtil.zipBytes(bytes); - raf.writeInt(bytes.length); - raf.writeInt(zipBytes.length); - raf.write(zipBytes); - } - - String getHtml() { - return html != null ? html : lazyHtmlLoader.getHtml(); - } - - @Override - public void addToDictionary(Dictionary dictionary) { - assert index == -1; - dictionary.htmlEntries.add(this); - index = dictionary.htmlEntries.size() - 1; - } - - @Override - public RowBase CreateRow(int rowIndex, Index dictionaryIndex) { - return new Row(this.index, rowIndex, dictionaryIndex); - } - - static final class Serializer implements RAFListSerializer { - - final Dictionary dictionary; - - Serializer(Dictionary dictionary) { - this.dictionary = dictionary; +public class HtmlEntry extends AbstractEntry implements Comparable { + + // Title is not HTML escaped. + public final String title; + private final LazyHtmlLoader lazyHtmlLoader; + @SuppressWarnings("WeakerAccess") + public String html; + + public HtmlEntry(final EntrySource entrySource, String title) { + super(entrySource); + this.title = title; + lazyHtmlLoader = null; } - @Override - public HtmlEntry read(RandomAccessFile raf, final int index) throws IOException { - return new HtmlEntry(dictionary, raf, index); + public HtmlEntry(Dictionary dictionary, FileChannel ch, DataInput raf, final int index) + throws IOException { + super(dictionary, raf, index); + title = raf.readUTF(); + lazyHtmlLoader = new LazyHtmlLoader(ch, raf, dictionary.htmlData, index); + html = null; + } + + private void writeBase(DataOutput raf) throws IOException { + super.write(raf); + raf.writeUTF(title); + } + + private void writeData(DataOutput raf) throws IOException { + final byte[] bytes = getHtml().getBytes(StandardCharsets.UTF_8); + StringUtil.writeVarInt(raf, bytes.length); + raf.write(bytes); + } + + private static byte[] readData(DataInput raf) throws IOException { + int len = StringUtil.readVarInt(raf); + final byte[] bytes = new byte[Math.min(len, 20 * 1024 * 1024)]; + raf.readFully(bytes); + return bytes; + } + + String getHtml() { + return html != null ? html : lazyHtmlLoader.getHtml(); } @Override - public void write(RandomAccessFile raf, HtmlEntry t) throws IOException { - t.write(raf); - } - }; - - public String getRawText(final boolean compact) { - return title + ":\n" + getHtml(); - } - - - @Override - public int compareTo(HtmlEntry another) { - if (title.compareTo(another.title) != 0) { - return title.compareTo(another.title); - } - return getHtml().compareTo(another.getHtml()); - } - - @Override - public String toString() { - return getRawText(false); - } - - // -------------------------------------------------------------------- - - - public static class Row extends RowBase { - - boolean isExpanded = false; - - Row(final RandomAccessFile raf, final int thisRowIndex, - final Index index) throws IOException { - super(raf, thisRowIndex, index); - } - - Row(final int referenceIndex, final int thisRowIndex, - final Index index) { - super(referenceIndex, thisRowIndex, index); - } - + public void addToDictionary(Dictionary dictionary) { + assert index == -1; + dictionary.htmlEntries.add(this); + index = dictionary.htmlEntries.size() - 1; + } + @Override - public String toString() { - return getRawText(false); + public RowBase CreateRow(int rowIndex, Index dictionaryIndex) { + return new Row(this.index, rowIndex, dictionaryIndex); + } + + static final class Serializer implements RAFListSerializerSkippable { + + final Dictionary dictionary; + final FileChannel ch; + + Serializer(Dictionary dictionary, FileChannel ch) { + this.dictionary = dictionary; + this.ch = ch; + } + + @Override + public HtmlEntry read(DataInput raf, final int index) throws IOException { + return new HtmlEntry(dictionary, ch, raf, index); + } + + @Override + public void skip(DataInput raf, final int index) throws IOException { + if (dictionary.dictFileVersion >= 7) + { + StringUtil.readVarInt(raf); + } + else + { + raf.skipBytes(2); + } + int l = raf.readUnsignedShort(); + raf.skipBytes(l); + } + + @Override + public void write(DataOutput raf, HtmlEntry t) throws IOException { + t.writeBase(raf); + } } - public HtmlEntry getEntry() { - return index.dict.htmlEntries.get(referenceIndex); + static final class DataSerializer implements RAFListSerializer { + @Override + public HtmlEntry read(DataInput raf, final int index) { + assert false; + return null; + } + + @Override + public void write(DataOutput raf, HtmlEntry t) throws IOException { + t.writeData(raf); + } } - - @Override - public void print(PrintStream out) { - final HtmlEntry entry = getEntry(); - out.println("See also HtmlEntry:" + entry.title); + + static final class DataDeserializer implements RAFListSerializer { + @Override + public byte[] read(DataInput raf, final int index) throws IOException { + return HtmlEntry.readData(raf); + } + + @Override + public void write(DataOutput raf, byte[] t) { + assert false; + } + } + + private String getRawText(final boolean compact) { + return title + ":\n" + getHtml(); } @Override - public String getRawText(boolean compact) { - final HtmlEntry entry = getEntry(); - return entry.getRawText(compact); + public int compareTo(/*@NonNull*/ HtmlEntry another) { + if (title.compareTo(another.title) != 0) { + return title.compareTo(another.title); + } + return getHtml().compareTo(another.getHtml()); } @Override - public RowMatchType matches(final List searchTokens, final Pattern orderedMatchPattern, final Transliterator normalizer, final boolean swapPairEntries) { - final String text = normalizer.transform(getRawText(false)); - if (orderedMatchPattern.matcher(text).find()) { - return RowMatchType.ORDERED_MATCH; - } - for (int i = searchTokens.size() - 1; i >= 0; --i) { - final String searchToken = searchTokens.get(i); - if (!text.contains(searchToken)) { - return RowMatchType.NO_MATCH; + public String toString() { + return getRawText(false); + } + + // -------------------------------------------------------------------- + + public static class Row extends RowBase { + + Row(final DataInput raf, final int thisRowIndex, + final Index index, int extra) throws IOException { + super(raf, thisRowIndex, index, extra); + } + + Row(final int referenceIndex, final int thisRowIndex, + final Index index) { + super(referenceIndex, thisRowIndex, index); + } + + @Override + public String toString() { + return getRawText(false); + } + + public HtmlEntry getEntry() { + return index.dict.htmlEntries.get(referenceIndex); + } + + @Override + public void print(PrintStream out) { + final HtmlEntry entry = getEntry(); + out.println("See also HtmlEntry:" + entry.title); + } + + @Override + public String getRawText(boolean compact) { + final HtmlEntry entry = getEntry(); + return entry.getRawText(compact); + } + + @Override + public RowMatchType matches(final List searchTokens, + final Pattern orderedMatchPattern, final Transliterator normalizer, + final boolean swapPairEntries) { + final String text = normalizer.transform(getRawText(false)); + if (orderedMatchPattern.matcher(text).find()) { + return RowMatchType.ORDERED_MATCH; + } + for (int i = searchTokens.size() - 1; i >= 0; --i) { + final String searchToken = searchTokens.get(i); + if (!text.contains(searchToken)) { + return RowMatchType.NO_MATCH; + } + } + return RowMatchType.BAG_OF_WORDS_MATCH; } - } - return RowMatchType.BAG_OF_WORDS_MATCH; } - } public static String htmlBody(final List htmlEntries, final String indexShortName) { final StringBuilder result = new StringBuilder(); for (final HtmlEntry htmlEntry : htmlEntries) { - final String titleEscaped = StringUtil.escapeToPureHtmlUnicode(htmlEntry.title); - result.append(String.format("

%s

\n

%s\n", - formatQuickdicUrl(indexShortName, titleEscaped), titleEscaped, - htmlEntry.getHtml())); + final String titleEscaped = StringUtil.escapeUnicodeToPureHtml(htmlEntry.title); + result.append(String.format("

%s

\n

%s\n", + formatQuickdicUrl(indexShortName, htmlEntry.title), titleEscaped, + htmlEntry.getHtml())); } return result.toString(); } - + + @SuppressWarnings("WeakerAccess") public static String formatQuickdicUrl(final String indexShortName, final String text) { assert !indexShortName.contains(":"); assert text.length() > 0; - try { - return String.format("qd:%s:%s", indexShortName, URLEncoder.encode(text, "UTF-8")); - } catch (UnsupportedEncodingException e) { - throw new RuntimeException(e); - } + return String.format("q://d?%s&%s", indexShortName, StringUtil.encodeForUrl(text)); } public static boolean isQuickdicUrl(String url) { - return url.startsWith("qd:"); - } - - public static void quickdicUrlToIntent(final String url, final Intent intent) { - int firstColon = url.indexOf(":"); - if (firstColon == -1) return; - int secondColon = url.indexOf(":", firstColon + 1); - if (secondColon == -1) return; - intent.putExtra(C.SEARCH_TOKEN, Uri.decode(url.substring(secondColon + 1))); + return url.startsWith("q://d?"); } - + // -------------------------------------------------------------------- - + + @SuppressWarnings("WeakerAccess") public static final class LazyHtmlLoader { - final RandomAccessFile raf; + final DataInput raf; + final FileChannel ch; final long offset; final int numBytes; final int numZipBytes; - + final List data; + final int index; + // Not sure this volatile is right, but oh well. - volatile SoftReference htmlRef = new SoftReference(null); - - private LazyHtmlLoader(final RandomAccessFile raf) throws IOException { - this.raf = raf; - numBytes = raf.readInt(); - numZipBytes = raf.readInt(); - offset = raf.getFilePointer(); + volatile SoftReference htmlRef = new SoftReference<>(null); + + private LazyHtmlLoader(FileChannel ch, final DataInput inp, List data, int index) throws IOException { + this.data = data; + this.index = index; + if (data != null) { + this.raf = null; + this.ch = null; + this.offset = 0; + this.numBytes = -1; + this.numZipBytes = -1; + return; + } + raf = inp; + this.ch = ch; + numBytes = Math.min(raf.readInt(), 20 * 1024 * 1024); + numZipBytes = Math.min(raf.readInt(), 20 * 1024 * 1024); + offset = ch.position(); raf.skipBytes(numZipBytes); } - - public String getHtml() { + + String getHtml() { String html = htmlRef.get(); if (html != null) { return html; } - System.out.println("Loading Html: numBytes=" + numBytes + ", numZipBytes=" + numZipBytes); - final byte[] bytes = new byte[numBytes]; + if (data != null) { + html = new String(data.get(index), StandardCharsets.UTF_8); + htmlRef = new SoftReference<>(html); + return html; + } + System.out.println("Loading Html: numBytes=" + numBytes + ", numZipBytes=" + + numZipBytes); final byte[] zipBytes = new byte[numZipBytes]; - synchronized (raf) { + synchronized (ch) { try { - raf.seek(offset); - raf.read(zipBytes); + ch.position(offset); + raf.readFully(zipBytes); } catch (IOException e) { - throw new RuntimeException(e); + throw new RuntimeException("Failed to read HTML data from dictionary", e); } } try { - StringUtil.unzipFully(zipBytes, bytes); - html = new String(bytes, "UTF-8"); + final byte[] bytes = StringUtil.unzipFully(zipBytes, numBytes); + html = new String(bytes, StandardCharsets.UTF_8); } catch (IOException e) { - throw new RuntimeException(e); + throw new RuntimeException("Dictionary HTML data corrupted", e); } - htmlRef = new SoftReference(html); + htmlRef = new SoftReference<>(html); return html; } }