2 package com.hughes.android.dictionary.engine;
4 import android.support.annotation.NonNull;
6 import com.hughes.util.StringUtil;
7 import com.hughes.util.raf.RAFListSerializer;
8 import com.hughes.util.raf.RAFListSerializerSkippable;
9 import com.ibm.icu.text.Transliterator;
11 import java.io.DataInput;
12 import java.io.DataOutput;
13 import java.io.IOException;
14 import java.io.PrintStream;
15 import java.io.UnsupportedEncodingException;
16 import java.lang.ref.SoftReference;
17 import java.nio.channels.FileChannel;
18 import java.util.List;
19 import java.util.regex.Pattern;
21 public class HtmlEntry extends AbstractEntry implements Comparable<HtmlEntry> {
23 // Title is not HTML escaped.
24 public final String title;
25 private final LazyHtmlLoader lazyHtmlLoader;
26 @SuppressWarnings("WeakerAccess")
29 public HtmlEntry(final EntrySource entrySource, String title) {
32 lazyHtmlLoader = null;
35 public HtmlEntry(Dictionary dictionary, FileChannel ch, DataInput raf, final int index)
37 super(dictionary, raf, index);
38 title = raf.readUTF();
39 lazyHtmlLoader = new LazyHtmlLoader(ch, raf, dictionary.htmlData, index);
43 private void writeBase(DataOutput raf) throws IOException {
48 private void writeData(DataOutput raf) throws IOException {
49 final byte[] bytes = getHtml().getBytes("UTF-8");
50 StringUtil.writeVarInt(raf, bytes.length);
54 private static byte[] readData(DataInput raf) throws IOException {
55 int len = StringUtil.readVarInt(raf);
56 final byte[] bytes = new byte[Math.min(len, 20 * 1024 * 1024)];
62 return html != null ? html : lazyHtmlLoader.getHtml();
66 public void addToDictionary(Dictionary dictionary) {
68 dictionary.htmlEntries.add(this);
69 index = dictionary.htmlEntries.size() - 1;
73 public RowBase CreateRow(int rowIndex, Index dictionaryIndex) {
74 return new Row(this.index, rowIndex, dictionaryIndex);
77 static final class Serializer implements RAFListSerializerSkippable<HtmlEntry> {
79 final Dictionary dictionary;
82 Serializer(Dictionary dictionary, FileChannel ch) {
83 this.dictionary = dictionary;
88 public HtmlEntry read(DataInput raf, final int index) throws IOException {
89 return new HtmlEntry(dictionary, ch, raf, index);
93 public void skip(DataInput raf, final int index) throws IOException {
94 if (dictionary.dictFileVersion >= 7)
96 StringUtil.readVarInt(raf);
102 int l = raf.readUnsignedShort();
107 public void write(DataOutput raf, HtmlEntry t) throws IOException {
112 static final class DataSerializer implements RAFListSerializer<HtmlEntry> {
114 public HtmlEntry read(DataInput raf, final int index) {
120 public void write(DataOutput raf, HtmlEntry t) throws IOException {
125 static final class DataDeserializer implements RAFListSerializer<byte[]> {
127 public byte[] read(DataInput raf, final int index) throws IOException {
128 return HtmlEntry.readData(raf);
132 public void write(DataOutput raf, byte[] t) {
137 private String getRawText(final boolean compact) {
138 return title + ":\n" + getHtml();
142 public int compareTo(@NonNull HtmlEntry another) {
143 if (title.compareTo(another.title) != 0) {
144 return title.compareTo(another.title);
146 return getHtml().compareTo(another.getHtml());
150 public String toString() {
151 return getRawText(false);
154 // --------------------------------------------------------------------
156 public static class Row extends RowBase {
158 Row(final DataInput raf, final int thisRowIndex,
159 final Index index, int extra) throws IOException {
160 super(raf, thisRowIndex, index, extra);
163 Row(final int referenceIndex, final int thisRowIndex,
165 super(referenceIndex, thisRowIndex, index);
169 public String toString() {
170 return getRawText(false);
173 public HtmlEntry getEntry() {
174 return index.dict.htmlEntries.get(referenceIndex);
178 public void print(PrintStream out) {
179 final HtmlEntry entry = getEntry();
180 out.println("See also HtmlEntry:" + entry.title);
184 public String getRawText(boolean compact) {
185 final HtmlEntry entry = getEntry();
186 return entry.getRawText(compact);
190 public RowMatchType matches(final List<String> searchTokens,
191 final Pattern orderedMatchPattern, final Transliterator normalizer,
192 final boolean swapPairEntries) {
193 final String text = normalizer.transform(getRawText(false));
194 if (orderedMatchPattern.matcher(text).find()) {
195 return RowMatchType.ORDERED_MATCH;
197 for (int i = searchTokens.size() - 1; i >= 0; --i) {
198 final String searchToken = searchTokens.get(i);
199 if (!text.contains(searchToken)) {
200 return RowMatchType.NO_MATCH;
203 return RowMatchType.BAG_OF_WORDS_MATCH;
207 public static String htmlBody(final List<HtmlEntry> htmlEntries, final String indexShortName) {
208 final StringBuilder result = new StringBuilder();
209 for (final HtmlEntry htmlEntry : htmlEntries) {
210 final String titleEscaped = StringUtil.escapeUnicodeToPureHtml(htmlEntry.title);
211 result.append(String.format("<h1><a href=\"%s\">%s</a></h1>\n<p>%s\n",
212 formatQuickdicUrl(indexShortName, htmlEntry.title), titleEscaped,
213 htmlEntry.getHtml()));
215 return result.toString();
218 @SuppressWarnings("WeakerAccess")
219 public static String formatQuickdicUrl(final String indexShortName, final String text) {
220 assert !indexShortName.contains(":");
221 assert text.length() > 0;
222 return String.format("q://d?%s&%s", indexShortName, StringUtil.encodeForUrl(text));
225 public static boolean isQuickdicUrl(String url) {
226 return url.startsWith("q://d?");
229 // --------------------------------------------------------------------
231 @SuppressWarnings("WeakerAccess")
232 public static final class LazyHtmlLoader {
234 final FileChannel ch;
237 final int numZipBytes;
238 final List<byte[]> data;
241 // Not sure this volatile is right, but oh well.
242 volatile SoftReference<String> htmlRef = new SoftReference<>(null);
244 private LazyHtmlLoader(FileChannel ch, final DataInput inp, List<byte[]> data, int index) throws IOException {
252 this.numZipBytes = -1;
257 numBytes = Math.min(raf.readInt(), 20 * 1024 * 1024);
258 numZipBytes = Math.min(raf.readInt(), 20 * 1024 * 1024);
259 offset = ch.position();
260 raf.skipBytes(numZipBytes);
264 String html = htmlRef.get();
270 html = new String(data.get(index), "UTF-8");
271 } catch (UnsupportedEncodingException e) {
272 throw new RuntimeException("Dictionary HTML data corrupted", e);
274 htmlRef = new SoftReference<>(html);
277 System.out.println("Loading Html: numBytes=" + numBytes + ", numZipBytes="
279 final byte[] zipBytes = new byte[numZipBytes];
283 raf.readFully(zipBytes);
284 } catch (IOException e) {
285 throw new RuntimeException("Failed to read HTML data from dictionary", e);
289 final byte[] bytes = StringUtil.unzipFully(zipBytes, numBytes);
290 html = new String(bytes, "UTF-8");
291 } catch (IOException e) {
292 throw new RuntimeException("Dictionary HTML data corrupted", e);
294 htmlRef = new SoftReference<>(html);