2 package com.hughes.android.dictionary.engine;
4 import java.io.DataInput;
5 import java.io.DataOutput;
6 import java.io.IOException;
7 import java.io.PrintStream;
8 import java.lang.ref.SoftReference;
9 import java.nio.ByteBuffer;
10 import java.nio.channels.FileChannel;
11 import java.nio.charset.StandardCharsets;
12 import java.util.List;
13 import java.util.regex.Pattern;
15 import com.hughes.util.DataInputBuffer;
16 import com.hughes.util.StringUtil;
17 import com.hughes.util.raf.RAFListSerializer;
18 import com.hughes.util.raf.RAFListSerializerSkippable;
19 import com.ibm.icu.text.Transliterator;
21 public class HtmlEntry extends AbstractEntry implements Comparable<HtmlEntry> {
23 // Title is not HTML escaped.
24 public final String title;
25 private final LazyHtmlLoader lazyHtmlLoader;
26 @SuppressWarnings("WeakerAccess")
29 public HtmlEntry(final EntrySource entrySource, String title) {
32 lazyHtmlLoader = null;
35 public HtmlEntry(Dictionary dictionary, DataInput raf, final int index)
37 super(dictionary, raf, index);
38 title = raf.readUTF();
39 lazyHtmlLoader = new LazyHtmlLoader(raf, dictionary.htmlData, index);
43 private void writeBase(DataOutput raf) throws IOException {
48 private void writeData(DataOutput raf) throws IOException {
49 final byte[] bytes = getHtml().getBytes(StandardCharsets.UTF_8);
50 StringUtil.writeVarInt(raf, bytes.length);
54 private static DataInputBuffer readData(DataInput raf) throws IOException {
55 int len = StringUtil.readVarInt(raf);
56 return ((DataInputBuffer)raf).slice(len);
60 return html != null ? html : lazyHtmlLoader.getHtml();
64 public void addToDictionary(Dictionary dictionary) {
66 dictionary.htmlEntries.add(this);
67 index = dictionary.htmlEntries.size() - 1;
71 public RowBase CreateRow(int rowIndex, Index dictionaryIndex) {
72 return new Row(this.index, rowIndex, dictionaryIndex);
75 static final class Serializer implements RAFListSerializerSkippable<HtmlEntry> {
77 final Dictionary dictionary;
79 Serializer(Dictionary dictionary) {
80 this.dictionary = dictionary;
84 public HtmlEntry read(DataInput raf, final int index) throws IOException {
85 return new HtmlEntry(dictionary, raf, index);
89 public void skip(DataInput raf, final int index) throws IOException {
90 if (dictionary.dictFileVersion >= 7)
92 StringUtil.readVarInt(raf);
98 int l = raf.readUnsignedShort();
103 public void write(DataOutput raf, HtmlEntry t) throws IOException {
108 static final class DataSerializer implements RAFListSerializer<HtmlEntry> {
110 public HtmlEntry read(DataInput raf, final int index) {
116 public void write(DataOutput raf, HtmlEntry t) throws IOException {
121 static final class DataDeserializer implements RAFListSerializer<DataInputBuffer> {
123 public DataInputBuffer read(DataInput raf, final int index) throws IOException {
124 return HtmlEntry.readData(raf);
128 public void write(DataOutput raf, DataInputBuffer t) {
133 private String getRawText(final boolean compact) {
134 return title + ":\n" + getHtml();
138 public int compareTo(/*@NonNull*/ HtmlEntry another) {
139 if (title.compareTo(another.title) != 0) {
140 return title.compareTo(another.title);
142 return getHtml().compareTo(another.getHtml());
146 public String toString() {
147 return getRawText(false);
150 // --------------------------------------------------------------------
152 public static class Row extends RowBase {
154 Row(final DataInput raf, final int thisRowIndex,
155 final Index index, int extra) throws IOException {
156 super(raf, thisRowIndex, index, extra);
159 Row(final int referenceIndex, final int thisRowIndex,
161 super(referenceIndex, thisRowIndex, index);
165 public String toString() {
166 return getRawText(false);
169 public HtmlEntry getEntry() {
170 return index.dict.htmlEntries.get(referenceIndex);
174 public void print(PrintStream out) {
175 final HtmlEntry entry = getEntry();
176 out.println("See also HtmlEntry:" + entry.title);
180 public String getRawText(boolean compact) {
181 final HtmlEntry entry = getEntry();
182 return entry.getRawText(compact);
186 public RowMatchType matches(final List<String> searchTokens,
187 final Pattern orderedMatchPattern, final Transliterator normalizer,
188 final boolean swapPairEntries) {
189 final String text = normalizer.transform(getRawText(false));
190 if (orderedMatchPattern.matcher(text).find()) {
191 return RowMatchType.ORDERED_MATCH;
193 for (int i = searchTokens.size() - 1; i >= 0; --i) {
194 final String searchToken = searchTokens.get(i);
195 if (!text.contains(searchToken)) {
196 return RowMatchType.NO_MATCH;
199 return RowMatchType.BAG_OF_WORDS_MATCH;
203 public static String htmlBody(final List<HtmlEntry> htmlEntries, final String indexShortName) {
204 final StringBuilder result = new StringBuilder();
205 for (final HtmlEntry htmlEntry : htmlEntries) {
206 final String titleEscaped = StringUtil.escapeUnicodeToPureHtml(htmlEntry.title);
207 result.append(String.format("<h1><a href=\"%s\">%s</a></h1>\n<p>%s\n",
208 formatQuickdicUrl(indexShortName, htmlEntry.title), titleEscaped,
209 htmlEntry.getHtml()));
211 return result.toString();
214 @SuppressWarnings("WeakerAccess")
215 public static String formatQuickdicUrl(final String indexShortName, final String text) {
216 assert !indexShortName.contains(":");
217 assert text.length() > 0;
218 return String.format("q://d?%s&%s", indexShortName, StringUtil.encodeForUrl(text));
221 public static boolean isQuickdicUrl(String url) {
222 return url.startsWith("q://d?");
225 // --------------------------------------------------------------------
227 @SuppressWarnings("WeakerAccess")
228 public static final class LazyHtmlLoader {
229 final DataInputBuffer buf;
231 final List<DataInputBuffer> data;
234 // Not sure this volatile is right, but oh well.
235 volatile SoftReference<String> htmlRef = new SoftReference<>(null);
237 private LazyHtmlLoader(final DataInput inp, List<DataInputBuffer> data, int index) throws IOException {
245 numBytes = Math.min(inp.readInt(), 20 * 1024 * 1024);
246 int numZipBytes = Math.min(inp.readInt(), 20 * 1024 * 1024);
247 DataInputBuffer b = (DataInputBuffer)inp;
248 buf = b.slice(numZipBytes);
252 String html = htmlRef.get();
257 html = data.get(index).asString();
258 htmlRef = new SoftReference<>(html);
261 System.out.println("Loading Html: numBytes=" + numBytes + ", numZipBytes="
263 final byte[] zipBytes = new byte[buf.limit()];
265 buf.readFully(zipBytes);
267 final byte[] bytes = StringUtil.unzipFully(zipBytes, numBytes);
268 html = new String(bytes, StandardCharsets.UTF_8);
269 } catch (IOException e) {
270 throw new RuntimeException("Dictionary HTML data corrupted", e);
272 htmlRef = new SoftReference<>(html);