]> gitweb.fperrin.net Git - Dictionary.git/blob - src/com/hughes/android/dictionary/engine/HtmlEntry.java
1a602a017ad8d8a0c59a511a5656efe4fb5ff958
[Dictionary.git] / src / com / hughes / android / dictionary / engine / HtmlEntry.java
1
2 package com.hughes.android.dictionary.engine;
3
4 import android.support.annotation.NonNull;
5
6 import com.hughes.util.StringUtil;
7 import com.hughes.util.raf.RAFListSerializer;
8 import com.hughes.util.raf.RAFListSerializerSkippable;
9 import com.ibm.icu.text.Transliterator;
10
11 import java.io.DataInput;
12 import java.io.DataOutput;
13 import java.io.IOException;
14 import java.io.PrintStream;
15 import java.io.UnsupportedEncodingException;
16 import java.lang.ref.SoftReference;
17 import java.nio.channels.FileChannel;
18 import java.util.List;
19 import java.util.regex.Pattern;
20
21 public class HtmlEntry extends AbstractEntry implements Comparable<HtmlEntry> {
22
23     // Title is not HTML escaped.
24     public final String title;
25     private final LazyHtmlLoader lazyHtmlLoader;
26     @SuppressWarnings("WeakerAccess")
27     public String html;
28
29     public HtmlEntry(final EntrySource entrySource, String title) {
30         super(entrySource);
31         this.title = title;
32         lazyHtmlLoader = null;
33     }
34
35     public HtmlEntry(Dictionary dictionary, FileChannel ch, DataInput raf, final int index)
36     throws IOException {
37         super(dictionary, raf, index);
38         title = raf.readUTF();
39         lazyHtmlLoader = new LazyHtmlLoader(ch, raf, dictionary.htmlData, index);
40         html = null;
41     }
42
43     private void writeBase(DataOutput raf) throws IOException {
44         super.write(raf);
45         raf.writeUTF(title);
46     }
47
48     private void writeData(DataOutput raf) throws IOException {
49         final byte[] bytes = getHtml().getBytes("UTF-8");
50         StringUtil.writeVarInt(raf, bytes.length);
51         raf.write(bytes);
52     }
53
54     private static byte[] readData(DataInput raf) throws IOException {
55         int len = StringUtil.readVarInt(raf);
56         final byte[] bytes = new byte[Math.min(len, 20 * 1024 * 1024)];
57         raf.readFully(bytes);
58         return bytes;
59     }
60
61     String getHtml() {
62         return html != null ? html : lazyHtmlLoader.getHtml();
63     }
64
65     @Override
66     public void addToDictionary(Dictionary dictionary) {
67         assert index == -1;
68         dictionary.htmlEntries.add(this);
69         index = dictionary.htmlEntries.size() - 1;
70     }
71
72     @Override
73     public RowBase CreateRow(int rowIndex, Index dictionaryIndex) {
74         return new Row(this.index, rowIndex, dictionaryIndex);
75     }
76
77     static final class Serializer implements RAFListSerializerSkippable<HtmlEntry> {
78
79         final Dictionary dictionary;
80         final FileChannel ch;
81
82         Serializer(Dictionary dictionary, FileChannel ch) {
83             this.dictionary = dictionary;
84             this.ch = ch;
85         }
86
87         @Override
88         public HtmlEntry read(DataInput raf, final int index) throws IOException {
89             return new HtmlEntry(dictionary, ch, raf, index);
90         }
91
92         @Override
93         public void skip(DataInput raf, final int index) throws IOException {
94             if (dictionary.dictFileVersion >= 7)
95             {
96                 StringUtil.readVarInt(raf);
97             }
98             else
99             {
100                 raf.skipBytes(2);
101             }
102             int l = raf.readUnsignedShort();
103             raf.skipBytes(l);
104         }
105
106         @Override
107         public void write(DataOutput raf, HtmlEntry t) throws IOException {
108             t.writeBase(raf);
109         }
110     }
111
112     static final class DataSerializer implements RAFListSerializer<HtmlEntry> {
113         @Override
114         public HtmlEntry read(DataInput raf, final int index) {
115             assert false;
116             return null;
117         }
118
119         @Override
120         public void write(DataOutput raf, HtmlEntry t) throws IOException {
121             t.writeData(raf);
122         }
123     }
124
125     static final class DataDeserializer implements RAFListSerializer<byte[]> {
126         @Override
127         public byte[] read(DataInput raf, final int index) throws IOException {
128             return HtmlEntry.readData(raf);
129         }
130
131         @Override
132         public void write(DataOutput raf, byte[] t) {
133             assert false;
134         }
135     }
136
137     private String getRawText(final boolean compact) {
138         return title + ":\n" + getHtml();
139     }
140
141     @Override
142     public int compareTo(@NonNull HtmlEntry another) {
143         if (title.compareTo(another.title) != 0) {
144             return title.compareTo(another.title);
145         }
146         return getHtml().compareTo(another.getHtml());
147     }
148
149     @Override
150     public String toString() {
151         return getRawText(false);
152     }
153
154     // --------------------------------------------------------------------
155
156     public static class Row extends RowBase {
157
158         Row(final DataInput raf, final int thisRowIndex,
159             final Index index, int extra) throws IOException {
160             super(raf, thisRowIndex, index, extra);
161         }
162
163         Row(final int referenceIndex, final int thisRowIndex,
164             final Index index) {
165             super(referenceIndex, thisRowIndex, index);
166         }
167
168         @Override
169         public String toString() {
170             return getRawText(false);
171         }
172
173         public HtmlEntry getEntry() {
174             return index.dict.htmlEntries.get(referenceIndex);
175         }
176
177         @Override
178         public void print(PrintStream out) {
179             final HtmlEntry entry = getEntry();
180             out.println("See also HtmlEntry:" + entry.title);
181         }
182
183         @Override
184         public String getRawText(boolean compact) {
185             final HtmlEntry entry = getEntry();
186             return entry.getRawText(compact);
187         }
188
189         @Override
190         public RowMatchType matches(final List<String> searchTokens,
191                                     final Pattern orderedMatchPattern, final Transliterator normalizer,
192                                     final boolean swapPairEntries) {
193             final String text = normalizer.transform(getRawText(false));
194             if (orderedMatchPattern.matcher(text).find()) {
195                 return RowMatchType.ORDERED_MATCH;
196             }
197             for (int i = searchTokens.size() - 1; i >= 0; --i) {
198                 final String searchToken = searchTokens.get(i);
199                 if (!text.contains(searchToken)) {
200                     return RowMatchType.NO_MATCH;
201                 }
202             }
203             return RowMatchType.BAG_OF_WORDS_MATCH;
204         }
205     }
206
207     public static String htmlBody(final List<HtmlEntry> htmlEntries, final String indexShortName) {
208         final StringBuilder result = new StringBuilder();
209         for (final HtmlEntry htmlEntry : htmlEntries) {
210             final String titleEscaped = StringUtil.escapeUnicodeToPureHtml(htmlEntry.title);
211             result.append(String.format("<h1><a href=\"%s\">%s</a></h1>\n<p>%s\n",
212                                         formatQuickdicUrl(indexShortName, htmlEntry.title), titleEscaped,
213                                         htmlEntry.getHtml()));
214         }
215         return result.toString();
216     }
217
218     @SuppressWarnings("WeakerAccess")
219     public static String formatQuickdicUrl(final String indexShortName, final String text) {
220         assert !indexShortName.contains(":");
221         assert text.length() > 0;
222         return String.format("q://d?%s&%s", indexShortName, StringUtil.encodeForUrl(text));
223     }
224
225     public static boolean isQuickdicUrl(String url) {
226         return url.startsWith("q://d?");
227     }
228
229     // --------------------------------------------------------------------
230
231     @SuppressWarnings("WeakerAccess")
232     public static final class LazyHtmlLoader {
233         final DataInput raf;
234         final FileChannel ch;
235         final long offset;
236         final int numBytes;
237         final int numZipBytes;
238         final List<byte[]> data;
239         final int index;
240
241         // Not sure this volatile is right, but oh well.
242         volatile SoftReference<String> htmlRef = new SoftReference<>(null);
243
244         private LazyHtmlLoader(FileChannel ch, final DataInput inp, List<byte[]> data, int index) throws IOException {
245             this.data = data;
246             this.index = index;
247             if (data != null) {
248                 this.raf = null;
249                 this.ch = null;
250                 this.offset = 0;
251                 this.numBytes = -1;
252                 this.numZipBytes = -1;
253                 return;
254             }
255             raf = inp;
256             this.ch = ch;
257             numBytes = Math.min(raf.readInt(), 20 * 1024 * 1024);
258             numZipBytes = Math.min(raf.readInt(), 20 * 1024 * 1024);
259             offset = ch.position();
260             raf.skipBytes(numZipBytes);
261         }
262
263         String getHtml() {
264             String html = htmlRef.get();
265             if (html != null) {
266                 return html;
267             }
268             if (data != null) {
269                 try {
270                     html = new String(data.get(index), "UTF-8");
271                 } catch (UnsupportedEncodingException e) {
272                     throw new RuntimeException("Dictionary HTML data corrupted", e);
273                 }
274                 htmlRef = new SoftReference<>(html);
275                 return html;
276             }
277             System.out.println("Loading Html: numBytes=" + numBytes + ", numZipBytes="
278                                + numZipBytes);
279             final byte[] zipBytes = new byte[numZipBytes];
280             synchronized (ch) {
281                 try {
282                     ch.position(offset);
283                     raf.readFully(zipBytes);
284                 } catch (IOException e) {
285                     throw new RuntimeException("Failed to read HTML data from dictionary", e);
286                 }
287             }
288             try {
289                 final byte[] bytes = StringUtil.unzipFully(zipBytes, numBytes);
290                 html = new String(bytes, "UTF-8");
291             } catch (IOException e) {
292                 throw new RuntimeException("Dictionary HTML data corrupted", e);
293             }
294             htmlRef = new SoftReference<>(html);
295             return html;
296         }
297     }
298
299 }