]> gitweb.fperrin.net Git - Dictionary.git/blob - src/com/hughes/android/dictionary/engine/HtmlEntry.java
8dcf905175f5f5e01b983a7fd288f2a80a998ae1
[Dictionary.git] / src / com / hughes / android / dictionary / engine / HtmlEntry.java
1
2 package com.hughes.android.dictionary.engine;
3
4 import com.hughes.util.StringUtil;
5 import com.hughes.util.raf.RAFListSerializer;
6 import com.hughes.util.raf.RAFListSerializerSkippable;
7 import com.ibm.icu.text.Transliterator;
8
9 import java.io.DataInput;
10 import java.io.DataOutput;
11 import java.io.IOException;
12 import java.io.PrintStream;
13 import java.io.UnsupportedEncodingException;
14 import java.lang.ref.SoftReference;
15 import java.nio.channels.FileChannel;
16 import java.util.List;
17 import java.util.regex.Pattern;
18
19 public class HtmlEntry extends AbstractEntry implements Comparable<HtmlEntry> {
20
21     // Title is not HTML escaped.
22     public final String title;
23     private final LazyHtmlLoader lazyHtmlLoader;
24     @SuppressWarnings("WeakerAccess")
25     public String html;
26
27     public HtmlEntry(final EntrySource entrySource, String title) {
28         super(entrySource);
29         this.title = title;
30         lazyHtmlLoader = null;
31     }
32
33     public HtmlEntry(Dictionary dictionary, FileChannel ch, DataInput raf, final int index)
34     throws IOException {
35         super(dictionary, raf, index);
36         title = raf.readUTF();
37         lazyHtmlLoader = new LazyHtmlLoader(ch, raf, dictionary.htmlData, index);
38         html = null;
39     }
40
41     private void writeBase(DataOutput raf) throws IOException {
42         super.write(raf);
43         raf.writeUTF(title);
44     }
45
46     private void writeData(DataOutput raf) throws IOException {
47         final byte[] bytes = getHtml().getBytes("UTF-8");
48         StringUtil.writeVarInt(raf, bytes.length);
49         raf.write(bytes);
50     }
51
52     private static byte[] readData(DataInput raf) throws IOException {
53         int len = StringUtil.readVarInt(raf);
54         final byte[] bytes = new byte[Math.min(len, 20 * 1024 * 1024)];
55         raf.readFully(bytes);
56         return bytes;
57     }
58
59     String getHtml() {
60         return html != null ? html : lazyHtmlLoader.getHtml();
61     }
62
63     @Override
64     public void addToDictionary(Dictionary dictionary) {
65         assert index == -1;
66         dictionary.htmlEntries.add(this);
67         index = dictionary.htmlEntries.size() - 1;
68     }
69
70     @Override
71     public RowBase CreateRow(int rowIndex, Index dictionaryIndex) {
72         return new Row(this.index, rowIndex, dictionaryIndex);
73     }
74
75     static final class Serializer implements RAFListSerializerSkippable<HtmlEntry> {
76
77         final Dictionary dictionary;
78         final FileChannel ch;
79
80         Serializer(Dictionary dictionary, FileChannel ch) {
81             this.dictionary = dictionary;
82             this.ch = ch;
83         }
84
85         @Override
86         public HtmlEntry read(DataInput raf, final int index) throws IOException {
87             return new HtmlEntry(dictionary, ch, raf, index);
88         }
89
90         @Override
91         public void skip(DataInput raf, final int index) throws IOException {
92             if (dictionary.dictFileVersion >= 7)
93             {
94                 StringUtil.readVarInt(raf);
95             }
96             else
97             {
98                 raf.skipBytes(2);
99             }
100             int l = raf.readUnsignedShort();
101             raf.skipBytes(l);
102         }
103
104         @Override
105         public void write(DataOutput raf, HtmlEntry t) throws IOException {
106             t.writeBase(raf);
107         }
108     }
109
110     static final class DataSerializer implements RAFListSerializer<HtmlEntry> {
111         @Override
112         public HtmlEntry read(DataInput raf, final int index) {
113             assert false;
114             return null;
115         }
116
117         @Override
118         public void write(DataOutput raf, HtmlEntry t) throws IOException {
119             t.writeData(raf);
120         }
121     }
122
123     static final class DataDeserializer implements RAFListSerializer<byte[]> {
124         @Override
125         public byte[] read(DataInput raf, final int index) throws IOException {
126             return HtmlEntry.readData(raf);
127         }
128
129         @Override
130         public void write(DataOutput raf, byte[] t) {
131             assert false;
132         }
133     }
134
135     private String getRawText(final boolean compact) {
136         return title + ":\n" + getHtml();
137     }
138
139     @Override
140     public int compareTo(/*@NonNull*/ HtmlEntry another) {
141         if (title.compareTo(another.title) != 0) {
142             return title.compareTo(another.title);
143         }
144         return getHtml().compareTo(another.getHtml());
145     }
146
147     @Override
148     public String toString() {
149         return getRawText(false);
150     }
151
152     // --------------------------------------------------------------------
153
154     public static class Row extends RowBase {
155
156         Row(final DataInput raf, final int thisRowIndex,
157             final Index index, int extra) throws IOException {
158             super(raf, thisRowIndex, index, extra);
159         }
160
161         Row(final int referenceIndex, final int thisRowIndex,
162             final Index index) {
163             super(referenceIndex, thisRowIndex, index);
164         }
165
166         @Override
167         public String toString() {
168             return getRawText(false);
169         }
170
171         public HtmlEntry getEntry() {
172             return index.dict.htmlEntries.get(referenceIndex);
173         }
174
175         @Override
176         public void print(PrintStream out) {
177             final HtmlEntry entry = getEntry();
178             out.println("See also HtmlEntry:" + entry.title);
179         }
180
181         @Override
182         public String getRawText(boolean compact) {
183             final HtmlEntry entry = getEntry();
184             return entry.getRawText(compact);
185         }
186
187         @Override
188         public RowMatchType matches(final List<String> searchTokens,
189                                     final Pattern orderedMatchPattern, final Transliterator normalizer,
190                                     final boolean swapPairEntries) {
191             final String text = normalizer.transform(getRawText(false));
192             if (orderedMatchPattern.matcher(text).find()) {
193                 return RowMatchType.ORDERED_MATCH;
194             }
195             for (int i = searchTokens.size() - 1; i >= 0; --i) {
196                 final String searchToken = searchTokens.get(i);
197                 if (!text.contains(searchToken)) {
198                     return RowMatchType.NO_MATCH;
199                 }
200             }
201             return RowMatchType.BAG_OF_WORDS_MATCH;
202         }
203     }
204
205     public static String htmlBody(final List<HtmlEntry> htmlEntries, final String indexShortName) {
206         final StringBuilder result = new StringBuilder();
207         for (final HtmlEntry htmlEntry : htmlEntries) {
208             final String titleEscaped = StringUtil.escapeUnicodeToPureHtml(htmlEntry.title);
209             result.append(String.format("<h1><a href=\"%s\">%s</a></h1>\n<p>%s\n",
210                                         formatQuickdicUrl(indexShortName, htmlEntry.title), titleEscaped,
211                                         htmlEntry.getHtml()));
212         }
213         return result.toString();
214     }
215
216     @SuppressWarnings("WeakerAccess")
217     public static String formatQuickdicUrl(final String indexShortName, final String text) {
218         assert !indexShortName.contains(":");
219         assert text.length() > 0;
220         return String.format("q://d?%s&%s", indexShortName, StringUtil.encodeForUrl(text));
221     }
222
223     public static boolean isQuickdicUrl(String url) {
224         return url.startsWith("q://d?");
225     }
226
227     // --------------------------------------------------------------------
228
229     @SuppressWarnings("WeakerAccess")
230     public static final class LazyHtmlLoader {
231         final DataInput raf;
232         final FileChannel ch;
233         final long offset;
234         final int numBytes;
235         final int numZipBytes;
236         final List<byte[]> data;
237         final int index;
238
239         // Not sure this volatile is right, but oh well.
240         volatile SoftReference<String> htmlRef = new SoftReference<>(null);
241
242         private LazyHtmlLoader(FileChannel ch, final DataInput inp, List<byte[]> data, int index) throws IOException {
243             this.data = data;
244             this.index = index;
245             if (data != null) {
246                 this.raf = null;
247                 this.ch = null;
248                 this.offset = 0;
249                 this.numBytes = -1;
250                 this.numZipBytes = -1;
251                 return;
252             }
253             raf = inp;
254             this.ch = ch;
255             numBytes = Math.min(raf.readInt(), 20 * 1024 * 1024);
256             numZipBytes = Math.min(raf.readInt(), 20 * 1024 * 1024);
257             offset = ch.position();
258             raf.skipBytes(numZipBytes);
259         }
260
261         String getHtml() {
262             String html = htmlRef.get();
263             if (html != null) {
264                 return html;
265             }
266             if (data != null) {
267                 try {
268                     html = new String(data.get(index), "UTF-8");
269                 } catch (UnsupportedEncodingException e) {
270                     throw new RuntimeException("Dictionary HTML data corrupted", e);
271                 }
272                 htmlRef = new SoftReference<>(html);
273                 return html;
274             }
275             System.out.println("Loading Html: numBytes=" + numBytes + ", numZipBytes="
276                                + numZipBytes);
277             final byte[] zipBytes = new byte[numZipBytes];
278             synchronized (ch) {
279                 try {
280                     ch.position(offset);
281                     raf.readFully(zipBytes);
282                 } catch (IOException e) {
283                     throw new RuntimeException("Failed to read HTML data from dictionary", e);
284                 }
285             }
286             try {
287                 final byte[] bytes = StringUtil.unzipFully(zipBytes, numBytes);
288                 html = new String(bytes, "UTF-8");
289             } catch (IOException e) {
290                 throw new RuntimeException("Dictionary HTML data corrupted", e);
291             }
292             htmlRef = new SoftReference<>(html);
293             return html;
294         }
295     }
296
297 }