]> gitweb.fperrin.net Git - Dictionary.git/blob - src/com/hughes/android/dictionary/engine/HtmlEntry.java
1b191a93ac4b80000672876b4f453cfe5c1c097e
[Dictionary.git] / src / com / hughes / android / dictionary / engine / HtmlEntry.java
1
2 package com.hughes.android.dictionary.engine;
3
4 import java.io.DataInput;
5 import java.io.DataOutput;
6 import java.io.IOException;
7 import java.io.PrintStream;
8 import java.lang.ref.SoftReference;
9 import java.nio.ByteBuffer;
10 import java.nio.channels.FileChannel;
11 import java.nio.charset.StandardCharsets;
12 import java.util.List;
13 import java.util.regex.Pattern;
14
15 import com.hughes.util.DataInputBuffer;
16 import com.hughes.util.StringUtil;
17 import com.hughes.util.raf.RAFListSerializer;
18 import com.hughes.util.raf.RAFListSerializerSkippable;
19 import com.ibm.icu.text.Transliterator;
20
21 public class HtmlEntry extends AbstractEntry implements Comparable<HtmlEntry> {
22
23     // Title is not HTML escaped.
24     public final String title;
25     private final LazyHtmlLoader lazyHtmlLoader;
26     @SuppressWarnings("WeakerAccess")
27     public String html;
28
29     public HtmlEntry(final EntrySource entrySource, String title) {
30         super(entrySource);
31         this.title = title;
32         lazyHtmlLoader = null;
33     }
34
35     public HtmlEntry(Dictionary dictionary, DataInput raf, final int index)
36     throws IOException {
37         super(dictionary, raf, index);
38         title = raf.readUTF();
39         lazyHtmlLoader = new LazyHtmlLoader(raf, dictionary.htmlData, index);
40         html = null;
41     }
42
43     private void writeBase(DataOutput raf) throws IOException {
44         super.write(raf);
45         raf.writeUTF(title);
46     }
47
48     private void writeData(DataOutput raf) throws IOException {
49         final byte[] bytes = getHtml().getBytes(StandardCharsets.UTF_8);
50         StringUtil.writeVarInt(raf, bytes.length);
51         raf.write(bytes);
52     }
53
54     private static DataInputBuffer readData(DataInput raf) throws IOException {
55         int len = StringUtil.readVarInt(raf);
56         return ((DataInputBuffer)raf).slice(len);
57     }
58
59     String getHtml() {
60         return html != null ? html : lazyHtmlLoader.getHtml();
61     }
62
63     @Override
64     public void addToDictionary(Dictionary dictionary) {
65         assert index == -1;
66         dictionary.htmlEntries.add(this);
67         index = dictionary.htmlEntries.size() - 1;
68     }
69
70     @Override
71     public RowBase CreateRow(int rowIndex, Index dictionaryIndex) {
72         return new Row(this.index, rowIndex, dictionaryIndex);
73     }
74
75     static final class Serializer implements RAFListSerializerSkippable<HtmlEntry> {
76
77         final Dictionary dictionary;
78
79         Serializer(Dictionary dictionary) {
80             this.dictionary = dictionary;
81         }
82
83         @Override
84         public HtmlEntry read(DataInput raf, final int index) throws IOException {
85             return new HtmlEntry(dictionary, raf, index);
86         }
87
88         @Override
89         public void skip(DataInput raf, final int index) throws IOException {
90             if (dictionary.dictFileVersion >= 7)
91             {
92                 StringUtil.readVarInt(raf);
93             }
94             else
95             {
96                 raf.skipBytes(2);
97             }
98             int l = raf.readUnsignedShort();
99             raf.skipBytes(l);
100         }
101
102         @Override
103         public void write(DataOutput raf, HtmlEntry t) throws IOException {
104             t.writeBase(raf);
105         }
106     }
107
108     static final class DataSerializer implements RAFListSerializer<HtmlEntry> {
109         @Override
110         public HtmlEntry read(DataInput raf, final int index) {
111             assert false;
112             return null;
113         }
114
115         @Override
116         public void write(DataOutput raf, HtmlEntry t) throws IOException {
117             t.writeData(raf);
118         }
119     }
120
121     static final class DataDeserializer implements RAFListSerializer<DataInputBuffer> {
122         @Override
123         public DataInputBuffer read(DataInput raf, final int index) throws IOException {
124             return HtmlEntry.readData(raf);
125         }
126
127         @Override
128         public void write(DataOutput raf, DataInputBuffer t) {
129             assert false;
130         }
131     }
132
133     private String getRawText(final boolean compact) {
134         return title + ":\n" + getHtml();
135     }
136
137     @Override
138     public int compareTo(/*@NonNull*/ HtmlEntry another) {
139         if (title.compareTo(another.title) != 0) {
140             return title.compareTo(another.title);
141         }
142         return getHtml().compareTo(another.getHtml());
143     }
144
145     @Override
146     public String toString() {
147         return getRawText(false);
148     }
149
150     // --------------------------------------------------------------------
151
152     public static class Row extends RowBase {
153
154         Row(final DataInput raf, final int thisRowIndex,
155             final Index index, int extra) throws IOException {
156             super(raf, thisRowIndex, index, extra);
157         }
158
159         Row(final int referenceIndex, final int thisRowIndex,
160             final Index index) {
161             super(referenceIndex, thisRowIndex, index);
162         }
163
164         @Override
165         public String toString() {
166             return getRawText(false);
167         }
168
169         public HtmlEntry getEntry() {
170             return index.dict.htmlEntries.get(referenceIndex);
171         }
172
173         @Override
174         public void print(PrintStream out) {
175             final HtmlEntry entry = getEntry();
176             out.println("See also HtmlEntry:" + entry.title);
177         }
178
179         @Override
180         public String getRawText(boolean compact) {
181             final HtmlEntry entry = getEntry();
182             return entry.getRawText(compact);
183         }
184
185         @Override
186         public RowMatchType matches(final List<String> searchTokens,
187                                     final Pattern orderedMatchPattern, final Transliterator normalizer,
188                                     final boolean swapPairEntries) {
189             final String text = normalizer.transform(getRawText(false));
190             if (orderedMatchPattern.matcher(text).find()) {
191                 return RowMatchType.ORDERED_MATCH;
192             }
193             for (int i = searchTokens.size() - 1; i >= 0; --i) {
194                 final String searchToken = searchTokens.get(i);
195                 if (!text.contains(searchToken)) {
196                     return RowMatchType.NO_MATCH;
197                 }
198             }
199             return RowMatchType.BAG_OF_WORDS_MATCH;
200         }
201     }
202
203     public static String htmlBody(final List<HtmlEntry> htmlEntries, final String indexShortName) {
204         final StringBuilder result = new StringBuilder();
205         for (final HtmlEntry htmlEntry : htmlEntries) {
206             final String titleEscaped = StringUtil.escapeUnicodeToPureHtml(htmlEntry.title);
207             result.append(String.format("<h1><a href=\"%s\">%s</a></h1>\n<p>%s\n",
208                                         formatQuickdicUrl(indexShortName, htmlEntry.title), titleEscaped,
209                                         htmlEntry.getHtml()));
210         }
211         return result.toString();
212     }
213
214     @SuppressWarnings("WeakerAccess")
215     public static String formatQuickdicUrl(final String indexShortName, final String text) {
216         assert !indexShortName.contains(":");
217         assert text.length() > 0;
218         return String.format("q://d?%s&%s", indexShortName, StringUtil.encodeForUrl(text));
219     }
220
221     public static boolean isQuickdicUrl(String url) {
222         return url.startsWith("q://d?");
223     }
224
225     // --------------------------------------------------------------------
226
227     @SuppressWarnings("WeakerAccess")
228     public static final class LazyHtmlLoader {
229         final DataInputBuffer buf;
230         final int numBytes;
231         final List<DataInputBuffer> data;
232         final int index;
233
234         // Not sure this volatile is right, but oh well.
235         volatile SoftReference<String> htmlRef = new SoftReference<>(null);
236
237         private LazyHtmlLoader(final DataInput inp, List<DataInputBuffer> data, int index) throws IOException {
238             this.data = data;
239             this.index = index;
240             if (data != null) {
241                 buf = null;
242                 this.numBytes = -1;
243                 return;
244             }
245             numBytes = Math.min(inp.readInt(), 20 * 1024 * 1024);
246             int numZipBytes = Math.min(inp.readInt(), 20 * 1024 * 1024);
247             DataInputBuffer b = (DataInputBuffer)inp;
248             buf = b.slice(numZipBytes);
249         }
250
251         String getHtml() {
252             String html = htmlRef.get();
253             if (html != null) {
254                 return html;
255             }
256             if (data != null) {
257                 html = data.get(index).asString();
258                 htmlRef = new SoftReference<>(html);
259                 return html;
260             }
261             System.out.println("Loading Html: numBytes=" + numBytes + ", numZipBytes="
262                                + buf.limit());
263             final byte[] zipBytes = new byte[buf.limit()];
264             buf.rewind();
265             buf.readFully(zipBytes);
266             try {
267                 final byte[] bytes = StringUtil.unzipFully(zipBytes, numBytes);
268                 html = new String(bytes, StandardCharsets.UTF_8);
269             } catch (IOException e) {
270                 throw new RuntimeException("Dictionary HTML data corrupted", e);
271             }
272             htmlRef = new SoftReference<>(html);
273             return html;
274         }
275     }
276
277 }