]> gitweb.fperrin.net Git - Dictionary.git/blob - src/com/hughes/android/dictionary/engine/HtmlEntry.java
Vastly improve HtmlEntry compression.
[Dictionary.git] / src / com / hughes / android / dictionary / engine / HtmlEntry.java
1
2 package com.hughes.android.dictionary.engine;
3
4 import com.hughes.util.StringUtil;
5 import com.hughes.util.raf.RAFListSerializer;
6 import com.hughes.util.raf.RAFSerializable;
7 import com.ibm.icu.text.Transliterator;
8
9 import java.io.DataInput;
10 import java.io.DataOutput;
11 import java.io.IOException;
12 import java.io.PrintStream;
13 import java.io.RandomAccessFile;
14 import java.io.UnsupportedEncodingException;
15 import java.lang.ref.SoftReference;
16 import java.util.List;
17 import java.util.regex.Pattern;
18
19 public class HtmlEntry extends AbstractEntry implements RAFSerializable<HtmlEntry>,
20         Comparable<HtmlEntry> {
21
22     // Title is not HTML escaped.
23     public final String title;
24     public final LazyHtmlLoader lazyHtmlLoader;
25     public String html;
26
27     public HtmlEntry(final EntrySource entrySource, String title) {
28         super(entrySource);
29         this.title = title;
30         lazyHtmlLoader = null;
31     }
32
33     public HtmlEntry(Dictionary dictionary, DataInput raf, final int index)
34             throws IOException {
35         super(dictionary, raf, index);
36         title = raf.readUTF();
37         lazyHtmlLoader = new LazyHtmlLoader(raf, dictionary.htmlData, index);
38         html = null;
39     }
40
41     @Override
42     public void write(DataOutput raf) throws IOException {
43         super.write(raf);
44         raf.writeUTF(title);
45
46         final byte[] bytes = getHtml().getBytes("UTF-8");
47         final byte[] zipBytes = StringUtil.zipBytes(bytes);
48         StringUtil.writeVarInt(raf, zipBytes.length);
49         raf.write(zipBytes);
50     }
51
52     public void writeBase(DataOutput raf) throws IOException {
53         super.write(raf);
54         raf.writeUTF(title);
55     }
56
57     public void writeData(DataOutput raf) throws IOException {
58         final byte[] bytes = getHtml().getBytes("UTF-8");
59         StringUtil.writeVarInt(raf, bytes.length);
60         raf.write(bytes);
61     }
62
63     public static byte[] readData(DataInput raf) throws IOException {
64         int len = StringUtil.readVarInt(raf);
65         final byte[] bytes = new byte[len];
66         raf.readFully(bytes);
67         return bytes;
68     }
69
70     String getHtml() {
71         return html != null ? html : lazyHtmlLoader.getHtml();
72     }
73
74     @Override
75     public void addToDictionary(Dictionary dictionary) {
76         assert index == -1;
77         dictionary.htmlEntries.add(this);
78         index = dictionary.htmlEntries.size() - 1;
79     }
80
81     @Override
82     public RowBase CreateRow(int rowIndex, Index dictionaryIndex) {
83         return new Row(this.index, rowIndex, dictionaryIndex);
84     }
85
86     static final class Serializer implements RAFListSerializer<HtmlEntry> {
87
88         final Dictionary dictionary;
89
90         Serializer(Dictionary dictionary) {
91             this.dictionary = dictionary;
92         }
93
94         @Override
95         public HtmlEntry read(DataInput raf, final int index) throws IOException {
96             return new HtmlEntry(dictionary, raf, index);
97         }
98
99         @Override
100         public void write(DataOutput raf, HtmlEntry t) throws IOException {
101             t.writeBase(raf);
102         }
103     }
104
105     static final class DataSerializer implements RAFListSerializer<HtmlEntry> {
106         @Override
107         public HtmlEntry read(DataInput raf, final int index) throws IOException {
108             assert false;
109             return null;
110         }
111
112         @Override
113         public void write(DataOutput raf, HtmlEntry t) throws IOException {
114             t.writeData(raf);
115         }
116     }
117
118     static final class DataDeserializer implements RAFListSerializer<byte[]> {
119         @Override
120         public byte[] read(DataInput raf, final int index) throws IOException {
121             return HtmlEntry.readData(raf);
122         }
123
124         @Override
125         public void write(DataOutput raf, byte[] t) throws IOException {
126             assert false;
127         }
128     }
129
130     public String getRawText(final boolean compact) {
131         return title + ":\n" + getHtml();
132     }
133
134     @Override
135     public int compareTo(HtmlEntry another) {
136         if (title.compareTo(another.title) != 0) {
137             return title.compareTo(another.title);
138         }
139         return getHtml().compareTo(another.getHtml());
140     }
141
142     @Override
143     public String toString() {
144         return getRawText(false);
145     }
146
147     // --------------------------------------------------------------------
148
149     public static class Row extends RowBase {
150
151         boolean isExpanded = false;
152
153         Row(final DataInput raf, final int thisRowIndex,
154                 final Index index, int extra) throws IOException {
155             super(raf, thisRowIndex, index, extra);
156         }
157
158         Row(final int referenceIndex, final int thisRowIndex,
159                 final Index index) {
160             super(referenceIndex, thisRowIndex, index);
161         }
162
163         @Override
164         public String toString() {
165             return getRawText(false);
166         }
167
168         public HtmlEntry getEntry() {
169             return index.dict.htmlEntries.get(referenceIndex);
170         }
171
172         @Override
173         public void print(PrintStream out) {
174             final HtmlEntry entry = getEntry();
175             out.println("See also HtmlEntry:" + entry.title);
176         }
177
178         @Override
179         public String getRawText(boolean compact) {
180             final HtmlEntry entry = getEntry();
181             return entry.getRawText(compact);
182         }
183
184         @Override
185         public RowMatchType matches(final List<String> searchTokens,
186                 final Pattern orderedMatchPattern, final Transliterator normalizer,
187                 final boolean swapPairEntries) {
188             final String text = normalizer.transform(getRawText(false));
189             if (orderedMatchPattern.matcher(text).find()) {
190                 return RowMatchType.ORDERED_MATCH;
191             }
192             for (int i = searchTokens.size() - 1; i >= 0; --i) {
193                 final String searchToken = searchTokens.get(i);
194                 if (!text.contains(searchToken)) {
195                     return RowMatchType.NO_MATCH;
196                 }
197             }
198             return RowMatchType.BAG_OF_WORDS_MATCH;
199         }
200     }
201
202     public static String htmlBody(final List<HtmlEntry> htmlEntries, final String indexShortName) {
203         final StringBuilder result = new StringBuilder();
204         for (final HtmlEntry htmlEntry : htmlEntries) {
205             final String titleEscaped = StringUtil.escapeUnicodeToPureHtml(htmlEntry.title);
206             result.append(String.format("<h1><a href=\"%s\">%s</a></h1>\n<p>%s\n",
207                     formatQuickdicUrl(indexShortName, htmlEntry.title), titleEscaped,
208                     htmlEntry.getHtml()));
209         }
210         return result.toString();
211     }
212
213     public static String formatQuickdicUrl(final String indexShortName, final String text) {
214         assert !indexShortName.contains(":");
215         assert text.length() > 0;
216         return String.format("q://d?%s&%s", indexShortName, StringUtil.encodeForUrl(text));
217     }
218
219     public static boolean isQuickdicUrl(String url) {
220         return url.startsWith("q://d?");
221     }
222
223     // --------------------------------------------------------------------
224
225     public static final class LazyHtmlLoader {
226         final RandomAccessFile raf;
227         final long offset;
228         final int numBytes;
229         final int numZipBytes;
230         final List<byte[]> data;
231         final int index;
232
233         // Not sure this volatile is right, but oh well.
234         volatile SoftReference<String> htmlRef = new SoftReference<String>(null);
235
236         private LazyHtmlLoader(final DataInput inp, List<byte[]> data, int index) throws IOException {
237             this.data = data;
238             this.index = index;
239             if (data != null) {
240                 this.raf = null;
241                 this.offset = 0;
242                 this.numBytes = -1;
243                 this.numZipBytes = -1;
244                 return;
245             }
246             raf = (RandomAccessFile)inp;
247             numBytes = raf.readInt();
248             numZipBytes = raf.readInt();
249             offset = raf.getFilePointer();
250             raf.skipBytes(numZipBytes);
251         }
252
253         public String getHtml() {
254             String html = htmlRef.get();
255             if (html != null) {
256                 return html;
257             }
258             if (data != null) {
259                 try {
260                     html = new String(data.get(index), "UTF-8");
261                 } catch (UnsupportedEncodingException e) {
262                     throw new RuntimeException(e);
263                 }
264                 htmlRef = new SoftReference<String>(html);
265                 return html;
266             }
267             System.out.println("Loading Html: numBytes=" + numBytes + ", numZipBytes="
268                     + numZipBytes);
269             final byte[] zipBytes = new byte[numZipBytes];
270             synchronized (raf) {
271                 try {
272                     raf.seek(offset);
273                     raf.read(zipBytes);
274                 } catch (IOException e) {
275                     throw new RuntimeException(e);
276                 }
277             }
278             try {
279                 final byte[] bytes = StringUtil.unzipFully(zipBytes, numBytes);
280                 html = new String(bytes, "UTF-8");
281             } catch (IOException e) {
282                 throw new RuntimeException(e);
283             }
284             htmlRef = new SoftReference<String>(html);
285             return html;
286         }
287     }
288
289 }