]> gitweb.fperrin.net Git - Dictionary.git/blob - src/com/hughes/android/dictionary/engine/HtmlEntry.java
ca153a70126af7638ffcbed677a65402227e6eb8
[Dictionary.git] / src / com / hughes / android / dictionary / engine / HtmlEntry.java
1
2 package com.hughes.android.dictionary.engine;
3
4 import com.hughes.util.StringUtil;
5 import com.hughes.util.raf.RAFListSerializer;
6 import com.ibm.icu.text.Transliterator;
7
8 import java.io.DataInput;
9 import java.io.DataOutput;
10 import java.io.IOException;
11 import java.io.PrintStream;
12 import java.io.RandomAccessFile;
13 import java.io.UnsupportedEncodingException;
14 import java.lang.ref.SoftReference;
15 import java.util.List;
16 import java.util.regex.Pattern;
17
18 public class HtmlEntry extends AbstractEntry implements Comparable<HtmlEntry> {
19
20     // Title is not HTML escaped.
21     public final String title;
22     public final LazyHtmlLoader lazyHtmlLoader;
23     public String html;
24
25     public HtmlEntry(final EntrySource entrySource, String title) {
26         super(entrySource);
27         this.title = title;
28         lazyHtmlLoader = null;
29     }
30
31     public HtmlEntry(Dictionary dictionary, DataInput raf, final int index)
32     throws IOException {
33         super(dictionary, raf, index);
34         title = raf.readUTF();
35         lazyHtmlLoader = new LazyHtmlLoader(raf, dictionary.htmlData, index);
36         html = null;
37     }
38
39     public void writeBase(DataOutput raf) throws IOException {
40         super.write(raf);
41         raf.writeUTF(title);
42     }
43
44     public void writeData(DataOutput raf) throws IOException {
45         final byte[] bytes = getHtml().getBytes("UTF-8");
46         StringUtil.writeVarInt(raf, bytes.length);
47         raf.write(bytes);
48     }
49
50     public static byte[] readData(DataInput raf) throws IOException {
51         int len = StringUtil.readVarInt(raf);
52         final byte[] bytes = new byte[len];
53         raf.readFully(bytes);
54         return bytes;
55     }
56
57     String getHtml() {
58         return html != null ? html : lazyHtmlLoader.getHtml();
59     }
60
61     @Override
62     public void addToDictionary(Dictionary dictionary) {
63         assert index == -1;
64         dictionary.htmlEntries.add(this);
65         index = dictionary.htmlEntries.size() - 1;
66     }
67
68     @Override
69     public RowBase CreateRow(int rowIndex, Index dictionaryIndex) {
70         return new Row(this.index, rowIndex, dictionaryIndex);
71     }
72
73     static final class Serializer implements RAFListSerializer<HtmlEntry> {
74
75         final Dictionary dictionary;
76
77         Serializer(Dictionary dictionary) {
78             this.dictionary = dictionary;
79         }
80
81         @Override
82         public HtmlEntry read(DataInput raf, final int index) throws IOException {
83             return new HtmlEntry(dictionary, raf, index);
84         }
85
86         @Override
87         public void write(DataOutput raf, HtmlEntry t) throws IOException {
88             t.writeBase(raf);
89         }
90     }
91
92     static final class DataSerializer implements RAFListSerializer<HtmlEntry> {
93         @Override
94         public HtmlEntry read(DataInput raf, final int index) throws IOException {
95             assert false;
96             return null;
97         }
98
99         @Override
100         public void write(DataOutput raf, HtmlEntry t) throws IOException {
101             t.writeData(raf);
102         }
103     }
104
105     static final class DataDeserializer implements RAFListSerializer<byte[]> {
106         @Override
107         public byte[] read(DataInput raf, final int index) throws IOException {
108             return HtmlEntry.readData(raf);
109         }
110
111         @Override
112         public void write(DataOutput raf, byte[] t) throws IOException {
113             assert false;
114         }
115     }
116
117     public String getRawText(final boolean compact) {
118         return title + ":\n" + getHtml();
119     }
120
121     @Override
122     public int compareTo(HtmlEntry another) {
123         if (title.compareTo(another.title) != 0) {
124             return title.compareTo(another.title);
125         }
126         return getHtml().compareTo(another.getHtml());
127     }
128
129     @Override
130     public String toString() {
131         return getRawText(false);
132     }
133
134     // --------------------------------------------------------------------
135
136     public static class Row extends RowBase {
137
138         boolean isExpanded = false;
139
140         Row(final DataInput raf, final int thisRowIndex,
141             final Index index, int extra) throws IOException {
142             super(raf, thisRowIndex, index, extra);
143         }
144
145         Row(final int referenceIndex, final int thisRowIndex,
146             final Index index) {
147             super(referenceIndex, thisRowIndex, index);
148         }
149
150         @Override
151         public String toString() {
152             return getRawText(false);
153         }
154
155         public HtmlEntry getEntry() {
156             return index.dict.htmlEntries.get(referenceIndex);
157         }
158
159         @Override
160         public void print(PrintStream out) {
161             final HtmlEntry entry = getEntry();
162             out.println("See also HtmlEntry:" + entry.title);
163         }
164
165         @Override
166         public String getRawText(boolean compact) {
167             final HtmlEntry entry = getEntry();
168             return entry.getRawText(compact);
169         }
170
171         @Override
172         public RowMatchType matches(final List<String> searchTokens,
173                                     final Pattern orderedMatchPattern, final Transliterator normalizer,
174                                     final boolean swapPairEntries) {
175             final String text = normalizer.transform(getRawText(false));
176             if (orderedMatchPattern.matcher(text).find()) {
177                 return RowMatchType.ORDERED_MATCH;
178             }
179             for (int i = searchTokens.size() - 1; i >= 0; --i) {
180                 final String searchToken = searchTokens.get(i);
181                 if (!text.contains(searchToken)) {
182                     return RowMatchType.NO_MATCH;
183                 }
184             }
185             return RowMatchType.BAG_OF_WORDS_MATCH;
186         }
187     }
188
189     public static String htmlBody(final List<HtmlEntry> htmlEntries, final String indexShortName) {
190         final StringBuilder result = new StringBuilder();
191         for (final HtmlEntry htmlEntry : htmlEntries) {
192             final String titleEscaped = StringUtil.escapeUnicodeToPureHtml(htmlEntry.title);
193             result.append(String.format("<h1><a href=\"%s\">%s</a></h1>\n<p>%s\n",
194                                         formatQuickdicUrl(indexShortName, htmlEntry.title), titleEscaped,
195                                         htmlEntry.getHtml()));
196         }
197         return result.toString();
198     }
199
200     public static String formatQuickdicUrl(final String indexShortName, final String text) {
201         assert !indexShortName.contains(":");
202         assert text.length() > 0;
203         return String.format("q://d?%s&%s", indexShortName, StringUtil.encodeForUrl(text));
204     }
205
206     public static boolean isQuickdicUrl(String url) {
207         return url.startsWith("q://d?");
208     }
209
210     // --------------------------------------------------------------------
211
212     public static final class LazyHtmlLoader {
213         final RandomAccessFile raf;
214         final long offset;
215         final int numBytes;
216         final int numZipBytes;
217         final List<byte[]> data;
218         final int index;
219
220         // Not sure this volatile is right, but oh well.
221         volatile SoftReference<String> htmlRef = new SoftReference<String>(null);
222
223         private LazyHtmlLoader(final DataInput inp, List<byte[]> data, int index) throws IOException {
224             this.data = data;
225             this.index = index;
226             if (data != null) {
227                 this.raf = null;
228                 this.offset = 0;
229                 this.numBytes = -1;
230                 this.numZipBytes = -1;
231                 return;
232             }
233             raf = (RandomAccessFile)inp;
234             numBytes = raf.readInt();
235             numZipBytes = raf.readInt();
236             offset = raf.getFilePointer();
237             raf.skipBytes(numZipBytes);
238         }
239
240         public String getHtml() {
241             String html = htmlRef.get();
242             if (html != null) {
243                 return html;
244             }
245             if (data != null) {
246                 try {
247                     html = new String(data.get(index), "UTF-8");
248                 } catch (UnsupportedEncodingException e) {
249                     throw new RuntimeException("Dictionary HTML data corrupted", e);
250                 }
251                 htmlRef = new SoftReference<String>(html);
252                 return html;
253             }
254             System.out.println("Loading Html: numBytes=" + numBytes + ", numZipBytes="
255                                + numZipBytes);
256             final byte[] zipBytes = new byte[numZipBytes];
257             synchronized (raf) {
258                 try {
259                     raf.seek(offset);
260                     raf.read(zipBytes);
261                 } catch (IOException e) {
262                     throw new RuntimeException("Failed to read HTML data from dictionary", e);
263                 }
264             }
265             try {
266                 final byte[] bytes = StringUtil.unzipFully(zipBytes, numBytes);
267                 html = new String(bytes, "UTF-8");
268             } catch (IOException e) {
269                 throw new RuntimeException("Dictionary HTML data corrupted", e);
270             }
271             htmlRef = new SoftReference<String>(html);
272             return html;
273         }
274     }
275
276 }