]> gitweb.fperrin.net Git - Dictionary.git/blob - src/com/hughes/android/dictionary/engine/HtmlEntry.java
ce57e11a980cc2011bb57af85cb60f20896d5bf9
[Dictionary.git] / src / com / hughes / android / dictionary / engine / HtmlEntry.java
1
2 package com.hughes.android.dictionary.engine;
3
4 import com.hughes.util.StringUtil;
5 import com.hughes.util.raf.RAFListSerializer;
6 import com.ibm.icu.text.Transliterator;
7
8 import java.io.DataInput;
9 import java.io.DataOutput;
10 import java.io.IOException;
11 import java.io.PrintStream;
12 import java.io.RandomAccessFile;
13 import java.io.UnsupportedEncodingException;
14 import java.lang.ref.SoftReference;
15 import java.nio.channels.FileChannel;
16 import java.util.List;
17 import java.util.regex.Pattern;
18
19 public class HtmlEntry extends AbstractEntry implements Comparable<HtmlEntry> {
20
21     // Title is not HTML escaped.
22     public final String title;
23     public final LazyHtmlLoader lazyHtmlLoader;
24     public String html;
25
26     public HtmlEntry(final EntrySource entrySource, String title) {
27         super(entrySource);
28         this.title = title;
29         lazyHtmlLoader = null;
30     }
31
32     public HtmlEntry(Dictionary dictionary, FileChannel ch, DataInput raf, final int index)
33     throws IOException {
34         super(dictionary, raf, index);
35         title = raf.readUTF();
36         lazyHtmlLoader = new LazyHtmlLoader(ch, raf, dictionary.htmlData, index);
37         html = null;
38     }
39
40     public void writeBase(DataOutput raf) throws IOException {
41         super.write(raf);
42         raf.writeUTF(title);
43     }
44
45     public void writeData(DataOutput raf) throws IOException {
46         final byte[] bytes = getHtml().getBytes("UTF-8");
47         StringUtil.writeVarInt(raf, bytes.length);
48         raf.write(bytes);
49     }
50
51     public static byte[] readData(DataInput raf) throws IOException {
52         int len = StringUtil.readVarInt(raf);
53         final byte[] bytes = new byte[len];
54         raf.readFully(bytes);
55         return bytes;
56     }
57
58     String getHtml() {
59         return html != null ? html : lazyHtmlLoader.getHtml();
60     }
61
62     @Override
63     public void addToDictionary(Dictionary dictionary) {
64         assert index == -1;
65         dictionary.htmlEntries.add(this);
66         index = dictionary.htmlEntries.size() - 1;
67     }
68
69     @Override
70     public RowBase CreateRow(int rowIndex, Index dictionaryIndex) {
71         return new Row(this.index, rowIndex, dictionaryIndex);
72     }
73
74     static final class Serializer implements RAFListSerializer<HtmlEntry> {
75
76         final Dictionary dictionary;
77         final FileChannel ch;
78
79         Serializer(Dictionary dictionary, FileChannel ch) {
80             this.dictionary = dictionary;
81             this.ch = ch;
82         }
83
84         @Override
85         public HtmlEntry read(DataInput raf, final int index) throws IOException {
86             return new HtmlEntry(dictionary, ch, raf, index);
87         }
88
89         @Override
90         public void write(DataOutput raf, HtmlEntry t) throws IOException {
91             t.writeBase(raf);
92         }
93     }
94
95     static final class DataSerializer implements RAFListSerializer<HtmlEntry> {
96         @Override
97         public HtmlEntry read(DataInput raf, final int index) throws IOException {
98             assert false;
99             return null;
100         }
101
102         @Override
103         public void write(DataOutput raf, HtmlEntry t) throws IOException {
104             t.writeData(raf);
105         }
106     }
107
108     static final class DataDeserializer implements RAFListSerializer<byte[]> {
109         @Override
110         public byte[] read(DataInput raf, final int index) throws IOException {
111             return HtmlEntry.readData(raf);
112         }
113
114         @Override
115         public void write(DataOutput raf, byte[] t) throws IOException {
116             assert false;
117         }
118     }
119
120     public String getRawText(final boolean compact) {
121         return title + ":\n" + getHtml();
122     }
123
124     @Override
125     public int compareTo(HtmlEntry another) {
126         if (title.compareTo(another.title) != 0) {
127             return title.compareTo(another.title);
128         }
129         return getHtml().compareTo(another.getHtml());
130     }
131
132     @Override
133     public String toString() {
134         return getRawText(false);
135     }
136
137     // --------------------------------------------------------------------
138
139     public static class Row extends RowBase {
140
141         boolean isExpanded = false;
142
143         Row(final DataInput raf, final int thisRowIndex,
144             final Index index, int extra) throws IOException {
145             super(raf, thisRowIndex, index, extra);
146         }
147
148         Row(final int referenceIndex, final int thisRowIndex,
149             final Index index) {
150             super(referenceIndex, thisRowIndex, index);
151         }
152
153         @Override
154         public String toString() {
155             return getRawText(false);
156         }
157
158         public HtmlEntry getEntry() {
159             return index.dict.htmlEntries.get(referenceIndex);
160         }
161
162         @Override
163         public void print(PrintStream out) {
164             final HtmlEntry entry = getEntry();
165             out.println("See also HtmlEntry:" + entry.title);
166         }
167
168         @Override
169         public String getRawText(boolean compact) {
170             final HtmlEntry entry = getEntry();
171             return entry.getRawText(compact);
172         }
173
174         @Override
175         public RowMatchType matches(final List<String> searchTokens,
176                                     final Pattern orderedMatchPattern, final Transliterator normalizer,
177                                     final boolean swapPairEntries) {
178             final String text = normalizer.transform(getRawText(false));
179             if (orderedMatchPattern.matcher(text).find()) {
180                 return RowMatchType.ORDERED_MATCH;
181             }
182             for (int i = searchTokens.size() - 1; i >= 0; --i) {
183                 final String searchToken = searchTokens.get(i);
184                 if (!text.contains(searchToken)) {
185                     return RowMatchType.NO_MATCH;
186                 }
187             }
188             return RowMatchType.BAG_OF_WORDS_MATCH;
189         }
190     }
191
192     public static String htmlBody(final List<HtmlEntry> htmlEntries, final String indexShortName) {
193         final StringBuilder result = new StringBuilder();
194         for (final HtmlEntry htmlEntry : htmlEntries) {
195             final String titleEscaped = StringUtil.escapeUnicodeToPureHtml(htmlEntry.title);
196             result.append(String.format("<h1><a href=\"%s\">%s</a></h1>\n<p>%s\n",
197                                         formatQuickdicUrl(indexShortName, htmlEntry.title), titleEscaped,
198                                         htmlEntry.getHtml()));
199         }
200         return result.toString();
201     }
202
203     public static String formatQuickdicUrl(final String indexShortName, final String text) {
204         assert !indexShortName.contains(":");
205         assert text.length() > 0;
206         return String.format("q://d?%s&%s", indexShortName, StringUtil.encodeForUrl(text));
207     }
208
209     public static boolean isQuickdicUrl(String url) {
210         return url.startsWith("q://d?");
211     }
212
213     // --------------------------------------------------------------------
214
215     public static final class LazyHtmlLoader {
216         final DataInput raf;
217         final FileChannel ch;
218         final long offset;
219         final int numBytes;
220         final int numZipBytes;
221         final List<byte[]> data;
222         final int index;
223
224         // Not sure this volatile is right, but oh well.
225         volatile SoftReference<String> htmlRef = new SoftReference<String>(null);
226
227         private LazyHtmlLoader(FileChannel ch, final DataInput inp, List<byte[]> data, int index) throws IOException {
228             this.data = data;
229             this.index = index;
230             if (data != null) {
231                 this.raf = null;
232                 this.ch = null;
233                 this.offset = 0;
234                 this.numBytes = -1;
235                 this.numZipBytes = -1;
236                 return;
237             }
238             raf = inp;
239             this.ch = ch;
240             numBytes = raf.readInt();
241             numZipBytes = raf.readInt();
242             offset = ch.position();
243             raf.skipBytes(numZipBytes);
244         }
245
246         public String getHtml() {
247             String html = htmlRef.get();
248             if (html != null) {
249                 return html;
250             }
251             if (data != null) {
252                 try {
253                     html = new String(data.get(index), "UTF-8");
254                 } catch (UnsupportedEncodingException e) {
255                     throw new RuntimeException("Dictionary HTML data corrupted", e);
256                 }
257                 htmlRef = new SoftReference<String>(html);
258                 return html;
259             }
260             System.out.println("Loading Html: numBytes=" + numBytes + ", numZipBytes="
261                                + numZipBytes);
262             final byte[] zipBytes = new byte[numZipBytes];
263             synchronized (ch) {
264                 try {
265                     ch.position(offset);
266                     raf.readFully(zipBytes);
267                 } catch (IOException e) {
268                     throw new RuntimeException("Failed to read HTML data from dictionary", e);
269                 }
270             }
271             try {
272                 final byte[] bytes = StringUtil.unzipFully(zipBytes, numBytes);
273                 html = new String(bytes, "UTF-8");
274             } catch (IOException e) {
275                 throw new RuntimeException("Dictionary HTML data corrupted", e);
276             }
277             htmlRef = new SoftReference<String>(html);
278             return html;
279         }
280     }
281
282 }