]> gitweb.fperrin.net Git - Dictionary.git/blob - src/com/hughes/android/dictionary/engine/HtmlEntry.java
Switch from RandomAccessFile to DataInput/DataOutput.
[Dictionary.git] / src / com / hughes / android / dictionary / engine / HtmlEntry.java
1
2 package com.hughes.android.dictionary.engine;
3
4 import com.hughes.util.StringUtil;
5 import com.hughes.util.raf.RAFListSerializer;
6 import com.hughes.util.raf.RAFSerializable;
7 import com.ibm.icu.text.Transliterator;
8
9 import java.io.DataInput;
10 import java.io.DataOutput;
11 import java.io.IOException;
12 import java.io.PrintStream;
13 import java.io.RandomAccessFile;
14 import java.lang.ref.SoftReference;
15 import java.util.List;
16 import java.util.regex.Pattern;
17
18 public class HtmlEntry extends AbstractEntry implements RAFSerializable<HtmlEntry>,
19         Comparable<HtmlEntry> {
20
21     // Title is not HTML escaped.
22     public final String title;
23     public final LazyHtmlLoader lazyHtmlLoader;
24     public String html;
25
26     public HtmlEntry(final EntrySource entrySource, String title) {
27         super(entrySource);
28         this.title = title;
29         lazyHtmlLoader = null;
30     }
31
32     public HtmlEntry(Dictionary dictionary, DataInput raf, final int index)
33             throws IOException {
34         super(dictionary, raf, index);
35         title = raf.readUTF();
36         lazyHtmlLoader = new LazyHtmlLoader(raf);
37         html = null;
38     }
39
40     @Override
41     public void write(DataOutput raf) throws IOException {
42         super.write(raf);
43         raf.writeUTF(title);
44
45         final byte[] bytes = getHtml().getBytes("UTF-8");
46         final byte[] zipBytes = StringUtil.zipBytes(bytes);
47         raf.writeInt(bytes.length);
48         raf.writeInt(zipBytes.length);
49         raf.write(zipBytes);
50     }
51
52     String getHtml() {
53         return html != null ? html : lazyHtmlLoader.getHtml();
54     }
55
56     @Override
57     public void addToDictionary(Dictionary dictionary) {
58         assert index == -1;
59         dictionary.htmlEntries.add(this);
60         index = dictionary.htmlEntries.size() - 1;
61     }
62
63     @Override
64     public RowBase CreateRow(int rowIndex, Index dictionaryIndex) {
65         return new Row(this.index, rowIndex, dictionaryIndex);
66     }
67
68     static final class Serializer implements RAFListSerializer<HtmlEntry> {
69
70         final Dictionary dictionary;
71
72         Serializer(Dictionary dictionary) {
73             this.dictionary = dictionary;
74         }
75
76         @Override
77         public HtmlEntry read(DataInput raf, final int index) throws IOException {
78             return new HtmlEntry(dictionary, raf, index);
79         }
80
81         @Override
82         public void write(DataOutput raf, HtmlEntry t) throws IOException {
83             t.write(raf);
84         }
85     }
86
87     public String getRawText(final boolean compact) {
88         return title + ":\n" + getHtml();
89     }
90
91     @Override
92     public int compareTo(HtmlEntry another) {
93         if (title.compareTo(another.title) != 0) {
94             return title.compareTo(another.title);
95         }
96         return getHtml().compareTo(another.getHtml());
97     }
98
99     @Override
100     public String toString() {
101         return getRawText(false);
102     }
103
104     // --------------------------------------------------------------------
105
106     public static class Row extends RowBase {
107
108         boolean isExpanded = false;
109
110         Row(final DataInput raf, final int thisRowIndex,
111                 final Index index) throws IOException {
112             super(raf, thisRowIndex, index);
113         }
114
115         Row(final int referenceIndex, final int thisRowIndex,
116                 final Index index) {
117             super(referenceIndex, thisRowIndex, index);
118         }
119
120         @Override
121         public String toString() {
122             return getRawText(false);
123         }
124
125         public HtmlEntry getEntry() {
126             return index.dict.htmlEntries.get(referenceIndex);
127         }
128
129         @Override
130         public void print(PrintStream out) {
131             final HtmlEntry entry = getEntry();
132             out.println("See also HtmlEntry:" + entry.title);
133         }
134
135         @Override
136         public String getRawText(boolean compact) {
137             final HtmlEntry entry = getEntry();
138             return entry.getRawText(compact);
139         }
140
141         @Override
142         public RowMatchType matches(final List<String> searchTokens,
143                 final Pattern orderedMatchPattern, final Transliterator normalizer,
144                 final boolean swapPairEntries) {
145             final String text = normalizer.transform(getRawText(false));
146             if (orderedMatchPattern.matcher(text).find()) {
147                 return RowMatchType.ORDERED_MATCH;
148             }
149             for (int i = searchTokens.size() - 1; i >= 0; --i) {
150                 final String searchToken = searchTokens.get(i);
151                 if (!text.contains(searchToken)) {
152                     return RowMatchType.NO_MATCH;
153                 }
154             }
155             return RowMatchType.BAG_OF_WORDS_MATCH;
156         }
157     }
158
159     public static String htmlBody(final List<HtmlEntry> htmlEntries, final String indexShortName) {
160         final StringBuilder result = new StringBuilder();
161         for (final HtmlEntry htmlEntry : htmlEntries) {
162             final String titleEscaped = StringUtil.escapeUnicodeToPureHtml(htmlEntry.title);
163             result.append(String.format("<h1><a href=\"%s\">%s</a></h1>\n<p>%s\n",
164                     formatQuickdicUrl(indexShortName, htmlEntry.title), titleEscaped,
165                     htmlEntry.getHtml()));
166         }
167         return result.toString();
168     }
169
170     public static String formatQuickdicUrl(final String indexShortName, final String text) {
171         assert !indexShortName.contains(":");
172         assert text.length() > 0;
173         return String.format("q://d?%s&%s", indexShortName, StringUtil.encodeForUrl(text));
174     }
175
176     public static boolean isQuickdicUrl(String url) {
177         return url.startsWith("q://d?");
178     }
179
180     // --------------------------------------------------------------------
181
182     public static final class LazyHtmlLoader {
183         final RandomAccessFile raf;
184         final long offset;
185         final int numBytes;
186         final int numZipBytes;
187
188         // Not sure this volatile is right, but oh well.
189         volatile SoftReference<String> htmlRef = new SoftReference<String>(null);
190
191         private LazyHtmlLoader(final DataInput inp) throws IOException {
192             raf = (RandomAccessFile)inp;
193             numBytes = raf.readInt();
194             numZipBytes = raf.readInt();
195             offset = raf.getFilePointer();
196             raf.skipBytes(numZipBytes);
197         }
198
199         public String getHtml() {
200             String html = htmlRef.get();
201             if (html != null) {
202                 return html;
203             }
204             System.out.println("Loading Html: numBytes=" + numBytes + ", numZipBytes="
205                     + numZipBytes);
206             final byte[] bytes = new byte[numBytes];
207             final byte[] zipBytes = new byte[numZipBytes];
208             synchronized (raf) {
209                 try {
210                     raf.seek(offset);
211                     raf.read(zipBytes);
212                 } catch (IOException e) {
213                     throw new RuntimeException(e);
214                 }
215             }
216             try {
217                 StringUtil.unzipFully(zipBytes, bytes);
218                 html = new String(bytes, "UTF-8");
219             } catch (IOException e) {
220                 throw new RuntimeException(e);
221             }
222             htmlRef = new SoftReference<String>(html);
223             return html;
224         }
225     }
226
227 }