]> gitweb.fperrin.net Git - Dictionary.git/blob - src/com/hughes/android/dictionary/engine/HtmlEntry.java
Use StringBuilder instead of String.format.
[Dictionary.git] / src / com / hughes / android / dictionary / engine / HtmlEntry.java
1
2 package com.hughes.android.dictionary.engine;
3
4 import java.io.DataInput;
5 import java.io.DataOutput;
6 import java.io.IOException;
7 import java.io.PrintStream;
8 import java.lang.ref.SoftReference;
9 import java.nio.charset.StandardCharsets;
10 import java.util.List;
11 import java.util.regex.Pattern;
12
13 import com.hughes.util.DataInputBuffer;
14 import com.hughes.util.StringUtil;
15 import com.hughes.util.raf.RAFListSerializer;
16 import com.hughes.util.raf.RAFListSerializerSkippable;
17 import com.ibm.icu.text.Transliterator;
18
19 public class HtmlEntry extends AbstractEntry implements Comparable<HtmlEntry> {
20
21     // Title is not HTML escaped.
22     public final String title;
23     private final LazyHtmlLoader lazyHtmlLoader;
24     @SuppressWarnings("WeakerAccess")
25     public String html;
26
27     public HtmlEntry(final EntrySource entrySource, String title) {
28         super(entrySource);
29         this.title = title;
30         lazyHtmlLoader = null;
31     }
32
33     public HtmlEntry(Dictionary dictionary, DataInput raf, final int index)
34     throws IOException {
35         super(dictionary, raf, index);
36         title = raf.readUTF();
37         lazyHtmlLoader = new LazyHtmlLoader(raf, dictionary.htmlData, index);
38         html = null;
39     }
40
41     private void writeBase(DataOutput raf) throws IOException {
42         super.write(raf);
43         raf.writeUTF(title);
44     }
45
46     private void writeData(DataOutput raf) throws IOException {
47         final byte[] bytes = getHtml().getBytes(StandardCharsets.UTF_8);
48         StringUtil.writeVarInt(raf, bytes.length);
49         raf.write(bytes);
50     }
51
52     private static DataInputBuffer readData(DataInput raf) throws IOException {
53         int len = StringUtil.readVarInt(raf);
54         return ((DataInputBuffer)raf).slice(len);
55     }
56
57     String getHtml() {
58         return html != null ? html : lazyHtmlLoader.getHtml();
59     }
60
61     @Override
62     public void addToDictionary(Dictionary dictionary) {
63         assert index == -1;
64         dictionary.htmlEntries.add(this);
65         index = dictionary.htmlEntries.size() - 1;
66     }
67
68     @Override
69     public RowBase CreateRow(int rowIndex, Index dictionaryIndex) {
70         return new Row(this.index, rowIndex, dictionaryIndex);
71     }
72
73     static final class Serializer implements RAFListSerializerSkippable<HtmlEntry> {
74
75         final Dictionary dictionary;
76
77         Serializer(Dictionary dictionary) {
78             this.dictionary = dictionary;
79         }
80
81         @Override
82         public HtmlEntry read(DataInput raf, final int index) throws IOException {
83             return new HtmlEntry(dictionary, raf, index);
84         }
85
86         @Override
87         public void skip(DataInput raf, final int index) throws IOException {
88             if (dictionary.dictFileVersion >= 7)
89             {
90                 StringUtil.readVarInt(raf);
91             }
92             else
93             {
94                 raf.skipBytes(2);
95             }
96             int l = raf.readUnsignedShort();
97             raf.skipBytes(l);
98         }
99
100         @Override
101         public void write(DataOutput raf, HtmlEntry t) throws IOException {
102             t.writeBase(raf);
103         }
104     }
105
106     static final class DataSerializer implements RAFListSerializer<HtmlEntry> {
107         @Override
108         public HtmlEntry read(DataInput raf, final int index) {
109             assert false;
110             return null;
111         }
112
113         @Override
114         public void write(DataOutput raf, HtmlEntry t) throws IOException {
115             t.writeData(raf);
116         }
117     }
118
119     static final class DataDeserializer implements RAFListSerializer<DataInputBuffer> {
120         @Override
121         public DataInputBuffer read(DataInput raf, final int index) throws IOException {
122             return readData(raf);
123         }
124
125         @Override
126         public void write(DataOutput raf, DataInputBuffer t) {
127             assert false;
128         }
129     }
130
131     private String getRawText(final boolean compact) {
132         return title + ":\n" + getHtml();
133     }
134
135     @Override
136     public int compareTo(/*@NonNull*/ HtmlEntry another) {
137         if (title.compareTo(another.title) != 0) {
138             return title.compareTo(another.title);
139         }
140         return getHtml().compareTo(another.getHtml());
141     }
142
143     @Override
144     public String toString() {
145         return getRawText(false);
146     }
147
148     // --------------------------------------------------------------------
149
150     public static class Row extends RowBase {
151
152         Row(final DataInput raf, final int thisRowIndex,
153             final Index index, int extra) throws IOException {
154             super(raf, thisRowIndex, index, extra);
155         }
156
157         Row(final int referenceIndex, final int thisRowIndex,
158             final Index index) {
159             super(referenceIndex, thisRowIndex, index);
160         }
161
162         @Override
163         public String toString() {
164             return getRawText(false);
165         }
166
167         public HtmlEntry getEntry() {
168             return index.dict.htmlEntries.get(referenceIndex);
169         }
170
171         @Override
172         public void print(PrintStream out) {
173             final HtmlEntry entry = getEntry();
174             out.println("See also HtmlEntry:" + entry.title);
175         }
176
177         @Override
178         public String getRawText(boolean compact) {
179             final HtmlEntry entry = getEntry();
180             return entry.getRawText(compact);
181         }
182
183         @Override
184         public RowMatchType matches(final List<String> searchTokens,
185                                     final Pattern orderedMatchPattern, final Transliterator normalizer,
186                                     final boolean swapPairEntries) {
187             final String text = normalizer.transform(getRawText(false));
188             if (orderedMatchPattern.matcher(text).find()) {
189                 return RowMatchType.ORDERED_MATCH;
190             }
191             for (int i = searchTokens.size() - 1; i >= 0; --i) {
192                 final String searchToken = searchTokens.get(i);
193                 if (!text.contains(searchToken)) {
194                     return RowMatchType.NO_MATCH;
195                 }
196             }
197             return RowMatchType.BAG_OF_WORDS_MATCH;
198         }
199     }
200
201     public static String htmlBody(final List<HtmlEntry> htmlEntries, final String indexShortName) {
202         final StringBuilder result = new StringBuilder();
203         for (final HtmlEntry htmlEntry : htmlEntries) {
204             final String titleEscaped = StringUtil.escapeUnicodeToPureHtml(htmlEntry.title);
205             result.append(String.format("<h1><a href=\"%s\">%s</a></h1>\n<p>%s\n",
206                                         formatQuickdicUrl(indexShortName, htmlEntry.title), titleEscaped,
207                                         htmlEntry.getHtml()));
208         }
209         return result.toString();
210     }
211
212     @SuppressWarnings("WeakerAccess")
213     public static String formatQuickdicUrl(final String indexShortName, final String text) {
214         assert !indexShortName.contains(":");
215         assert text.length() > 0;
216         StringBuilder s = new StringBuilder("q://d?");
217         s.append(indexShortName);
218         s.append("&");
219         s.append(StringUtil.encodeForUrl(text));
220         return s.toString();
221     }
222
223     public static boolean isQuickdicUrl(String url) {
224         return url.startsWith("q://d?");
225     }
226
227     // --------------------------------------------------------------------
228
229     @SuppressWarnings("WeakerAccess")
230     public static final class LazyHtmlLoader {
231         final DataInputBuffer buf;
232         final int numBytes;
233         final List<DataInputBuffer> data;
234         final int index;
235
236         // Not sure this volatile is right, but oh well.
237         volatile SoftReference<String> htmlRef = new SoftReference<>(null);
238
239         private LazyHtmlLoader(final DataInput inp, List<DataInputBuffer> data, int index) throws IOException {
240             this.data = data;
241             this.index = index;
242             if (data != null) {
243                 buf = null;
244                 this.numBytes = -1;
245                 return;
246             }
247             numBytes = Math.min(inp.readInt(), 20 * 1024 * 1024);
248             int numZipBytes = Math.min(inp.readInt(), 20 * 1024 * 1024);
249             DataInputBuffer b = (DataInputBuffer)inp;
250             buf = b.slice(numZipBytes);
251         }
252
253         String getHtml() {
254             String html = htmlRef.get();
255             if (html != null) {
256                 return html;
257             }
258             if (data != null) {
259                 html = data.get(index).asString();
260                 htmlRef = new SoftReference<>(html);
261                 return html;
262             }
263             System.out.println("Loading Html: numBytes=" + numBytes + ", numZipBytes="
264                                + buf.limit());
265             final byte[] zipBytes = new byte[buf.limit()];
266             buf.rewind();
267             buf.readFully(zipBytes);
268             try {
269                 final byte[] bytes = StringUtil.unzipFully(zipBytes, numBytes);
270                 html = new String(bytes, StandardCharsets.UTF_8);
271             } catch (IOException e) {
272                 throw new RuntimeException("Dictionary HTML data corrupted", e);
273             }
274             htmlRef = new SoftReference<>(html);
275             return html;
276         }
277     }
278
279 }