2 package com.hughes.android.dictionary.engine;
4 import com.hughes.util.StringUtil;
5 import com.hughes.util.raf.RAFListSerializer;
6 import com.ibm.icu.text.Transliterator;
8 import java.io.DataInput;
9 import java.io.DataOutput;
10 import java.io.IOException;
11 import java.io.PrintStream;
12 import java.io.RandomAccessFile;
13 import java.io.UnsupportedEncodingException;
14 import java.lang.ref.SoftReference;
15 import java.nio.channels.FileChannel;
16 import java.util.List;
17 import java.util.regex.Pattern;
19 public class HtmlEntry extends AbstractEntry implements Comparable<HtmlEntry> {
21 // Title is not HTML escaped.
22 public final String title;
23 public final LazyHtmlLoader lazyHtmlLoader;
26 public HtmlEntry(final EntrySource entrySource, String title) {
29 lazyHtmlLoader = null;
32 public HtmlEntry(Dictionary dictionary, FileChannel ch, DataInput raf, final int index)
34 super(dictionary, raf, index);
35 title = raf.readUTF();
36 lazyHtmlLoader = new LazyHtmlLoader(ch, raf, dictionary.htmlData, index);
40 public void writeBase(DataOutput raf) throws IOException {
45 public void writeData(DataOutput raf) throws IOException {
46 final byte[] bytes = getHtml().getBytes("UTF-8");
47 StringUtil.writeVarInt(raf, bytes.length);
51 public static byte[] readData(DataInput raf) throws IOException {
52 int len = StringUtil.readVarInt(raf);
53 final byte[] bytes = new byte[Math.min(len, 20 * 1024 * 1024)];
59 return html != null ? html : lazyHtmlLoader.getHtml();
63 public void addToDictionary(Dictionary dictionary) {
65 dictionary.htmlEntries.add(this);
66 index = dictionary.htmlEntries.size() - 1;
70 public RowBase CreateRow(int rowIndex, Index dictionaryIndex) {
71 return new Row(this.index, rowIndex, dictionaryIndex);
74 static final class Serializer implements RAFListSerializer<HtmlEntry> {
76 final Dictionary dictionary;
79 Serializer(Dictionary dictionary, FileChannel ch) {
80 this.dictionary = dictionary;
85 public HtmlEntry read(DataInput raf, final int index) throws IOException {
86 return new HtmlEntry(dictionary, ch, raf, index);
90 public void write(DataOutput raf, HtmlEntry t) throws IOException {
95 static final class DataSerializer implements RAFListSerializer<HtmlEntry> {
97 public HtmlEntry read(DataInput raf, final int index) throws IOException {
103 public void write(DataOutput raf, HtmlEntry t) throws IOException {
108 static final class DataDeserializer implements RAFListSerializer<byte[]> {
110 public byte[] read(DataInput raf, final int index) throws IOException {
111 return HtmlEntry.readData(raf);
115 public void write(DataOutput raf, byte[] t) throws IOException {
120 public String getRawText(final boolean compact) {
121 return title + ":\n" + getHtml();
125 public int compareTo(HtmlEntry another) {
126 if (title.compareTo(another.title) != 0) {
127 return title.compareTo(another.title);
129 return getHtml().compareTo(another.getHtml());
133 public String toString() {
134 return getRawText(false);
137 // --------------------------------------------------------------------
139 public static class Row extends RowBase {
141 boolean isExpanded = false;
143 Row(final DataInput raf, final int thisRowIndex,
144 final Index index, int extra) throws IOException {
145 super(raf, thisRowIndex, index, extra);
148 Row(final int referenceIndex, final int thisRowIndex,
150 super(referenceIndex, thisRowIndex, index);
154 public String toString() {
155 return getRawText(false);
158 public HtmlEntry getEntry() {
159 return index.dict.htmlEntries.get(referenceIndex);
163 public void print(PrintStream out) {
164 final HtmlEntry entry = getEntry();
165 out.println("See also HtmlEntry:" + entry.title);
169 public String getRawText(boolean compact) {
170 final HtmlEntry entry = getEntry();
171 return entry.getRawText(compact);
175 public RowMatchType matches(final List<String> searchTokens,
176 final Pattern orderedMatchPattern, final Transliterator normalizer,
177 final boolean swapPairEntries) {
178 final String text = normalizer.transform(getRawText(false));
179 if (orderedMatchPattern.matcher(text).find()) {
180 return RowMatchType.ORDERED_MATCH;
182 for (int i = searchTokens.size() - 1; i >= 0; --i) {
183 final String searchToken = searchTokens.get(i);
184 if (!text.contains(searchToken)) {
185 return RowMatchType.NO_MATCH;
188 return RowMatchType.BAG_OF_WORDS_MATCH;
192 public static String htmlBody(final List<HtmlEntry> htmlEntries, final String indexShortName) {
193 final StringBuilder result = new StringBuilder();
194 for (final HtmlEntry htmlEntry : htmlEntries) {
195 final String titleEscaped = StringUtil.escapeUnicodeToPureHtml(htmlEntry.title);
196 result.append(String.format("<h1><a href=\"%s\">%s</a></h1>\n<p>%s\n",
197 formatQuickdicUrl(indexShortName, htmlEntry.title), titleEscaped,
198 htmlEntry.getHtml()));
200 return result.toString();
203 public static String formatQuickdicUrl(final String indexShortName, final String text) {
204 assert !indexShortName.contains(":");
205 assert text.length() > 0;
206 return String.format("q://d?%s&%s", indexShortName, StringUtil.encodeForUrl(text));
209 public static boolean isQuickdicUrl(String url) {
210 return url.startsWith("q://d?");
213 // --------------------------------------------------------------------
215 public static final class LazyHtmlLoader {
217 final FileChannel ch;
220 final int numZipBytes;
221 final List<byte[]> data;
224 // Not sure this volatile is right, but oh well.
225 volatile SoftReference<String> htmlRef = new SoftReference<String>(null);
227 private LazyHtmlLoader(FileChannel ch, final DataInput inp, List<byte[]> data, int index) throws IOException {
235 this.numZipBytes = -1;
240 numBytes = Math.min(raf.readInt(), 20 * 1024 * 1024);
241 numZipBytes = Math.min(raf.readInt(), 20 * 1024 * 1024);
242 offset = ch.position();
243 raf.skipBytes(numZipBytes);
246 public String getHtml() {
247 String html = htmlRef.get();
253 html = new String(data.get(index), "UTF-8");
254 } catch (UnsupportedEncodingException e) {
255 throw new RuntimeException("Dictionary HTML data corrupted", e);
257 htmlRef = new SoftReference<String>(html);
260 System.out.println("Loading Html: numBytes=" + numBytes + ", numZipBytes="
262 final byte[] zipBytes = new byte[numZipBytes];
266 raf.readFully(zipBytes);
267 } catch (IOException e) {
268 throw new RuntimeException("Failed to read HTML data from dictionary", e);
272 final byte[] bytes = StringUtil.unzipFully(zipBytes, numBytes);
273 html = new String(bytes, "UTF-8");
274 } catch (IOException e) {
275 throw new RuntimeException("Dictionary HTML data corrupted", e);
277 htmlRef = new SoftReference<String>(html);