2 package com.hughes.android.dictionary.engine;
4 import com.hughes.util.StringUtil;
5 import com.hughes.util.raf.RAFListSerializer;
6 import com.hughes.util.raf.RAFSerializable;
7 import com.ibm.icu.text.Transliterator;
9 import java.io.DataInput;
10 import java.io.DataOutput;
11 import java.io.IOException;
12 import java.io.PrintStream;
13 import java.io.RandomAccessFile;
14 import java.io.UnsupportedEncodingException;
15 import java.lang.ref.SoftReference;
16 import java.util.List;
17 import java.util.regex.Pattern;
19 public class HtmlEntry extends AbstractEntry implements RAFSerializable<HtmlEntry>,
20 Comparable<HtmlEntry> {
22 // Title is not HTML escaped.
23 public final String title;
24 public final LazyHtmlLoader lazyHtmlLoader;
27 public HtmlEntry(final EntrySource entrySource, String title) {
30 lazyHtmlLoader = null;
33 public HtmlEntry(Dictionary dictionary, DataInput raf, final int index)
35 super(dictionary, raf, index);
36 title = raf.readUTF();
37 lazyHtmlLoader = new LazyHtmlLoader(raf, dictionary.htmlData, index);
42 public void write(DataOutput raf) throws IOException {
46 final byte[] bytes = getHtml().getBytes("UTF-8");
47 final byte[] zipBytes = StringUtil.zipBytes(bytes);
48 StringUtil.writeVarInt(raf, zipBytes.length);
52 public void writeBase(DataOutput raf) throws IOException {
57 public void writeData(DataOutput raf) throws IOException {
58 final byte[] bytes = getHtml().getBytes("UTF-8");
59 StringUtil.writeVarInt(raf, bytes.length);
63 public static byte[] readData(DataInput raf) throws IOException {
64 int len = StringUtil.readVarInt(raf);
65 final byte[] bytes = new byte[len];
71 return html != null ? html : lazyHtmlLoader.getHtml();
75 public void addToDictionary(Dictionary dictionary) {
77 dictionary.htmlEntries.add(this);
78 index = dictionary.htmlEntries.size() - 1;
82 public RowBase CreateRow(int rowIndex, Index dictionaryIndex) {
83 return new Row(this.index, rowIndex, dictionaryIndex);
86 static final class Serializer implements RAFListSerializer<HtmlEntry> {
88 final Dictionary dictionary;
90 Serializer(Dictionary dictionary) {
91 this.dictionary = dictionary;
95 public HtmlEntry read(DataInput raf, final int index) throws IOException {
96 return new HtmlEntry(dictionary, raf, index);
100 public void write(DataOutput raf, HtmlEntry t) throws IOException {
105 static final class DataSerializer implements RAFListSerializer<HtmlEntry> {
107 public HtmlEntry read(DataInput raf, final int index) throws IOException {
113 public void write(DataOutput raf, HtmlEntry t) throws IOException {
118 static final class DataDeserializer implements RAFListSerializer<byte[]> {
120 public byte[] read(DataInput raf, final int index) throws IOException {
121 return HtmlEntry.readData(raf);
125 public void write(DataOutput raf, byte[] t) throws IOException {
130 public String getRawText(final boolean compact) {
131 return title + ":\n" + getHtml();
135 public int compareTo(HtmlEntry another) {
136 if (title.compareTo(another.title) != 0) {
137 return title.compareTo(another.title);
139 return getHtml().compareTo(another.getHtml());
143 public String toString() {
144 return getRawText(false);
147 // --------------------------------------------------------------------
149 public static class Row extends RowBase {
151 boolean isExpanded = false;
153 Row(final DataInput raf, final int thisRowIndex,
154 final Index index, int extra) throws IOException {
155 super(raf, thisRowIndex, index, extra);
158 Row(final int referenceIndex, final int thisRowIndex,
160 super(referenceIndex, thisRowIndex, index);
164 public String toString() {
165 return getRawText(false);
168 public HtmlEntry getEntry() {
169 return index.dict.htmlEntries.get(referenceIndex);
173 public void print(PrintStream out) {
174 final HtmlEntry entry = getEntry();
175 out.println("See also HtmlEntry:" + entry.title);
179 public String getRawText(boolean compact) {
180 final HtmlEntry entry = getEntry();
181 return entry.getRawText(compact);
185 public RowMatchType matches(final List<String> searchTokens,
186 final Pattern orderedMatchPattern, final Transliterator normalizer,
187 final boolean swapPairEntries) {
188 final String text = normalizer.transform(getRawText(false));
189 if (orderedMatchPattern.matcher(text).find()) {
190 return RowMatchType.ORDERED_MATCH;
192 for (int i = searchTokens.size() - 1; i >= 0; --i) {
193 final String searchToken = searchTokens.get(i);
194 if (!text.contains(searchToken)) {
195 return RowMatchType.NO_MATCH;
198 return RowMatchType.BAG_OF_WORDS_MATCH;
202 public static String htmlBody(final List<HtmlEntry> htmlEntries, final String indexShortName) {
203 final StringBuilder result = new StringBuilder();
204 for (final HtmlEntry htmlEntry : htmlEntries) {
205 final String titleEscaped = StringUtil.escapeUnicodeToPureHtml(htmlEntry.title);
206 result.append(String.format("<h1><a href=\"%s\">%s</a></h1>\n<p>%s\n",
207 formatQuickdicUrl(indexShortName, htmlEntry.title), titleEscaped,
208 htmlEntry.getHtml()));
210 return result.toString();
213 public static String formatQuickdicUrl(final String indexShortName, final String text) {
214 assert !indexShortName.contains(":");
215 assert text.length() > 0;
216 return String.format("q://d?%s&%s", indexShortName, StringUtil.encodeForUrl(text));
219 public static boolean isQuickdicUrl(String url) {
220 return url.startsWith("q://d?");
223 // --------------------------------------------------------------------
225 public static final class LazyHtmlLoader {
226 final RandomAccessFile raf;
229 final int numZipBytes;
230 final List<byte[]> data;
233 // Not sure this volatile is right, but oh well.
234 volatile SoftReference<String> htmlRef = new SoftReference<String>(null);
236 private LazyHtmlLoader(final DataInput inp, List<byte[]> data, int index) throws IOException {
243 this.numZipBytes = -1;
246 raf = (RandomAccessFile)inp;
247 numBytes = raf.readInt();
248 numZipBytes = raf.readInt();
249 offset = raf.getFilePointer();
250 raf.skipBytes(numZipBytes);
253 public String getHtml() {
254 String html = htmlRef.get();
260 html = new String(data.get(index), "UTF-8");
261 } catch (UnsupportedEncodingException e) {
262 throw new RuntimeException(e);
264 htmlRef = new SoftReference<String>(html);
267 System.out.println("Loading Html: numBytes=" + numBytes + ", numZipBytes="
269 final byte[] zipBytes = new byte[numZipBytes];
274 } catch (IOException e) {
275 throw new RuntimeException(e);
279 final byte[] bytes = StringUtil.unzipFully(zipBytes, numBytes);
280 html = new String(bytes, "UTF-8");
281 } catch (IOException e) {
282 throw new RuntimeException(e);
284 htmlRef = new SoftReference<String>(html);