-package com.hughes.android.dictionary;\r
-\r
-import java.io.IOException;\r
-import java.io.RandomAccessFile;\r
-import java.util.ArrayList;\r
-import java.util.Arrays;\r
-import java.util.LinkedHashMap;\r
-import java.util.LinkedHashSet;\r
-import java.util.List;\r
-import java.util.Map;\r
-import java.util.Set;\r
-import java.util.regex.Pattern;\r
-\r
-import com.hughes.util.raf.RAFFactory;\r
-import com.hughes.util.raf.RAFSerializable;\r
-\r
-public final class Entry implements RAFSerializable<Entry> {\r
-\r
- static final byte LANG1 = 0;\r
- static final byte LANG2 = 1;\r
-\r
- static final Pattern lineSplitPattern = Pattern.compile("\\s::\\s");\r
- static final Pattern sublineSplitPattern = Pattern.compile("\\s\\|\\s");\r
-\r
- final String[] lang1;\r
- final String[] lang2;\r
- \r
-// public Entry(final String lang1, final String lang2) {\r
-// this.lang1 = new String[] {lang1};\r
-// this.lang2 = new String[] {lang2};\r
-// }\r
-\r
- Entry(final String[] lang1, final String[] lang2) {\r
- this.lang1 = lang1;\r
- this.lang2 = lang2;\r
- }\r
-\r
- public static final RAFFactory<Entry> RAF_FACTORY = new RAFFactory<Entry>() {\r
- public Entry create(RandomAccessFile raf) throws IOException {\r
- final int rows = raf.readByte();\r
- final String[] lang1 = new String[rows];\r
- final String[] lang2 = new String[rows];\r
- for (int i = 0; i < lang1.length; ++i) {\r
- lang1[i] = raf.readUTF();\r
- lang2[i] = raf.readUTF();\r
- }\r
- return new Entry(lang1, lang2);\r
- }};\r
- public void write(RandomAccessFile raf) throws IOException {\r
- assert lang1.length == (byte) lang1.length;\r
- raf.writeByte(lang1.length);\r
- for (int i = 0; i < lang1.length; ++i) {\r
- raf.writeUTF(lang1[i]);\r
- raf.writeUTF(lang2[i]);\r
- }\r
- }\r
-\r
- @Override\r
- public boolean equals(Object o) {\r
- if (!(o instanceof Entry)) {\r
- return false;\r
- }\r
- final Entry that = (Entry) o;\r
- return Arrays.deepEquals(this.lang1, that.lang1) && Arrays.deepEquals(this.lang2, that.lang2); \r
- }\r
-\r
- @Override\r
- public int hashCode() {\r
- return Arrays.deepHashCode(lang1) + Arrays.deepHashCode(lang2);\r
- }\r
-\r
- @Override\r
- public String toString() {\r
- return getRawText();\r
- }\r
-\r
- public int getRowCount() {\r
- assert lang1.length == lang2.length;\r
- return lang1.length;\r
- }\r
-\r
- String[] getAllText(final byte lang) {\r
- if (lang == LANG1) {\r
- return lang1;\r
- }\r
- assert lang == LANG2;\r
- return lang2;\r
- }\r
- \r
- String getRawText() {\r
- final StringBuilder result = new StringBuilder();\r
- for (int i = 0; i < lang1.length; ++i) {\r
- result.append(i == 0 ? "" : " | ").append(lang1[i]);\r
- }\r
- result.append("\t");\r
- for (int i = 0; i < lang2.length; ++i) {\r
- result.append(i == 0 ? "" : " | ").append(lang2[i]);\r
- }\r
- return result.toString();\r
- }\r
- \r
- static byte otherLang(final byte lang) {\r
- assert lang == LANG1 || lang == LANG2;\r
- return lang == LANG1 ? LANG2 : LANG1;\r
- }\r
- \r
-\r
- static Entry parseFromLine(String line, final boolean hasMultipleSubentries) {\r
- line = line.replaceAll("<", "<");\r
- line = line.replaceAll(">", ">");\r
- final String[] parts = lineSplitPattern.split(line);\r
- if (parts.length != 2) {\r
- System.err.println("Entry:" + "Invalid line: " + line);\r
- return null;\r
- }\r
- if (!hasMultipleSubentries) {\r
- return new Entry(new String[] {parts[0].trim()}, new String[] {parts[1].trim()});\r
- }\r
- \r
- final String[] lang1 = sublineSplitPattern.split(" " + parts[0].trim() + " ");\r
- final String[] lang2 = sublineSplitPattern.split(" " + parts[1].trim() + " ");\r
- if (lang1.length != lang2.length) {\r
- System.err.println("Entry:" + "Invalid subline: " + line);\r
- return null;\r
- }\r
- for (int i = 0; i < lang1.length; ++i) {\r
- lang1[i] = lang1[i].trim();\r
- lang2[i] = lang2[i].trim();\r
- }\r
- return new Entry(lang1, lang2);\r
- }\r
- \r
- static final Map<String, String> bracketToClose = new LinkedHashMap<String, String>();\r
- static {\r
- bracketToClose.put("\"", "\"");\r
- bracketToClose.put(" '", "' ");\r
- }\r
- \r
- static final Pattern WHITESPACE = Pattern.compile("\\s+");\r
- \r
- public Set<String> getIndexableTokens(final byte lang) {\r
- final Set<String> result = new LinkedHashSet<String>();\r
- String text = " ";\r
- for (final String subentry : getAllText(lang)) {\r
- text += subentry + " ";\r
- }\r
-\r
- text = text.replaceAll("fig\\.", " ");\r
- text = text.replaceAll("\\{[^\\}]+}", " ");\r
- text = text.replaceAll("\"-", "-");\r
- text = text.replaceAll("-\"", "-");\r
- text = text.replaceAll("[\"/\\()<>\\[\\],;?!.]", " ");\r
- text = text.replaceAll("[:] ", " ");\r
- text = text.replaceAll(" [:]", " ");\r
- \r
- // Now be really conservative about what we allow inside a token:\r
- // See: http://unicode.org/Public/UNIDATA/UCD.html#General_Category_Values\r
- text = text.replaceAll("[^-:\\p{Lu}\\p{Ll}\\p{Lt}\\p{Lm}\\p{Lo}\\p{Nd}\\p{Nl}\\p{No}]", " ");\r
- \r
- result.addAll(Arrays.asList(WHITESPACE.split(text)));\r
-\r
- text = text.replaceAll("[-]", " ");\r
- result.addAll(Arrays.asList(WHITESPACE.split(text)));\r
- \r
- final Set<String> result2 = new LinkedHashSet<String>();\r
- for (final String token : result) {\r
- if (isIndexable(token)) {\r
- result2.add(token);\r
- }\r
- }\r
- return result2;\r
- }\r
-\r
- static boolean isIndexable(final String text) {\r
- // Does it have an alpha-numeric anywhere?\r
- return text.matches(".*\\w.*");\r
- }\r
- \r
- static List<String> getTextInside(final String text, final String start, final String end) {\r
- final List<String> result = new ArrayList<String>();\r
- int startPos = 0;\r
- while ((startPos = text.indexOf(start)) != -1) {\r
- final int endPos = text.indexOf(end, startPos + 1);\r
- result.add(text.substring(startPos + 1, endPos));\r
- startPos = endPos + 1;\r
- }\r
- return result;\r
- }\r
-\r
-}
\ No newline at end of file
+package com.hughes.android.dictionary;
+
+import java.io.IOException;
+import java.io.RandomAccessFile;
+
+import com.hughes.util.raf.RAFFactory;
+import com.hughes.util.raf.RAFSerializable;
+
+public abstract class Entry implements RAFSerializable<Entry> {
+
+ public static final RAFFactory<Entry> RAF_FACTORY = new RAFFactory<Entry>() {
+ public Entry create(RandomAccessFile raf) throws IOException {
+ final byte type = raf.readByte();
+ switch (type) {
+ case 0:
+ return SimpleEntry.RAF_FACTORY.create(raf);
+ }
+ throw new RuntimeException("Invalid entry type: " + type);
+ }};
+
+
+}