]> gitweb.fperrin.net Git - Dictionary.git/commitdiff
go
authorthadh <thadh@THADH-MTV.ad.corp.google.com>
Thu, 9 Apr 2009 00:38:58 +0000 (17:38 -0700)
committerthadh <thadh@THADH-MTV.ad.corp.google.com>
Thu, 9 Apr 2009 00:38:58 +0000 (17:38 -0700)
src/com/hughes/android/dictionary/Language.java [new file with mode: 0755]
src/com/hughes/android/dictionary/ZIndex.java [new file with mode: 0755]
src/com/hughes/android/dictionary/ZMemoryIndex.java [new file with mode: 0755]

diff --git a/src/com/hughes/android/dictionary/Language.java b/src/com/hughes/android/dictionary/Language.java
new file mode 100755 (executable)
index 0000000..947eba1
--- /dev/null
@@ -0,0 +1,71 @@
+package com.hughes.android.dictionary;\r
+\r
+import java.util.Comparator;\r
+import java.util.LinkedHashMap;\r
+import java.util.Map;\r
+\r
+import com.hughes.util.StringUtil;\r
+\r
+public abstract class Language {\r
+\r
+  final String symbol;\r
+  final Comparator<String> tokenComparator;\r
+\r
+  public Language(final String symbol) {\r
+    this.symbol = symbol;\r
+    this.tokenComparator = new Comparator<String>() {\r
+      public int compare(final String s1, final String s2) {\r
+        final String norm1 = normalizeTokenForSort(s1);\r
+        final String norm2 = normalizeTokenForSort(s2);\r
+        final int c = norm1.compareTo(norm2);\r
+        if (c != 0) {\r
+          return c;\r
+        }\r
+        return StringUtil.reverse(s1).compareTo(StringUtil.reverse(s2));\r
+      }};\r
+  }\r
+  \r
+  @Override\r
+  public String toString() {\r
+    return symbol;\r
+  }\r
+\r
+  abstract String normalizeTokenForSort(final String token);\r
+\r
+\r
+  // ----------------------------------------------------------------\r
+\r
+  public static final Language EN = new Language("EN") {\r
+    @Override\r
+    public String normalizeTokenForSort(final String token) {\r
+      return token.toLowerCase().replaceAll("ß", "ss").replaceAll("ä", "a")\r
+          .replaceAll("ö", "o").replaceAll("ü", "u").replaceAll("[^A-Za-z0-9]",\r
+              "");\r
+    }\r
+  };\r
+\r
+  public static final Language DE = new Language("DE") {\r
+    @Override\r
+    String normalizeTokenForSort(final String token) {\r
+      return token.toLowerCase().replaceAll("ß", "ss").replaceAll("ä", "a")\r
+          .replaceAll("ae", "a").replaceAll("ö", "o").replaceAll("oe", "o")\r
+          .replaceAll("ü", "u").replaceAll("ue", "u").replaceAll(\r
+              "[^A-Za-z0-9]", "");\r
+    }\r
+  };\r
+\r
+  // ----------------------------------------------------------------\r
+\r
+  private static final Map<String, Language> symbolToLangauge = new LinkedHashMap<String, Language>();\r
+\r
+  static {\r
+    symbolToLangauge.put(EN.symbol, EN);\r
+    symbolToLangauge.put(DE.symbol, DE);\r
+  }\r
+  \r
+  static Language lookup(final String symbol) {\r
+    return symbolToLangauge.get(symbol);\r
+  }\r
+\r
+\r
+}\r
diff --git a/src/com/hughes/android/dictionary/ZIndex.java b/src/com/hughes/android/dictionary/ZIndex.java
new file mode 100755 (executable)
index 0000000..fa3dc7b
--- /dev/null
@@ -0,0 +1,189 @@
+package com.hughes.android.dictionary;\r
+//package com.hughes.android.dictionary;\r
+//\r
+//import java.io.IOException;\r
+//import java.io.RandomAccessFile;\r
+//import java.util.Map;\r
+//import java.util.Set;\r
+//import java.util.TreeMap;\r
+//\r
+//import com.hughes.util.LRUCacheMap;\r
+//\r
+//\r
+//public final class Index {\r
+//  \r
+//  final String filename;\r
+//  final RandomAccessFile file;\r
+//  \r
+//  final Node root;\r
+//  final Map<Integer,Node> indexOffsetToNode = new LRUCacheMap<Integer,Node>(5000);\r
+//  \r
+//  \r
+//  public Index(final String filename) throws IOException {\r
+//    this.filename = filename;\r
+//    file = new RandomAccessFile(filename, "r");\r
+//    root = getNode(new NodeHandle("", 0));\r
+//  }\r
+//  \r
+//  public Node lookup(final String normalizedToken) throws IOException {\r
+//    return lookup(normalizedToken, 0, root);\r
+//  }\r
+//  \r
+//  private Node lookup(final String normalizedToken, final int pos, final Node node) throws IOException {\r
+//    if (pos == normalizedToken.length()) {\r
+//      return node;\r
+//    }\r
+//    \r
+//    // Check whether any prefix of the token is a child.\r
+//    for (int i = pos + 1; i <= normalizedToken.length(); ++i) {\r
+//      final NodeHandle childHandle = node.children.get(normalizedToken.substring(pos, i));\r
+//      if (childHandle != null) {\r
+//        return lookup(normalizedToken, i, childHandle.getNode());\r
+//      }\r
+//    }\r
+//    \r
+//    // Check whether any child starts with what's left of text.\r
+//    final String remainder = normalizedToken.substring(pos);\r
+//    for (final Map.Entry<String, NodeHandle> childHandle : node.children.entrySet()) {\r
+//      if (childHandle.getKey().startsWith(remainder)) {\r
+//        return getNode(childHandle.getValue());\r
+//      }\r
+//    }\r
+//    \r
+//    return node;\r
+//  }\r
+//  \r
+//  private Node getNode(final NodeHandle nodeHandle) throws IOException {\r
+//    Node node = indexOffsetToNode.get(nodeHandle.indexOffset);\r
+//    if (node == null) {\r
+//      node = new Node(nodeHandle);\r
+//      indexOffsetToNode.put(nodeHandle.indexOffset, node);\r
+//    }\r
+//    return node;\r
+//  }\r
+//  \r
+//  final class NodeHandle {\r
+//    final String normalizedToken;\r
+//    final int indexOffset;\r
+//\r
+//    NodeHandle(final String normalizedToken, final int indexOffset) throws IOException {\r
+//      this.normalizedToken = normalizedToken;\r
+//      this.indexOffset = indexOffset;\r
+//    }\r
+//    \r
+//    Node getNode() throws IOException {\r
+//      return Index.this.getNode(this);\r
+//    }\r
+//  }\r
+//\r
+//  final class Node {\r
+//    final NodeHandle nodeHandle;\r
+//    final TreeMap<String,NodeHandle> children;\r
+//    final Map<String,int[]> tokenToOffsets = new TreeMap<String, int[]>(EntryFactory.entryFactory.getTokenComparator());\r
+//    final int descendantTokenCount;\r
+//    final int descendantEntryCount;\r
+//    \r
+//    Node(final NodeHandle nodeHandle) throws IOException {\r
+//      this.nodeHandle = nodeHandle;\r
+//      \r
+//      file.seek(nodeHandle.indexOffset);\r
+//      \r
+//      // Read children to offset.\r
+//      final int numChildren = file.readInt();\r
+//      children = new TreeMap<String, NodeHandle>();\r
+//      for (int i = 0; i < numChildren; ++i) {\r
+//        final String chunk = file.readUTF().intern();\r
+//        if (chunk.length() == 0) {\r
+//          throw new IOException("Empty string chunk.");\r
+//        }\r
+//        children.put(chunk, new NodeHandle(nodeHandle.normalizedToken + chunk, file.readInt()));\r
+//      }\r
+//    \r
+//      // Read tokens.\r
+//      final int numTokens = file.readInt();\r
+//      for (int i = 0; i < numTokens; ++i) {\r
+//        final String token = file.readUTF();\r
+////        assert EntryFactory.entryFactory.normalizeToken(token).equals(nodeHandle.normalizedToken);\r
+//        final int[] offsets = new int[file.readInt()];\r
+//        for (int j = 0; j < offsets.length; ++j) {\r
+//          offsets[j]= file.readInt();\r
+//        }\r
+//        tokenToOffsets.put(token, offsets);\r
+//      }\r
+//      \r
+//      // TODO: move this up, and defer the loading of the other stuff until it's needed.\r
+//      descendantTokenCount = file.readInt();\r
+//      descendantEntryCount = file.readInt();\r
+//    }\r
+//    \r
+//    @Override\r
+//    public String toString() {\r
+//      return String.format("%s(%d,%d)", nodeHandle.normalizedToken, getThisCount(), getDescendantCount());\r
+//    }\r
+//    \r
+//    public int getDescendantCount() {\r
+//      return descendantEntryCount + descendantTokenCount;\r
+//    }\r
+//    \r
+//    public int getThisCount() {\r
+//      int count = tokenToOffsets.size();\r
+//      for (final int[] offsets : tokenToOffsets.values()) {\r
+//        count += offsets.length;\r
+//      }\r
+//      return count;\r
+//    }\r
+//\r
+//    public Object getDescendant(int position) throws IOException {\r
+//      assert position < getDescendantCount(); \r
+//\r
+////      System.out.println("getD: " + this + ", " + position);\r
+//      if (position < getThisCount()) {\r
+//        for (final Map.Entry<String, int[]> tokenEntry : tokenToOffsets.entrySet()) {\r
+//          if (position == 0) {\r
+//            return tokenEntry.getKey();\r
+//          }\r
+//          --position;\r
+//          if (position < tokenEntry.getValue().length) {\r
+//            return tokenEntry.getValue()[position];\r
+//          }\r
+//          position -= tokenEntry.getValue().length;\r
+//        }\r
+//        assert false;\r
+//      }\r
+//      position -= getThisCount();\r
+//      \r
+//      \r
+//      for (final Map.Entry<String,NodeHandle> childEntry : children.entrySet()) {\r
+//        final Node child = childEntry.getValue().getNode();\r
+//        if (position < child.getDescendantCount()) {\r
+//          return child.getDescendant(position);\r
+//        }\r
+//        position -= child.getDescendantCount();\r
+//      }\r
+//      assert false;\r
+//      return null;\r
+//    }\r
+//\r
+//    public void getDescendantEntryOffsets(final Set<Integer> entryOffsets, int maxSize) throws IOException {\r
+//      for (final int[] offsets : tokenToOffsets.values()) {\r
+//        for (final int offset : offsets) {\r
+//          if (entryOffsets.size() >= maxSize) {\r
+//            return;\r
+//          }\r
+//          entryOffsets.add(offset);\r
+//        }\r
+//      }\r
+//      if (entryOffsets.size() >= maxSize) {\r
+//        return;\r
+//      }\r
+//      for (final Map.Entry<String, NodeHandle> childEntry : children.entrySet()) {\r
+//        final Node child = childEntry.getValue().getNode();\r
+//        child.getDescendantEntryOffsets(entryOffsets, maxSize);\r
+//        if (entryOffsets.size() >= maxSize) {\r
+//          return;\r
+//        }\r
+//      }\r
+//    }\r
+//  }\r
+//  \r
+//}\r
diff --git a/src/com/hughes/android/dictionary/ZMemoryIndex.java b/src/com/hughes/android/dictionary/ZMemoryIndex.java
new file mode 100755 (executable)
index 0000000..af65321
--- /dev/null
@@ -0,0 +1,63 @@
+//package com.hughes.android.dictionary;\r
+//\r
+//import java.io.DataInputStream;\r
+//import java.io.DataOutputStream;\r
+//import java.io.IOException;\r
+//import java.io.Serializable;\r
+//\r
+//\r
+//public final class ZMemoryIndex {\r
+//  \r
+//  private static final long serialVersionUID = 3375180767865334065L;\r
+//\r
+//  static final class Node implements Serializable {\r
+//\r
+//    private static final long serialVersionUID = 8824115665859184225L;\r
+//\r
+//    final String[] chars;\r
+//    final Node[] children;\r
+//    final int[] offsets;\r
+//    \r
+//    Node(final int numChildren, final int numOffsets) {\r
+//      chars = new String[numChildren];\r
+//      children = new Node[numChildren];\r
+//      offsets = new int[numOffsets];\r
+//    }\r
+//    \r
+//    int descendantCount() {\r
+//      int total = 1;\r
+//      for (final Node child : children) {\r
+//        total += child.descendantCount();\r
+//      }\r
+//      return total;\r
+//    }\r
+//    \r
+//    Node(final DataInputStream is) throws IOException {\r
+//      final int numChildren = is.readInt();\r
+//      chars = new String[numChildren];\r
+//      children = new Node[numChildren];\r
+//      for (int i = 0; i < numChildren; ++i) {\r
+//        chars[i] = is.readUTF().intern();\r
+//        children[i] = new Node(is);\r
+//      }\r
+//      final int numOffsets = is.readInt();\r
+//      offsets = new int[numOffsets];\r
+//      for (int i = 0; i < numOffsets; ++i) {\r
+//        offsets[i] = is.readInt();\r
+//      }\r
+//    }\r
+//    \r
+//    void write(final DataOutputStream os) throws IOException {\r
+//      os.writeInt(chars.length);\r
+//      for (int i = 0; i < chars.length; i++) {\r
+//        os.writeUTF(chars[i]);\r
+//        children[i].write(os);\r
+//      }\r
+//      os.writeInt(offsets.length);\r
+//      for (int i = 0; i < offsets.length; i++) {\r
+//        os.writeInt(offsets[i]);\r
+//      }\r
+//    }\r
+//  }\r
+//  \r
+//}\r