]> gitweb.fperrin.net Git - DictionaryPC.git/blobdiff - src/com/hughes/android/dictionary/ZIndexBuilder.java
go
[DictionaryPC.git] / src / com / hughes / android / dictionary / ZIndexBuilder.java
diff --git a/src/com/hughes/android/dictionary/ZIndexBuilder.java b/src/com/hughes/android/dictionary/ZIndexBuilder.java
new file mode 100755 (executable)
index 0000000..5d4b26a
--- /dev/null
@@ -0,0 +1,117 @@
+package com.hughes.android.dictionary;\r
+\r
+\r
+public class ZIndexBuilder {\r
+\r
+//  static final Pattern WHITESPACE = Pattern.compile("\\s+");\r
+//  static final Pattern NONALPHA = Pattern.compile("[^A-Za-z]+");\r
+//\r
+//  public static void main(String[] args) throws IOException,\r
+//      ClassNotFoundException {\r
+//    if (args.length != 1) {\r
+//      System.err.println("No input file.");\r
+//      return;\r
+//    }\r
+//    final String dictionaryFileName = args[0];\r
+//    createIndex(dictionaryFileName, Entry.LANG1);\r
+//    createIndex(dictionaryFileName, Entry.LANG2);\r
+//  }\r
+//\r
+//  private static void createIndex(final String dictionaryFileName,\r
+//      final byte lang) throws IOException, FileNotFoundException,\r
+//      ClassNotFoundException {\r
+//    Node rootBuilder;\r
+//    rootBuilder = processDictionaryLines(dictionaryFileName, lang);\r
+//    FileUtil.write(rootBuilder, String.format("%s_builder_%d.serialized", dictionaryFileName, lang));\r
+//    rootBuilder = (Node) FileUtil.read(String.format("%s_builder_%d.serialized", dictionaryFileName, lang));\r
+//\r
+//    rootBuilder.forEachNode(new Function<Node>() {\r
+//      @Override\r
+//      public void invoke(final Node node) {\r
+//        for (final List<EntryDescriptor> entryDescriptors : node.entryDescriptorsMap.values()) {\r
+//          Collections.sort(entryDescriptors);\r
+//        }\r
+//      }});\r
+//    \r
+//    // Dump twice to get accurate file locations.\r
+//    for (int i = 0; i < 2; ++i) {\r
+//      final RandomAccessFile raf = new RandomAccessFile(String.format(Dictionary.INDEX_FORMAT, dictionaryFileName, lang), "rw"); \r
+//      rootBuilder.dump(raf);\r
+//      raf.close();\r
+//    }\r
+//  }\r
+//\r
+//  // ----------------------------------------------------------------\r
+//  \r
+//  static final class EntryDescriptor implements Comparable<EntryDescriptor>, Serializable {\r
+//    final int offset;\r
+//    final int numTokens;\r
+//    public EntryDescriptor(int offset, int numTokens) {\r
+//      this.offset = offset;\r
+//      this.numTokens = numTokens;\r
+//    }\r
+//    @Override\r
+//    public boolean equals(Object obj) {\r
+//      final EntryDescriptor that = (EntryDescriptor) obj;\r
+//      return this.offset == that.offset;\r
+//    }\r
+//    @Override\r
+//    public int hashCode() {\r
+//      return offset;\r
+//    }\r
+//    @Override\r
+//    public int compareTo(EntryDescriptor o) {\r
+//      return this.numTokens < o.numTokens ? -1 : this.numTokens == o.numTokens ? 0 : 1;\r
+//    }\r
+//  }\r
+//\r
+//\r
+//  // ----------------------------------------------------------------\r
+//\r
+//  static Node processDictionaryLines(final String dictionaryFileName, final byte lang) throws IOException {\r
+//    final Node root = new Node("");\r
+//    final RandomAccessFile dictionaryFile = new RandomAccessFile(dictionaryFileName, "r");\r
+//    String line;\r
+//    final Entry entry = new Entry();\r
+//    int lineCount = 0;\r
+//    long fileLocation = 0;\r
+//    while ((line = dictionaryFile.readLine()) != null) {\r
+//      assert ((int) fileLocation) == fileLocation;\r
+//\r
+//      line = line.trim();\r
+//      if (line.isEmpty() || line.startsWith("#") || !entry.parseFromLine(line)) {\r
+//        continue;\r
+//      }\r
+//      final String text = entry.getIndexableText(Entry.LANG1);\r
+//      final String[] tokens = WHITESPACE.split(text);\r
+//      final Map<String,String> tokenToNormalizedMap = new LinkedHashMap<String,String>();\r
+//      for (String token : tokens) {\r
+//        if (token.length() <= 1 || !Character.isLetter(token.charAt(0))) {\r
+//          continue;\r
+//        }\r
+//        tokenToNormalizedMap.put(token, EntryFactory.entryFactory.normalizeToken(token));\r
+//      }\r
+//      for (final Map.Entry<String, String> tokenToNormalized : tokenToNormalizedMap.entrySet()) {\r
+//        final String normalizedToken = tokenToNormalized.getValue();\r
+//        final Node node = root.getNode(normalizedToken, 0, true);\r
+//        node.addToken(tokenToNormalized.getKey(), new EntryDescriptor((int) fileLocation, tokens.length));\r
+//        assert node == root.getNode(normalizedToken, 0, false);\r
+//        assert normalizedToken\r
+//            .equals(root.getNode(normalizedToken, 0, false).normalizedToken);\r
+//      }\r
+//\r
+//      if (lineCount % 10000 == 0) {\r
+//        System.out.println("IndexBuilder: " + "lineCount=" + lineCount);\r
+//      }\r
+//      \r
+//      lineCount++;\r
+//      fileLocation = dictionaryFile.getFilePointer();\r
+//    }\r
+//    dictionaryFile.close();\r
+//    \r
+//    root.recursiveSetDescendantCounts();\r
+//    \r
+//    return root;\r
+//  }\r
+\r
+}\r