public Index(final String filename) throws IOException {\r
this.filename = filename;\r
file = new RandomAccessFile(filename, "r");\r
- root = getNode("", 0);\r
+ root = getNode(new NodeHandle("", 0));\r
}\r
\r
- public Node lookup(final String text) throws IOException {\r
- return lookup(text, 0, root);\r
+ public Node lookup(final String normalizedToken) throws IOException {\r
+ return lookup(normalizedToken, 0, root);\r
}\r
\r
- private Node lookup(final String text, final int pos, final Node node) throws IOException {\r
- if (pos == text.length()) {\r
+ private Node lookup(final String normalizedToken, final int pos, final Node node) throws IOException {\r
+ if (pos == normalizedToken.length()) {\r
return node;\r
}\r
\r
- // Check whether any prefix of text is a child.\r
- for (int i = pos + 1; i <= text.length(); ++i) {\r
- final Integer child = node.children.get(text.substring(pos, i));\r
- if (child != null) {\r
- return lookup(text, i, getNode(text.substring(0, i), child));\r
+ // Check whether any prefix of the token is a child.\r
+ for (int i = pos + 1; i <= normalizedToken.length(); ++i) {\r
+ final NodeHandle childHandle = node.children.get(normalizedToken.substring(pos, i));\r
+ if (childHandle != null) {\r
+ return lookup(normalizedToken, i, childHandle.getNode());\r
}\r
}\r
\r
// Check whether any child starts with what's left of text.\r
- final String remainder = text.substring(pos);\r
- for (final Map.Entry<String, Integer> childEntry : node.children.entrySet()) {\r
- if (childEntry.getKey().startsWith(remainder)) {\r
- return getNode(node.text + childEntry.getKey(), childEntry.getValue());\r
+ final String remainder = normalizedToken.substring(pos);\r
+ for (final Map.Entry<String, NodeHandle> childHandle : node.children.entrySet()) {\r
+ if (childHandle.getKey().startsWith(remainder)) {\r
+ return getNode(childHandle.getValue());\r
}\r
}\r
\r
return node;\r
}\r
\r
- private Node getNode(final String text, final int indexOffset) throws IOException {\r
- Node node = indexOffsetToNode.get(indexOffset);\r
+ private Node getNode(final NodeHandle nodeHandle) throws IOException {\r
+ Node node = indexOffsetToNode.get(nodeHandle.indexOffset);\r
if (node == null) {\r
- node = new Node(text, indexOffset);\r
- indexOffsetToNode.put(indexOffset, node);\r
+ node = new Node(nodeHandle);\r
+ indexOffsetToNode.put(nodeHandle.indexOffset, node);\r
}\r
return node;\r
}\r
+ \r
+ final class NodeHandle {\r
+ final String normalizedToken;\r
+ final int indexOffset;\r
+\r
+ NodeHandle(final String normalizedToken, final int indexOffset) throws IOException {\r
+ this.normalizedToken = normalizedToken;\r
+ this.indexOffset = indexOffset;\r
+ }\r
+ \r
+ Node getNode() throws IOException {\r
+ return Index.this.getNode(this);\r
+ }\r
+ }\r
\r
final class Node {\r
- final String text;\r
- final int indexOffset;\r
- final TreeMap<String,Integer> children;\r
- final int[] offsets;\r
+ final NodeHandle nodeHandle;\r
+ final TreeMap<String,NodeHandle> children;\r
+ final Map<String,int[]> tokenToOffsets = new TreeMap<String, int[]>(EntryFactory.entryFactory.getEntryComparator());\r
+ final int descendantTokenCount;\r
+ final int descendantEntryCount;\r
\r
- Node(final String text, final int indexOffset) throws IOException {\r
- this.text = text;\r
- this.indexOffset = indexOffset;\r
+ Node(final NodeHandle nodeHandle) throws IOException {\r
+ this.nodeHandle = nodeHandle;\r
+ \r
+ file.seek(nodeHandle.indexOffset);\r
\r
- file.seek(indexOffset);\r
+ // Read children to offset.\r
final int numChildren = file.readInt();\r
- children = new TreeMap<String, Integer>();\r
+ children = new TreeMap<String, NodeHandle>();\r
for (int i = 0; i < numChildren; ++i) {\r
final String chunk = file.readUTF().intern();\r
if (chunk.length() == 0) {\r
throw new IOException("Empty string chunk.");\r
}\r
- children.put(chunk, file.readInt());\r
+ children.put(chunk, new NodeHandle(nodeHandle.normalizedToken + chunk, file.readInt()));\r
+ }\r
+ \r
+ // Read tokens.\r
+ final int numTokens = file.readInt();\r
+ for (int i = 0; i < numTokens; ++i) {\r
+ final String token = file.readUTF();\r
+ assert EntryFactory.entryFactory.normalizeToken(token).equals(nodeHandle.normalizedToken);\r
+ final int[] offsets = new int[file.readInt()];\r
+ for (int j = 0; j < offsets.length; ++j) {\r
+ offsets[j]= file.readInt();\r
+ }\r
+ tokenToOffsets.put(token, offsets);\r
}\r
\r
- final int numOffsets = file.readInt();\r
- offsets = new int[numOffsets];\r
- for (int i = 0; i < offsets.length; ++i) {\r
- offsets[i] = file.readInt();\r
+ descendantTokenCount = file.readInt();\r
+ descendantEntryCount = file.readInt();\r
+ }\r
+ \r
+ public int getDescendantCount() {\r
+ return descendantEntryCount + descendantTokenCount;\r
+ }\r
+\r
+ public Object getDescendant(final int position) throws IOException {\r
+ if (position < getDescendantCount()) {\r
+ return null;\r
+ }\r
+ for (final Map.Entry<String,NodeHandle> childEntry : children.entrySet()) {\r
+ final Node child = childEntry.getValue().getNode();\r
+ if (position < child.getDescendantCount()) {\r
+ \r
+ } else {\r
+ position -= child.getDescendantCount();\r
+ }\r
}\r
}\r
\r
public void getDescendantEntryOffsets(final Set<Integer> entryOffsets, int maxSize) throws IOException {\r
- for (int i = 0; i < offsets.length; ++i) {\r
- if (entryOffsets.size() >= maxSize) {\r
- return;\r
+ for (final int[] offsets : tokenToOffsets.values()) {\r
+ for (final int offset : offsets) {\r
+ if (entryOffsets.size() >= maxSize) {\r
+ return;\r
+ }\r
+ entryOffsets.add(offset);\r
}\r
- entryOffsets.add(offsets[i]);\r
}\r
if (entryOffsets.size() >= maxSize) {\r
return;\r
}\r
- for (final Map.Entry<String, Integer> childEntry : children.entrySet()) {\r
- final Node child = getNode(text + childEntry.getKey(), childEntry.getValue());\r
+ for (final Map.Entry<String, NodeHandle> childEntry : children.entrySet()) {\r
+ final Node child = childEntry.getValue().getNode();\r
child.getDescendantEntryOffsets(entryOffsets, maxSize);\r
if (entryOffsets.size() >= maxSize) {\r
return;\r
}\r
}\r
}\r
-\r
+ \r
}\r