- static final class Node implements Serializable {\r
- final String normalizedToken;\r
- \r
- final TreeMap<String, Node> children = new TreeMap<String, Node>();\r
- final TreeMap<String,List<EntryDescriptor>> entryDescriptorsMap = new TreeMap<String, List<EntryDescriptor>>();\r
- \r
-// final List<EntryDescriptor> offsets = new ArrayList<EntryDescriptor>();\r
- \r
- int indexFileLocation = -1;\r
-\r
- private int descendantTokenCount;\r
- private int descendantEntryCount = 0;\r
-\r
- public Node(final String normalizedToken) {\r
- if (normalizedToken.length() == 0) {\r
- System.out.println("Created root.");\r
- }\r
- this.normalizedToken = normalizedToken.intern();\r
- }\r
-\r
- public Node getNode(final String nToken, final int pos,\r
- final boolean create) {\r
- assert this.normalizedToken.equals(nToken.substring(0, pos));\r
-\r
- if (pos == nToken.length()) {\r
- assert normalizedToken.equals(nToken);\r
- return this;\r
- }\r
-\r
- final String rest = nToken.substring(pos);\r
- assert rest.length() > 0;\r
-\r
- final Map.Entry<String, Node> lcsEntry;\r
- final String lcs;\r
- {\r
- final Map.Entry<String, Node> floorEntry = children.floorEntry(rest);\r
- final Map.Entry<String, Node> ceilingEntry = children\r
- .ceilingEntry(rest);\r
- final String floorLcs = floorEntry == null ? "" : StringUtil\r
- .longestCommonSubstring(rest, floorEntry.getKey());\r
- final String ceilingLcs = ceilingEntry == null ? "" : StringUtil\r
- .longestCommonSubstring(rest, ceilingEntry.getKey());\r
- if (floorLcs.length() > ceilingLcs.length()) {\r
- lcsEntry = floorEntry;\r
- lcs = floorLcs;\r
- } else {\r
- lcsEntry = ceilingEntry;\r
- lcs = ceilingLcs;\r
- }\r
- }\r
-\r
- // No LCS, have to add everything.\r
- if (lcs.length() == 0) {\r
- if (!create) {\r
- return null;\r
- }\r
- final Node result = new Node(nToken);\r
- final Object old = children.put(rest.intern(), result);\r
- assert old == null;\r
- // System.out.println(" Adding final chunk: " + rest);\r
- return result;\r
- }\r
-\r
- assert lcsEntry != null;\r
-\r
- // The map already contained the LCS.\r
- if (lcs.length() == lcsEntry.getKey().length()) {\r
- assert lcs.equals(lcsEntry.getKey());\r
- final Node result = lcsEntry.getValue().getNode(nToken,\r
- pos + lcs.length(), create);\r
- assert result.normalizedToken.equals(nToken);\r
- return result;\r
- }\r
-\r
- if (!create) {\r
- return null;\r
- }\r
-\r
- // Have to split, inserting the LCS.\r
- // System.out.println(" Splitting " + lcsEntry + "/" + word + " @ " +\r
- // lcs);\r
- final Node newChild = new Node(nToken.substring(0, pos + lcs.length()));\r
- final Object old = children.put(lcs.intern(), newChild);\r
- assert old == null;\r
- children.remove(lcsEntry.getKey());\r
- newChild.children.put(lcsEntry.getKey().substring(lcs.length())\r
- .intern(), lcsEntry.getValue());\r
-\r
- if (lcs.equals(rest)) {\r
- return newChild;\r
- }\r
- final Node result = new Node(nToken);\r
- final Object old2 = newChild.children.put(rest.substring(lcs.length())\r
- .intern(), result);\r
- assert old2 == null;\r
- // System.out.println(" newchildren=" + newChild.children);\r
-\r
- return result;\r
- }\r
-\r
- void forEachNode(final Function<Node> f) {\r
- f.invoke(this);\r
- for (final Node child : children.values()) {\r
- child.forEachNode(f);\r
- }\r
- }\r
-\r
- int descendantCount() {\r
- int count = 1;\r
- for (final Node child : children.values()) {\r
- count += child.descendantCount();\r
- }\r
- return count;\r
- }\r
-\r
- void recursiveSetDescendantCounts() {\r
- descendantTokenCount = entryDescriptorsMap.size();\r
- descendantEntryCount = 0;\r
-\r
- for (final Node child : children.values()) {\r
- child.recursiveSetDescendantCounts();\r
- descendantTokenCount += child.descendantTokenCount;\r
- descendantEntryCount += child.descendantEntryCount;\r
- }\r
-\r
- for (final List<EntryDescriptor> entryDescriptors : entryDescriptorsMap.values()) {\r
- descendantEntryCount += entryDescriptors.size();\r
- }\r
- }\r
-\r
- @Override\r
- public String toString() {\r
- return normalizedToken;\r
- }\r
- \r
- void dump(final RandomAccessFile file) throws IOException {\r
- if (indexFileLocation == -1) {\r
- indexFileLocation = (int) file.getFilePointer();\r
- } else {\r
- assert indexFileLocation == file.getFilePointer();\r
- }\r
- \r
- // Children to location.\r
- file.writeInt(children.size());\r
- for (final Map.Entry<String, Node> child : children.entrySet()) {\r
- file.writeUTF(child.getKey());\r
- file.writeInt(child.getValue().indexFileLocation);\r
- }\r
- \r
- // Entries.\r
- file.writeInt(entryDescriptorsMap.size());\r
- for (final Map.Entry<String, List<EntryDescriptor>> entry : entryDescriptorsMap.entrySet()) {\r
- file.writeUTF(entry.getKey());\r
- file.writeInt(entry.getValue().size());\r
- for (int i = 0; i < entry.getValue().size(); ++i) {\r
- file.writeInt(entry.getValue().get(i).offset);\r
- }\r
- }\r
-\r
- // Dump counts.\r
- file.writeInt(descendantTokenCount);\r
- file.writeInt(descendantEntryCount);\r
- \r
- // Dump children.\r
- for (final Map.Entry<String, Node> child : children.entrySet()) {\r
- child.getValue().dump(file);\r
- }\r
- }\r
-\r
- public void addToken(final String token, final EntryDescriptor entryDescriptor) {\r
- List<EntryDescriptor> entryDescriptors = this.entryDescriptorsMap.get(token);\r
- if (entryDescriptors == null) {\r
- entryDescriptors = new ArrayList<EntryDescriptor>();\r
- this.entryDescriptorsMap.put(token, entryDescriptors);\r
- }\r
- entryDescriptors.add(entryDescriptor);\r
- }\r
- }\r