-\r
- static final class Node implements Serializable {\r
- private static final long serialVersionUID = -5423134653901704956L;\r
- \r
- final TreeMap<String, Node> childrenMap = new TreeMap<String, Node>();\r
- final List<Integer> offsetsList = new ArrayList<Integer>();\r
- final String sequence;\r
- int descendantOffsetCount = 0;\r
-\r
- public Node(String sequence) {\r
- if (sequence.length() == 0) {\r
- System.out.println("Created root.");\r
- }\r
- this.sequence = sequence.intern();\r
- }\r
-\r
- public Node getIndexNode(final String word, final int pos,\r
- final boolean create) {\r
- assert this.sequence.equals(word.substring(0, pos));\r
-\r
- if (pos == word.length()) {\r
- assert sequence.equals(word);\r
- return this;\r
- }\r
-\r
- final String rest = word.substring(pos);\r
- assert rest.length() > 0;\r
-\r
- final Map.Entry<String, Node> lcsEntry;\r
- final String lcs;\r
- {\r
- final Map.Entry<String, Node> floorEntry = childrenMap.floorEntry(rest);\r
- final Map.Entry<String, Node> ceilingEntry = childrenMap\r
- .ceilingEntry(rest);\r
- final String floorLcs = floorEntry == null ? "" : StringUtil\r
- .longestCommonSubstring(rest, floorEntry.getKey());\r
- final String ceilingLcs = ceilingEntry == null ? "" : StringUtil\r
- .longestCommonSubstring(rest, ceilingEntry.getKey());\r
- if (floorLcs.length() > ceilingLcs.length()) {\r
- lcsEntry = floorEntry;\r
- lcs = floorLcs;\r
- } else {\r
- lcsEntry = ceilingEntry;\r
- lcs = ceilingLcs;\r
- }\r
- }\r
-\r
- // No LCS, have to add everything.\r
- if (lcs.length() == 0) {\r
- if (!create) {\r
- return null;\r
- }\r
- final Node result = new Node(word);\r
- final Object old = childrenMap.put(rest.intern(), result);\r
- assert old == null;\r
- // System.out.println(" Adding final chunk: " + rest);\r
- return result;\r
- }\r
-\r
- assert lcsEntry != null;\r
-\r
- // The map already contained the LCS.\r
- if (lcs.length() == lcsEntry.getKey().length()) {\r
- assert lcs.equals(lcsEntry.getKey());\r
- final Node result = lcsEntry.getValue().getIndexNode(word,\r
- pos + lcs.length(), create);\r
- assert result.sequence.equals(word);\r
- return result;\r
- }\r
-\r
- if (!create) {\r
- return null;\r
- }\r
-\r
- // Have to split, inserting the LCS.\r
- // System.out.println(" Splitting " + lcsEntry + "/" + word + " @ " +\r
- // lcs);\r
- final Node newChild = new Node(word.substring(0, pos + lcs.length()));\r
- final Object old = childrenMap.put(lcs.intern(), newChild);\r
- assert old == null;\r
- childrenMap.remove(lcsEntry.getKey());\r
- newChild.childrenMap.put(lcsEntry.getKey().substring(lcs.length())\r
- .intern(), lcsEntry.getValue());\r
-\r
- if (lcs.equals(rest)) {\r
- return newChild;\r
- }\r
- final Node result = new Node(word);\r
- final Object old2 = newChild.childrenMap.put(rest.substring(lcs.length())\r
- .intern(), result);\r
- assert old2 == null;\r
- // System.out.println(" newChildrenMap=" + newChild.childrenMap);\r
-\r
- return result;\r
- }\r
-\r
- Index.Node toIndexNode() {\r
- final Index.Node result = new Index.Node(childrenMap.size(), offsetsList\r
- .size());\r
- int i = 0;\r
- for (final Map.Entry<String, Node> entry : childrenMap.entrySet()) {\r
- result.chars[i] = entry.getKey();\r
- result.children[i] = entry.getValue().toIndexNode();\r
- i++;\r
- }\r
- return result;\r
- }\r
-\r
- void forEachNode(final Function<Node> f) {\r
- f.invoke(this);\r
- for (final Node child : childrenMap.values()) {\r
- child.forEachNode(f);\r
- }\r
- }\r
-\r
- int descendantCount() {\r
- int count = 1;\r
- for (final Node child : childrenMap.values()) {\r
- count += child.descendantCount();\r
- }\r
- return count;\r
- }\r
-\r
- void recursiveSetDescendantOffsetCount() {\r
- descendantOffsetCount = offsetsList.size();\r
- for (final Node child : childrenMap.values()) {\r
- child.recursiveSetDescendantOffsetCount();\r
- descendantOffsetCount += child.descendantOffsetCount;\r
- }\r