2 * ********************************************************************************
3 * Copyright (C) 2007-2011, International Business Machines Corporation and others.
5 * ********************************************************************************
7 package com.ibm.icu.impl;
9 import java.util.Iterator;
10 import java.util.LinkedList;
11 import java.util.List;
12 import java.util.ListIterator;
14 import com.ibm.icu.lang.UCharacter;
17 * TextTrieMap is a trie implementation for supporting
18 * fast prefix match for the key.
20 public class TextTrieMap<V> {
22 private Node _root = new Node();
26 * Constructs a TextTrieMap object.
28 * @param ignoreCase true to use simple case insensitive match
30 public TextTrieMap(boolean ignoreCase) {
31 _ignoreCase = ignoreCase;
35 * Adds the text key and its associated object in this object.
37 * @param text The text.
38 * @param val The value object associated with the text.
40 public TextTrieMap<V> put(CharSequence text, V val) {
41 CharIterator chitr = new CharIterator(text, 0, _ignoreCase);
42 _root.add(chitr, val);
47 * Gets an iterator of the objects associated with the
48 * longest prefix matching string key.
50 * @param text The text to be matched with prefixes.
51 * @return An iterator of the objects associated with
52 * the longest prefix matching matching key, or null
53 * if no matching entry is found.
55 public Iterator<V> get(String text) {
60 * Gets an iterator of the objects associated with the
61 * longest prefix matching string key starting at the
64 * @param text The text to be matched with prefixes.
65 * @param start The start index of of the text
66 * @return An iterator of the objects associated with the
67 * longest prefix matching matching key, or null if no
68 * matching entry is found.
70 public Iterator<V> get(CharSequence text, int start) {
71 return get(text, start, null);
74 public Iterator<V> get(CharSequence text, int start, int[] matchLen) {
75 LongestMatchHandler<V> handler = new LongestMatchHandler<V>();
76 find(text, start, handler);
77 if (matchLen != null && matchLen.length > 0) {
78 matchLen[0] = handler.getMatchLength();
80 return handler.getMatches();
83 public void find(CharSequence text, ResultHandler<V> handler) {
84 find(text, 0, handler);
87 public void find(CharSequence text, int offset, ResultHandler<V> handler) {
88 CharIterator chitr = new CharIterator(text, offset, _ignoreCase);
89 find(_root, chitr, handler);
92 private synchronized void find(Node node, CharIterator chitr, ResultHandler<V> handler) {
93 Iterator<V> values = node.values();
95 if (!handler.handlePrefixMatch(chitr.processedLength(), values)) {
100 Node nextMatch = node.findMatch(chitr);
101 if (nextMatch != null) {
102 find(nextMatch, chitr, handler);
106 public static class CharIterator implements Iterator<Character> {
107 private boolean _ignoreCase;
108 private CharSequence _text;
109 private int _nextIdx;
110 private int _startIdx;
112 private Character _remainingChar;
114 CharIterator(CharSequence text, int offset, boolean ignoreCase) {
116 _nextIdx = _startIdx = offset;
117 _ignoreCase = ignoreCase;
121 * @see java.util.Iterator#hasNext()
123 public boolean hasNext() {
124 if (_nextIdx == _text.length() && _remainingChar == null) {
131 * @see java.util.Iterator#next()
133 public Character next() {
134 if (_nextIdx == _text.length() && _remainingChar == null) {
138 if (_remainingChar != null) {
139 next = _remainingChar;
140 _remainingChar = null;
143 int cp = UCharacter.foldCase(Character.codePointAt(_text, _nextIdx), true);
144 _nextIdx = _nextIdx + Character.charCount(cp);
146 char[] chars = Character.toChars(cp);
148 if (chars.length == 2) {
149 _remainingChar = chars[1];
152 next = _text.charAt(_nextIdx);
160 * @see java.util.Iterator#remove()
162 public void remove() {
163 throw new UnsupportedOperationException("remove() not supproted");
166 public int nextIndex() {
170 public int processedLength() {
171 if (_remainingChar != null) {
172 throw new IllegalStateException("In the middle of surrogate pair");
174 return _nextIdx - _startIdx;
179 * Callback handler for processing prefix matches used by
182 public interface ResultHandler<V> {
184 * Handles a prefix key match
186 * @param matchLength Matched key's length
187 * @param values An iterator of the objects associated with the matched key
188 * @return Return true to continue the search in the trie, false to quit.
190 public boolean handlePrefixMatch(int matchLength, Iterator<V> values);
193 private static class LongestMatchHandler<V> implements ResultHandler<V> {
194 private Iterator<V> matches = null;
195 private int length = 0;
197 public boolean handlePrefixMatch(int matchLength, Iterator<V> values) {
198 if (matchLength > length) {
199 length = matchLength;
205 public Iterator<V> getMatches() {
209 public int getMatchLength() {
215 * Inner class representing a text node in the trie.
218 private char[] _text;
219 private List<V> _values;
220 private List<Node> _children;
225 private Node(char[] text, List<V> values, List<Node> children) {
228 _children = children;
231 public Iterator<V> values() {
232 if (_values == null) {
235 return _values.iterator();
238 public void add(CharIterator chitr, V value) {
239 StringBuilder buf = new StringBuilder();
240 while (chitr.hasNext()) {
241 buf.append(chitr.next());
243 add(toCharArray(buf), 0, value);
246 public Node findMatch(CharIterator chitr) {
247 if (_children == null) {
250 if (!chitr.hasNext()) {
254 Character ch = chitr.next();
255 for (Node child : _children) {
256 if (ch < child._text[0]) {
259 if (ch == child._text[0]) {
260 if (child.matchFollowing(chitr)) {
269 private void add(char[] text, int offset, V value) {
270 if (text.length == offset) {
271 _values = addValue(_values, value);
275 if (_children == null) {
276 _children = new LinkedList<Node>();
277 Node child = new Node(subArray(text, offset), addValue(null, value), null);
278 _children.add(child);
282 // walk through children
283 ListIterator<Node> litr = _children.listIterator();
284 while (litr.hasNext()) {
285 Node next = litr.next();
286 if (text[offset] < next._text[0]) {
290 if (text[offset] == next._text[0]) {
291 int matchLen = next.lenMatches(text, offset);
292 if (matchLen == next._text.length) {
294 next.add(text, offset + matchLen, value);
296 // partial match, create a branch
297 next.split(matchLen);
298 next.add(text, offset + matchLen, value);
303 // add a new child to this node
304 litr.add(new Node(subArray(text, offset), addValue(null, value), null));
307 private boolean matchFollowing(CharIterator chitr) {
308 boolean matched = true;
310 while (idx < _text.length) {
311 if(!chitr.hasNext()) {
315 Character ch = chitr.next();
316 if (ch != _text[idx]) {
325 private int lenMatches(char[] text, int offset) {
326 int textLen = text.length - offset;
327 int limit = _text.length < textLen ? _text.length : textLen;
329 while (len < limit) {
330 if (_text[len] != text[offset + len]) {
338 private void split(int offset) {
339 // split the current node at the offset
340 char[] childText = subArray(_text, offset);
341 _text = subArray(_text, 0, offset);
343 // add the Node representing after the offset as a child
344 Node child = new Node(childText, _values, _children);
347 _children = new LinkedList<Node>();
348 _children.add(child);
351 private List<V> addValue(List<V> list, V value) {
353 list = new LinkedList<V>();
360 private static char[] toCharArray(CharSequence text) {
361 char[] array = new char[text.length()];
362 for (int i = 0; i < array.length; i++) {
363 array[i] = text.charAt(i);
368 private static char[] subArray(char[] array, int start) {
372 char[] sub = new char[array.length - start];
373 System.arraycopy(array, start, sub, 0, sub.length);
377 private static char[] subArray(char[] array, int start, int limit) {
378 if (start == 0 && limit == array.length) {
381 char[] sub = new char[limit - start];
382 System.arraycopy(array, start, sub, 0, limit - start);