jars/icu4j-52_1/main/classes/core/src/com/ibm/icu/impl/TextTrieMap.java

   1 /*
   2  * ********************************************************************************
   3  * Copyright (C) 2007-2011, International Business Machines Corporation and others.
   4  * All Rights Reserved.
   5  * ********************************************************************************
   6  */
   7 package com.ibm.icu.impl;
   8
   9 import java.util.Iterator;
  10 import java.util.LinkedList;
  11 import java.util.List;
  12 import java.util.ListIterator;
  13
  14 import com.ibm.icu.lang.UCharacter;
  15
  16 /**
  17  * TextTrieMap is a trie implementation for supporting
  18  * fast prefix match for the key.
  19  */
  20 public class TextTrieMap<V> {
  21
  22     private Node _root = new Node();
  23     boolean _ignoreCase;
  24
  25     /**
  26      * Constructs a TextTrieMap object.
  27      *
  28      * @param ignoreCase true to use simple case insensitive match
  29      */
  30     public TextTrieMap(boolean ignoreCase) {
  31         _ignoreCase = ignoreCase;
  32     }
  33
  34     /**
  35      * Adds the text key and its associated object in this object.
  36      *
  37      * @param text The text.
  38      * @param val The value object associated with the text.
  39      */
  40     public TextTrieMap<V> put(CharSequence text, V val) {
  41         CharIterator chitr = new CharIterator(text, 0, _ignoreCase);
  42         _root.add(chitr, val);
  43         return this;
  44     }
  45
  46     /**
  47      * Gets an iterator of the objects associated with the
  48      * longest prefix matching string key.
  49      *
  50      * @param text The text to be matched with prefixes.
  51      * @return An iterator of the objects associated with
  52      * the longest prefix matching matching key, or null
  53      * if no matching entry is found.
  54      */
  55     public Iterator<V> get(String text) {
  56         return get(text, 0);
  57     }
  58
  59     /**
  60      * Gets an iterator of the objects associated with the
  61      * longest prefix matching string key starting at the
  62      * specified position.
  63      *
  64      * @param text The text to be matched with prefixes.
  65      * @param start The start index of of the text
  66      * @return An iterator of the objects associated with the
  67      * longest prefix matching matching key, or null if no
  68      * matching entry is found.
  69      */
  70     public Iterator<V> get(CharSequence text, int start) {
  71         return get(text, start, null);
  72     }
  73
  74     public Iterator<V> get(CharSequence text, int start, int[] matchLen) {
  75         LongestMatchHandler<V> handler = new LongestMatchHandler<V>();
  76         find(text, start, handler);
  77         if (matchLen != null && matchLen.length > 0) {
  78             matchLen[0] = handler.getMatchLength();
  79         }
  80         return handler.getMatches();
  81     }
  82
  83     public void find(CharSequence text, ResultHandler<V> handler) {
  84         find(text, 0, handler);
  85     }
  86
  87     public void find(CharSequence text, int offset, ResultHandler<V> handler) {
  88         CharIterator chitr = new CharIterator(text, offset, _ignoreCase);
  89         find(_root, chitr, handler);
  90     }
  91
  92     private synchronized void find(Node node, CharIterator chitr, ResultHandler<V> handler) {
  93         Iterator<V> values = node.values();
  94         if (values != null) {
  95             if (!handler.handlePrefixMatch(chitr.processedLength(), values)) {
  96                 return;
  97             }
  98         }
  99
 100         Node nextMatch = node.findMatch(chitr);
 101         if (nextMatch != null) {
 102             find(nextMatch, chitr, handler);
 103         }
 104     }
 105
 106     public static class CharIterator implements Iterator<Character> {
 107         private boolean _ignoreCase;
 108         private CharSequence _text;
 109         private int _nextIdx;
 110         private int _startIdx;
 111
 112         private Character _remainingChar;
 113
 114         CharIterator(CharSequence text, int offset, boolean ignoreCase) {
 115             _text = text;
 116             _nextIdx = _startIdx = offset;
 117             _ignoreCase = ignoreCase;
 118         }
 119
 120         /* (non-Javadoc)
 121          * @see java.util.Iterator#hasNext()
 122          */
 123         public boolean hasNext() {
 124             if (_nextIdx == _text.length() && _remainingChar == null) {
 125                 return false;
 126             }
 127             return true;
 128         }
 129
 130         /* (non-Javadoc)
 131          * @see java.util.Iterator#next()
 132          */
 133         public Character next() {
 134             if (_nextIdx == _text.length() && _remainingChar == null) {
 135                 return null;
 136             }
 137             Character next;
 138             if (_remainingChar != null) {
 139                 next = _remainingChar;
 140                 _remainingChar = null;
 141             } else {
 142                 if (_ignoreCase) {
 143                     int cp = UCharacter.foldCase(Character.codePointAt(_text, _nextIdx), true);
 144                     _nextIdx = _nextIdx + Character.charCount(cp);
 145
 146                     char[] chars = Character.toChars(cp);
 147                     next = chars[0];
 148                     if (chars.length == 2) {
 149                         _remainingChar = chars[1];
 150                     }
 151                 } else {
 152                     next = _text.charAt(_nextIdx);
 153                     _nextIdx++;
 154                 }
 155             }
 156             return next;
 157         }
 158
 159         /* (non-Javadoc)
 160          * @see java.util.Iterator#remove()
 161          */
 162         public void remove() {
 163             throw new UnsupportedOperationException("remove() not supproted");
 164         }
 165
 166         public int nextIndex() {
 167             return _nextIdx;
 168         }
 169
 170         public int processedLength() {
 171             if (_remainingChar != null) {
 172                 throw new IllegalStateException("In the middle of surrogate pair");
 173             }
 174             return _nextIdx - _startIdx;
 175         }
 176     }
 177
 178     /**
 179      * Callback handler for processing prefix matches used by
 180      * find method.
 181      */
 182     public interface ResultHandler<V> {
 183         /**
 184          * Handles a prefix key match
 185          *
 186          * @param matchLength Matched key's length
 187          * @param values An iterator of the objects associated with the matched key
 188          * @return Return true to continue the search in the trie, false to quit.
 189          */
 190         public boolean handlePrefixMatch(int matchLength, Iterator<V> values);
 191     }
 192
 193     private static class LongestMatchHandler<V> implements ResultHandler<V> {
 194         private Iterator<V> matches = null;
 195         private int length = 0;
 196
 197         public boolean handlePrefixMatch(int matchLength, Iterator<V> values) {
 198             if (matchLength > length) {
 199                 length = matchLength;
 200                 matches = values;
 201             }
 202             return true;
 203         }
 204
 205         public Iterator<V> getMatches() {
 206             return matches;
 207         }
 208
 209         public int getMatchLength() {
 210             return length;
 211         }
 212     }
 213
 214     /**
 215      * Inner class representing a text node in the trie.
 216      */
 217     private class Node {
 218         private char[] _text;
 219         private List<V> _values;
 220         private List<Node> _children;
 221
 222         private Node() {
 223         }
 224
 225         private Node(char[] text, List<V> values, List<Node> children) {
 226             _text = text;
 227             _values = values;
 228             _children = children;
 229         }
 230
 231         public Iterator<V> values() {
 232             if (_values == null) {
 233                 return null;
 234             }
 235             return _values.iterator();
 236         }
 237
 238         public void add(CharIterator chitr, V value) {
 239             StringBuilder buf = new StringBuilder();
 240             while (chitr.hasNext()) {
 241                 buf.append(chitr.next());
 242             }
 243             add(toCharArray(buf), 0, value);
 244         }
 245
 246         public Node findMatch(CharIterator chitr) {
 247             if (_children == null) {
 248                 return null;
 249             }
 250             if (!chitr.hasNext()) {
 251                 return null;
 252             }
 253             Node match = null;
 254             Character ch = chitr.next();
 255             for (Node child : _children) {
 256                 if (ch < child._text[0]) {
 257                     break;
 258                 }
 259                 if (ch == child._text[0]) {
 260                     if (child.matchFollowing(chitr)) {
 261                         match = child;
 262                     }
 263                     break;
 264                 }
 265             }
 266             return match;
 267         }
 268
 269         private void add(char[] text, int offset, V value) {
 270             if (text.length == offset) {
 271                 _values = addValue(_values, value);
 272                 return;
 273             }
 274
 275             if (_children == null) {
 276                 _children = new LinkedList<Node>();
 277                 Node child = new Node(subArray(text, offset), addValue(null, value), null);
 278                 _children.add(child);
 279                 return;
 280             }
 281
 282             // walk through children
 283             ListIterator<Node> litr = _children.listIterator();
 284             while (litr.hasNext()) {
 285                 Node next = litr.next();
 286                 if (text[offset] < next._text[0]) {
 287                     litr.previous();
 288                     break;
 289                 }
 290                 if (text[offset] == next._text[0]) {
 291                     int matchLen = next.lenMatches(text, offset);
 292                     if (matchLen == next._text.length) {
 293                         // full match
 294                         next.add(text, offset + matchLen, value);
 295                     } else {
 296                         // partial match, create a branch
 297                         next.split(matchLen);
 298                         next.add(text, offset + matchLen, value);
 299                     }
 300                     return;
 301                 }
 302             }
 303             // add a new child to this node
 304             litr.add(new Node(subArray(text, offset), addValue(null, value), null));
 305         }
 306
 307         private boolean matchFollowing(CharIterator chitr) {
 308             boolean matched = true;
 309             int idx = 1;
 310             while (idx < _text.length) {
 311                 if(!chitr.hasNext()) {
 312                     matched = false;
 313                     break;
 314                 }
 315                 Character ch = chitr.next();
 316                 if (ch != _text[idx]) {
 317                     matched = false;
 318                     break;
 319                 }
 320                 idx++;
 321             }
 322             return matched;
 323         }
 324
 325         private int lenMatches(char[] text, int offset) {
 326             int textLen = text.length - offset;
 327             int limit = _text.length < textLen ? _text.length : textLen;
 328             int len = 0;
 329             while (len < limit) {
 330                 if (_text[len] != text[offset + len]) {
 331                     break;
 332                 }
 333                 len++;
 334             }
 335             return len;
 336         }
 337
 338         private void split(int offset) {
 339             // split the current node at the offset
 340             char[] childText = subArray(_text, offset);
 341             _text = subArray(_text, 0, offset);
 342
 343             // add the Node representing after the offset as a child
 344             Node child = new Node(childText, _values, _children);
 345             _values = null;
 346
 347             _children = new LinkedList<Node>();
 348             _children.add(child);
 349         }
 350
 351         private List<V> addValue(List<V> list, V value) {
 352             if (list == null) {
 353                 list = new LinkedList<V>();
 354             }
 355             list.add(value);
 356             return list;
 357         }
 358     }
 359
 360     private static char[] toCharArray(CharSequence text) {
 361         char[] array = new char[text.length()];
 362         for (int i = 0; i < array.length; i++) {
 363             array[i] = text.charAt(i);
 364         }
 365         return array;
 366     }
 367
 368     private static char[] subArray(char[] array, int start) {
 369         if (start == 0) {
 370             return array;
 371         }
 372         char[] sub = new char[array.length - start];
 373         System.arraycopy(array, start, sub, 0, sub.length);
 374         return sub;
 375     }
 376
 377     private static char[] subArray(char[] array, int start, int limit) {
 378         if (start == 0 && limit == array.length) {
 379             return array;
 380         }
 381         char[] sub = new char[limit - start];
 382         System.arraycopy(array, start, sub, 0, limit - start);
 383         return sub;
 384     }
 385 }