2 *******************************************************************************
3 * Copyright (C) 2011-2012, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
6 * created on: 2011jan06
7 * created by: Markus W. Scherer
8 * ported from ICU4C ucharstrie.h/.cpp
11 package com.ibm.icu.util;
13 import java.io.IOException;
14 import java.util.ArrayList;
15 import java.util.NoSuchElementException;
17 import com.ibm.icu.text.UTF16;
18 import com.ibm.icu.util.BytesTrie.Result;
21 * Light-weight, non-const reader class for a CharsTrie.
22 * Traverses a char-serialized data structure with minimal state,
23 * for mapping strings (16-bit-unit sequences) to non-negative integer values.
25 * <p>This class is not intended for public subclassing.
28 * @author Markus W. Scherer
30 public final class CharsTrie implements Cloneable, Iterable<CharsTrie.Entry> {
32 * Constructs a CharsTrie reader instance.
34 * <p>The CharSequence must contain a copy of a char sequence from the CharsTrieBuilder,
35 * with the offset indicating the first char of that sequence.
36 * The CharsTrie object will not read more chars than
37 * the CharsTrieBuilder generated in the corresponding build() call.
39 * <p>The CharSequence is not copied/cloned and must not be modified while
40 * the CharsTrie object is in use.
42 * @param trieChars CharSequence that contains the serialized trie.
43 * @param offset Root offset of the trie in the CharSequence.
46 public CharsTrie(CharSequence trieChars, int offset) {
49 remainingMatchLength_=-1;
53 * Clones this trie reader object and its state,
54 * but not the char array which will be shared.
55 * @return A shallow clone of this trie.
59 public Object clone() throws CloneNotSupportedException {
60 return super.clone(); // A shallow copy is just what we need.
64 * Resets this trie to its initial state.
68 public CharsTrie reset() {
70 remainingMatchLength_=-1;
75 * CharsTrie state object, for saving a trie's current state
76 * and resetting the trie back to this state later.
79 public static final class State {
81 * Constructs an empty State.
85 private CharSequence chars;
88 private int remainingMatchLength;
92 * Saves the state of this trie.
93 * @param state The State object to hold the trie's state.
98 public CharsTrie saveState(State state) /*const*/ {
102 state.remainingMatchLength=remainingMatchLength_;
107 * Resets this trie to the saved state.
108 * @param state The State object which holds a saved trie state.
110 * @throws IllegalArgumentException if the state object contains no state,
111 * or the state of a different trie
116 public CharsTrie resetToState(State state) {
117 if(chars_==state.chars && chars_!=null && root_==state.root) {
119 remainingMatchLength_=state.remainingMatchLength;
121 throw new IllegalArgumentException("incompatible trie state");
127 * Determines whether the string so far matches, whether it has a value,
128 * and whether another input char can continue a matching string.
129 * @return The match/value Result.
132 public Result current() /*const*/ {
135 return Result.NO_MATCH;
138 return (remainingMatchLength_<0 && (node=chars_.charAt(pos))>=kMinValueLead) ?
139 valueResults_[node>>15] : Result.NO_VALUE;
144 * Traverses the trie from the initial state for this input char.
145 * Equivalent to reset().next(inUnit).
146 * @param inUnit Input char value. Values below 0 and above 0xffff will never match.
147 * @return The match/value Result.
150 public Result first(int inUnit) {
151 remainingMatchLength_=-1;
152 return nextImpl(root_, inUnit);
156 * Traverses the trie from the initial state for the
157 * one or two UTF-16 code units for this input code point.
158 * Equivalent to reset().nextForCodePoint(cp).
159 * @param cp A Unicode code point 0..0x10ffff.
160 * @return The match/value Result.
163 public Result firstForCodePoint(int cp) {
166 (first(UTF16.getLeadSurrogate(cp)).hasNext() ?
167 next(UTF16.getTrailSurrogate(cp)) :
172 * Traverses the trie from the current state for this input char.
173 * @param inUnit Input char value. Values below 0 and above 0xffff will never match.
174 * @return The match/value Result.
177 public Result next(int inUnit) {
180 return Result.NO_MATCH;
182 int length=remainingMatchLength_; // Actual remaining match length minus 1.
184 // Remaining part of a linear-match node.
185 if(inUnit==chars_.charAt(pos++)) {
186 remainingMatchLength_=--length;
189 return (length<0 && (node=chars_.charAt(pos))>=kMinValueLead) ?
190 valueResults_[node>>15] : Result.NO_VALUE;
193 return Result.NO_MATCH;
196 return nextImpl(pos, inUnit);
200 * Traverses the trie from the current state for the
201 * one or two UTF-16 code units for this input code point.
202 * @param cp A Unicode code point 0..0x10ffff.
203 * @return The match/value Result.
206 public Result nextForCodePoint(int cp) {
209 (next(UTF16.getLeadSurrogate(cp)).hasNext() ?
210 next(UTF16.getTrailSurrogate(cp)) :
215 * Traverses the trie from the current state for this string.
218 * Result result=current();
220 * if(!result.hasNext()) return Result.NO_MATCH;
224 * @param s Contains a string.
225 * @param sIndex The start index of the string in s.
226 * @param sLimit The (exclusive) end index of the string in s.
227 * @return The match/value Result.
230 public Result next(CharSequence s, int sIndex, int sLimit) {
237 return Result.NO_MATCH;
239 int length=remainingMatchLength_; // Actual remaining match length minus 1.
241 // Fetch the next input unit, if there is one.
242 // Continue a linear-match node.
246 remainingMatchLength_=length;
249 return (length<0 && (node=chars_.charAt(pos))>=kMinValueLead) ?
250 valueResults_[node>>15] : Result.NO_VALUE;
252 inUnit=s.charAt(sIndex++);
254 remainingMatchLength_=length;
257 if(inUnit!=chars_.charAt(pos)) {
259 return Result.NO_MATCH;
264 int node=chars_.charAt(pos++);
266 if(node<kMinLinearMatch) {
267 Result result=branchNext(pos, node, inUnit);
268 if(result==Result.NO_MATCH) {
269 return Result.NO_MATCH;
271 // Fetch the next input unit, if there is one.
275 if(result==Result.FINAL_VALUE) {
276 // No further matching units.
278 return Result.NO_MATCH;
280 inUnit=s.charAt(sIndex++);
281 pos=pos_; // branchNext() advanced pos and wrote it to pos_ .
282 node=chars_.charAt(pos++);
283 } else if(node<kMinValueLead) {
284 // Match length+1 units.
285 length=node-kMinLinearMatch; // Actual match length minus 1.
286 if(inUnit!=chars_.charAt(pos)) {
288 return Result.NO_MATCH;
293 } else if((node&kValueIsFinal)!=0) {
294 // No further matching units.
296 return Result.NO_MATCH;
298 // Skip intermediate value.
299 pos=skipNodeValue(pos, node);
307 * Returns a matching string's value if called immediately after
308 * current()/first()/next() returned Result.INTERMEDIATE_VALUE or Result.FINAL_VALUE.
309 * getValue() can be called multiple times.
311 * Do not call getValue() after Result.NO_MATCH or Result.NO_VALUE!
312 * @return The value for the string so far.
315 public int getValue() /*const*/ {
317 int leadUnit=chars_.charAt(pos++);
318 assert(leadUnit>=kMinValueLead);
319 return (leadUnit&kValueIsFinal)!=0 ?
320 readValue(chars_, pos, leadUnit&0x7fff) : readNodeValue(chars_, pos, leadUnit);
324 * Determines whether all strings reachable from the current state
325 * map to the same value, and if so, returns that value.
326 * @return The unique value in bits 32..1 with bit 0 set,
327 * if all strings reachable from the current state
328 * map to the same value; otherwise returns 0.
331 public long getUniqueValue() /*const*/ {
336 // Skip the rest of a pending linear-match node.
337 long uniqueValue=findUniqueValue(chars_, pos+remainingMatchLength_+1, 0);
338 // Ignore internally used bits 63..33; extend the actual value's sign bit from bit 32.
339 return (uniqueValue<<31)>>31;
343 * Finds each char which continues the string from the current state.
344 * That is, each char c for which it would be next(c)!=Result.NO_MATCH now.
345 * @param out Each next char is appended to this object.
346 * (Only uses the out.append(c) method.)
347 * @return The number of chars which continue the string from here.
350 public int getNextChars(Appendable out) /*const*/ {
355 if(remainingMatchLength_>=0) {
356 append(out, chars_.charAt(pos)); // Next unit of a pending linear-match node.
359 int node=chars_.charAt(pos++);
360 if(node>=kMinValueLead) {
361 if((node&kValueIsFinal)!=0) {
364 pos=skipNodeValue(pos, node);
368 if(node<kMinLinearMatch) {
370 node=chars_.charAt(pos++);
372 getNextBranchChars(chars_, pos, ++node, out);
375 // First unit of the linear-match node.
376 append(out, chars_.charAt(pos));
382 * Iterates from the current state of this trie.
383 * @return A new CharsTrie.Iterator.
386 public Iterator iterator() {
387 return new Iterator(chars_, pos_, remainingMatchLength_, 0);
391 * Iterates from the current state of this trie.
392 * @param maxStringLength If 0, the iterator returns full strings.
393 * Otherwise, the iterator returns strings with this maximum length.
394 * @return A new CharsTrie.Iterator.
397 public Iterator iterator(int maxStringLength) {
398 return new Iterator(chars_, pos_, remainingMatchLength_, maxStringLength);
402 * Iterates from the root of a char-serialized BytesTrie.
403 * @param trieChars CharSequence that contains the serialized trie.
404 * @param offset Root offset of the trie in the CharSequence.
405 * @param maxStringLength If 0, the iterator returns full strings.
406 * Otherwise, the iterator returns strings with this maximum length.
407 * @return A new CharsTrie.Iterator.
410 public static Iterator iterator(CharSequence trieChars, int offset, int maxStringLength) {
411 return new Iterator(trieChars, offset, -1, maxStringLength);
415 * Return value type for the Iterator.
418 public static final class Entry {
423 public CharSequence chars;
425 * The value associated with the string.
435 * Iterator for all of the (string, value) pairs in a CharsTrie.
438 public static final class Iterator implements java.util.Iterator<Entry> {
439 private Iterator(CharSequence trieChars, int offset, int remainingMatchLength, int maxStringLength) {
441 pos_=initialPos_=offset;
442 remainingMatchLength_=initialRemainingMatchLength_=remainingMatchLength;
443 maxLength_=maxStringLength;
444 int length=remainingMatchLength_; // Actual remaining match length minus 1.
446 // Pending linear-match node, append remaining bytes to str_.
448 if(maxLength_>0 && length>maxLength_) {
449 length=maxLength_; // This will leave remainingMatchLength>=0 as a signal.
451 str_.append(chars_, pos_, pos_+length);
453 remainingMatchLength_-=length;
458 * Resets this iterator to its initial state.
462 public Iterator reset() {
464 remainingMatchLength_=initialRemainingMatchLength_;
466 int length=remainingMatchLength_+1; // Remaining match length.
467 if(maxLength_>0 && length>maxLength_) {
470 str_.setLength(length);
472 remainingMatchLength_-=length;
478 * @return true if there are more elements.
481 public boolean hasNext() /*const*/ { return pos_>=0 || !stack_.isEmpty(); }
484 * Finds the next (string, value) pair if there is one.
486 * If the string is truncated to the maximum length and does not
487 * have a real value, then the value is set to -1.
488 * In this case, this "not a real value" is indistinguishable from
489 * a real value of -1.
490 * @return An Entry with the string and value of the next element.
491 * @throws NoSuchElementException - iteration has no more elements.
494 public Entry next() {
497 if(stack_.isEmpty()) {
498 throw new NoSuchElementException();
500 // Pop the state off the stack and continue with the next outbound edge of
502 long top=stack_.remove(stack_.size()-1);
505 str_.setLength(length&0xffff);
508 pos=branchNext(pos, length);
510 return entry_; // Reached a final value.
513 str_.append(chars_.charAt(pos++));
516 if(remainingMatchLength_>=0) {
517 // We only get here if we started in a pending linear-match node
518 // with more than maxLength remaining units.
519 return truncateAndStop();
522 int node=chars_.charAt(pos++);
523 if(node>=kMinValueLead) {
525 pos=skipNodeValue(pos, node);
529 // Deliver value for the string so far.
530 boolean isFinal=(node&kValueIsFinal)!=0;
532 entry_.value=readValue(chars_, pos, node&0x7fff);
534 entry_.value=readNodeValue(chars_, pos, node);
536 if(isFinal || (maxLength_>0 && str_.length()==maxLength_)) {
539 // We cannot skip the value right here because it shares its
540 // lead unit with a match node which we have to evaluate
542 // Instead, keep pos_ on the node lead unit itself.
550 if(maxLength_>0 && str_.length()==maxLength_) {
551 return truncateAndStop();
553 if(node<kMinLinearMatch) {
555 node=chars_.charAt(pos++);
557 pos=branchNext(pos, node+1);
559 return entry_; // Reached a final value.
562 // Linear-match node, append length units to str_.
563 int length=node-kMinLinearMatch+1;
564 if(maxLength_>0 && str_.length()+length>maxLength_) {
565 str_.append(chars_, pos, pos+maxLength_-str_.length());
566 return truncateAndStop();
568 str_.append(chars_, pos, pos+length);
575 * Iterator.remove() is not supported.
576 * @throws UnsupportedOperationException (always)
579 public void remove() {
580 throw new UnsupportedOperationException();
583 private Entry truncateAndStop() {
585 // We reset entry_.chars every time we return entry_
586 // just because the caller might have modified the Entry.
588 entry_.value=-1; // no real value for str
592 private int branchNext(int pos, int length) {
593 while(length>kMaxBranchLinearSubNodeLength) {
594 ++pos; // ignore the comparison unit
595 // Push state for the greater-or-equal edge.
596 stack_.add(((long)skipDelta(chars_, pos)<<32)|((length-(length>>1))<<16)|str_.length());
597 // Follow the less-than edge.
599 pos=jumpByDelta(chars_, pos);
601 // List of key-value pairs where values are either final values or jump deltas.
602 // Read the first (key, value) pair.
603 char trieUnit=chars_.charAt(pos++);
604 int node=chars_.charAt(pos++);
605 boolean isFinal=(node&kValueIsFinal)!=0;
606 int value=readValue(chars_, pos, node&=0x7fff);
607 pos=skipValue(pos, node);
608 stack_.add(((long)pos<<32)|((length-1)<<16)|str_.length());
609 str_.append(trieUnit);
620 private CharSequence chars_;
622 private int initialPos_;
623 private int remainingMatchLength_;
624 private int initialRemainingMatchLength_;
625 private boolean skipValue_; // Skip intermediate value which was already delivered.
627 private StringBuilder str_=new StringBuilder();
628 private int maxLength_;
629 private Entry entry_=new Entry();
631 // The stack stores longs for backtracking to another
632 // outbound edge of a branch node.
633 // Each long has the offset in chars_ in bits 62..32,
634 // the str_.length() from before the node in bits 15..0,
635 // and the remaining branch length in bits 31..16.
636 // (We could store the remaining branch length minus 1 in bits 30..16 and not use bit 31,
637 // but the code looks more confusing that way.)
638 private ArrayList<Long> stack_=new ArrayList<Long>();
641 private void stop() {
645 // Reads a compact 32-bit integer.
646 // pos is already after the leadUnit, and the lead unit has bit 15 reset.
647 private static int readValue(CharSequence chars, int pos, int leadUnit) {
649 if(leadUnit<kMinTwoUnitValueLead) {
651 } else if(leadUnit<kThreeUnitValueLead) {
652 value=((leadUnit-kMinTwoUnitValueLead)<<16)|chars.charAt(pos);
654 value=(chars.charAt(pos)<<16)|chars.charAt(pos+1);
658 private static int skipValue(int pos, int leadUnit) {
659 if(leadUnit>=kMinTwoUnitValueLead) {
660 if(leadUnit<kThreeUnitValueLead) {
668 private static int skipValue(CharSequence chars, int pos) {
669 int leadUnit=chars.charAt(pos++);
670 return skipValue(pos, leadUnit&0x7fff);
673 private static int readNodeValue(CharSequence chars, int pos, int leadUnit) {
674 assert(kMinValueLead<=leadUnit && leadUnit<kValueIsFinal);
676 if(leadUnit<kMinTwoUnitNodeValueLead) {
677 value=(leadUnit>>6)-1;
678 } else if(leadUnit<kThreeUnitNodeValueLead) {
679 value=(((leadUnit&0x7fc0)-kMinTwoUnitNodeValueLead)<<10)|chars.charAt(pos);
681 value=(chars.charAt(pos)<<16)|chars.charAt(pos+1);
685 private static int skipNodeValue(int pos, int leadUnit) {
686 assert(kMinValueLead<=leadUnit && leadUnit<kValueIsFinal);
687 if(leadUnit>=kMinTwoUnitNodeValueLead) {
688 if(leadUnit<kThreeUnitNodeValueLead) {
697 private static int jumpByDelta(CharSequence chars, int pos) {
698 int delta=chars.charAt(pos++);
699 if(delta>=kMinTwoUnitDeltaLead) {
700 if(delta==kThreeUnitDeltaLead) {
701 delta=(chars.charAt(pos)<<16)|chars.charAt(pos+1);
704 delta=((delta-kMinTwoUnitDeltaLead)<<16)|chars.charAt(pos++);
710 private static int skipDelta(CharSequence chars, int pos) {
711 int delta=chars.charAt(pos++);
712 if(delta>=kMinTwoUnitDeltaLead) {
713 if(delta==kThreeUnitDeltaLead) {
722 private static Result[] valueResults_={ Result.INTERMEDIATE_VALUE, Result.FINAL_VALUE };
724 // Handles a branch node for both next(unit) and next(string).
725 private Result branchNext(int pos, int length, int inUnit) {
726 // Branch according to the current unit.
728 length=chars_.charAt(pos++);
731 // The length of the branch is the number of units to select from.
732 // The data structure encodes a binary search.
733 while(length>kMaxBranchLinearSubNodeLength) {
734 if(inUnit<chars_.charAt(pos++)) {
736 pos=jumpByDelta(chars_, pos);
738 length=length-(length>>1);
739 pos=skipDelta(chars_, pos);
742 // Drop down to linear search for the last few units.
743 // length>=2 because the loop body above sees length>kMaxBranchLinearSubNodeLength>=3
744 // and divides length by 2.
746 if(inUnit==chars_.charAt(pos++)) {
748 int node=chars_.charAt(pos);
749 if((node&kValueIsFinal)!=0) {
750 // Leave the final value for getValue() to read.
751 result=Result.FINAL_VALUE;
753 // Use the non-final value as the jump delta.
755 // int delta=readValue(pos, node);
757 if(node<kMinTwoUnitValueLead) {
759 } else if(node<kThreeUnitValueLead) {
760 delta=((node-kMinTwoUnitValueLead)<<16)|chars_.charAt(pos++);
762 delta=(chars_.charAt(pos)<<16)|chars_.charAt(pos+1);
767 node=chars_.charAt(pos);
768 result= node>=kMinValueLead ? valueResults_[node>>15] : Result.NO_VALUE;
774 pos=skipValue(chars_, pos);
776 if(inUnit==chars_.charAt(pos++)) {
778 int node=chars_.charAt(pos);
779 return node>=kMinValueLead ? valueResults_[node>>15] : Result.NO_VALUE;
782 return Result.NO_MATCH;
786 // Requires remainingLength_<0.
787 private Result nextImpl(int pos, int inUnit) {
788 int node=chars_.charAt(pos++);
790 if(node<kMinLinearMatch) {
791 return branchNext(pos, node, inUnit);
792 } else if(node<kMinValueLead) {
793 // Match the first of length+1 units.
794 int length=node-kMinLinearMatch; // Actual match length minus 1.
795 if(inUnit==chars_.charAt(pos++)) {
796 remainingMatchLength_=--length;
798 return (length<0 && (node=chars_.charAt(pos))>=kMinValueLead) ?
799 valueResults_[node>>15] : Result.NO_VALUE;
804 } else if((node&kValueIsFinal)!=0) {
805 // No further matching units.
808 // Skip intermediate value.
809 pos=skipNodeValue(pos, node);
814 return Result.NO_MATCH;
817 // Helper functions for getUniqueValue().
818 // Recursively finds a unique value (or whether there is not a unique one)
820 // uniqueValue: On input, same as for getUniqueValue()/findUniqueValue().
821 // On return, if not 0, then bits 63..33 contain the updated non-negative pos.
822 private static long findUniqueValueFromBranch(CharSequence chars, int pos, int length,
824 while(length>kMaxBranchLinearSubNodeLength) {
825 ++pos; // ignore the comparison unit
826 uniqueValue=findUniqueValueFromBranch(chars, jumpByDelta(chars, pos), length>>1, uniqueValue);
830 length=length-(length>>1);
831 pos=skipDelta(chars, pos);
834 ++pos; // ignore a comparison unit
836 int node=chars.charAt(pos++);
837 boolean isFinal=(node&kValueIsFinal)!=0;
839 int value=readValue(chars, pos, node);
840 pos=skipValue(pos, node);
843 if(value!=(int)(uniqueValue>>1)) {
847 uniqueValue=((long)value<<1)|1;
850 uniqueValue=findUniqueValue(chars, pos+value, uniqueValue);
856 // ignore the last comparison byte
857 return ((long)(pos+1)<<33)|(uniqueValue&0x1ffffffffL);
859 // Recursively finds a unique value (or whether there is not a unique one)
860 // starting from a position on a node lead unit.
861 // uniqueValue: If there is one, then bits 32..1 contain the value and bit 0 is set.
862 // Otherwise, uniqueValue is 0. Bits 63..33 are ignored.
863 private static long findUniqueValue(CharSequence chars, int pos, long uniqueValue) {
864 int node=chars.charAt(pos++);
866 if(node<kMinLinearMatch) {
868 node=chars.charAt(pos++);
870 uniqueValue=findUniqueValueFromBranch(chars, pos, node+1, uniqueValue);
874 pos=(int)(uniqueValue>>>33);
875 node=chars.charAt(pos++);
876 } else if(node<kMinValueLead) {
878 pos+=node-kMinLinearMatch+1; // Ignore the match units.
879 node=chars.charAt(pos++);
881 boolean isFinal=(node&kValueIsFinal)!=0;
884 value=readValue(chars, pos, node&0x7fff);
886 value=readNodeValue(chars, pos, node);
889 if(value!=(int)(uniqueValue>>1)) {
893 uniqueValue=((long)value<<1)|1;
898 pos=skipNodeValue(pos, node);
904 // Helper functions for getNextChars().
905 // getNextChars() when pos is on a branch node.
906 private static void getNextBranchChars(CharSequence chars, int pos, int length, Appendable out) {
907 while(length>kMaxBranchLinearSubNodeLength) {
908 ++pos; // ignore the comparison unit
909 getNextBranchChars(chars, jumpByDelta(chars, pos), length>>1, out);
910 length=length-(length>>1);
911 pos=skipDelta(chars, pos);
914 append(out, chars.charAt(pos++));
915 pos=skipValue(chars, pos);
917 append(out, chars.charAt(pos));
919 private static void append(Appendable out, int c) {
922 } catch(IOException e) {
923 throw new RuntimeException(e);
927 // CharsTrie data structure
929 // The trie consists of a series of char-serialized nodes for incremental
930 // Unicode string/char sequence matching. (char=16-bit unsigned integer)
931 // The root node is at the beginning of the trie data.
933 // Types of nodes are distinguished by their node lead unit ranges.
934 // After each node, except a final-value node, another node follows to
935 // encode match values or continue matching further units.
938 // - Final-value node: Stores a 32-bit integer in a compact, variable-length format.
939 // The value is for the string/char sequence so far.
940 // - Match node, optionally with an intermediate value in a different compact format.
941 // The value, if present, is for the string/char sequence so far.
943 // Aside from the value, which uses the node lead unit's high bits:
945 // - Linear-match node: Matches a number of units.
946 // - Branch node: Branches to other nodes according to the current input unit.
947 // The node unit is the length of the branch (number of units to select from)
948 // minus 1. It is followed by a sub-node:
949 // - If the length is at most kMaxBranchLinearSubNodeLength, then
950 // there are length-1 (key, value) pairs and then one more comparison unit.
951 // If one of the key units matches, then the value is either a final value for
952 // the string so far, or a "jump" delta to the next node.
953 // If the last unit matches, then matching continues with the next node.
954 // (Values have the same encoding as final-value nodes.)
955 // - If the length is greater than kMaxBranchLinearSubNodeLength, then
956 // there is one unit and one "jump" delta.
957 // If the input unit is less than the sub-node unit, then "jump" by delta to
958 // the next sub-node which will have a length of length/2.
959 // (The delta has its own compact encoding.)
960 // Otherwise, skip the "jump" delta to the next sub-node
961 // which will have a length of length-length/2.
963 // Match-node lead unit values, after masking off intermediate-value bits:
965 // 0000..002f: Branch node. If node!=0 then the length is node+1, otherwise
966 // the length is one more than the next unit.
968 // For a branch sub-node with at most this many entries, we drop down
969 // to a linear search.
970 /*package*/ static final int kMaxBranchLinearSubNodeLength=5;
972 // 0030..003f: Linear-match node, match 1..16 units and continue reading the next node.
973 /*package*/ static final int kMinLinearMatch=0x30;
974 /*package*/ static final int kMaxLinearMatchLength=0x10;
976 // Match-node lead unit bits 14..6 for the optional intermediate value.
977 // If these bits are 0, then there is no intermediate value.
978 // Otherwise, see the *NodeValue* constants below.
979 /*package*/ static final int kMinValueLead=kMinLinearMatch+kMaxLinearMatchLength; // 0x0040
980 /*package*/ static final int kNodeTypeMask=kMinValueLead-1; // 0x003f
982 // A final-value node has bit 15 set.
983 /*package*/ static final int kValueIsFinal=0x8000;
985 // Compact value: After testing and masking off bit 15, use the following thresholds.
986 /*package*/ static final int kMaxOneUnitValue=0x3fff;
988 /*package*/ static final int kMinTwoUnitValueLead=kMaxOneUnitValue+1; // 0x4000
989 /*package*/ static final int kThreeUnitValueLead=0x7fff;
991 /*package*/ static final int kMaxTwoUnitValue=((kThreeUnitValueLead-kMinTwoUnitValueLead)<<16)-1; // 0x3ffeffff
993 // Compact intermediate-value integer, lead unit shared with a branch or linear-match node.
994 /*package*/ static final int kMaxOneUnitNodeValue=0xff;
995 /*package*/ static final int kMinTwoUnitNodeValueLead=kMinValueLead+((kMaxOneUnitNodeValue+1)<<6); // 0x4040
996 /*package*/ static final int kThreeUnitNodeValueLead=0x7fc0;
998 /*package*/ static final int kMaxTwoUnitNodeValue=
999 ((kThreeUnitNodeValueLead-kMinTwoUnitNodeValueLead)<<10)-1; // 0xfdffff
1001 // Compact delta integers.
1002 /*package*/ static final int kMaxOneUnitDelta=0xfbff;
1003 /*package*/ static final int kMinTwoUnitDeltaLead=kMaxOneUnitDelta+1; // 0xfc00
1004 /*package*/ static final int kThreeUnitDeltaLead=0xffff;
1006 /*package*/ static final int kMaxTwoUnitDelta=((kThreeUnitDeltaLead-kMinTwoUnitDeltaLead)<<16)-1; // 0x03feffff
1008 // Fixed value referencing the CharsTrie words.
1009 private CharSequence chars_;
1012 // Iterator variables.
1014 // Pointer to next trie unit to read. NULL if no more matches.
1016 // Remaining length of a linear-match node, minus 1. Negative if not in such a node.
1017 private int remainingMatchLength_;