2 *******************************************************************************
\r
3 * Copyright (C) 1996-2010, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 *******************************************************************************
\r
7 package com.ibm.icu.dev.test.util;
\r
9 import java.util.Iterator;
\r
10 import java.util.Set;
\r
12 import com.ibm.icu.text.UTF16;
\r
15 * UnicodeSetIterator iterates over the contents of a UnicodeSet. It
\r
16 * iterates over either code points or code point ranges. After all
\r
17 * code points or ranges have been returned, it returns the
\r
18 * multicharacter strings of the UnicodSet, if any.
\r
20 * <p>To iterate over code points, use a loop like this:
\r
22 * UnicodeSetIterator it = new UnicodeSetIterator(set);
\r
23 * while (set.next()) {
\r
24 * if (set.codepoint != UnicodeSetIterator.IS_STRING) {
\r
25 * processCodepoint(set.codepoint);
\r
27 * processString(set.string);
\r
32 * <p>To iterate over code point ranges, use a loop like this:
\r
34 * UnicodeSetIterator it = new UnicodeSetIterator(set);
\r
35 * while (set.nextRange()) {
\r
36 * if (set.codepoint != UnicodeSetIterator.IS_STRING) {
\r
37 * processCodepointRange(set.codepoint, set.codepointEnd);
\r
39 * processString(set.string);
\r
45 public class UnicodeMapIterator<T> {
\r
48 * Value of <tt>codepoint</tt> if the iterator points to a string.
\r
49 * If <tt>codepoint == IS_STRING</tt>, then examine
\r
50 * <tt>string</tt> for the current iteration result.
\r
52 public static int IS_STRING = -1;
\r
55 * Current code point, or the special value <tt>IS_STRING</tt>, if
\r
56 * the iterator points to a string.
\r
58 public int codepoint;
\r
61 * When iterating over ranges using <tt>nextRange()</tt>,
\r
62 * <tt>codepointEnd</tt> contains the inclusive end of the
\r
63 * iteration range, if <tt>codepoint != IS_STRING</tt>. If
\r
64 * iterating over code points using <tt>next()</tt>, or if
\r
65 * <tt>codepoint == IS_STRING</tt>, then the value of
\r
66 * <tt>codepointEnd</tt> is undefined.
\r
68 public int codepointEnd;
\r
71 * If <tt>codepoint == IS_STRING</tt>, then <tt>string</tt> points
\r
72 * to the current string. If <tt>codepoint != IS_STRING</tt>, the
\r
73 * value of <tt>string</tt> is undefined.
\r
75 public String string;
\r
78 * The value associated with this element or range.
\r
83 * Create an iterator over the given set.
\r
84 * @param set set to iterate over
\r
86 public UnicodeMapIterator(UnicodeMap set) {
\r
91 * Create an iterator over nothing. <tt>next()</tt> and
\r
92 * <tt>nextRange()</tt> return false. This is a convenience
\r
93 * constructor allowing the target to be set later.
\r
95 public UnicodeMapIterator() {
\r
96 reset(new UnicodeMap());
\r
100 * Returns the next element in the set, either a single code point
\r
101 * or a string. If there are no more elements in the set, return
\r
102 * false. If <tt>codepoint == IS_STRING</tt>, the value is a
\r
103 * string in the <tt>string</tt> field. Otherwise the value is a
\r
104 * single code point in the <tt>codepoint</tt> field.
\r
106 * <p>The order of iteration is all code points in sorted order,
\r
107 * followed by all strings sorted order. <tt>codepointEnd</tt> is
\r
108 * undefined after calling this method. <tt>string</tt> is
\r
109 * undefined unless <tt>codepoint == IS_STRING</tt>. Do not mix
\r
110 * calls to <tt>next()</tt> and <tt>nextRange()</tt> without
\r
111 * calling <tt>reset()</tt> between them. The results of doing so
\r
114 * @return true if there was another element in the set and this
\r
115 * object contains the element.
\r
117 public boolean next() {
\r
118 if (nextElement <= endElement) {
\r
119 codepoint = codepointEnd = nextElement++;
\r
122 while (range < endRange) {
\r
123 if (loadRange(++range) == null) {
\r
126 codepoint = codepointEnd = nextElement++;
\r
130 // stringIterator == null iff there are no string elements remaining
\r
132 if (stringIterator == null) return false;
\r
133 codepoint = IS_STRING; // signal that value is actually a string
\r
134 string = (String)stringIterator.next();
\r
135 if (!stringIterator.hasNext()) stringIterator = null;
\r
140 * Returns the next element in the set, either a code point range
\r
141 * or a string. If there are no more elements in the set, return
\r
142 * false. If <tt>codepoint == IS_STRING</tt>, the value is a
\r
143 * string in the <tt>string</tt> field. Otherwise the value is a
\r
144 * range of one or more code points from <tt>codepoint</tt> to
\r
145 * <tt>codepointeEnd</tt> inclusive.
\r
147 * <p>The order of iteration is all code points ranges in sorted
\r
148 * order, followed by all strings sorted order. Ranges are
\r
149 * disjoint and non-contiguous. <tt>string</tt> is undefined
\r
150 * unless <tt>codepoint == IS_STRING</tt>. Do not mix calls to
\r
151 * <tt>next()</tt> and <tt>nextRange()</tt> without calling
\r
152 * <tt>reset()</tt> between them. The results of doing so are
\r
155 * @return true if there was another element in the set and this
\r
156 * object contains the element.
\r
158 public boolean nextRange() {
\r
159 if (nextElement <= endElement) {
\r
160 codepointEnd = endElement;
\r
161 codepoint = nextElement;
\r
162 nextElement = endElement+1;
\r
165 while (range < endRange) {
\r
166 if (loadRange(++range) == null) {
\r
169 codepointEnd = endElement;
\r
170 codepoint = nextElement;
\r
171 nextElement = endElement+1;
\r
175 // stringIterator == null iff there are no string elements remaining
\r
177 if (stringIterator == null) return false;
\r
178 codepoint = IS_STRING; // signal that value is actually a string
\r
179 string = (String)stringIterator.next();
\r
180 if (!stringIterator.hasNext()) stringIterator = null;
\r
185 * Sets this iterator to visit the elements of the given set and
\r
186 * resets it to the start of that set. The iterator is valid only
\r
187 * so long as <tt>set</tt> is valid.
\r
188 * @param set the set to iterate over.
\r
190 public void reset(UnicodeMap set) {
\r
196 * Resets this iterator to the start of the set.
\r
199 public UnicodeMapIterator<T> reset() {
\r
200 endRange = map.getRangeCount() - 1;
\r
201 // both next*() methods will test: if (nextElement <= endElement)
\r
202 // we set them to fail this test, which will cause them to load the first range
\r
207 stringIterator = null;
\r
208 Set<String> strings = map.getNonRangeStrings();
\r
209 if (strings != null) {
\r
210 stringIterator = strings.iterator();
\r
211 if (!stringIterator.hasNext()) stringIterator = null;
\r
218 * Gets the current string from the iterator. Only use after calling next(), not nextRange().
\r
220 public String getString() {
\r
221 if (codepoint != IS_STRING) {
\r
222 return UTF16.valueOf(codepoint);
\r
227 // ======================= PRIVATES ===========================
\r
229 private UnicodeMap<T> map;
\r
230 private int endRange = 0;
\r
231 private int range = 0;
\r
232 private Iterator<String> stringIterator = null;
\r
233 protected int endElement;
\r
234 protected int nextElement;
\r
237 * Invariant: stringIterator is null when there are no (more) strings remaining
\r
240 protected T loadRange(int range) {
\r
241 nextElement = map.getRangeStart(range);
\r
242 endElement = map.getRangeEnd(range);
\r
243 value = map.getRangeValue(range);
\r