2 *******************************************************************************
\r
3 * Copyright (C) 1996-2010, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 *******************************************************************************
\r
7 package com.ibm.icu.text;
\r
9 import java.util.Iterator;
\r
12 * UnicodeSetIterator iterates over the contents of a UnicodeSet. It
\r
13 * iterates over either code points or code point ranges. After all
\r
14 * code points or ranges have been returned, it returns the
\r
15 * multicharacter strings of the UnicodSet, if any.
\r
17 * <p>To iterate over code points and multicharacter strings,
\r
18 * use a loop like this:
\r
20 * for (UnicodeSetIterator it = new UnicodeSetIterator(set); it.next();) {
\r
21 * processString(it.getString());
\r
25 * <p>To iterate over code point ranges, use a loop like this:
\r
27 * for (UnicodeSetIterator it = new UnicodeSetIterator(set); it.nextRange();) {
\r
28 * if (it.codepoint != UnicodeSetIterator.IS_STRING) {
\r
29 * processCodepointRange(it.codepoint, it.codepointEnd);
\r
31 * processString(it.getString());
\r
38 public class UnicodeSetIterator {
\r
41 * Value of <tt>codepoint</tt> if the iterator points to a string.
\r
42 * If <tt>codepoint == IS_STRING</tt>, then examine
\r
43 * <tt>string</tt> for the current iteration result.
\r
46 public static int IS_STRING = -1;
\r
49 * Current code point, or the special value <tt>IS_STRING</tt>, if
\r
50 * the iterator points to a string.
\r
53 public int codepoint;
\r
56 * When iterating over ranges using <tt>nextRange()</tt>,
\r
57 * <tt>codepointEnd</tt> contains the inclusive end of the
\r
58 * iteration range, if <tt>codepoint != IS_STRING</tt>. If
\r
59 * iterating over code points using <tt>next()</tt>, or if
\r
60 * <tt>codepoint == IS_STRING</tt>, then the value of
\r
61 * <tt>codepointEnd</tt> is undefined.
\r
64 public int codepointEnd;
\r
67 * If <tt>codepoint == IS_STRING</tt>, then <tt>string</tt> points
\r
68 * to the current string. If <tt>codepoint != IS_STRING</tt>, the
\r
69 * value of <tt>string</tt> is undefined.
\r
72 public String string;
\r
75 * Create an iterator over the given set.
\r
76 * @param set set to iterate over
\r
79 public UnicodeSetIterator(UnicodeSet set) {
\r
84 * Create an iterator over nothing. <tt>next()</tt> and
\r
85 * <tt>nextRange()</tt> return false. This is a convenience
\r
86 * constructor allowing the target to be set later.
\r
89 public UnicodeSetIterator() {
\r
90 reset(new UnicodeSet());
\r
94 * Returns the next element in the set, either a single code point
\r
95 * or a string. If there are no more elements in the set, return
\r
96 * false. If <tt>codepoint == IS_STRING</tt>, the value is a
\r
97 * string in the <tt>string</tt> field. Otherwise the value is a
\r
98 * single code point in the <tt>codepoint</tt> field.
\r
100 * <p>The order of iteration is all code points in sorted order,
\r
101 * followed by all strings sorted order. <tt>codepointEnd</tt> is
\r
102 * undefined after calling this method. <tt>string</tt> is
\r
103 * undefined unless <tt>codepoint == IS_STRING</tt>. Do not mix
\r
104 * calls to <tt>next()</tt> and <tt>nextRange()</tt> without
\r
105 * calling <tt>reset()</tt> between them. The results of doing so
\r
108 * @return true if there was another element in the set and this
\r
109 * object contains the element.
\r
112 public boolean next() {
\r
113 if (nextElement <= endElement) {
\r
114 codepoint = codepointEnd = nextElement++;
\r
117 if (range < endRange) {
\r
118 loadRange(++range);
\r
119 codepoint = codepointEnd = nextElement++;
\r
123 // stringIterator == null iff there are no string elements remaining
\r
125 if (stringIterator == null) {
\r
128 codepoint = IS_STRING; // signal that value is actually a string
\r
129 string = stringIterator.next();
\r
130 if (!stringIterator.hasNext()) {
\r
131 stringIterator = null;
\r
137 * Returns the next element in the set, either a code point range
\r
138 * or a string. If there are no more elements in the set, return
\r
139 * false. If <tt>codepoint == IS_STRING</tt>, the value is a
\r
140 * string in the <tt>string</tt> field. Otherwise the value is a
\r
141 * range of one or more code points from <tt>codepoint</tt> to
\r
142 * <tt>codepointeEnd</tt> inclusive.
\r
144 * <p>The order of iteration is all code points ranges in sorted
\r
145 * order, followed by all strings sorted order. Ranges are
\r
146 * disjoint and non-contiguous. <tt>string</tt> is undefined
\r
147 * unless <tt>codepoint == IS_STRING</tt>. Do not mix calls to
\r
148 * <tt>next()</tt> and <tt>nextRange()</tt> without calling
\r
149 * <tt>reset()</tt> between them. The results of doing so are
\r
152 * @return true if there was another element in the set and this
\r
153 * object contains the element.
\r
156 public boolean nextRange() {
\r
157 if (nextElement <= endElement) {
\r
158 codepointEnd = endElement;
\r
159 codepoint = nextElement;
\r
160 nextElement = endElement+1;
\r
163 if (range < endRange) {
\r
164 loadRange(++range);
\r
165 codepointEnd = endElement;
\r
166 codepoint = nextElement;
\r
167 nextElement = endElement+1;
\r
171 // stringIterator == null iff there are no string elements remaining
\r
173 if (stringIterator == null) {
\r
176 codepoint = IS_STRING; // signal that value is actually a string
\r
177 string = stringIterator.next();
\r
178 if (!stringIterator.hasNext()) {
\r
179 stringIterator = null;
\r
185 * Sets this iterator to visit the elements of the given set and
\r
186 * resets it to the start of that set. The iterator is valid only
\r
187 * so long as <tt>set</tt> is valid.
\r
188 * @param uset the set to iterate over.
\r
191 public void reset(UnicodeSet uset) {
\r
197 * Resets this iterator to the start of the set.
\r
200 public void reset() {
\r
201 endRange = set.getRangeCount() - 1;
\r
205 if (endRange >= 0) {
\r
208 stringIterator = null;
\r
209 if (set.strings != null) {
\r
210 stringIterator = set.strings.iterator();
\r
211 if (!stringIterator.hasNext()) {
\r
212 stringIterator = null;
\r
218 * Gets the current string from the iterator. Only use after calling next(), not nextRange().
\r
221 public String getString() {
\r
222 if (codepoint != IS_STRING) {
\r
223 return UTF16.valueOf(codepoint);
\r
228 // ======================= PRIVATES ===========================
\r
230 private UnicodeSet set;
\r
231 private int endRange = 0;
\r
232 private int range = 0;
\r
236 * @deprecated This API is ICU internal only.
\r
238 public UnicodeSet getSet() {
\r
244 * @deprecated This API is ICU internal only.
\r
246 protected int endElement;
\r
249 * @deprecated This API is ICU internal only.
\r
251 protected int nextElement;
\r
252 private Iterator<String> stringIterator = null;
\r
255 * Invariant: stringIterator is null when there are no (more) strings remaining
\r
260 * @deprecated This API is ICU internal only.
\r
262 protected void loadRange(int aRange) {
\r
263 nextElement = set.getRangeStart(aRange);
\r
264 endElement = set.getRangeEnd(aRange);
\r