2 *******************************************************************************
3 * Copyright (C) 1996-2010, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 *******************************************************************************
7 package com.ibm.icu.text;
9 import java.util.Iterator;
12 * UnicodeSetIterator iterates over the contents of a UnicodeSet. It
13 * iterates over either code points or code point ranges. After all
14 * code points or ranges have been returned, it returns the
15 * multicharacter strings of the UnicodSet, if any.
17 * <p>To iterate over code points and multicharacter strings,
18 * use a loop like this:
20 * for (UnicodeSetIterator it = new UnicodeSetIterator(set); it.next();) {
21 * processString(it.getString());
25 * <p>To iterate over code point ranges, use a loop like this:
27 * for (UnicodeSetIterator it = new UnicodeSetIterator(set); it.nextRange();) {
28 * if (it.codepoint != UnicodeSetIterator.IS_STRING) {
29 * processCodepointRange(it.codepoint, it.codepointEnd);
31 * processString(it.getString());
38 public class UnicodeSetIterator {
41 * Value of <tt>codepoint</tt> if the iterator points to a string.
42 * If <tt>codepoint == IS_STRING</tt>, then examine
43 * <tt>string</tt> for the current iteration result.
46 public static int IS_STRING = -1;
49 * Current code point, or the special value <tt>IS_STRING</tt>, if
50 * the iterator points to a string.
56 * When iterating over ranges using <tt>nextRange()</tt>,
57 * <tt>codepointEnd</tt> contains the inclusive end of the
58 * iteration range, if <tt>codepoint != IS_STRING</tt>. If
59 * iterating over code points using <tt>next()</tt>, or if
60 * <tt>codepoint == IS_STRING</tt>, then the value of
61 * <tt>codepointEnd</tt> is undefined.
64 public int codepointEnd;
67 * If <tt>codepoint == IS_STRING</tt>, then <tt>string</tt> points
68 * to the current string. If <tt>codepoint != IS_STRING</tt>, the
69 * value of <tt>string</tt> is undefined.
75 * Create an iterator over the given set.
76 * @param set set to iterate over
79 public UnicodeSetIterator(UnicodeSet set) {
84 * Create an iterator over nothing. <tt>next()</tt> and
85 * <tt>nextRange()</tt> return false. This is a convenience
86 * constructor allowing the target to be set later.
89 public UnicodeSetIterator() {
90 reset(new UnicodeSet());
94 * Returns the next element in the set, either a single code point
95 * or a string. If there are no more elements in the set, return
96 * false. If <tt>codepoint == IS_STRING</tt>, the value is a
97 * string in the <tt>string</tt> field. Otherwise the value is a
98 * single code point in the <tt>codepoint</tt> field.
100 * <p>The order of iteration is all code points in sorted order,
101 * followed by all strings sorted order. <tt>codepointEnd</tt> is
102 * undefined after calling this method. <tt>string</tt> is
103 * undefined unless <tt>codepoint == IS_STRING</tt>. Do not mix
104 * calls to <tt>next()</tt> and <tt>nextRange()</tt> without
105 * calling <tt>reset()</tt> between them. The results of doing so
108 * @return true if there was another element in the set and this
109 * object contains the element.
112 public boolean next() {
113 if (nextElement <= endElement) {
114 codepoint = codepointEnd = nextElement++;
117 if (range < endRange) {
119 codepoint = codepointEnd = nextElement++;
123 // stringIterator == null iff there are no string elements remaining
125 if (stringIterator == null) {
128 codepoint = IS_STRING; // signal that value is actually a string
129 string = stringIterator.next();
130 if (!stringIterator.hasNext()) {
131 stringIterator = null;
137 * Returns the next element in the set, either a code point range
138 * or a string. If there are no more elements in the set, return
139 * false. If <tt>codepoint == IS_STRING</tt>, the value is a
140 * string in the <tt>string</tt> field. Otherwise the value is a
141 * range of one or more code points from <tt>codepoint</tt> to
142 * <tt>codepointeEnd</tt> inclusive.
144 * <p>The order of iteration is all code points ranges in sorted
145 * order, followed by all strings sorted order. Ranges are
146 * disjoint and non-contiguous. <tt>string</tt> is undefined
147 * unless <tt>codepoint == IS_STRING</tt>. Do not mix calls to
148 * <tt>next()</tt> and <tt>nextRange()</tt> without calling
149 * <tt>reset()</tt> between them. The results of doing so are
152 * @return true if there was another element in the set and this
153 * object contains the element.
156 public boolean nextRange() {
157 if (nextElement <= endElement) {
158 codepointEnd = endElement;
159 codepoint = nextElement;
160 nextElement = endElement+1;
163 if (range < endRange) {
165 codepointEnd = endElement;
166 codepoint = nextElement;
167 nextElement = endElement+1;
171 // stringIterator == null iff there are no string elements remaining
173 if (stringIterator == null) {
176 codepoint = IS_STRING; // signal that value is actually a string
177 string = stringIterator.next();
178 if (!stringIterator.hasNext()) {
179 stringIterator = null;
185 * Sets this iterator to visit the elements of the given set and
186 * resets it to the start of that set. The iterator is valid only
187 * so long as <tt>set</tt> is valid.
188 * @param uset the set to iterate over.
191 public void reset(UnicodeSet uset) {
197 * Resets this iterator to the start of the set.
200 public void reset() {
201 endRange = set.getRangeCount() - 1;
208 stringIterator = null;
209 if (set.strings != null) {
210 stringIterator = set.strings.iterator();
211 if (!stringIterator.hasNext()) {
212 stringIterator = null;
218 * Gets the current string from the iterator. Only use after calling next(), not nextRange().
221 public String getString() {
222 if (codepoint != IS_STRING) {
223 return UTF16.valueOf(codepoint);
228 // ======================= PRIVATES ===========================
230 private UnicodeSet set;
231 private int endRange = 0;
232 private int range = 0;
236 * @deprecated This API is ICU internal only.
238 public UnicodeSet getSet() {
244 * @deprecated This API is ICU internal only.
246 protected int endElement;
249 * @deprecated This API is ICU internal only.
251 protected int nextElement;
252 private Iterator<String> stringIterator = null;
255 * Invariant: stringIterator is null when there are no (more) strings remaining
260 * @deprecated This API is ICU internal only.
262 protected void loadRange(int aRange) {
263 nextElement = set.getRangeStart(aRange);
264 endElement = set.getRangeEnd(aRange);