2 ******************************************************************************
\r
3 * Copyright (C) 1996-2010, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 ******************************************************************************
\r
8 package com.ibm.icu.lang;
\r
10 import com.ibm.icu.impl.UCharacterName;
\r
11 import com.ibm.icu.impl.UCharacterNameChoice;
\r
12 import com.ibm.icu.util.ValueIterator;
\r
15 * <p>Class enabling iteration of the codepoints and their names.</p>
\r
16 * <p>Result of each iteration contains a valid codepoint that has valid
\r
18 * <p>See UCharacter.getNameIterator() for an example of use.</p>
\r
20 * @since release 2.1, March 5 2002
\r
22 class UCharacterNameIterator implements ValueIterator
\r
24 // public methods ----------------------------------------------------
\r
27 * <p>Gets the next result for this iteration and returns
\r
28 * true if we are not at the end of the iteration, false otherwise.</p>
\r
29 * <p>If the return boolean is a false, the contents of elements will not
\r
31 * @param element for storing the result codepoint and name
\r
32 * @return true if we are not at the end of the iteration, false otherwise.
\r
33 * @see ValueIterator.Element
\r
35 public boolean next(ValueIterator.Element element)
\r
37 if (m_current_ >= m_limit_) {
\r
41 if (m_choice_ == UCharacterNameChoice.UNICODE_CHAR_NAME ||
\r
42 m_choice_ == UCharacterNameChoice.EXTENDED_CHAR_NAME
\r
44 int length = m_name_.getAlgorithmLength();
\r
45 if (m_algorithmIndex_ < length) {
\r
46 while (m_algorithmIndex_ < length) {
\r
47 // find the algorithm range that could contain m_current_
\r
48 if (m_algorithmIndex_ < 0 ||
\r
49 m_name_.getAlgorithmEnd(m_algorithmIndex_) <
\r
51 m_algorithmIndex_ ++;
\r
58 if (m_algorithmIndex_ < length) {
\r
59 // interleave the data-driven ones with the algorithmic ones
\r
60 // iterate over all algorithmic ranges; assume that they are
\r
61 // in ascending order
\r
62 int start = m_name_.getAlgorithmStart(m_algorithmIndex_);
\r
63 if (m_current_ < start) {
\r
64 // this should get rid of those codepoints that are not
\r
65 // in the algorithmic range
\r
67 if (m_limit_ <= start) {
\r
70 if (!iterateGroup(element, end)) {
\r
76 // "if (m_current_ >= m_limit_)" would not return true
\r
77 // because it can never be reached due to:
\r
78 // 1) It has already been checked earlier
\r
79 // 2) When m_current_ is updated earlier, it returns true
\r
80 // 3) No updates on m_limit_*/
\r
81 if (m_current_ >= m_limit_) {
\r
82 // after iterateGroup fails, current codepoint may be
\r
83 // greater than limit
\r
87 element.integer = m_current_;
\r
88 element.value = m_name_.getAlgorithmName(m_algorithmIndex_,
\r
90 // reset the group index if we are in the algorithmic names
\r
97 // enumerate the character names after the last algorithmic range
\r
98 if (!iterateGroup(element, m_limit_)) {
\r
102 else if (m_choice_ == UCharacterNameChoice.EXTENDED_CHAR_NAME) {
\r
103 if (!iterateExtended(element, m_limit_)) {
\r
113 * <p>Resets the iterator to start iterating from the integer index
\r
114 * UCharacter.MIN_VALUE or X if a setRange(X, Y) has been called previously.
\r
117 public void reset()
\r
119 m_current_ = m_start_;
\r
120 m_groupIndex_ = -1;
\r
121 m_algorithmIndex_ = -1;
\r
125 * <p>Restricts the range of integers to iterate and resets the iteration
\r
126 * to begin at the index argument start.</p>
\r
127 * <p>If setRange(start, end) is not performed before next(element) is
\r
128 * called, the iteration will start from the integer index
\r
129 * UCharacter.MIN_VALUE and end at UCharacter.MAX_VALUE.</p>
\r
131 * If this range is set outside the range of UCharacter.MIN_VALUE and
\r
132 * UCharacter.MAX_VALUE, next(element) will always return false.
\r
134 * @param start first integer in range to iterate
\r
135 * @param limit 1 integer after the last integer in range
\r
136 * @exception IllegalArgumentException thrown when attempting to set an
\r
137 * illegal range. E.g limit <= start
\r
139 public void setRange(int start, int limit)
\r
141 if (start >= limit) {
\r
142 throw new IllegalArgumentException(
\r
143 "start or limit has to be valid Unicode codepoints and start < limit");
\r
145 if (start < UCharacter.MIN_VALUE) {
\r
146 m_start_ = UCharacter.MIN_VALUE;
\r
152 if (limit > UCharacter.MAX_VALUE + 1) {
\r
153 m_limit_ = UCharacter.MAX_VALUE + 1;
\r
158 m_current_ = m_start_;
\r
161 // protected constructor ---------------------------------------------
\r
165 * @param name name data
\r
166 * @param choice name choice from the class
\r
167 * com.ibm.icu.lang.UCharacterNameChoice
\r
169 protected UCharacterNameIterator(UCharacterName name, int choice)
\r
172 throw new IllegalArgumentException("UCharacterName name argument cannot be null. Missing unames.icu?");
\r
175 // no explicit choice in UCharacter so no checks on choice
\r
176 m_choice_ = choice;
\r
177 m_start_ = UCharacter.MIN_VALUE;
\r
178 m_limit_ = UCharacter.MAX_VALUE + 1;
\r
179 m_current_ = m_start_;
\r
182 // private data members ---------------------------------------------
\r
187 private UCharacterName m_name_;
\r
191 private int m_choice_;
\r
193 * Start iteration range
\r
195 private int m_start_;
\r
197 * End + 1 iteration range
\r
199 private int m_limit_;
\r
201 * Current codepoint
\r
203 private int m_current_;
\r
207 private int m_groupIndex_ = -1;
\r
211 private int m_algorithmIndex_ = -1;
\r
215 private static char GROUP_OFFSETS_[] =
\r
216 new char[UCharacterName.LINES_PER_GROUP_ + 1];
\r
217 private static char GROUP_LENGTHS_[] =
\r
218 new char[UCharacterName.LINES_PER_GROUP_ + 1];
\r
220 // private methods --------------------------------------------------
\r
223 * Group name iteration, iterate all the names in the current 32-group and
\r
224 * returns the first codepoint that has a valid name.
\r
225 * @param result stores the result codepoint and name
\r
226 * @param limit last codepoint + 1 in range to search
\r
227 * @return false if a codepoint with a name is found in group and we can
\r
228 * bail from further iteration, true to continue on with the
\r
231 private boolean iterateSingleGroup(ValueIterator.Element result, int limit)
\r
233 synchronized(GROUP_OFFSETS_) {
\r
234 synchronized(GROUP_LENGTHS_) {
\r
235 int index = m_name_.getGroupLengths(m_groupIndex_, GROUP_OFFSETS_,
\r
237 while (m_current_ < limit) {
\r
238 int offset = UCharacterName.getGroupOffset(m_current_);
\r
239 String name = m_name_.getGroupName(
\r
240 index + GROUP_OFFSETS_[offset],
\r
241 GROUP_LENGTHS_[offset], m_choice_);
\r
242 if ((name == null || name.length() == 0) &&
\r
243 m_choice_ == UCharacterNameChoice.EXTENDED_CHAR_NAME) {
\r
244 name = m_name_.getExtendedName(m_current_);
\r
246 if (name != null && name.length() > 0) {
\r
247 result.integer = m_current_;
\r
248 result.value = name;
\r
259 * Group name iteration, iterate all the names in the current 32-group and
\r
260 * returns the first codepoint that has a valid name.
\r
261 * @param result stores the result codepoint and name
\r
262 * @param limit last codepoint + 1 in range to search
\r
263 * @return false if a codepoint with a name is found in group and we can
\r
264 * bail from further iteration, true to continue on with the
\r
267 private boolean iterateGroup(ValueIterator.Element result, int limit)
\r
269 if (m_groupIndex_ < 0) {
\r
270 m_groupIndex_ = m_name_.getGroup(m_current_);
\r
273 while (m_groupIndex_ < m_name_.m_groupcount_ &&
\r
274 m_current_ < limit) {
\r
275 // iterate till the last group or the last codepoint
\r
276 int startMSB = UCharacterName.getCodepointMSB(m_current_);
\r
277 int gMSB = m_name_.getGroupMSB(m_groupIndex_); // can be -1
\r
278 if (startMSB == gMSB) {
\r
279 if (startMSB == UCharacterName.getCodepointMSB(limit - 1)) {
\r
280 // if start and limit - 1 are in the same group, then enumerate
\r
281 // only in that one
\r
282 return iterateSingleGroup(result, limit);
\r
284 // enumerate characters in the partial start group
\r
285 // if (m_name_.getGroupOffset(m_current_) != 0) {
\r
286 if (!iterateSingleGroup(result,
\r
287 UCharacterName.getGroupLimit(gMSB))) {
\r
290 ++ m_groupIndex_; // continue with the next group
\r
292 else if (startMSB > gMSB) {
\r
293 // make sure that we start enumerating with the first group
\r
298 int gMIN = UCharacterName.getGroupMin(gMSB);
\r
299 if (gMIN > limit) {
\r
302 if (m_choice_ == UCharacterNameChoice.EXTENDED_CHAR_NAME) {
\r
303 if (!iterateExtended(result, gMIN)) {
\r
315 * Iterate extended names.
\r
316 * @param result stores the result codepoint and name
\r
317 * @param limit last codepoint + 1 in range to search
\r
318 * @return false if a codepoint with a name is found and we can
\r
319 * bail from further iteration, true to continue on with the
\r
320 * iteration (this will always be false for valid codepoints)
\r
322 private boolean iterateExtended(ValueIterator.Element result,
\r
325 while (m_current_ < limit) {
\r
326 String name = m_name_.getExtendedOr10Name(m_current_);
\r
327 if (name != null && name.length() > 0) {
\r
328 result.integer = m_current_;
\r
329 result.value = name;
\r