2 *******************************************************************************
3 * Copyright (C) 1996-2009, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 *******************************************************************************
7 package com.ibm.icu.text;
10 import java.text.CharacterIterator;
12 import com.ibm.icu.impl.CharacterIteratorWrapper;
13 import com.ibm.icu.impl.ReplaceableUCharacterIterator;
14 import com.ibm.icu.impl.UCharArrayIterator;
15 import com.ibm.icu.impl.UCharacterIteratorWrapper;
16 import com.ibm.icu.impl.UCharacterProperty;
20 * Abstract class that defines an API for iteration on text objects.This is an
21 * interface for forward and backward iteration and random access into a text
22 * object. Forward iteration is done with post-increment and backward iteration
23 * is done with pre-decrement semantics, while the
24 * <code>java.text.CharacterIterator</code> interface methods provided forward
25 * iteration with "pre-increment" and backward iteration with pre-decrement
26 * semantics. This API is more efficient for forward iteration over code points.
27 * The other major difference is that this API can do both code unit and code point
28 * iteration, <code>java.text.CharacterIterator</code> can only iterate over
29 * code units and is limited to BMP (0 - 0xFFFF)
33 public abstract class UCharacterIterator
34 implements Cloneable,UForwardCharacterIterator {
37 * Protected default constructor for the subclasses
40 protected UCharacterIterator(){
43 // static final methods ----------------------------------------------------
46 * Returns a <code>UCharacterIterator</code> object given a
47 * <code>Replaceable</code> object.
48 * @param source a valid source as a <code>Replaceable</code> object
49 * @return UCharacterIterator object
50 * @exception IllegalArgumentException if the argument is null
53 public static final UCharacterIterator getInstance(Replaceable source){
54 return new ReplaceableUCharacterIterator(source);
58 * Returns a <code>UCharacterIterator</code> object given a
60 * @param source a string
61 * @return UCharacterIterator object
62 * @exception IllegalArgumentException if the argument is null
65 public static final UCharacterIterator getInstance(String source){
66 return new ReplaceableUCharacterIterator(source);
70 * Returns a <code>UCharacterIterator</code> object given a
71 * source character array.
72 * @param source an array of UTF-16 code units
73 * @return UCharacterIterator object
74 * @exception IllegalArgumentException if the argument is null
77 public static final UCharacterIterator getInstance(char[] source){
78 return getInstance(source,0,source.length);
82 * Returns a <code>UCharacterIterator</code> object given a
83 * source character array.
84 * @param source an array of UTF-16 code units
85 * @return UCharacterIterator object
86 * @exception IllegalArgumentException if the argument is null
89 public static final UCharacterIterator getInstance(char[] source, int start, int limit){
90 return new UCharArrayIterator(source,start,limit);
93 * Returns a <code>UCharacterIterator</code> object given a
94 * source StringBuffer.
95 * @param source an string buffer of UTF-16 code units
96 * @return UCharacterIterator object
97 * @exception IllegalArgumentException if the argument is null
100 public static final UCharacterIterator getInstance(StringBuffer source){
101 return new ReplaceableUCharacterIterator(source);
105 * Returns a <code>UCharacterIterator</code> object given a
107 * @param source a valid CharacterIterator object.
108 * @return UCharacterIterator object
109 * @exception IllegalArgumentException if the argument is null
112 public static final UCharacterIterator getInstance(CharacterIterator source){
113 return new CharacterIteratorWrapper(source);
116 // public methods ----------------------------------------------------------
118 * Returns a <code>java.text.CharacterIterator</code> object for
119 * the underlying text of this iterator. The returned iterator is
120 * independent of this iterator.
121 * @return java.text.CharacterIterator object
124 public CharacterIterator getCharacterIterator(){
125 return new UCharacterIteratorWrapper(this);
129 * Returns the code unit at the current index. If index is out
130 * of range, returns DONE. Index is not changed.
131 * @return current code unit
134 public abstract int current();
137 * Returns the codepoint at the current index.
138 * If the current index is invalid, DONE is returned.
139 * If the current index points to a lead surrogate, and there is a following
140 * trail surrogate, then the code point is returned. Otherwise, the code
141 * unit at index is returned. Index is not changed.
142 * @return current codepoint
145 public int currentCodePoint(){
147 if(UTF16.isLeadSurrogate((char)ch)){
148 // advance the index to get the
151 // due to post increment semantics
152 // current() after next() actually
153 // returns the char we want
155 // current should never change
156 // the current index so back off
159 if(UTF16.isTrailSurrogate((char)ch2)){
160 // we found a surrogate pair
161 // return the codepoint
162 return UCharacterProperty.getRawSupplementary(
171 * Returns the length of the text
172 * @return length of the text
175 public abstract int getLength();
179 * Gets the current index in text.
180 * @return current index in text.
183 public abstract int getIndex();
187 * Returns the UTF16 code unit at index, and increments to the next
188 * code unit (post-increment semantics). If index is out of
189 * range, DONE is returned, and the iterator is reset to the limit
191 * @return the next UTF16 code unit, or DONE if the index is at the limit
195 public abstract int next();
198 * Returns the code point at index, and increments to the next code
199 * point (post-increment semantics). If index does not point to a
200 * valid surrogate pair, the behavior is the same as
201 * <code>next()<code>. Otherwise the iterator is incremented past
202 * the surrogate pair, and the code point represented by the pair
204 * @return the next codepoint in text, or DONE if the index is at
205 * the limit of the text.
208 public int nextCodePoint(){
210 if(UTF16.isLeadSurrogate((char)ch1)){
212 if(UTF16.isTrailSurrogate((char)ch2)){
213 return UCharacterProperty.getRawSupplementary((char)ch1,
215 }else if (ch2 != DONE) {
216 // unmatched surrogate so back out
224 * Decrement to the position of the previous code unit in the
225 * text, and return it (pre-decrement semantics). If the
226 * resulting index is less than 0, the index is reset to 0 and
228 * @return the previous code unit in the text, or DONE if the new
229 * index is before the start of the text.
232 public abstract int previous();
236 * Retreat to the start of the previous code point in the text,
237 * and return it (pre-decrement semantics). If the index is not
238 * preceeded by a valid surrogate pair, the behavior is the same
239 * as <code>previous()</code>. Otherwise the iterator is
240 * decremented to the start of the surrogate pair, and the code
241 * point represented by the pair is returned.
242 * @return the previous code point in the text, or DONE if the new
243 * index is before the start of the text.
246 public int previousCodePoint(){
247 int ch1 = previous();
248 if(UTF16.isTrailSurrogate((char)ch1)){
249 int ch2 = previous();
250 if(UTF16.isLeadSurrogate((char)ch2)){
251 return UCharacterProperty.getRawSupplementary((char)ch2,
253 }else if (ch2 != DONE) {
254 //unmatched trail surrogate so back out
262 * Sets the index to the specified index in the text.
263 * @param index the index within the text.
264 * @exception IndexOutOfBoundsException is thrown if an invalid index is
268 public abstract void setIndex(int index);
271 * Sets the current index to the limit.
274 public void setToLimit() {
275 setIndex(getLength());
279 * Sets the current index to the start.
282 public void setToStart() {
287 * Fills the buffer with the underlying text storage of the iterator
288 * If the buffer capacity is not enough a exception is thrown. The capacity
289 * of the fill in buffer should at least be equal to length of text in the
290 * iterator obtained by calling <code>getLength()</code).
295 * UChacterIterator iter = new UCharacterIterator.getInstance(text);
296 * char[] buf = new char[iter.getLength()];
300 * char[] buf= new char[1];
304 * len = iter.getText(buf);
306 * }catch(IndexOutOfBoundsException e){
307 * buf = new char[iter.getLength()];
313 * @param fillIn an array of chars to fill with the underlying UTF-16 code
315 * @param offset the position within the array to start putting the data.
316 * @return the number of code units added to fillIn, as a convenience
317 * @exception IndexOutOfBoundsException exception if there is not enough
318 * room after offset in the array, or if offset < 0.
321 public abstract int getText(char[] fillIn, int offset);
324 * Convenience override for <code>getText(char[], int)>/code> that provides
326 * @param fillIn an array of chars to fill with the underlying UTF-16 code
328 * @return the number of code units added to fillIn, as a convenience
329 * @exception IndexOutOfBoundsException exception if there is not enough
333 public final int getText(char[] fillIn) {
334 return getText(fillIn, 0);
338 * Convenience method for returning the underlying text storage as as string
339 * @return the underlying text storage in the iterator as a string
342 public String getText() {
343 char[] text = new char[getLength()];
345 return new String(text);
349 * Moves the current position by the number of code units
350 * specified, either forward or backward depending on the sign
351 * of delta (positive or negative respectively). If the resulting
352 * index would be less than zero, the index is set to zero, and if
353 * the resulting index would be greater than limit, the index is
356 * @param delta the number of code units to move the current
358 * @return the new index.
359 * @exception IndexOutOfBoundsException is thrown if an invalid index is
364 public int moveIndex(int delta) {
365 int x = Math.max(0, Math.min(getIndex() + delta, getLength()));
371 * Moves the current position by the number of code points
372 * specified, either forward or backward depending on the sign of
373 * delta (positive or negative respectively). If the current index
374 * is at a trail surrogate then the first adjustment is by code
375 * unit, and the remaining adjustments are by code points. If the
376 * resulting index would be less than zero, the index is set to
377 * zero, and if the resulting index would be greater than limit,
378 * the index is set to limit.
379 * @param delta the number of code units to move the current index.
380 * @return the new index
381 * @exception IndexOutOfBoundsException is thrown if an invalid delta is
385 public int moveCodePointIndex(int delta){
387 while(delta>0 && nextCodePoint() != DONE){delta--;}
389 while(delta<0 && previousCodePoint() != DONE){delta++;}
392 throw new IndexOutOfBoundsException();
399 * Creates a copy of this iterator, independent from other iterators.
400 * If it is not possible to clone the iterator, returns null.
401 * @return copy of this iterator
404 public Object clone() throws CloneNotSupportedException{
405 return super.clone();