2 *******************************************************************************
\r
3 * Copyright (C) 1996-2009, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 *******************************************************************************
\r
7 package com.ibm.icu.text;
\r
10 import java.text.CharacterIterator;
\r
12 import com.ibm.icu.impl.CharacterIteratorWrapper;
\r
13 import com.ibm.icu.impl.ReplaceableUCharacterIterator;
\r
14 import com.ibm.icu.impl.UCharArrayIterator;
\r
15 import com.ibm.icu.impl.UCharacterIteratorWrapper;
\r
16 import com.ibm.icu.impl.UCharacterProperty;
\r
20 * Abstract class that defines an API for iteration on text objects.This is an
\r
21 * interface for forward and backward iteration and random access into a text
\r
22 * object. Forward iteration is done with post-increment and backward iteration
\r
23 * is done with pre-decrement semantics, while the
\r
24 * <code>java.text.CharacterIterator</code> interface methods provided forward
\r
25 * iteration with "pre-increment" and backward iteration with pre-decrement
\r
26 * semantics. This API is more efficient for forward iteration over code points.
\r
27 * The other major difference is that this API can do both code unit and code point
\r
28 * iteration, <code>java.text.CharacterIterator</code> can only iterate over
\r
29 * code units and is limited to BMP (0 - 0xFFFF)
\r
33 public abstract class UCharacterIterator
\r
34 implements Cloneable,UForwardCharacterIterator {
\r
37 * Protected default constructor for the subclasses
\r
40 protected UCharacterIterator(){
\r
43 // static final methods ----------------------------------------------------
\r
46 * Returns a <code>UCharacterIterator</code> object given a
\r
47 * <code>Replaceable</code> object.
\r
48 * @param source a valid source as a <code>Replaceable</code> object
\r
49 * @return UCharacterIterator object
\r
50 * @exception IllegalArgumentException if the argument is null
\r
53 public static final UCharacterIterator getInstance(Replaceable source){
\r
54 return new ReplaceableUCharacterIterator(source);
\r
58 * Returns a <code>UCharacterIterator</code> object given a
\r
60 * @param source a string
\r
61 * @return UCharacterIterator object
\r
62 * @exception IllegalArgumentException if the argument is null
\r
65 public static final UCharacterIterator getInstance(String source){
\r
66 return new ReplaceableUCharacterIterator(source);
\r
70 * Returns a <code>UCharacterIterator</code> object given a
\r
71 * source character array.
\r
72 * @param source an array of UTF-16 code units
\r
73 * @return UCharacterIterator object
\r
74 * @exception IllegalArgumentException if the argument is null
\r
77 public static final UCharacterIterator getInstance(char[] source){
\r
78 return getInstance(source,0,source.length);
\r
82 * Returns a <code>UCharacterIterator</code> object given a
\r
83 * source character array.
\r
84 * @param source an array of UTF-16 code units
\r
85 * @return UCharacterIterator object
\r
86 * @exception IllegalArgumentException if the argument is null
\r
89 public static final UCharacterIterator getInstance(char[] source, int start, int limit){
\r
90 return new UCharArrayIterator(source,start,limit);
\r
93 * Returns a <code>UCharacterIterator</code> object given a
\r
94 * source StringBuffer.
\r
95 * @param source an string buffer of UTF-16 code units
\r
96 * @return UCharacterIterator object
\r
97 * @exception IllegalArgumentException if the argument is null
\r
100 public static final UCharacterIterator getInstance(StringBuffer source){
\r
101 return new ReplaceableUCharacterIterator(source);
\r
105 * Returns a <code>UCharacterIterator</code> object given a
\r
106 * CharacterIterator.
\r
107 * @param source a valid CharacterIterator object.
\r
108 * @return UCharacterIterator object
\r
109 * @exception IllegalArgumentException if the argument is null
\r
112 public static final UCharacterIterator getInstance(CharacterIterator source){
\r
113 return new CharacterIteratorWrapper(source);
\r
116 // public methods ----------------------------------------------------------
\r
118 * Returns a <code>java.text.CharacterIterator</code> object for
\r
119 * the underlying text of this iterator. The returned iterator is
\r
120 * independent of this iterator.
\r
121 * @return java.text.CharacterIterator object
\r
124 public CharacterIterator getCharacterIterator(){
\r
125 return new UCharacterIteratorWrapper(this);
\r
129 * Returns the code unit at the current index. If index is out
\r
130 * of range, returns DONE. Index is not changed.
\r
131 * @return current code unit
\r
134 public abstract int current();
\r
137 * Returns the codepoint at the current index.
\r
138 * If the current index is invalid, DONE is returned.
\r
139 * If the current index points to a lead surrogate, and there is a following
\r
140 * trail surrogate, then the code point is returned. Otherwise, the code
\r
141 * unit at index is returned. Index is not changed.
\r
142 * @return current codepoint
\r
145 public int currentCodePoint(){
\r
146 int ch = current();
\r
147 if(UTF16.isLeadSurrogate((char)ch)){
\r
148 // advance the index to get the
\r
151 // due to post increment semantics
\r
152 // current() after next() actually
\r
153 // returns the char we want
\r
154 int ch2 = current();
\r
155 // current should never change
\r
156 // the current index so back off
\r
159 if(UTF16.isTrailSurrogate((char)ch2)){
\r
160 // we found a surrogate pair
\r
161 // return the codepoint
\r
162 return UCharacterProperty.getRawSupplementary(
\r
171 * Returns the length of the text
\r
172 * @return length of the text
\r
175 public abstract int getLength();
\r
179 * Gets the current index in text.
\r
180 * @return current index in text.
\r
183 public abstract int getIndex();
\r
187 * Returns the UTF16 code unit at index, and increments to the next
\r
188 * code unit (post-increment semantics). If index is out of
\r
189 * range, DONE is returned, and the iterator is reset to the limit
\r
191 * @return the next UTF16 code unit, or DONE if the index is at the limit
\r
195 public abstract int next();
\r
198 * Returns the code point at index, and increments to the next code
\r
199 * point (post-increment semantics). If index does not point to a
\r
200 * valid surrogate pair, the behavior is the same as
\r
201 * <code>next()<code>. Otherwise the iterator is incremented past
\r
202 * the surrogate pair, and the code point represented by the pair
\r
204 * @return the next codepoint in text, or DONE if the index is at
\r
205 * the limit of the text.
\r
208 public int nextCodePoint(){
\r
210 if(UTF16.isLeadSurrogate((char)ch1)){
\r
212 if(UTF16.isTrailSurrogate((char)ch2)){
\r
213 return UCharacterProperty.getRawSupplementary((char)ch1,
\r
215 }else if (ch2 != DONE) {
\r
216 // unmatched surrogate so back out
\r
224 * Decrement to the position of the previous code unit in the
\r
225 * text, and return it (pre-decrement semantics). If the
\r
226 * resulting index is less than 0, the index is reset to 0 and
\r
227 * DONE is returned.
\r
228 * @return the previous code unit in the text, or DONE if the new
\r
229 * index is before the start of the text.
\r
232 public abstract int previous();
\r
236 * Retreat to the start of the previous code point in the text,
\r
237 * and return it (pre-decrement semantics). If the index is not
\r
238 * preceeded by a valid surrogate pair, the behavior is the same
\r
239 * as <code>previous()</code>. Otherwise the iterator is
\r
240 * decremented to the start of the surrogate pair, and the code
\r
241 * point represented by the pair is returned.
\r
242 * @return the previous code point in the text, or DONE if the new
\r
243 * index is before the start of the text.
\r
246 public int previousCodePoint(){
\r
247 int ch1 = previous();
\r
248 if(UTF16.isTrailSurrogate((char)ch1)){
\r
249 int ch2 = previous();
\r
250 if(UTF16.isLeadSurrogate((char)ch2)){
\r
251 return UCharacterProperty.getRawSupplementary((char)ch2,
\r
253 }else if (ch2 != DONE) {
\r
254 //unmatched trail surrogate so back out
\r
262 * Sets the index to the specified index in the text.
\r
263 * @param index the index within the text.
\r
264 * @exception IndexOutOfBoundsException is thrown if an invalid index is
\r
268 public abstract void setIndex(int index);
\r
271 * Sets the current index to the limit.
\r
274 public void setToLimit() {
\r
275 setIndex(getLength());
\r
279 * Sets the current index to the start.
\r
282 public void setToStart() {
\r
287 * Fills the buffer with the underlying text storage of the iterator
\r
288 * If the buffer capacity is not enough a exception is thrown. The capacity
\r
289 * of the fill in buffer should at least be equal to length of text in the
\r
290 * iterator obtained by calling <code>getLength()</code).
\r
295 * UChacterIterator iter = new UCharacterIterator.getInstance(text);
\r
296 * char[] buf = new char[iter.getLength()];
\r
297 * iter.getText(buf);
\r
300 * char[] buf= new char[1];
\r
304 * len = iter.getText(buf);
\r
306 * }catch(IndexOutOfBoundsException e){
\r
307 * buf = new char[iter.getLength()];
\r
313 * @param fillIn an array of chars to fill with the underlying UTF-16 code
\r
315 * @param offset the position within the array to start putting the data.
\r
316 * @return the number of code units added to fillIn, as a convenience
\r
317 * @exception IndexOutOfBoundsException exception if there is not enough
\r
318 * room after offset in the array, or if offset < 0.
\r
321 public abstract int getText(char[] fillIn, int offset);
\r
324 * Convenience override for <code>getText(char[], int)>/code> that provides
\r
326 * @param fillIn an array of chars to fill with the underlying UTF-16 code
\r
328 * @return the number of code units added to fillIn, as a convenience
\r
329 * @exception IndexOutOfBoundsException exception if there is not enough
\r
330 * room in the array.
\r
333 public final int getText(char[] fillIn) {
\r
334 return getText(fillIn, 0);
\r
338 * Convenience method for returning the underlying text storage as as string
\r
339 * @return the underlying text storage in the iterator as a string
\r
342 public String getText() {
\r
343 char[] text = new char[getLength()];
\r
345 return new String(text);
\r
349 * Moves the current position by the number of code units
\r
350 * specified, either forward or backward depending on the sign
\r
351 * of delta (positive or negative respectively). If the resulting
\r
352 * index would be less than zero, the index is set to zero, and if
\r
353 * the resulting index would be greater than limit, the index is
\r
356 * @param delta the number of code units to move the current
\r
358 * @return the new index.
\r
359 * @exception IndexOutOfBoundsException is thrown if an invalid index is
\r
364 public int moveIndex(int delta) {
\r
365 int x = Math.max(0, Math.min(getIndex() + delta, getLength()));
\r
371 * Moves the current position by the number of code points
\r
372 * specified, either forward or backward depending on the sign of
\r
373 * delta (positive or negative respectively). If the current index
\r
374 * is at a trail surrogate then the first adjustment is by code
\r
375 * unit, and the remaining adjustments are by code points. If the
\r
376 * resulting index would be less than zero, the index is set to
\r
377 * zero, and if the resulting index would be greater than limit,
\r
378 * the index is set to limit.
\r
379 * @param delta the number of code units to move the current index.
\r
380 * @return the new index
\r
381 * @exception IndexOutOfBoundsException is thrown if an invalid delta is
\r
385 public int moveCodePointIndex(int delta){
\r
387 while(delta>0 && nextCodePoint() != DONE){delta--;}
\r
389 while(delta<0 && previousCodePoint() != DONE){delta++;}
\r
392 throw new IndexOutOfBoundsException();
\r
399 * Creates a copy of this iterator, independent from other iterators.
\r
400 * If it is not possible to clone the iterator, returns null.
\r
401 * @return copy of this iterator
\r
404 public Object clone() throws CloneNotSupportedException{
\r
405 return super.clone();
\r