2 *******************************************************************************
3 * Copyright (C) 2013, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 *******************************************************************************
7 package com.ibm.icu.impl;
9 import java.text.CharacterIterator;
11 import com.ibm.icu.text.UTF16;
13 public final class CharacterIteration {
14 // disallow instantiation
15 private CharacterIteration() { }
17 // 32 bit Char value returned from when an iterator has run out of range.
18 // Positive value so fast case (not end, not surrogate) can be checked
19 // with a single test.
20 public static final int DONE32 = 0x7fffffff;
23 * Move the iterator forward to the next code point, and return that code point,
24 * leaving the iterator positioned at char returned.
25 * For Supplementary chars, the iterator is left positioned at the lead surrogate.
26 * @param ci The character iterator
27 * @return The next code point.
29 public static int next32(CharacterIterator ci) {
30 // If the current position is at a surrogate pair, move to the trail surrogate
31 // which leaves it in position for underlying iterator's next() to work.
33 if (c >= UTF16.LEAD_SURROGATE_MIN_VALUE && c<=UTF16.LEAD_SURROGATE_MAX_VALUE) {
35 if (c<UTF16.TRAIL_SURROGATE_MIN_VALUE || c>UTF16.TRAIL_SURROGATE_MAX_VALUE) {
40 // For BMP chars, this next() is the real deal.
43 // If we might have a lead surrogate, we need to peak ahead to get the trail
44 // even though we don't want to really be positioned there.
45 if (c >= UTF16.LEAD_SURROGATE_MIN_VALUE) {
46 c = nextTrail32(ci, c);
49 if (c >= UTF16.SUPPLEMENTARY_MIN_VALUE && c != DONE32) {
50 // We got a supplementary char. Back the iterator up to the postion
51 // of the lead surrogate.
58 // Out-of-line portion of the in-line Next32 code.
59 // The call site does an initial ci.next() and calls this function
60 // if the 16 bit value it gets is >= LEAD_SURROGATE_MIN_VALUE.
61 // NOTE: we leave the underlying char iterator positioned in the
62 // middle of a surrogate pair. ci.next() will work correctly
63 // from there, but the ci.getIndex() will be wrong, and needs
65 public static int nextTrail32(CharacterIterator ci, int lead) {
66 if (lead == CharacterIterator.DONE && ci.getIndex() >= ci.getEndIndex()) {
70 if (lead <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
71 char cTrail = ci.next();
72 if (UTF16.isTrailSurrogate(cTrail)) {
73 retVal = ((lead - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) +
74 (cTrail - UTF16.TRAIL_SURROGATE_MIN_VALUE) +
75 UTF16.SUPPLEMENTARY_MIN_VALUE;
83 public static int previous32(CharacterIterator ci) {
84 if (ci.getIndex() <= ci.getBeginIndex()) {
87 char trail = ci.previous();
89 if (UTF16.isTrailSurrogate(trail) && ci.getIndex()>ci.getBeginIndex()) {
90 char lead = ci.previous();
91 if (UTF16.isLeadSurrogate(lead)) {
92 retVal = (((int)lead - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) +
93 ((int)trail - UTF16.TRAIL_SURROGATE_MIN_VALUE) +
94 UTF16.SUPPLEMENTARY_MIN_VALUE;
102 public static int current32(CharacterIterator ci) {
103 char lead = ci.current();
105 if (retVal < UTF16.LEAD_SURROGATE_MIN_VALUE) {
108 if (UTF16.isLeadSurrogate(lead)) {
109 int trail = (int)ci.next();
111 if (UTF16.isTrailSurrogate((char)trail)) {
112 retVal = ((lead - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) +
113 (trail - UTF16.TRAIL_SURROGATE_MIN_VALUE) +
114 UTF16.SUPPLEMENTARY_MIN_VALUE;
117 if (lead == CharacterIterator.DONE) {
118 if (ci.getIndex() >= ci.getEndIndex()) {