2 ********************************************************************************
3 * Copyright (C) 2010-2011, Google, International Business Machines Corporation *
4 * and others. All Rights Reserved. *
5 ********************************************************************************
7 package com.ibm.icu.lang;
11 * A number of utilities for dealing with CharSequences and related classes.
12 * For accessing codepoints with a CharSequence, also see
14 * <li>{@link java.lang.Character#codePointAt(CharSequence, int)}</li>
15 * <li>{@link java.lang.Character#codePointBefore(CharSequence, int)}</li>
16 * <li>{@link java.lang.Character#codePointCount(CharSequence, int, int)}</li>
17 * <li>{@link java.lang.Character#charCount(int)}</li>
18 * <li>{@link java.lang.Character#offsetByCodePoints(CharSequence, int, int)}</li>
19 * <li>{@link java.lang.Character#toChars(int, char[], int)}</li>
20 * <li>{@link java.lang.Character#toCodePoint(char, char)}</li>
24 * @deprecated This API is ICU internal only.
26 public class CharSequences {
29 // compareToIgnoreCase(a, b)
30 // contentEquals(a, b)
31 // contentEqualsIgnoreCase(a, b)
33 // contains(a, b) => indexOf >= 0
37 // lastIndexOf(a, b, fromIndex)
38 // indexOf(a, ch, fromIndex)
39 // lastIndexOf(a, ch, fromIndex);
41 // s.trim() => UnicodeSet.trim(CharSequence s); return a subsequence starting with the first character not in the set to the last character not in the set.
42 // add UnicodeSet.split(CharSequence s);
45 * Find the longest n such that a[aIndex,n] = b[bIndex,n], and n is on a character boundary.
47 * @deprecated This API is ICU internal only.
49 public static int matchAfter(CharSequence a, CharSequence b, int aIndex, int bIndex) {
50 int i = aIndex, j = bIndex;
51 int alen = a.length();
52 int blen = b.length();
53 for (; i < alen && j < blen; ++i, ++j) {
54 char ca = a.charAt(i);
55 char cb = b.charAt(j);
60 // if we failed a match make sure that we didn't match half a character
61 int result = i - aIndex;
62 if (result != 0 && !onCharacterBoundary(a, i) && !onCharacterBoundary(b, j)) {
69 * Count the code point length. Unpaired surrogates count as 1.
71 * @deprecated This API is ICU internal only.
73 public int codePointLength(CharSequence s) {
74 return Character.codePointCount(s, 0, s.length());
75 // int length = s.length();
76 // int result = length;
77 // for (int i = 1; i < length; ++i) {
78 // char ch = s.charAt(i);
79 // if (0xDC00 <= ch && ch <= 0xDFFF) {
80 // char ch0 = s.charAt(i-1);
81 // if (0xD800 <= ch && ch <= 0xDbFF) {
89 * Utility function for comparing codepoint to string without generating new
93 * @deprecated This API is ICU internal only.
95 public static final boolean equals(int codepoint, CharSequence other) {
99 switch (other.length()) {
100 case 1: return codepoint == other.charAt(0);
101 case 2: return codepoint > 0xFFFF && codepoint == Character.codePointAt(other, 0);
102 default: return false;
108 * @deprecated This API is ICU internal only.
110 public static final boolean equals(CharSequence other, int codepoint) {
111 return equals(codepoint, other);
115 * Utility to compare a string to a code point.
116 * Same results as turning the code point into a string (with the [ugly] new StringBuilder().appendCodePoint(codepoint).toString())
117 * and comparing, but much faster (no object creation).
118 * Actually, there is one difference; a null compares as less.
119 * Note that this (=String) order is UTF-16 order -- *not* code point order.
122 * @deprecated This API is ICU internal only.
124 public static int compare(CharSequence string, int codePoint) {
125 if (codePoint < Character.MIN_CODE_POINT || codePoint > Character.MAX_CODE_POINT) {
126 throw new IllegalArgumentException();
128 int stringLength = string.length();
129 if (stringLength == 0) {
132 char firstChar = string.charAt(0);
133 int offset = codePoint - Character.MIN_SUPPLEMENTARY_CODE_POINT;
135 if (offset < 0) { // BMP codePoint
136 int result = firstChar - codePoint;
140 return stringLength - 1;
143 char lead = (char)((offset >>> 10) + Character.MIN_HIGH_SURROGATE);
144 int result = firstChar - lead;
148 if (stringLength > 1) {
149 char trail = (char)((offset & 0x3ff) + Character.MIN_LOW_SURROGATE);
150 result = string.charAt(1) - trail;
155 return stringLength - 2;
159 * Utility to compare a string to a code point.
160 * Same results as turning the code point into a string and comparing, but much faster (no object creation).
161 * Actually, there is one difference; a null compares as less.
162 * Note that this (=String) order is UTF-16 order -- *not* code point order.
165 * @deprecated This API is ICU internal only.
167 public static int compare(int codepoint, CharSequence a) {
168 return -compare(a, codepoint);
172 * Return the value of the first code point, if the string is exactly one code point. Otherwise return Integer.MAX_VALUE.
175 * @deprecated This API is ICU internal only.
177 public static int getSingleCodePoint(CharSequence s) {
178 int length = s.length();
179 if (length < 1 || length > 2) {
180 return Integer.MAX_VALUE;
182 int result = Character.codePointAt(s, 0);
183 return (result < 0x10000) == (length == 1) ? result : Integer.MAX_VALUE;
187 * Utility function for comparing objects that may be null
191 * @deprecated This API is ICU internal only.
193 public static final <T extends Object> boolean equals(T a, T b) {
194 return a == null ? b == null
200 * Utility for comparing the contents of CharSequences
203 * @deprecated This API is ICU internal only.
205 public static int compare(CharSequence a, CharSequence b) {
206 int alength = a.length();
207 int blength = b.length();
208 int min = alength <= blength ? alength : blength;
209 for (int i = 0; i < min; ++i) {
210 int diff = a.charAt(i) - b.charAt(i);
215 return alength - blength;
219 * Utility for comparing the contents of CharSequences
222 * @deprecated This API is ICU internal only.
224 public static boolean equalsChars(CharSequence a, CharSequence b) {
225 // do length test first for fast path
226 return a.length() == b.length() && compare(a,b) == 0;
230 * Are we on a character boundary?
233 * @deprecated This API is ICU internal only.
235 public static boolean onCharacterBoundary(CharSequence s, int i) {
238 || !Character.isHighSurrogate(s.charAt(i-1))
239 || !Character.isLowSurrogate(s.charAt(i));
243 * Find code point in string.
246 * @deprecated This API is ICU internal only.
248 public static int indexOf(CharSequence s, int codePoint) {
250 for (int i = 0; i < s.length(); i += Character.charCount(cp)) {
251 cp = Character.codePointAt(s, i);
252 if (cp == codePoint) {
260 * Utility function for simplified, more robust loops, such as:
262 * for (int codePoint : CharSequences.codePoints(string)) {
263 * doSomethingWith(codePoint);
268 * @deprecated This API is ICU internal only.
270 public static int[] codePoints(CharSequence s) {
271 int[] result = new int[s.length()]; // in the vast majority of cases, the length is the same
273 for (int i = 0; i < s.length(); ++i) {
274 char cp = s.charAt(i);
275 if (cp >= 0xDC00 && cp <= 0xDFFF && i != 0 ) { // hand-code for speed
276 char last = (char) result[j-1];
277 if (last >= 0xD800 && last <= 0xDBFF) {
278 // Note: j-1 is safe, because j can only be zero if i is zero. But i!=0 in this block.
279 result[j-1] = Character.toCodePoint(last, cp);
285 if (j == result.length) {
288 int[] shortResult = new int[j];
289 System.arraycopy(result, 0, shortResult, 0, j);
293 private CharSequences() {