3 *******************************************************************************
\r
4 * Copyright (C) 1996-2009, International Business Machines Corporation and *
\r
5 * others. All Rights Reserved. *
\r
6 *******************************************************************************
\r
8 package com.ibm.icu.impl;
\r
10 import java.util.ArrayList;
\r
12 import com.ibm.icu.lang.*;
\r
13 import com.ibm.icu.text.*;
\r
14 import com.ibm.icu.impl.UCharacterProperty;
\r
16 public final class Utility {
\r
18 private static final char APOSTROPHE = '\'';
\r
19 private static final char BACKSLASH = '\\';
\r
20 private static final int MAGIC_UNSIGNED = 0x80000000;
\r
23 * Convenience utility to compare two Object[]s.
\r
24 * Ought to be in System
\r
26 public final static boolean arrayEquals(Object[] source, Object target) {
\r
27 if (source == null) return (target == null);
\r
28 if (!(target instanceof Object[])) return false;
\r
29 Object[] targ = (Object[]) target;
\r
30 return (source.length == targ.length
\r
31 && arrayRegionMatches(source, 0, targ, 0, source.length));
\r
35 * Convenience utility to compare two int[]s
\r
36 * Ought to be in System
\r
38 public final static boolean arrayEquals(int[] source, Object target) {
\r
39 if (source == null) return (target == null);
\r
40 if (!(target instanceof int[])) return false;
\r
41 int[] targ = (int[]) target;
\r
42 return (source.length == targ.length
\r
43 && arrayRegionMatches(source, 0, targ, 0, source.length));
\r
47 * Convenience utility to compare two double[]s
\r
48 * Ought to be in System
\r
50 public final static boolean arrayEquals(double[] source, Object target) {
\r
51 if (source == null) return (target == null);
\r
52 if (!(target instanceof double[])) return false;
\r
53 double[] targ = (double[]) target;
\r
54 return (source.length == targ.length
\r
55 && arrayRegionMatches(source, 0, targ, 0, source.length));
\r
57 public final static boolean arrayEquals(byte[] source, Object target) {
\r
58 if (source == null) return (target == null);
\r
59 if (!(target instanceof byte[])) return false;
\r
60 byte[] targ = (byte[]) target;
\r
61 return (source.length == targ.length
\r
62 && arrayRegionMatches(source, 0, targ, 0, source.length));
\r
66 * Convenience utility to compare two Object[]s
\r
67 * Ought to be in System
\r
69 public final static boolean arrayEquals(Object source, Object target) {
\r
70 if (source == null) return (target == null);
\r
71 // for some reason, the correct arrayEquals is not being called
\r
72 // so do it by hand for now.
\r
73 if (source instanceof Object[])
\r
74 return(arrayEquals((Object[]) source,target));
\r
75 if (source instanceof int[])
\r
76 return(arrayEquals((int[]) source,target));
\r
77 if (source instanceof double[])
\r
78 return(arrayEquals((int[]) source,target));
\r
79 if (source instanceof byte[])
\r
80 return(arrayEquals((byte[]) source,target));
\r
81 return source.equals(target);
\r
85 * Convenience utility to compare two Object[]s
\r
86 * Ought to be in System.
\r
87 * @param len the length to compare.
\r
88 * The start indices and start+len must be valid.
\r
90 public final static boolean arrayRegionMatches(Object[] source, int sourceStart,
\r
91 Object[] target, int targetStart,
\r
94 int sourceEnd = sourceStart + len;
\r
95 int delta = targetStart - sourceStart;
\r
96 for (int i = sourceStart; i < sourceEnd; i++) {
\r
97 if (!arrayEquals(source[i],target[i + delta]))
\r
104 * Convenience utility to compare two Object[]s
\r
105 * Ought to be in System.
\r
106 * @param len the length to compare.
\r
107 * The start indices and start+len must be valid.
\r
109 public final static boolean arrayRegionMatches(char[] source, int sourceStart,
\r
110 char[] target, int targetStart,
\r
113 int sourceEnd = sourceStart + len;
\r
114 int delta = targetStart - sourceStart;
\r
115 for (int i = sourceStart; i < sourceEnd; i++) {
\r
116 if (source[i]!=target[i + delta])
\r
123 * Convenience utility to compare two int[]s.
\r
124 * @param len the length to compare.
\r
125 * The start indices and start+len must be valid.
\r
126 * Ought to be in System
\r
128 public final static boolean arrayRegionMatches(int[] source, int sourceStart,
\r
129 int[] target, int targetStart,
\r
132 int sourceEnd = sourceStart + len;
\r
133 int delta = targetStart - sourceStart;
\r
134 for (int i = sourceStart; i < sourceEnd; i++) {
\r
135 if (source[i] != target[i + delta])
\r
142 * Convenience utility to compare two arrays of doubles.
\r
143 * @param len the length to compare.
\r
144 * The start indices and start+len must be valid.
\r
145 * Ought to be in System
\r
147 public final static boolean arrayRegionMatches(double[] source, int sourceStart,
\r
148 double[] target, int targetStart,
\r
151 int sourceEnd = sourceStart + len;
\r
152 int delta = targetStart - sourceStart;
\r
153 for (int i = sourceStart; i < sourceEnd; i++) {
\r
154 if (source[i] != target[i + delta])
\r
159 public final static boolean arrayRegionMatches(byte[] source, int sourceStart,
\r
160 byte[] target, int targetStart, int len){
\r
161 int sourceEnd = sourceStart + len;
\r
162 int delta = targetStart - sourceStart;
\r
163 for (int i = sourceStart; i < sourceEnd; i++) {
\r
164 if (source[i] != target[i + delta])
\r
171 * Convenience utility. Does null checks on objects, then calls equals.
\r
173 public final static boolean objectEquals(Object source, Object target) {
\r
174 if (source == null)
\r
175 return (target == null);
\r
177 return source.equals(target);
\r
181 * The ESCAPE character is used during run-length encoding. It signals
\r
182 * a run of identical chars.
\r
184 private static final char ESCAPE = '\uA5A5';
\r
187 * The ESCAPE_BYTE character is used during run-length encoding. It signals
\r
188 * a run of identical bytes.
\r
190 static final byte ESCAPE_BYTE = (byte)0xA5;
\r
193 * Construct a string representing an int array. Use run-length encoding.
\r
194 * A character represents itself, unless it is the ESCAPE character. Then
\r
195 * the following notations are possible:
\r
196 * ESCAPE ESCAPE ESCAPE literal
\r
197 * ESCAPE n c n instances of character c
\r
198 * Since an encoded run occupies 3 characters, we only encode runs of 4 or
\r
199 * more characters. Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF.
\r
200 * If we encounter a run where n == ESCAPE, we represent this as:
\r
202 * The ESCAPE value is chosen so as not to collide with commonly
\r
205 static public final String arrayToRLEString(int[] a) {
\r
206 StringBuffer buffer = new StringBuffer();
\r
208 appendInt(buffer, a.length);
\r
209 int runValue = a[0];
\r
211 for (int i=1; i<a.length; ++i) {
\r
213 if (s == runValue && runLength < 0xFFFF) {
\r
216 encodeRun(buffer, runValue, runLength);
\r
221 encodeRun(buffer, runValue, runLength);
\r
222 return buffer.toString();
\r
226 * Construct a string representing a short array. Use run-length encoding.
\r
227 * A character represents itself, unless it is the ESCAPE character. Then
\r
228 * the following notations are possible:
\r
229 * ESCAPE ESCAPE ESCAPE literal
\r
230 * ESCAPE n c n instances of character c
\r
231 * Since an encoded run occupies 3 characters, we only encode runs of 4 or
\r
232 * more characters. Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF.
\r
233 * If we encounter a run where n == ESCAPE, we represent this as:
\r
235 * The ESCAPE value is chosen so as not to collide with commonly
\r
238 static public final String arrayToRLEString(short[] a) {
\r
239 StringBuffer buffer = new StringBuffer();
\r
240 // for (int i=0; i<a.length; ++i) buffer.append((char) a[i]);
\r
241 buffer.append((char) (a.length >> 16));
\r
242 buffer.append((char) a.length);
\r
243 short runValue = a[0];
\r
245 for (int i=1; i<a.length; ++i) {
\r
247 if (s == runValue && runLength < 0xFFFF) ++runLength;
\r
249 encodeRun(buffer, runValue, runLength);
\r
254 encodeRun(buffer, runValue, runLength);
\r
255 return buffer.toString();
\r
259 * Construct a string representing a char array. Use run-length encoding.
\r
260 * A character represents itself, unless it is the ESCAPE character. Then
\r
261 * the following notations are possible:
\r
262 * ESCAPE ESCAPE ESCAPE literal
\r
263 * ESCAPE n c n instances of character c
\r
264 * Since an encoded run occupies 3 characters, we only encode runs of 4 or
\r
265 * more characters. Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF.
\r
266 * If we encounter a run where n == ESCAPE, we represent this as:
\r
268 * The ESCAPE value is chosen so as not to collide with commonly
\r
271 static public final String arrayToRLEString(char[] a) {
\r
272 StringBuffer buffer = new StringBuffer();
\r
273 buffer.append((char) (a.length >> 16));
\r
274 buffer.append((char) a.length);
\r
275 char runValue = a[0];
\r
277 for (int i=1; i<a.length; ++i) {
\r
279 if (s == runValue && runLength < 0xFFFF) ++runLength;
\r
281 encodeRun(buffer, (short)runValue, runLength);
\r
286 encodeRun(buffer, (short)runValue, runLength);
\r
287 return buffer.toString();
\r
291 * Construct a string representing a byte array. Use run-length encoding.
\r
292 * Two bytes are packed into a single char, with a single extra zero byte at
\r
293 * the end if needed. A byte represents itself, unless it is the
\r
294 * ESCAPE_BYTE. Then the following notations are possible:
\r
295 * ESCAPE_BYTE ESCAPE_BYTE ESCAPE_BYTE literal
\r
296 * ESCAPE_BYTE n b n instances of byte b
\r
297 * Since an encoded run occupies 3 bytes, we only encode runs of 4 or
\r
298 * more bytes. Thus we have n > 0 and n != ESCAPE_BYTE and n <= 0xFF.
\r
299 * If we encounter a run where n == ESCAPE_BYTE, we represent this as:
\r
300 * b ESCAPE_BYTE n-1 b
\r
301 * The ESCAPE_BYTE value is chosen so as not to collide with commonly
\r
304 static public final String arrayToRLEString(byte[] a) {
\r
305 StringBuffer buffer = new StringBuffer();
\r
306 buffer.append((char) (a.length >> 16));
\r
307 buffer.append((char) a.length);
\r
308 byte runValue = a[0];
\r
310 byte[] state = new byte[2];
\r
311 for (int i=1; i<a.length; ++i) {
\r
313 if (b == runValue && runLength < 0xFF) ++runLength;
\r
315 encodeRun(buffer, runValue, runLength, state);
\r
320 encodeRun(buffer, runValue, runLength, state);
\r
322 // We must save the final byte, if there is one, by padding
\r
324 if (state[0] != 0) appendEncodedByte(buffer, (byte)0, state);
\r
326 return buffer.toString();
\r
330 * Encode a run, possibly a degenerate run (of < 4 values).
\r
331 * @param length The length of the run; must be > 0 && <= 0xFFFF.
\r
333 private static final void encodeRun(StringBuffer buffer, int value, int length) {
\r
335 for (int j=0; j<length; ++j) {
\r
336 if (value == ESCAPE) {
\r
337 appendInt(buffer, value);
\r
339 appendInt(buffer, value);
\r
343 if (length == (int) ESCAPE) {
\r
344 if (value == (int) ESCAPE) {
\r
345 appendInt(buffer, ESCAPE);
\r
347 appendInt(buffer, value);
\r
350 appendInt(buffer, ESCAPE);
\r
351 appendInt(buffer, length);
\r
352 appendInt(buffer, value); // Don't need to escape this value
\r
356 private static final void appendInt(StringBuffer buffer, int value) {
\r
357 buffer.append((char)(value >>> 16));
\r
358 buffer.append((char)(value & 0xFFFF));
\r
362 * Encode a run, possibly a degenerate run (of < 4 values).
\r
363 * @param length The length of the run; must be > 0 && <= 0xFFFF.
\r
365 private static final void encodeRun(StringBuffer buffer, short value, int length) {
\r
367 for (int j=0; j<length; ++j) {
\r
368 if (value == (int) ESCAPE) buffer.append(ESCAPE);
\r
369 buffer.append((char) value);
\r
373 if (length == (int) ESCAPE) {
\r
374 if (value == (int) ESCAPE) buffer.append(ESCAPE);
\r
375 buffer.append((char) value);
\r
378 buffer.append(ESCAPE);
\r
379 buffer.append((char) length);
\r
380 buffer.append((char) value); // Don't need to escape this value
\r
385 * Encode a run, possibly a degenerate run (of < 4 values).
\r
386 * @param length The length of the run; must be > 0 && <= 0xFF.
\r
388 private static final void encodeRun(StringBuffer buffer, byte value, int length,
\r
391 for (int j=0; j<length; ++j) {
\r
392 if (value == ESCAPE_BYTE) appendEncodedByte(buffer, ESCAPE_BYTE, state);
\r
393 appendEncodedByte(buffer, value, state);
\r
397 if (length == ESCAPE_BYTE) {
\r
398 if (value == ESCAPE_BYTE) appendEncodedByte(buffer, ESCAPE_BYTE, state);
\r
399 appendEncodedByte(buffer, value, state);
\r
402 appendEncodedByte(buffer, ESCAPE_BYTE, state);
\r
403 appendEncodedByte(buffer, (byte)length, state);
\r
404 appendEncodedByte(buffer, value, state); // Don't need to escape this value
\r
409 * Append a byte to the given StringBuffer, packing two bytes into each
\r
410 * character. The state parameter maintains intermediary data between
\r
412 * @param state A two-element array, with state[0] == 0 if this is the
\r
413 * first byte of a pair, or state[0] != 0 if this is the second byte
\r
414 * of a pair, in which case state[1] is the first byte.
\r
416 private static final void appendEncodedByte(StringBuffer buffer, byte value,
\r
418 if (state[0] != 0) {
\r
419 char c = (char) ((state[1] << 8) | (((int) value) & 0xFF));
\r
430 * Construct an array of ints from a run-length encoded string.
\r
432 static public final int[] RLEStringToIntArray(String s) {
\r
433 int length = getInt(s, 0);
\r
434 int[] array = new int[length];
\r
437 int maxI = s.length() / 2;
\r
438 while (ai < length && i < maxI) {
\r
439 int c = getInt(s, i++);
\r
442 c = getInt(s, i++);
\r
447 int runValue = getInt(s, i++);
\r
448 for (int j=0; j<runLength; ++j) {
\r
449 array[ai++] = runValue;
\r
458 if (ai != length || i != maxI) {
\r
459 throw new IllegalStateException("Bad run-length encoded int array");
\r
464 static final int getInt(String s, int i) {
\r
465 return (((int) s.charAt(2*i)) << 16) | (int) s.charAt(2*i+1);
\r
469 * Construct an array of shorts from a run-length encoded string.
\r
471 static public final short[] RLEStringToShortArray(String s) {
\r
472 int length = (((int) s.charAt(0)) << 16) | ((int) s.charAt(1));
\r
473 short[] array = new short[length];
\r
475 for (int i=2; i<s.length(); ++i) {
\r
476 char c = s.charAt(i);
\r
480 array[ai++] = (short) c;
\r
482 int runLength = (int) c;
\r
483 short runValue = (short) s.charAt(++i);
\r
484 for (int j=0; j<runLength; ++j) array[ai++] = runValue;
\r
488 array[ai++] = (short) c;
\r
493 throw new IllegalStateException("Bad run-length encoded short array");
\r
499 * Construct an array of shorts from a run-length encoded string.
\r
501 static public final char[] RLEStringToCharArray(String s) {
\r
502 int length = (((int) s.charAt(0)) << 16) | ((int) s.charAt(1));
\r
503 char[] array = new char[length];
\r
505 for (int i=2; i<s.length(); ++i) {
\r
506 char c = s.charAt(i);
\r
512 int runLength = (int) c;
\r
513 char runValue = s.charAt(++i);
\r
514 for (int j=0; j<runLength; ++j) array[ai++] = runValue;
\r
523 throw new IllegalStateException("Bad run-length encoded short array");
\r
529 * Construct an array of bytes from a run-length encoded string.
\r
531 static public final byte[] RLEStringToByteArray(String s) {
\r
532 int length = (((int) s.charAt(0)) << 16) | ((int) s.charAt(1));
\r
533 byte[] array = new byte[length];
\r
534 boolean nextChar = true;
\r
539 for (int ai=0; ai<length; ) {
\r
540 // This part of the loop places the next byte into the local
\r
541 // variable 'b' each time through the loop. It keeps the
\r
542 // current character in 'c' and uses the boolean 'nextChar'
\r
543 // to see if we've taken both bytes out of 'c' yet.
\r
547 b = (byte) (c >> 8);
\r
551 b = (byte) (c & 0xFF);
\r
555 // This part of the loop is a tiny state machine which handles
\r
556 // the parsing of the run-length encoding. This would be simpler
\r
557 // if we could look ahead, but we can't, so we use 'node' to
\r
558 // move between three nodes in the state machine.
\r
561 // Normal idle node
\r
562 if (b == ESCAPE_BYTE) {
\r
570 // We have seen one ESCAPE_BYTE; we expect either a second
\r
571 // one, or a run length and value.
\r
572 if (b == ESCAPE_BYTE) {
\r
573 array[ai++] = ESCAPE_BYTE;
\r
578 // Interpret signed byte as unsigned
\r
579 if (runLength < 0) runLength += 0x100;
\r
584 // We have seen an ESCAPE_BYTE and length byte. We interpret
\r
585 // the next byte as the value to be repeated.
\r
586 for (int j=0; j<runLength; ++j) array[ai++] = b;
\r
593 throw new IllegalStateException("Bad run-length encoded byte array");
\r
595 if (i != s.length())
\r
596 throw new IllegalStateException("Excess data in RLE byte array string");
\r
601 static public String LINE_SEPARATOR = System.getProperty("line.separator");
\r
604 * Format a String for representation in a source file. This includes
\r
605 * breaking it into lines and escaping characters using octal notation
\r
606 * when necessary (control characters and double quotes).
\r
608 static public final String formatForSource(String s) {
\r
609 StringBuffer buffer = new StringBuffer();
\r
610 for (int i=0; i<s.length();) {
\r
611 if (i > 0) buffer.append('+').append(LINE_SEPARATOR);
\r
612 buffer.append(" \"");
\r
614 while (i<s.length() && count<80) {
\r
615 char c = s.charAt(i++);
\r
616 if (c < '\u0020' || c == '"' || c == '\\') {
\r
618 buffer.append("\\n");
\r
620 } else if (c == '\t') {
\r
621 buffer.append("\\t");
\r
623 } else if (c == '\r') {
\r
624 buffer.append("\\r");
\r
627 // Represent control characters, backslash and double quote
\r
628 // using octal notation; otherwise the string we form
\r
629 // won't compile, since Unicode escape sequences are
\r
630 // processed before tokenization.
\r
631 buffer.append('\\');
\r
632 buffer.append(HEX_DIGIT[(c & 0700) >> 6]); // HEX_DIGIT works for octal
\r
633 buffer.append(HEX_DIGIT[(c & 0070) >> 3]);
\r
634 buffer.append(HEX_DIGIT[(c & 0007)]);
\r
638 else if (c <= '\u007E') {
\r
643 buffer.append("\\u");
\r
644 buffer.append(HEX_DIGIT[(c & 0xF000) >> 12]);
\r
645 buffer.append(HEX_DIGIT[(c & 0x0F00) >> 8]);
\r
646 buffer.append(HEX_DIGIT[(c & 0x00F0) >> 4]);
\r
647 buffer.append(HEX_DIGIT[(c & 0x000F)]);
\r
651 buffer.append('"');
\r
653 return buffer.toString();
\r
656 static final char[] HEX_DIGIT = {'0','1','2','3','4','5','6','7',
\r
657 '8','9','A','B','C','D','E','F'};
\r
660 * Format a String for representation in a source file. Like
\r
661 * formatForSource but does not do line breaking.
\r
663 static public final String format1ForSource(String s) {
\r
664 StringBuffer buffer = new StringBuffer();
\r
665 buffer.append("\"");
\r
666 for (int i=0; i<s.length();) {
\r
667 char c = s.charAt(i++);
\r
668 if (c < '\u0020' || c == '"' || c == '\\') {
\r
670 buffer.append("\\n");
\r
671 } else if (c == '\t') {
\r
672 buffer.append("\\t");
\r
673 } else if (c == '\r') {
\r
674 buffer.append("\\r");
\r
676 // Represent control characters, backslash and double quote
\r
677 // using octal notation; otherwise the string we form
\r
678 // won't compile, since Unicode escape sequences are
\r
679 // processed before tokenization.
\r
680 buffer.append('\\');
\r
681 buffer.append(HEX_DIGIT[(c & 0700) >> 6]); // HEX_DIGIT works for octal
\r
682 buffer.append(HEX_DIGIT[(c & 0070) >> 3]);
\r
683 buffer.append(HEX_DIGIT[(c & 0007)]);
\r
686 else if (c <= '\u007E') {
\r
690 buffer.append("\\u");
\r
691 buffer.append(HEX_DIGIT[(c & 0xF000) >> 12]);
\r
692 buffer.append(HEX_DIGIT[(c & 0x0F00) >> 8]);
\r
693 buffer.append(HEX_DIGIT[(c & 0x00F0) >> 4]);
\r
694 buffer.append(HEX_DIGIT[(c & 0x000F)]);
\r
697 buffer.append('"');
\r
698 return buffer.toString();
\r
702 * Convert characters outside the range U+0020 to U+007F to
\r
703 * Unicode escapes, and convert backslash to a double backslash.
\r
705 public static final String escape(String s) {
\r
706 StringBuffer buf = new StringBuffer();
\r
707 for (int i=0; i<s.length(); ) {
\r
708 int c = UTF16.charAt(s, i);
\r
709 i += UTF16.getCharCount(c);
\r
710 if (c >= ' ' && c <= 0x007F) {
\r
712 buf.append("\\\\"); // That is, "\\"
\r
714 buf.append((char)c);
\r
717 boolean four = c <= 0xFFFF;
\r
718 buf.append(four ? "\\u" : "\\U");
\r
719 hex(c, four ? 4 : 8, buf);
\r
722 return buf.toString();
\r
725 /* This map must be in ASCENDING ORDER OF THE ESCAPE CODE */
\r
726 static private final char[] UNESCAPE_MAP = {
\r
742 * Convert an escape to a 32-bit code point value. We attempt
\r
743 * to parallel the icu4c unescapeAt() function.
\r
744 * @param offset16 an array containing offset to the character
\r
745 * <em>after</em> the backslash. Upon return offset16[0] will
\r
746 * be updated to point after the escape sequence.
\r
747 * @return character value from 0 to 10FFFF, or -1 on error.
\r
749 public static int unescapeAt(String s, int[] offset16) {
\r
755 int bitsPerDigit = 4;
\r
758 boolean braces = false;
\r
760 /* Check that offset is in range */
\r
761 int offset = offset16[0];
\r
762 int length = s.length();
\r
763 if (offset < 0 || offset >= length) {
\r
767 /* Fetch first UChar after '\\' */
\r
768 c = UTF16.charAt(s, offset);
\r
769 offset += UTF16.getCharCount(c);
\r
771 /* Convert hexadecimal and octal escapes */
\r
774 minDig = maxDig = 4;
\r
777 minDig = maxDig = 8;
\r
781 if (offset < length && UTF16.charAt(s, offset) == 0x7B /*{*/) {
\r
790 dig = UCharacter.digit(c, 8);
\r
794 n = 1; /* Already have first octal digit */
\r
801 while (offset < length && n < maxDig) {
\r
802 c = UTF16.charAt(s, offset);
\r
803 dig = UCharacter.digit(c, (bitsPerDigit == 3) ? 8 : 16);
\r
807 result = (result << bitsPerDigit) | dig;
\r
808 offset += UTF16.getCharCount(c);
\r
815 if (c != 0x7D /*}*/) {
\r
820 if (result < 0 || result >= 0x110000) {
\r
823 // If an escape sequence specifies a lead surrogate, see
\r
824 // if there is a trail surrogate after it, either as an
\r
825 // escape or as a literal. If so, join them up into a
\r
827 if (offset < length &&
\r
828 UTF16.isLeadSurrogate((char) result)) {
\r
829 int ahead = offset+1;
\r
830 c = s.charAt(offset); // [sic] get 16-bit code unit
\r
831 if (c == '\\' && ahead < length) {
\r
832 int o[] = new int[] { ahead };
\r
833 c = unescapeAt(s, o);
\r
836 if (UTF16.isTrailSurrogate((char) c)) {
\r
838 result = UCharacterProperty.getRawSupplementary(
\r
839 (char) result, (char) c);
\r
842 offset16[0] = offset;
\r
846 /* Convert C-style escapes in table */
\r
847 for (i=0; i<UNESCAPE_MAP.length; i+=2) {
\r
848 if (c == UNESCAPE_MAP[i]) {
\r
849 offset16[0] = offset;
\r
850 return UNESCAPE_MAP[i+1];
\r
851 } else if (c < UNESCAPE_MAP[i]) {
\r
856 /* Map \cX to control-X: X & 0x1F */
\r
857 if (c == 'c' && offset < length) {
\r
858 c = UTF16.charAt(s, offset);
\r
859 offset16[0] = offset + UTF16.getCharCount(c);
\r
863 /* If no special forms are recognized, then consider
\r
864 * the backslash to generically escape the next character. */
\r
865 offset16[0] = offset;
\r
870 * Convert all escapes in a given string using unescapeAt().
\r
871 * @exception IllegalArgumentException if an invalid escape is
\r
874 public static String unescape(String s) {
\r
875 StringBuffer buf = new StringBuffer();
\r
876 int[] pos = new int[1];
\r
877 for (int i=0; i<s.length(); ) {
\r
878 char c = s.charAt(i++);
\r
881 int e = unescapeAt(s, pos);
\r
883 throw new IllegalArgumentException("Invalid escape sequence " +
\r
884 s.substring(i-1, Math.min(i+8, s.length())));
\r
886 UTF16.append(buf, e);
\r
892 return buf.toString();
\r
896 * Convert all escapes in a given string using unescapeAt().
\r
897 * Leave invalid escape sequences unchanged.
\r
899 public static String unescapeLeniently(String s) {
\r
900 StringBuffer buf = new StringBuffer();
\r
901 int[] pos = new int[1];
\r
902 for (int i=0; i<s.length(); ) {
\r
903 char c = s.charAt(i++);
\r
906 int e = unescapeAt(s, pos);
\r
910 UTF16.append(buf, e);
\r
917 return buf.toString();
\r
921 * Convert a char to 4 hex uppercase digits. E.g., hex('a') =>
\r
924 public static String hex(char ch) {
\r
925 StringBuffer temp = new StringBuffer();
\r
926 return hex(ch, temp).toString();
\r
930 * Convert a string to comma-separated groups of 4 hex uppercase
\r
931 * digits. E.g., hex('ab') => "0041,0042".
\r
933 public static String hex(String s) {
\r
934 StringBuffer temp = new StringBuffer();
\r
935 return hex(s, temp).toString();
\r
939 * Convert a string to comma-separated groups of 4 hex uppercase
\r
940 * digits. E.g., hex('ab') => "0041,0042".
\r
942 public static String hex(StringBuffer s) {
\r
943 return hex(s.toString());
\r
947 * Convert a char to 4 hex uppercase digits. E.g., hex('a') =>
\r
948 * "0041". Append the output to the given StringBuffer.
\r
950 public static StringBuffer hex(char ch, StringBuffer output) {
\r
951 return appendNumber(output, ch, 16, 4);
\r
955 * Convert a integer to size width hex uppercase digits.
\r
956 * E.g., hex('a', 4, str) => "0041".
\r
957 * Append the output to the given StringBuffer.
\r
958 * If width is too small to fit, nothing will be appended to output.
\r
960 public static StringBuffer hex(int ch, int width, StringBuffer output) {
\r
961 return appendNumber(output, ch, 16, width);
\r
965 * Convert a integer to size width (minimum) hex uppercase digits.
\r
966 * E.g., hex('a', 4, str) => "0041". If the integer requires more
\r
967 * than width digits, more will be used.
\r
969 public static String hex(int ch, int width) {
\r
970 StringBuffer buf = new StringBuffer();
\r
971 return appendNumber(buf, ch, 16, width).toString();
\r
974 * Supplies a zero-padded hex representation of an integer (without 0x)
\r
976 static public String hex(long i, int places) {
\r
977 if (i == Long.MIN_VALUE) return "-8000000000000000";
\r
978 boolean negative = i < 0;
\r
982 String result = Long.toString(i, 16).toUpperCase();
\r
983 if (result.length() < places) {
\r
984 result = "0000000000000000".substring(result.length(),places) + result;
\r
987 return '-' + result;
\r
992 public static String hex(long ch) {
\r
997 * Convert a string to comma-separated groups of 4 hex uppercase
\r
998 * digits. E.g., hex('ab') => "0041,0042". Append the output
\r
999 * to the given StringBuffer.
\r
1001 public static StringBuffer hex(String s, StringBuffer result) {
\r
1002 for (int i = 0; i < s.length(); ++i) {
\r
1003 if (i != 0) result.append(',');
\r
1004 hex(s.charAt(i), result);
\r
1010 * Split a string into pieces based on the given divider character
\r
1011 * @param s the string to split
\r
1012 * @param divider the character on which to split. Occurrences of
\r
1013 * this character are not included in the output
\r
1014 * @param output an array to receive the substrings between
\r
1015 * instances of divider. It must be large enough on entry to
\r
1016 * accomodate all output. Adjacent instances of the divider
\r
1017 * character will place empty strings into output. Before
\r
1018 * returning, output is padded out with empty strings.
\r
1020 public static void split(String s, char divider, String[] output) {
\r
1024 for (i = 0; i < s.length(); ++i) {
\r
1025 if (s.charAt(i) == divider) {
\r
1026 output[current++] = s.substring(last,i);
\r
1030 output[current++] = s.substring(last,i);
\r
1031 while (current < output.length) {
\r
1032 output[current++] = "";
\r
1037 * Split a string into pieces based on the given divider character
\r
1038 * @param s the string to split
\r
1039 * @param divider the character on which to split. Occurrences of
\r
1040 * this character are not included in the output
\r
1041 * @return output an array to receive the substrings between
\r
1042 * instances of divider. Adjacent instances of the divider
\r
1043 * character will place empty strings into output.
\r
1045 public static String[] split(String s, char divider) {
\r
1048 ArrayList output = new ArrayList();
\r
1049 for (i = 0; i < s.length(); ++i) {
\r
1050 if (s.charAt(i) == divider) {
\r
1051 output.add(s.substring(last,i));
\r
1055 output.add( s.substring(last,i));
\r
1056 return (String[]) output.toArray(new String[output.size()]);
\r
1060 * Look up a given string in a string array. Returns the index at
\r
1061 * which the first occurrence of the string was found in the
\r
1062 * array, or -1 if it was not found.
\r
1063 * @param source the string to search for
\r
1064 * @param target the array of zero or more strings in which to
\r
1066 * @return the index of target at which source first occurs, or -1
\r
1069 public static int lookup(String source, String[] target) {
\r
1070 for (int i = 0; i < target.length; ++i) {
\r
1071 if (source.equals(target[i])) return i;
\r
1077 * Skip over a sequence of zero or more white space characters
\r
1078 * at pos. Return the index of the first non-white-space character
\r
1079 * at or after pos, or str.length(), if there is none.
\r
1081 public static int skipWhitespace(String str, int pos) {
\r
1082 while (pos < str.length()) {
\r
1083 int c = UTF16.charAt(str, pos);
\r
1084 if (!UCharacterProperty.isRuleWhiteSpace(c)) {
\r
1087 pos += UTF16.getCharCount(c);
\r
1093 * Skip over a sequence of zero or more white space characters
\r
1094 * at pos[0], advancing it.
\r
1096 public static void skipWhitespace(String str, int[] pos) {
\r
1097 pos[0] = skipWhitespace(str, pos[0]);
\r
1101 * Remove all rule white space from a string.
\r
1103 public static String deleteRuleWhiteSpace(String str) {
\r
1104 StringBuffer buf = new StringBuffer();
\r
1105 for (int i=0; i<str.length(); ) {
\r
1106 int ch = UTF16.charAt(str, i);
\r
1107 i += UTF16.getCharCount(ch);
\r
1108 if (UCharacterProperty.isRuleWhiteSpace(ch)) {
\r
1111 UTF16.append(buf, ch);
\r
1113 return buf.toString();
\r
1117 * Parse a single non-whitespace character 'ch', optionally
\r
1118 * preceded by whitespace.
\r
1119 * @param id the string to be parsed
\r
1120 * @param pos INPUT-OUTPUT parameter. On input, pos[0] is the
\r
1121 * offset of the first character to be parsed. On output, pos[0]
\r
1122 * is the index after the last parsed character. If the parse
\r
1123 * fails, pos[0] will be unchanged.
\r
1124 * @param ch the non-whitespace character to be parsed.
\r
1125 * @return true if 'ch' is seen preceded by zero or more
\r
1126 * whitespace characters.
\r
1128 public static boolean parseChar(String id, int[] pos, char ch) {
\r
1129 int start = pos[0];
\r
1130 skipWhitespace(id, pos);
\r
1131 if (pos[0] == id.length() ||
\r
1132 id.charAt(pos[0]) != ch) {
\r
1141 * Parse a pattern string starting at offset pos. Keywords are
\r
1142 * matched case-insensitively. Spaces may be skipped and may be
\r
1143 * optional or required. Integer values may be parsed, and if
\r
1144 * they are, they will be returned in the given array. If
\r
1145 * successful, the offset of the next non-space character is
\r
1146 * returned. On failure, -1 is returned.
\r
1147 * @param pattern must only contain lowercase characters, which
\r
1148 * will match their uppercase equivalents as well. A space
\r
1149 * character matches one or more required spaces. A '~' character
\r
1150 * matches zero or more optional spaces. A '#' character matches
\r
1151 * an integer and stores it in parsedInts, which the caller must
\r
1152 * ensure has enough capacity.
\r
1153 * @param parsedInts array to receive parsed integers. Caller
\r
1154 * must ensure that parsedInts.length is >= the number of '#'
\r
1155 * signs in 'pattern'.
\r
1156 * @return the position after the last character parsed, or -1 if
\r
1157 * the parse failed
\r
1159 public static int parsePattern(String rule, int pos, int limit,
\r
1160 String pattern, int[] parsedInts) {
\r
1161 // TODO Update this to handle surrogates
\r
1162 int[] p = new int[1];
\r
1163 int intCount = 0; // number of integers parsed
\r
1164 for (int i=0; i<pattern.length(); ++i) {
\r
1165 char cpat = pattern.charAt(i);
\r
1169 if (pos >= limit) {
\r
1172 c = rule.charAt(pos++);
\r
1173 if (!UCharacterProperty.isRuleWhiteSpace(c)) {
\r
1176 // FALL THROUGH to skipWhitespace
\r
1178 pos = skipWhitespace(rule, pos);
\r
1182 parsedInts[intCount++] = parseInteger(rule, p, limit);
\r
1183 if (p[0] == pos) {
\r
1184 // Syntax error; failed to parse integer
\r
1190 if (pos >= limit) {
\r
1193 c = (char) UCharacter.toLowerCase(rule.charAt(pos++));
\r
1204 * Parse a pattern string within the given Replaceable and a parsing
\r
1205 * pattern. Characters are matched literally and case-sensitively
\r
1206 * except for the following special characters:
\r
1208 * ~ zero or more uprv_isRuleWhiteSpace chars
\r
1210 * If end of pattern is reached with all matches along the way,
\r
1211 * pos is advanced to the first unparsed index and returned.
\r
1212 * Otherwise -1 is returned.
\r
1213 * @param pat pattern that controls parsing
\r
1214 * @param text text to be parsed, starting at index
\r
1215 * @param index offset to first character to parse
\r
1216 * @param limit offset after last character to parse
\r
1217 * @return index after last parsed character, or -1 on parse failure.
\r
1219 public static int parsePattern(String pat,
\r
1225 // empty pattern matches immediately
\r
1226 if (ipat == pat.length()) {
\r
1230 int cpat = UTF16.charAt(pat, ipat);
\r
1232 while (index < limit) {
\r
1233 int c = text.char32At(index);
\r
1236 if (cpat == '~') {
\r
1237 if (UCharacterProperty.isRuleWhiteSpace(c)) {
\r
1238 index += UTF16.getCharCount(c);
\r
1241 if (++ipat == pat.length()) {
\r
1242 return index; // success; c unparsed
\r
1244 // fall thru; process c again with next cpat
\r
1249 else if (c == cpat) {
\r
1250 int n = UTF16.getCharCount(c);
\r
1253 if (ipat == pat.length()) {
\r
1254 return index; // success; c parsed
\r
1256 // fall thru; get next cpat
\r
1259 // match failure of literal
\r
1264 cpat = UTF16.charAt(pat, ipat);
\r
1267 return -1; // text ended before end of pat
\r
1271 * Parse an integer at pos, either of the form \d+ or of the form
\r
1272 * 0x[0-9A-Fa-f]+ or 0[0-7]+, that is, in standard decimal, hex,
\r
1273 * or octal format.
\r
1274 * @param pos INPUT-OUTPUT parameter. On input, the first
\r
1275 * character to parse. On output, the character after the last
\r
1276 * parsed character.
\r
1278 public static int parseInteger(String rule, int[] pos, int limit) {
\r
1284 if (rule.regionMatches(true, p, "0x", 0, 2)) {
\r
1287 } else if (p < limit && rule.charAt(p) == '0') {
\r
1293 while (p < limit) {
\r
1294 int d = UCharacter.digit(rule.charAt(p++), radix);
\r
1300 int v = (value * radix) + d;
\r
1302 // If there are too many input digits, at some point
\r
1303 // the value will go negative, e.g., if we have seen
\r
1304 // "0x8000000" already and there is another '0', when
\r
1305 // we parse the next 0 the value will go negative.
\r
1317 * Parse a Unicode identifier from the given string at the given
\r
1318 * position. Return the identifier, or null if there is no
\r
1320 * @param str the string to parse
\r
1321 * @param pos INPUT-OUPUT parameter. On INPUT, pos[0] is the
\r
1322 * first character to examine. It must be less than str.length(),
\r
1323 * and it must not point to a whitespace character. That is, must
\r
1324 * have pos[0] < str.length() and
\r
1325 * !UCharacterProperty.isRuleWhiteSpace(UTF16.charAt(str, pos[0])). On
\r
1326 * OUTPUT, the position after the last parsed character.
\r
1327 * @return the Unicode identifier, or null if there is no valid
\r
1328 * identifier at pos[0].
\r
1330 public static String parseUnicodeIdentifier(String str, int[] pos) {
\r
1331 // assert(pos[0] < str.length());
\r
1332 // assert(!UCharacterProperty.isRuleWhiteSpace(UTF16.charAt(str, pos[0])));
\r
1333 StringBuffer buf = new StringBuffer();
\r
1335 while (p < str.length()) {
\r
1336 int ch = UTF16.charAt(str, p);
\r
1337 if (buf.length() == 0) {
\r
1338 if (UCharacter.isUnicodeIdentifierStart(ch)) {
\r
1339 UTF16.append(buf, ch);
\r
1344 if (UCharacter.isUnicodeIdentifierPart(ch)) {
\r
1345 UTF16.append(buf, ch);
\r
1350 p += UTF16.getCharCount(ch);
\r
1353 return buf.toString();
\r
1357 * Trim whitespace from ends of a StringBuffer.
\r
1359 public static StringBuffer trim(StringBuffer b) {
\r
1360 // TODO update to handle surrogates
\r
1362 for (i=0; i<b.length() && UCharacter.isWhitespace(b.charAt(i)); ++i) {}
\r
1364 for (i=b.length()-1; i>=0 && UCharacter.isWhitespace(b.charAt(i)); --i) {}
\r
1365 return b.delete(i+1, b.length());
\r
1368 static final char DIGITS[] = {
\r
1369 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
\r
1370 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
\r
1371 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
\r
1372 'U', 'V', 'W', 'X', 'Y', 'Z'
\r
1376 * Append a number to the given StringBuffer in the radix 10
\r
1377 * generating at least one digit.
\r
1379 public static StringBuffer appendNumber(StringBuffer result, int n) {
\r
1380 return appendNumber(result, n, 10, 1);
\r
1384 * Append the digits of a positive integer to the given
\r
1385 * <code>StringBuffer</code> in the given radix. This is
\r
1386 * done recursively since it is easiest to generate the low-
\r
1387 * order digit first, but it must be appended last.
\r
1389 * @param result is the <code>StringBuffer</code> to append to
\r
1390 * @param n is the positive integer
\r
1391 * @param radix is the radix, from 2 to 36 inclusive
\r
1392 * @param minDigits is the minimum number of digits to append.
\r
1394 private static void recursiveAppendNumber(StringBuffer result, int n,
\r
1395 int radix, int minDigits)
\r
1397 int digit = n % radix;
\r
1399 if (n >= radix || minDigits > 1) {
\r
1400 recursiveAppendNumber(result, n / radix, radix, minDigits - 1);
\r
1403 result.append(DIGITS[digit]);
\r
1407 * Append a number to the given StringBuffer in the given radix.
\r
1408 * Standard digits '0'-'9' are used and letters 'A'-'Z' for
\r
1409 * radices 11 through 36.
\r
1410 * @param result the digits of the number are appended here
\r
1411 * @param n the number to be converted to digits; may be negative.
\r
1412 * If negative, a '-' is prepended to the digits.
\r
1413 * @param radix a radix from 2 to 36 inclusive.
\r
1414 * @param minDigits the minimum number of digits, not including
\r
1415 * any '-', to produce. Values less than 2 have no effect. One
\r
1416 * digit is always emitted regardless of this parameter.
\r
1417 * @return a reference to result
\r
1419 public static StringBuffer appendNumber(StringBuffer result, int n,
\r
1420 int radix, int minDigits)
\r
1421 throws IllegalArgumentException
\r
1423 if (radix < 2 || radix > 36) {
\r
1424 throw new IllegalArgumentException("Illegal radix " + radix);
\r
1432 result.append("-");
\r
1435 recursiveAppendNumber(result, abs, radix, minDigits);
\r
1441 * Parse an unsigned 31-bit integer at the given offset. Use
\r
1442 * UCharacter.digit() to parse individual characters into digits.
\r
1443 * @param text the text to be parsed
\r
1444 * @param pos INPUT-OUTPUT parameter. On entry, pos[0] is the
\r
1445 * offset within text at which to start parsing; it should point
\r
1446 * to a valid digit. On exit, pos[0] is the offset after the last
\r
1447 * parsed character. If the parse failed, it will be unchanged on
\r
1448 * exit. Must be >= 0 on entry.
\r
1449 * @param radix the radix in which to parse; must be >= 2 and <=
\r
1451 * @return a non-negative parsed number, or -1 upon parse failure.
\r
1452 * Parse fails if there are no digits, that is, if pos[0] does not
\r
1453 * point to a valid digit on entry, or if the number to be parsed
\r
1454 * does not fit into a 31-bit unsigned integer.
\r
1456 public static int parseNumber(String text, int[] pos, int radix) {
\r
1457 // assert(pos[0] >= 0);
\r
1458 // assert(radix >= 2);
\r
1459 // assert(radix <= 36);
\r
1462 while (p < text.length()) {
\r
1463 int ch = UTF16.charAt(text, p);
\r
1464 int d = UCharacter.digit(ch, radix);
\r
1469 // ASSUME that when a 32-bit integer overflows it becomes
\r
1470 // negative. E.g., 214748364 * 10 + 8 => negative value.
\r
1476 if (p == pos[0]) {
\r
1484 * Return true if the character is NOT printable ASCII. The tab,
\r
1485 * newline and linefeed characters are considered unprintable.
\r
1487 public static boolean isUnprintable(int c) {
\r
1488 return !(c >= 0x20 && c <= 0x7E);
\r
1492 * Escape unprintable characters using <backslash>uxxxx notation
\r
1493 * for U+0000 to U+FFFF and <backslash>Uxxxxxxxx for U+10000 and
\r
1494 * above. If the character is printable ASCII, then do nothing
\r
1495 * and return FALSE. Otherwise, append the escaped notation and
\r
1498 public static boolean escapeUnprintable(StringBuffer result, int c) {
\r
1499 if (isUnprintable(c)) {
\r
1500 result.append('\\');
\r
1501 if ((c & ~0xFFFF) != 0) {
\r
1502 result.append('U');
\r
1503 result.append(DIGITS[0xF&(c>>28)]);
\r
1504 result.append(DIGITS[0xF&(c>>24)]);
\r
1505 result.append(DIGITS[0xF&(c>>20)]);
\r
1506 result.append(DIGITS[0xF&(c>>16)]);
\r
1508 result.append('u');
\r
1510 result.append(DIGITS[0xF&(c>>12)]);
\r
1511 result.append(DIGITS[0xF&(c>>8)]);
\r
1512 result.append(DIGITS[0xF&(c>>4)]);
\r
1513 result.append(DIGITS[0xF&c]);
\r
1520 * Returns the index of the first character in a set, ignoring quoted text.
\r
1521 * For example, in the string "abc'hide'h", the 'h' in "hide" will not be
\r
1522 * found by a search for "h". Unlike String.indexOf(), this method searches
\r
1523 * not for a single character, but for any character of the string
\r
1524 * <code>setOfChars</code>.
\r
1525 * @param text text to be searched
\r
1526 * @param start the beginning index, inclusive; <code>0 <= start
\r
1527 * <= limit</code>.
\r
1528 * @param limit the ending index, exclusive; <code>start <= limit
\r
1529 * <= text.length()</code>.
\r
1530 * @param setOfChars string with one or more distinct characters
\r
1531 * @return Offset of the first character in <code>setOfChars</code>
\r
1532 * found, or -1 if not found.
\r
1533 * @see String#indexOf
\r
1535 public static int quotedIndexOf(String text, int start, int limit,
\r
1536 String setOfChars) {
\r
1537 for (int i=start; i<limit; ++i) {
\r
1538 char c = text.charAt(i);
\r
1539 if (c == BACKSLASH) {
\r
1541 } else if (c == APOSTROPHE) {
\r
1542 while (++i < limit
\r
1543 && text.charAt(i) != APOSTROPHE) {}
\r
1544 } else if (setOfChars.indexOf(c) >= 0) {
\r
1552 * Similar to StringBuffer.getChars, version 1.3.
\r
1553 * Since JDK 1.2 implements StringBuffer.getChars differently, this method
\r
1554 * is here to provide consistent results.
\r
1555 * To be removed after JDK 1.2 ceased to be the reference platform.
\r
1556 * @param src source string buffer
\r
1557 * @param srcBegin offset to the start of the src to retrieve from
\r
1558 * @param srcEnd offset to the end of the src to retrieve from
\r
1559 * @param dst char array to store the retrieved chars
\r
1560 * @param dstBegin offset to the start of the destination char array to
\r
1561 * store the retrieved chars
\r
1563 public static void getChars(StringBuffer src, int srcBegin, int srcEnd,
\r
1564 char dst[], int dstBegin)
\r
1566 if (srcBegin == srcEnd) {
\r
1569 src.getChars(srcBegin, srcEnd, dst, dstBegin);
\r
1573 * Append a character to a rule that is being built up. To flush
\r
1574 * the quoteBuf to rule, make one final call with isLiteral == true.
\r
1575 * If there is no final character, pass in (int)-1 as c.
\r
1576 * @param rule the string to append the character to
\r
1577 * @param c the character to append, or (int)-1 if none.
\r
1578 * @param isLiteral if true, then the given character should not be
\r
1579 * quoted or escaped. Usually this means it is a syntactic element
\r
1581 * @param escapeUnprintable if true, then unprintable characters
\r
1582 * should be escaped using escapeUnprintable(). These escapes will
\r
1583 * appear outside of quotes.
\r
1584 * @param quoteBuf a buffer which is used to build up quoted
\r
1585 * substrings. The caller should initially supply an empty buffer,
\r
1586 * and thereafter should not modify the buffer. The buffer should be
\r
1587 * cleared out by, at the end, calling this method with a literal
\r
1588 * character (which may be -1).
\r
1590 public static void appendToRule(StringBuffer rule,
\r
1592 boolean isLiteral,
\r
1593 boolean escapeUnprintable,
\r
1594 StringBuffer quoteBuf) {
\r
1595 // If we are escaping unprintables, then escape them outside
\r
1596 // quotes. \\u and \\U are not recognized within quotes. The same
\r
1597 // logic applies to literals, but literals are never escaped.
\r
1599 (escapeUnprintable && Utility.isUnprintable(c))) {
\r
1600 if (quoteBuf.length() > 0) {
\r
1601 // We prefer backslash APOSTROPHE to double APOSTROPHE
\r
1602 // (more readable, less similar to ") so if there are
\r
1603 // double APOSTROPHEs at the ends, we pull them outside
\r
1606 // If the first thing in the quoteBuf is APOSTROPHE
\r
1607 // (doubled) then pull it out.
\r
1608 while (quoteBuf.length() >= 2 &&
\r
1609 quoteBuf.charAt(0) == APOSTROPHE &&
\r
1610 quoteBuf.charAt(1) == APOSTROPHE) {
\r
1611 rule.append(BACKSLASH).append(APOSTROPHE);
\r
1612 quoteBuf.delete(0, 2);
\r
1614 // If the last thing in the quoteBuf is APOSTROPHE
\r
1615 // (doubled) then remove and count it and add it after.
\r
1616 int trailingCount = 0;
\r
1617 while (quoteBuf.length() >= 2 &&
\r
1618 quoteBuf.charAt(quoteBuf.length()-2) == APOSTROPHE &&
\r
1619 quoteBuf.charAt(quoteBuf.length()-1) == APOSTROPHE) {
\r
1620 quoteBuf.setLength(quoteBuf.length()-2);
\r
1623 if (quoteBuf.length() > 0) {
\r
1624 rule.append(APOSTROPHE);
\r
1625 // jdk 1.3.1 does not have append(StringBuffer) yet
\r
1626 if(ICUDebug.isJDK14OrHigher){
\r
1627 rule.append(quoteBuf);
\r
1629 rule.append(quoteBuf.toString());
\r
1631 rule.append(APOSTROPHE);
\r
1632 quoteBuf.setLength(0);
\r
1634 while (trailingCount-- > 0) {
\r
1635 rule.append(BACKSLASH).append(APOSTROPHE);
\r
1639 /* Since spaces are ignored during parsing, they are
\r
1640 * emitted only for readability. We emit one here
\r
1641 * only if there isn't already one at the end of the
\r
1645 int len = rule.length();
\r
1646 if (len > 0 && rule.charAt(len-1) != ' ') {
\r
1649 } else if (!escapeUnprintable || !Utility.escapeUnprintable(rule, c)) {
\r
1650 UTF16.append(rule, c);
\r
1655 // Escape ' and '\' and don't begin a quote just for them
\r
1656 else if (quoteBuf.length() == 0 &&
\r
1657 (c == APOSTROPHE || c == BACKSLASH)) {
\r
1658 rule.append(BACKSLASH).append((char)c);
\r
1661 // Specials (printable ascii that isn't [0-9a-zA-Z]) and
\r
1662 // whitespace need quoting. Also append stuff to quotes if we are
\r
1663 // building up a quoted substring already.
\r
1664 else if (quoteBuf.length() > 0 ||
\r
1665 (c >= 0x0021 && c <= 0x007E &&
\r
1666 !((c >= 0x0030/*'0'*/ && c <= 0x0039/*'9'*/) ||
\r
1667 (c >= 0x0041/*'A'*/ && c <= 0x005A/*'Z'*/) ||
\r
1668 (c >= 0x0061/*'a'*/ && c <= 0x007A/*'z'*/))) ||
\r
1669 UCharacterProperty.isRuleWhiteSpace(c)) {
\r
1670 UTF16.append(quoteBuf, c);
\r
1671 // Double ' within a quote
\r
1672 if (c == APOSTROPHE) {
\r
1673 quoteBuf.append((char)c);
\r
1677 // Otherwise just append
\r
1679 UTF16.append(rule, c);
\r
1684 * Append the given string to the rule. Calls the single-character
\r
1685 * version of appendToRule for each character.
\r
1687 public static void appendToRule(StringBuffer rule,
\r
1689 boolean isLiteral,
\r
1690 boolean escapeUnprintable,
\r
1691 StringBuffer quoteBuf) {
\r
1692 for (int i=0; i<text.length(); ++i) {
\r
1693 // Okay to process in 16-bit code units here
\r
1694 appendToRule(rule, text.charAt(i), isLiteral, escapeUnprintable, quoteBuf);
\r
1699 * Given a matcher reference, which may be null, append its
\r
1700 * pattern as a literal to the given rule.
\r
1702 public static void appendToRule(StringBuffer rule,
\r
1703 UnicodeMatcher matcher,
\r
1704 boolean escapeUnprintable,
\r
1705 StringBuffer quoteBuf) {
\r
1706 if (matcher != null) {
\r
1707 appendToRule(rule, matcher.toPattern(escapeUnprintable),
\r
1708 true, escapeUnprintable, quoteBuf);
\r
1713 * Compares 2 unsigned integers
\r
1714 * @param source 32 bit unsigned integer
\r
1715 * @param target 32 bit unsigned integer
\r
1716 * @return 0 if equals, 1 if source is greater than target and -1
\r
1719 public static final int compareUnsigned(int source, int target)
\r
1721 source += MAGIC_UNSIGNED;
\r
1722 target += MAGIC_UNSIGNED;
\r
1723 if (source < target) {
\r
1726 else if (source > target) {
\r
1733 * Find the highest bit in a positive integer. This is done
\r
1734 * by doing a binary search through the bits.
\r
1736 * @param n is the integer
\r
1738 * @return the bit number of the highest bit, with 0 being
\r
1739 * the low order bit, or -1 if <code>n</code> is not positive
\r
1741 public static final byte highBit(int n)
\r
1749 if (n >= 1 << 16) {
\r
1754 if (n >= 1 << 8) {
\r
1759 if (n >= 1 << 4) {
\r
1764 if (n >= 1 << 2) {
\r
1769 if (n >= 1 << 1) {
\r
1777 * Utility method to take a int[] containing codepoints and return
\r
1778 * a string representation with code units.
\r
1780 public static String valueOf(int[]source){
\r
1781 // TODO: Investigate why this method is not on UTF16 class
\r
1782 StringBuffer result = new StringBuffer(source.length);
\r
1783 for(int i=0; i<source.length; i++){
\r
1784 UTF16.append(result,source[i]);
\r
1786 return result.toString();
\r
1791 * Utility to duplicate a string count times
\r
1795 public static String repeat(String s, int count) {
\r
1796 if (count <= 0) return "";
\r
1797 if (count == 1) return s;
\r
1798 StringBuffer result = new StringBuffer();
\r
1799 for (int i = 0; i < count; ++i) {
\r
1802 return result.toString();
\r
1806 // !!! 1.3 compatibility
\r
1807 public static int indexOf(StringBuffer buf, String s) {
\r
1808 //#if defined(FOUNDATION10) || defined(J2SE13)
\r
1809 //## return buf.toString().indexOf(s);
\r
1811 return buf.indexOf(s);
\r
1815 // !!! 1.3 compatibility
\r
1816 public static int lastIndexOf(StringBuffer buf, String s) {
\r
1817 //#if defined(FOUNDATION10) || defined(J2SE13)
\r
1818 //## return buf.toString().lastIndexOf(s);
\r
1820 return buf.lastIndexOf(s);
\r
1824 // !!! 1.3 compatibility
\r
1825 public static int indexOf(StringBuffer buf, String s, int i) {
\r
1826 //#if defined(FOUNDATION10) || defined(J2SE13)
\r
1827 //## return buf.toString().indexOf(s, i);
\r
1829 return buf.indexOf(s, i);
\r
1833 // !!! 1.3 compatibility
\r
1834 public static int lastIndexOf(StringBuffer buf, String s, int i) {
\r
1835 //#if defined(FOUNDATION10) || defined(J2SE13)
\r
1836 //## return buf.toString().lastIndexOf(s, i);
\r
1838 return buf.lastIndexOf(s, i);
\r
1842 // !!! 1.3/1.4 compatibility
\r
1843 public static String replace(String src, String target, String replacement) {
\r
1844 //#if defined(FOUNDATION10) || defined(J2SE13) || defined(J2SE14)
\r
1845 //## int i = src.indexOf(target);
\r
1846 //## if (i == -1) {
\r
1849 //## StringBuffer buf = new StringBuffer();
\r
1852 //## buf.append(src.substring(n, i));
\r
1853 //## buf.append(replacement);
\r
1854 //## n = i + target.length();
\r
1855 //## i = src.indexOf(target, n);
\r
1856 //## } while (i != -1);
\r
1857 //## if (n < src.length()) {
\r
1858 //## buf.append(src.substring(n));
\r
1860 //## return buf.toString();
\r
1862 return src.replace(target, replacement);
\r
1866 // !!! 1.3 compatibility
\r
1867 public static String replaceAll(String src, String target, String replacement) {
\r
1868 //#if defined(FOUNDATION10) || defined(J2SE13)
\r
1869 //## return replace(src, target, replacement);
\r
1871 return src.replaceAll(target, replacement);
\r
1875 //private static final String REGEX_SPECIALS = ".^$[]*+?|()";
\r
1877 // !!! 1.3 compatibility
\r
1878 // Note: target is not a string literal, not a regular expression.
\r
1879 public static String[] splitString(String src, String target) {
\r
1880 //#if defined(FOUNDATION10) || defined(J2SE13)
\r
1881 //## int i = src.indexOf(target);
\r
1882 //## if (i == -1) {
\r
1883 //## return new String[] { src };
\r
1885 //## ArrayList output = new ArrayList();
\r
1888 //## output.add(src.substring(n, i));
\r
1889 //## n = i + target.length();
\r
1890 //## i = src.indexOf(target, n);
\r
1891 //## } while (i != -1);
\r
1892 //## if (n < src.length()) {
\r
1893 //## output.add(src.substring(n));
\r
1895 //## return (String[]) output.toArray(new String[output.size()]);
\r
1897 return src.split("\\Q" + target + "\\E");
\r
1901 // !!! 1.3 compatibility
\r
1903 * Split the string at runs of ascii whitespace characters.
\r
1905 public static String[] splitWhitespace(String src) {
\r
1906 //#if defined(FOUNDATION10) || defined(J2SE13)
\r
1907 //## char ws[] = "\u0020\u0009\n\u000b\u000c\r".toCharArray();
\r
1908 //## ArrayList output = new ArrayList();
\r
1909 //## boolean inWhitespace = true;
\r
1912 //## for (int i = 0; i < src.length(); ++i) {
\r
1913 //## char c = src.charAt(i);
\r
1914 //## for (int j = 0; j < ws.length; ++j) {
\r
1915 //## if (ws[j] == c) {
\r
1916 //## if (!inWhitespace) {
\r
1917 //## output.add(src.substring(n, i));
\r
1918 //## inWhitespace = true;
\r
1920 //## continue loop;
\r
1923 //## if (inWhitespace) {
\r
1925 //## inWhitespace = false;
\r
1928 //## if (n < src.length()) {
\r
1929 //## output.add(src.substring(n));
\r
1931 //## return (String[]) output.toArray(new String[output.size()]);
\r
1933 return src.split("\\s+");
\r
1937 // !!! 1.3/1.4 compatibility
\r
1938 // Integer constants - Integer.valueOf(int) is not supported in JDK 1.3/1.4
\r
1939 private static final int MAX_INT_CONST = 64;
\r
1940 private static final Integer[] INT_CONST = new Integer[MAX_INT_CONST];
\r
1943 for (int i = 0; i < MAX_INT_CONST; i++) {
\r
1944 INT_CONST[i] = new Integer(i);
\r
1948 public static Integer integerValueOf(int val) {
\r
1949 if (0 <= val && val < MAX_INT_CONST) {
\r
1950 return INT_CONST[val];
\r
1952 return new Integer(val);
\r
1955 // !!! 1.3/1.4 compatibility
\r
1956 // Arrays.toString(Object[])
\r
1957 public static String arrayToString(Object[] a) {
\r
1958 StringBuffer buf = new StringBuffer("[");
\r
1959 for (int i = 0; i < a.length; i++) {
\r
1963 if (a[i] == null) {
\r
1964 buf.append("null");
\r
1966 buf.append(a[i].toString());
\r
1970 return buf.toString();
\r