-//##header\r
-/*\r
- *******************************************************************************\r
- * Copyright (C) 1996-2009, International Business Machines Corporation and *\r
- * others. All Rights Reserved. *\r
- *******************************************************************************\r
- */\r
-package com.ibm.icu.impl;\r
-\r
-import java.util.ArrayList;\r
-\r
-import com.ibm.icu.lang.*;\r
-import com.ibm.icu.text.*;\r
-import com.ibm.icu.impl.UCharacterProperty;\r
-\r
-public final class Utility {\r
-\r
- private static final char APOSTROPHE = '\'';\r
- private static final char BACKSLASH = '\\';\r
- private static final int MAGIC_UNSIGNED = 0x80000000;\r
-\r
- /**\r
- * Convenience utility to compare two Object[]s.\r
- * Ought to be in System\r
- */\r
- public final static boolean arrayEquals(Object[] source, Object target) {\r
- if (source == null) return (target == null);\r
- if (!(target instanceof Object[])) return false;\r
- Object[] targ = (Object[]) target;\r
- return (source.length == targ.length\r
- && arrayRegionMatches(source, 0, targ, 0, source.length));\r
- }\r
-\r
- /**\r
- * Convenience utility to compare two int[]s\r
- * Ought to be in System\r
- */\r
- public final static boolean arrayEquals(int[] source, Object target) {\r
- if (source == null) return (target == null);\r
- if (!(target instanceof int[])) return false;\r
- int[] targ = (int[]) target;\r
- return (source.length == targ.length\r
- && arrayRegionMatches(source, 0, targ, 0, source.length));\r
- }\r
-\r
- /**\r
- * Convenience utility to compare two double[]s\r
- * Ought to be in System\r
- */\r
- public final static boolean arrayEquals(double[] source, Object target) {\r
- if (source == null) return (target == null);\r
- if (!(target instanceof double[])) return false;\r
- double[] targ = (double[]) target;\r
- return (source.length == targ.length\r
- && arrayRegionMatches(source, 0, targ, 0, source.length));\r
- }\r
- public final static boolean arrayEquals(byte[] source, Object target) {\r
- if (source == null) return (target == null);\r
- if (!(target instanceof byte[])) return false;\r
- byte[] targ = (byte[]) target;\r
- return (source.length == targ.length\r
- && arrayRegionMatches(source, 0, targ, 0, source.length));\r
- }\r
-\r
- /**\r
- * Convenience utility to compare two Object[]s\r
- * Ought to be in System\r
- */\r
- public final static boolean arrayEquals(Object source, Object target) {\r
- if (source == null) return (target == null);\r
- // for some reason, the correct arrayEquals is not being called\r
- // so do it by hand for now.\r
- if (source instanceof Object[])\r
- return(arrayEquals((Object[]) source,target));\r
- if (source instanceof int[])\r
- return(arrayEquals((int[]) source,target));\r
- if (source instanceof double[])\r
- return(arrayEquals((int[]) source,target));\r
- if (source instanceof byte[])\r
- return(arrayEquals((byte[]) source,target));\r
- return source.equals(target);\r
- }\r
-\r
- /**\r
- * Convenience utility to compare two Object[]s\r
- * Ought to be in System.\r
- * @param len the length to compare.\r
- * The start indices and start+len must be valid.\r
- */\r
- public final static boolean arrayRegionMatches(Object[] source, int sourceStart,\r
- Object[] target, int targetStart,\r
- int len)\r
- {\r
- int sourceEnd = sourceStart + len;\r
- int delta = targetStart - sourceStart;\r
- for (int i = sourceStart; i < sourceEnd; i++) {\r
- if (!arrayEquals(source[i],target[i + delta]))\r
- return false;\r
- }\r
- return true;\r
- }\r
-\r
- /**\r
- * Convenience utility to compare two Object[]s\r
- * Ought to be in System.\r
- * @param len the length to compare.\r
- * The start indices and start+len must be valid.\r
- */\r
- public final static boolean arrayRegionMatches(char[] source, int sourceStart,\r
- char[] target, int targetStart,\r
- int len)\r
- {\r
- int sourceEnd = sourceStart + len;\r
- int delta = targetStart - sourceStart;\r
- for (int i = sourceStart; i < sourceEnd; i++) {\r
- if (source[i]!=target[i + delta])\r
- return false;\r
- }\r
- return true;\r
- }\r
-\r
- /** \r
- * Convenience utility to compare two int[]s.\r
- * @param len the length to compare.\r
- * The start indices and start+len must be valid.\r
- * Ought to be in System\r
- */\r
- public final static boolean arrayRegionMatches(int[] source, int sourceStart,\r
- int[] target, int targetStart,\r
- int len)\r
- {\r
- int sourceEnd = sourceStart + len;\r
- int delta = targetStart - sourceStart;\r
- for (int i = sourceStart; i < sourceEnd; i++) {\r
- if (source[i] != target[i + delta])\r
- return false;\r
- }\r
- return true;\r
- }\r
-\r
- /**\r
- * Convenience utility to compare two arrays of doubles.\r
- * @param len the length to compare.\r
- * The start indices and start+len must be valid.\r
- * Ought to be in System\r
- */\r
- public final static boolean arrayRegionMatches(double[] source, int sourceStart,\r
- double[] target, int targetStart,\r
- int len)\r
- {\r
- int sourceEnd = sourceStart + len;\r
- int delta = targetStart - sourceStart;\r
- for (int i = sourceStart; i < sourceEnd; i++) {\r
- if (source[i] != target[i + delta])\r
- return false;\r
- }\r
- return true;\r
- }\r
- public final static boolean arrayRegionMatches(byte[] source, int sourceStart,\r
- byte[] target, int targetStart, int len){\r
- int sourceEnd = sourceStart + len;\r
- int delta = targetStart - sourceStart;\r
- for (int i = sourceStart; i < sourceEnd; i++) {\r
- if (source[i] != target[i + delta])\r
- return false;\r
- }\r
- return true;\r
- }\r
-\r
- /**\r
- * Convenience utility. Does null checks on objects, then calls equals.\r
- */\r
- public final static boolean objectEquals(Object source, Object target) {\r
- if (source == null)\r
- return (target == null);\r
- else\r
- return source.equals(target);\r
- }\r
-\r
- /**\r
- * The ESCAPE character is used during run-length encoding. It signals\r
- * a run of identical chars.\r
- */\r
- private static final char ESCAPE = '\uA5A5';\r
-\r
- /**\r
- * The ESCAPE_BYTE character is used during run-length encoding. It signals\r
- * a run of identical bytes.\r
- */\r
- static final byte ESCAPE_BYTE = (byte)0xA5;\r
-\r
- /**\r
- * Construct a string representing an int array. Use run-length encoding.\r
- * A character represents itself, unless it is the ESCAPE character. Then\r
- * the following notations are possible:\r
- * ESCAPE ESCAPE ESCAPE literal\r
- * ESCAPE n c n instances of character c\r
- * Since an encoded run occupies 3 characters, we only encode runs of 4 or\r
- * more characters. Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF.\r
- * If we encounter a run where n == ESCAPE, we represent this as:\r
- * c ESCAPE n-1 c\r
- * The ESCAPE value is chosen so as not to collide with commonly\r
- * seen values.\r
- */\r
- static public final String arrayToRLEString(int[] a) {\r
- StringBuffer buffer = new StringBuffer();\r
-\r
- appendInt(buffer, a.length);\r
- int runValue = a[0];\r
- int runLength = 1;\r
- for (int i=1; i<a.length; ++i) {\r
- int s = a[i];\r
- if (s == runValue && runLength < 0xFFFF) {\r
- ++runLength;\r
- } else {\r
- encodeRun(buffer, runValue, runLength);\r
- runValue = s;\r
- runLength = 1;\r
- }\r
- }\r
- encodeRun(buffer, runValue, runLength);\r
- return buffer.toString();\r
- }\r
-\r
- /**\r
- * Construct a string representing a short array. Use run-length encoding.\r
- * A character represents itself, unless it is the ESCAPE character. Then\r
- * the following notations are possible:\r
- * ESCAPE ESCAPE ESCAPE literal\r
- * ESCAPE n c n instances of character c\r
- * Since an encoded run occupies 3 characters, we only encode runs of 4 or\r
- * more characters. Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF.\r
- * If we encounter a run where n == ESCAPE, we represent this as:\r
- * c ESCAPE n-1 c\r
- * The ESCAPE value is chosen so as not to collide with commonly\r
- * seen values.\r
- */\r
- static public final String arrayToRLEString(short[] a) {\r
- StringBuffer buffer = new StringBuffer();\r
- // for (int i=0; i<a.length; ++i) buffer.append((char) a[i]);\r
- buffer.append((char) (a.length >> 16));\r
- buffer.append((char) a.length);\r
- short runValue = a[0];\r
- int runLength = 1;\r
- for (int i=1; i<a.length; ++i) {\r
- short s = a[i];\r
- if (s == runValue && runLength < 0xFFFF) ++runLength;\r
- else {\r
- encodeRun(buffer, runValue, runLength);\r
- runValue = s;\r
- runLength = 1;\r
- }\r
- }\r
- encodeRun(buffer, runValue, runLength);\r
- return buffer.toString();\r
- }\r
-\r
- /**\r
- * Construct a string representing a char array. Use run-length encoding.\r
- * A character represents itself, unless it is the ESCAPE character. Then\r
- * the following notations are possible:\r
- * ESCAPE ESCAPE ESCAPE literal\r
- * ESCAPE n c n instances of character c\r
- * Since an encoded run occupies 3 characters, we only encode runs of 4 or\r
- * more characters. Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF.\r
- * If we encounter a run where n == ESCAPE, we represent this as:\r
- * c ESCAPE n-1 c\r
- * The ESCAPE value is chosen so as not to collide with commonly\r
- * seen values.\r
- */\r
- static public final String arrayToRLEString(char[] a) {\r
- StringBuffer buffer = new StringBuffer();\r
- buffer.append((char) (a.length >> 16));\r
- buffer.append((char) a.length);\r
- char runValue = a[0];\r
- int runLength = 1;\r
- for (int i=1; i<a.length; ++i) {\r
- char s = a[i];\r
- if (s == runValue && runLength < 0xFFFF) ++runLength;\r
- else {\r
- encodeRun(buffer, (short)runValue, runLength);\r
- runValue = s;\r
- runLength = 1;\r
- }\r
- }\r
- encodeRun(buffer, (short)runValue, runLength);\r
- return buffer.toString();\r
- }\r
-\r
- /**\r
- * Construct a string representing a byte array. Use run-length encoding.\r
- * Two bytes are packed into a single char, with a single extra zero byte at\r
- * the end if needed. A byte represents itself, unless it is the\r
- * ESCAPE_BYTE. Then the following notations are possible:\r
- * ESCAPE_BYTE ESCAPE_BYTE ESCAPE_BYTE literal\r
- * ESCAPE_BYTE n b n instances of byte b\r
- * Since an encoded run occupies 3 bytes, we only encode runs of 4 or\r
- * more bytes. Thus we have n > 0 and n != ESCAPE_BYTE and n <= 0xFF.\r
- * If we encounter a run where n == ESCAPE_BYTE, we represent this as:\r
- * b ESCAPE_BYTE n-1 b\r
- * The ESCAPE_BYTE value is chosen so as not to collide with commonly\r
- * seen values.\r
- */\r
- static public final String arrayToRLEString(byte[] a) {\r
- StringBuffer buffer = new StringBuffer();\r
- buffer.append((char) (a.length >> 16));\r
- buffer.append((char) a.length);\r
- byte runValue = a[0];\r
- int runLength = 1;\r
- byte[] state = new byte[2];\r
- for (int i=1; i<a.length; ++i) {\r
- byte b = a[i];\r
- if (b == runValue && runLength < 0xFF) ++runLength;\r
- else {\r
- encodeRun(buffer, runValue, runLength, state);\r
- runValue = b;\r
- runLength = 1;\r
- }\r
- }\r
- encodeRun(buffer, runValue, runLength, state);\r
-\r
- // We must save the final byte, if there is one, by padding\r
- // an extra zero.\r
- if (state[0] != 0) appendEncodedByte(buffer, (byte)0, state);\r
-\r
- return buffer.toString();\r
- }\r
-\r
- /**\r
- * Encode a run, possibly a degenerate run (of < 4 values).\r
- * @param length The length of the run; must be > 0 && <= 0xFFFF.\r
- */\r
- private static final void encodeRun(StringBuffer buffer, int value, int length) {\r
- if (length < 4) {\r
- for (int j=0; j<length; ++j) {\r
- if (value == ESCAPE) {\r
- appendInt(buffer, value);\r
- }\r
- appendInt(buffer, value);\r
- }\r
- }\r
- else {\r
- if (length == (int) ESCAPE) {\r
- if (value == (int) ESCAPE) {\r
- appendInt(buffer, ESCAPE);\r
- }\r
- appendInt(buffer, value);\r
- --length;\r
- }\r
- appendInt(buffer, ESCAPE);\r
- appendInt(buffer, length);\r
- appendInt(buffer, value); // Don't need to escape this value\r
- }\r
- }\r
- \r
- private static final void appendInt(StringBuffer buffer, int value) {\r
- buffer.append((char)(value >>> 16));\r
- buffer.append((char)(value & 0xFFFF));\r
- }\r
-\r
- /**\r
- * Encode a run, possibly a degenerate run (of < 4 values).\r
- * @param length The length of the run; must be > 0 && <= 0xFFFF.\r
- */\r
- private static final void encodeRun(StringBuffer buffer, short value, int length) {\r
- if (length < 4) {\r
- for (int j=0; j<length; ++j) {\r
- if (value == (int) ESCAPE) buffer.append(ESCAPE);\r
- buffer.append((char) value);\r
- }\r
- }\r
- else {\r
- if (length == (int) ESCAPE) {\r
- if (value == (int) ESCAPE) buffer.append(ESCAPE);\r
- buffer.append((char) value);\r
- --length;\r
- }\r
- buffer.append(ESCAPE);\r
- buffer.append((char) length);\r
- buffer.append((char) value); // Don't need to escape this value\r
- }\r
- }\r
-\r
- /**\r
- * Encode a run, possibly a degenerate run (of < 4 values).\r
- * @param length The length of the run; must be > 0 && <= 0xFF.\r
- */\r
- private static final void encodeRun(StringBuffer buffer, byte value, int length,\r
- byte[] state) {\r
- if (length < 4) {\r
- for (int j=0; j<length; ++j) {\r
- if (value == ESCAPE_BYTE) appendEncodedByte(buffer, ESCAPE_BYTE, state);\r
- appendEncodedByte(buffer, value, state);\r
- }\r
- }\r
- else {\r
- if (length == ESCAPE_BYTE) {\r
- if (value == ESCAPE_BYTE) appendEncodedByte(buffer, ESCAPE_BYTE, state);\r
- appendEncodedByte(buffer, value, state);\r
- --length;\r
- }\r
- appendEncodedByte(buffer, ESCAPE_BYTE, state);\r
- appendEncodedByte(buffer, (byte)length, state);\r
- appendEncodedByte(buffer, value, state); // Don't need to escape this value\r
- }\r
- }\r
-\r
- /**\r
- * Append a byte to the given StringBuffer, packing two bytes into each\r
- * character. The state parameter maintains intermediary data between\r
- * calls.\r
- * @param state A two-element array, with state[0] == 0 if this is the\r
- * first byte of a pair, or state[0] != 0 if this is the second byte\r
- * of a pair, in which case state[1] is the first byte.\r
- */\r
- private static final void appendEncodedByte(StringBuffer buffer, byte value,\r
- byte[] state) {\r
- if (state[0] != 0) {\r
- char c = (char) ((state[1] << 8) | (((int) value) & 0xFF));\r
- buffer.append(c);\r
- state[0] = 0;\r
- }\r
- else {\r
- state[0] = 1;\r
- state[1] = value;\r
- }\r
- }\r
-\r
- /**\r
- * Construct an array of ints from a run-length encoded string.\r
- */\r
- static public final int[] RLEStringToIntArray(String s) {\r
- int length = getInt(s, 0);\r
- int[] array = new int[length];\r
- int ai = 0, i = 1;\r
-\r
- int maxI = s.length() / 2;\r
- while (ai < length && i < maxI) {\r
- int c = getInt(s, i++);\r
-\r
- if (c == ESCAPE) {\r
- c = getInt(s, i++);\r
- if (c == ESCAPE) {\r
- array[ai++] = c;\r
- } else {\r
- int runLength = c;\r
- int runValue = getInt(s, i++);\r
- for (int j=0; j<runLength; ++j) {\r
- array[ai++] = runValue;\r
- }\r
- }\r
- }\r
- else {\r
- array[ai++] = c;\r
- }\r
- }\r
-\r
- if (ai != length || i != maxI) {\r
- throw new IllegalStateException("Bad run-length encoded int array");\r
- }\r
-\r
- return array;\r
- }\r
- static final int getInt(String s, int i) {\r
- return (((int) s.charAt(2*i)) << 16) | (int) s.charAt(2*i+1);\r
- }\r
-\r
- /**\r
- * Construct an array of shorts from a run-length encoded string.\r
- */\r
- static public final short[] RLEStringToShortArray(String s) {\r
- int length = (((int) s.charAt(0)) << 16) | ((int) s.charAt(1));\r
- short[] array = new short[length];\r
- int ai = 0;\r
- for (int i=2; i<s.length(); ++i) {\r
- char c = s.charAt(i);\r
- if (c == ESCAPE) {\r
- c = s.charAt(++i);\r
- if (c == ESCAPE) {\r
- array[ai++] = (short) c;\r
- } else {\r
- int runLength = (int) c;\r
- short runValue = (short) s.charAt(++i);\r
- for (int j=0; j<runLength; ++j) array[ai++] = runValue;\r
- }\r
- }\r
- else {\r
- array[ai++] = (short) c;\r
- }\r
- }\r
-\r
- if (ai != length)\r
- throw new IllegalStateException("Bad run-length encoded short array");\r
-\r
- return array;\r
- }\r
-\r
- /**\r
- * Construct an array of shorts from a run-length encoded string.\r
- */\r
- static public final char[] RLEStringToCharArray(String s) {\r
- int length = (((int) s.charAt(0)) << 16) | ((int) s.charAt(1));\r
- char[] array = new char[length];\r
- int ai = 0;\r
- for (int i=2; i<s.length(); ++i) {\r
- char c = s.charAt(i);\r
- if (c == ESCAPE) {\r
- c = s.charAt(++i);\r
- if (c == ESCAPE) {\r
- array[ai++] = c;\r
- } else {\r
- int runLength = (int) c;\r
- char runValue = s.charAt(++i);\r
- for (int j=0; j<runLength; ++j) array[ai++] = runValue;\r
- }\r
- }\r
- else {\r
- array[ai++] = c;\r
- }\r
- }\r
-\r
- if (ai != length)\r
- throw new IllegalStateException("Bad run-length encoded short array");\r
-\r
- return array;\r
- }\r
-\r
- /**\r
- * Construct an array of bytes from a run-length encoded string.\r
- */\r
- static public final byte[] RLEStringToByteArray(String s) {\r
- int length = (((int) s.charAt(0)) << 16) | ((int) s.charAt(1));\r
- byte[] array = new byte[length];\r
- boolean nextChar = true;\r
- char c = 0;\r
- int node = 0;\r
- int runLength = 0;\r
- int i = 2;\r
- for (int ai=0; ai<length; ) {\r
- // This part of the loop places the next byte into the local\r
- // variable 'b' each time through the loop. It keeps the\r
- // current character in 'c' and uses the boolean 'nextChar'\r
- // to see if we've taken both bytes out of 'c' yet.\r
- byte b;\r
- if (nextChar) {\r
- c = s.charAt(i++);\r
- b = (byte) (c >> 8);\r
- nextChar = false;\r
- }\r
- else {\r
- b = (byte) (c & 0xFF);\r
- nextChar = true;\r
- }\r
-\r
- // This part of the loop is a tiny state machine which handles\r
- // the parsing of the run-length encoding. This would be simpler\r
- // if we could look ahead, but we can't, so we use 'node' to\r
- // move between three nodes in the state machine.\r
- switch (node) {\r
- case 0:\r
- // Normal idle node\r
- if (b == ESCAPE_BYTE) {\r
- node = 1;\r
- }\r
- else {\r
- array[ai++] = b;\r
- }\r
- break;\r
- case 1:\r
- // We have seen one ESCAPE_BYTE; we expect either a second\r
- // one, or a run length and value.\r
- if (b == ESCAPE_BYTE) {\r
- array[ai++] = ESCAPE_BYTE;\r
- node = 0;\r
- }\r
- else {\r
- runLength = b;\r
- // Interpret signed byte as unsigned\r
- if (runLength < 0) runLength += 0x100;\r
- node = 2;\r
- }\r
- break;\r
- case 2:\r
- // We have seen an ESCAPE_BYTE and length byte. We interpret\r
- // the next byte as the value to be repeated.\r
- for (int j=0; j<runLength; ++j) array[ai++] = b;\r
- node = 0;\r
- break;\r
- }\r
- }\r
-\r
- if (node != 0)\r
- throw new IllegalStateException("Bad run-length encoded byte array");\r
-\r
- if (i != s.length())\r
- throw new IllegalStateException("Excess data in RLE byte array string");\r
-\r
- return array;\r
- }\r
-\r
- static public String LINE_SEPARATOR = System.getProperty("line.separator");\r
-\r
- /**\r
- * Format a String for representation in a source file. This includes\r
- * breaking it into lines and escaping characters using octal notation\r
- * when necessary (control characters and double quotes).\r
- */\r
- static public final String formatForSource(String s) {\r
- StringBuffer buffer = new StringBuffer();\r
- for (int i=0; i<s.length();) {\r
- if (i > 0) buffer.append('+').append(LINE_SEPARATOR);\r
- buffer.append(" \"");\r
- int count = 11;\r
- while (i<s.length() && count<80) {\r
- char c = s.charAt(i++);\r
- if (c < '\u0020' || c == '"' || c == '\\') {\r
- if (c == '\n') {\r
- buffer.append("\\n");\r
- count += 2;\r
- } else if (c == '\t') {\r
- buffer.append("\\t");\r
- count += 2;\r
- } else if (c == '\r') {\r
- buffer.append("\\r");\r
- count += 2;\r
- } else {\r
- // Represent control characters, backslash and double quote\r
- // using octal notation; otherwise the string we form\r
- // won't compile, since Unicode escape sequences are\r
- // processed before tokenization.\r
- buffer.append('\\');\r
- buffer.append(HEX_DIGIT[(c & 0700) >> 6]); // HEX_DIGIT works for octal\r
- buffer.append(HEX_DIGIT[(c & 0070) >> 3]);\r
- buffer.append(HEX_DIGIT[(c & 0007)]);\r
- count += 4;\r
- }\r
- }\r
- else if (c <= '\u007E') {\r
- buffer.append(c);\r
- count += 1;\r
- }\r
- else {\r
- buffer.append("\\u");\r
- buffer.append(HEX_DIGIT[(c & 0xF000) >> 12]);\r
- buffer.append(HEX_DIGIT[(c & 0x0F00) >> 8]);\r
- buffer.append(HEX_DIGIT[(c & 0x00F0) >> 4]);\r
- buffer.append(HEX_DIGIT[(c & 0x000F)]);\r
- count += 6;\r
- }\r
- }\r
- buffer.append('"');\r
- }\r
- return buffer.toString();\r
- }\r
-\r
- static final char[] HEX_DIGIT = {'0','1','2','3','4','5','6','7',\r
- '8','9','A','B','C','D','E','F'};\r
-\r
- /**\r
- * Format a String for representation in a source file. Like\r
- * formatForSource but does not do line breaking.\r
- */\r
- static public final String format1ForSource(String s) {\r
- StringBuffer buffer = new StringBuffer();\r
- buffer.append("\"");\r
- for (int i=0; i<s.length();) {\r
- char c = s.charAt(i++);\r
- if (c < '\u0020' || c == '"' || c == '\\') {\r
- if (c == '\n') {\r
- buffer.append("\\n");\r
- } else if (c == '\t') {\r
- buffer.append("\\t");\r
- } else if (c == '\r') {\r
- buffer.append("\\r");\r
- } else {\r
- // Represent control characters, backslash and double quote\r
- // using octal notation; otherwise the string we form\r
- // won't compile, since Unicode escape sequences are\r
- // processed before tokenization.\r
- buffer.append('\\');\r
- buffer.append(HEX_DIGIT[(c & 0700) >> 6]); // HEX_DIGIT works for octal\r
- buffer.append(HEX_DIGIT[(c & 0070) >> 3]);\r
- buffer.append(HEX_DIGIT[(c & 0007)]);\r
- }\r
- }\r
- else if (c <= '\u007E') {\r
- buffer.append(c);\r
- }\r
- else {\r
- buffer.append("\\u");\r
- buffer.append(HEX_DIGIT[(c & 0xF000) >> 12]);\r
- buffer.append(HEX_DIGIT[(c & 0x0F00) >> 8]);\r
- buffer.append(HEX_DIGIT[(c & 0x00F0) >> 4]);\r
- buffer.append(HEX_DIGIT[(c & 0x000F)]);\r
- }\r
- }\r
- buffer.append('"');\r
- return buffer.toString();\r
- }\r
-\r
- /**\r
- * Convert characters outside the range U+0020 to U+007F to\r
- * Unicode escapes, and convert backslash to a double backslash.\r
- */\r
- public static final String escape(String s) {\r
- StringBuffer buf = new StringBuffer();\r
- for (int i=0; i<s.length(); ) {\r
- int c = UTF16.charAt(s, i);\r
- i += UTF16.getCharCount(c);\r
- if (c >= ' ' && c <= 0x007F) {\r
- if (c == '\\') {\r
- buf.append("\\\\"); // That is, "\\"\r
- } else {\r
- buf.append((char)c);\r
- }\r
- } else {\r
- boolean four = c <= 0xFFFF;\r
- buf.append(four ? "\\u" : "\\U");\r
- hex(c, four ? 4 : 8, buf);\r
- }\r
- }\r
- return buf.toString();\r
- }\r
-\r
- /* This map must be in ASCENDING ORDER OF THE ESCAPE CODE */\r
- static private final char[] UNESCAPE_MAP = {\r
- /*" 0x22, 0x22 */\r
- /*' 0x27, 0x27 */\r
- /*? 0x3F, 0x3F */\r
- /*\ 0x5C, 0x5C */\r
- /*a*/ 0x61, 0x07,\r
- /*b*/ 0x62, 0x08,\r
- /*e*/ 0x65, 0x1b,\r
- /*f*/ 0x66, 0x0c,\r
- /*n*/ 0x6E, 0x0a,\r
- /*r*/ 0x72, 0x0d,\r
- /*t*/ 0x74, 0x09,\r
- /*v*/ 0x76, 0x0b\r
- };\r
-\r
- /**\r
- * Convert an escape to a 32-bit code point value. We attempt\r
- * to parallel the icu4c unescapeAt() function.\r
- * @param offset16 an array containing offset to the character\r
- * <em>after</em> the backslash. Upon return offset16[0] will\r
- * be updated to point after the escape sequence.\r
- * @return character value from 0 to 10FFFF, or -1 on error.\r
- */\r
- public static int unescapeAt(String s, int[] offset16) {\r
- int c;\r
- int result = 0;\r
- int n = 0;\r
- int minDig = 0;\r
- int maxDig = 0;\r
- int bitsPerDigit = 4;\r
- int dig;\r
- int i;\r
- boolean braces = false;\r
-\r
- /* Check that offset is in range */\r
- int offset = offset16[0];\r
- int length = s.length();\r
- if (offset < 0 || offset >= length) {\r
- return -1;\r
- }\r
-\r
- /* Fetch first UChar after '\\' */\r
- c = UTF16.charAt(s, offset);\r
- offset += UTF16.getCharCount(c);\r
-\r
- /* Convert hexadecimal and octal escapes */\r
- switch (c) {\r
- case 'u':\r
- minDig = maxDig = 4;\r
- break;\r
- case 'U':\r
- minDig = maxDig = 8;\r
- break;\r
- case 'x':\r
- minDig = 1;\r
- if (offset < length && UTF16.charAt(s, offset) == 0x7B /*{*/) {\r
- ++offset;\r
- braces = true;\r
- maxDig = 8;\r
- } else {\r
- maxDig = 2;\r
- }\r
- break;\r
- default:\r
- dig = UCharacter.digit(c, 8);\r
- if (dig >= 0) {\r
- minDig = 1;\r
- maxDig = 3;\r
- n = 1; /* Already have first octal digit */\r
- bitsPerDigit = 3;\r
- result = dig;\r
- }\r
- break;\r
- }\r
- if (minDig != 0) {\r
- while (offset < length && n < maxDig) {\r
- c = UTF16.charAt(s, offset);\r
- dig = UCharacter.digit(c, (bitsPerDigit == 3) ? 8 : 16);\r
- if (dig < 0) {\r
- break;\r
- }\r
- result = (result << bitsPerDigit) | dig;\r
- offset += UTF16.getCharCount(c);\r
- ++n;\r
- }\r
- if (n < minDig) {\r
- return -1;\r
- }\r
- if (braces) {\r
- if (c != 0x7D /*}*/) {\r
- return -1;\r
- }\r
- ++offset;\r
- }\r
- if (result < 0 || result >= 0x110000) {\r
- return -1;\r
- }\r
- // If an escape sequence specifies a lead surrogate, see\r
- // if there is a trail surrogate after it, either as an\r
- // escape or as a literal. If so, join them up into a\r
- // supplementary.\r
- if (offset < length &&\r
- UTF16.isLeadSurrogate((char) result)) {\r
- int ahead = offset+1;\r
- c = s.charAt(offset); // [sic] get 16-bit code unit\r
- if (c == '\\' && ahead < length) {\r
- int o[] = new int[] { ahead };\r
- c = unescapeAt(s, o);\r
- ahead = o[0];\r
- }\r
- if (UTF16.isTrailSurrogate((char) c)) {\r
- offset = ahead;\r
- result = UCharacterProperty.getRawSupplementary(\r
- (char) result, (char) c);\r
- }\r
- }\r
- offset16[0] = offset;\r
- return result;\r
- }\r
-\r
- /* Convert C-style escapes in table */\r
- for (i=0; i<UNESCAPE_MAP.length; i+=2) {\r
- if (c == UNESCAPE_MAP[i]) {\r
- offset16[0] = offset;\r
- return UNESCAPE_MAP[i+1];\r
- } else if (c < UNESCAPE_MAP[i]) {\r
- break;\r
- }\r
- }\r
-\r
- /* Map \cX to control-X: X & 0x1F */\r
- if (c == 'c' && offset < length) {\r
- c = UTF16.charAt(s, offset);\r
- offset16[0] = offset + UTF16.getCharCount(c);\r
- return 0x1F & c;\r
- }\r
-\r
- /* If no special forms are recognized, then consider\r
- * the backslash to generically escape the next character. */\r
- offset16[0] = offset;\r
- return c;\r
- }\r
-\r
- /**\r
- * Convert all escapes in a given string using unescapeAt().\r
- * @exception IllegalArgumentException if an invalid escape is\r
- * seen.\r
- */\r
- public static String unescape(String s) {\r
- StringBuffer buf = new StringBuffer();\r
- int[] pos = new int[1];\r
- for (int i=0; i<s.length(); ) {\r
- char c = s.charAt(i++);\r
- if (c == '\\') {\r
- pos[0] = i;\r
- int e = unescapeAt(s, pos);\r
- if (e < 0) {\r
- throw new IllegalArgumentException("Invalid escape sequence " +\r
- s.substring(i-1, Math.min(i+8, s.length())));\r
- }\r
- UTF16.append(buf, e);\r
- i = pos[0];\r
- } else {\r
- buf.append(c);\r
- }\r
- }\r
- return buf.toString();\r
- }\r
-\r
- /**\r
- * Convert all escapes in a given string using unescapeAt().\r
- * Leave invalid escape sequences unchanged.\r
- */\r
- public static String unescapeLeniently(String s) {\r
- StringBuffer buf = new StringBuffer();\r
- int[] pos = new int[1];\r
- for (int i=0; i<s.length(); ) {\r
- char c = s.charAt(i++);\r
- if (c == '\\') {\r
- pos[0] = i;\r
- int e = unescapeAt(s, pos);\r
- if (e < 0) {\r
- buf.append(c);\r
- } else {\r
- UTF16.append(buf, e);\r
- i = pos[0];\r
- }\r
- } else {\r
- buf.append(c);\r
- }\r
- }\r
- return buf.toString();\r
- }\r
-\r
- /**\r
- * Convert a char to 4 hex uppercase digits. E.g., hex('a') =>\r
- * "0041".\r
- */\r
- public static String hex(char ch) {\r
- StringBuffer temp = new StringBuffer();\r
- return hex(ch, temp).toString();\r
- }\r
-\r
- /**\r
- * Convert a string to comma-separated groups of 4 hex uppercase\r
- * digits. E.g., hex('ab') => "0041,0042".\r
- */\r
- public static String hex(String s) {\r
- StringBuffer temp = new StringBuffer();\r
- return hex(s, temp).toString();\r
- }\r
-\r
- /**\r
- * Convert a string to comma-separated groups of 4 hex uppercase\r
- * digits. E.g., hex('ab') => "0041,0042".\r
- */\r
- public static String hex(StringBuffer s) {\r
- return hex(s.toString());\r
- }\r
-\r
- /**\r
- * Convert a char to 4 hex uppercase digits. E.g., hex('a') =>\r
- * "0041". Append the output to the given StringBuffer.\r
- */\r
- public static StringBuffer hex(char ch, StringBuffer output) {\r
- return appendNumber(output, ch, 16, 4);\r
- }\r
-\r
- /**\r
- * Convert a integer to size width hex uppercase digits.\r
- * E.g., hex('a', 4, str) => "0041".\r
- * Append the output to the given StringBuffer.\r
- * If width is too small to fit, nothing will be appended to output.\r
- */\r
- public static StringBuffer hex(int ch, int width, StringBuffer output) {\r
- return appendNumber(output, ch, 16, width);\r
- }\r
-\r
- /**\r
- * Convert a integer to size width (minimum) hex uppercase digits.\r
- * E.g., hex('a', 4, str) => "0041". If the integer requires more\r
- * than width digits, more will be used.\r
- */\r
- public static String hex(int ch, int width) {\r
- StringBuffer buf = new StringBuffer();\r
- return appendNumber(buf, ch, 16, width).toString();\r
- }\r
- /**\r
- * Supplies a zero-padded hex representation of an integer (without 0x)\r
- */\r
- static public String hex(long i, int places) {\r
- if (i == Long.MIN_VALUE) return "-8000000000000000";\r
- boolean negative = i < 0;\r
- if (negative) {\r
- i = -i;\r
- }\r
- String result = Long.toString(i, 16).toUpperCase();\r
- if (result.length() < places) {\r
- result = "0000000000000000".substring(result.length(),places) + result;\r
- }\r
- if (negative) {\r
- return '-' + result;\r
- }\r
- return result;\r
- }\r
- \r
- public static String hex(long ch) {\r
- return hex(ch,4);\r
- }\r
- \r
- /**\r
- * Convert a string to comma-separated groups of 4 hex uppercase\r
- * digits. E.g., hex('ab') => "0041,0042". Append the output\r
- * to the given StringBuffer.\r
- */\r
- public static StringBuffer hex(String s, StringBuffer result) {\r
- for (int i = 0; i < s.length(); ++i) {\r
- if (i != 0) result.append(',');\r
- hex(s.charAt(i), result);\r
- }\r
- return result;\r
- }\r
-\r
- /**\r
- * Split a string into pieces based on the given divider character\r
- * @param s the string to split\r
- * @param divider the character on which to split. Occurrences of\r
- * this character are not included in the output\r
- * @param output an array to receive the substrings between\r
- * instances of divider. It must be large enough on entry to\r
- * accomodate all output. Adjacent instances of the divider\r
- * character will place empty strings into output. Before\r
- * returning, output is padded out with empty strings.\r
- */\r
- public static void split(String s, char divider, String[] output) {\r
- int last = 0;\r
- int current = 0;\r
- int i;\r
- for (i = 0; i < s.length(); ++i) {\r
- if (s.charAt(i) == divider) {\r
- output[current++] = s.substring(last,i);\r
- last = i+1;\r
- }\r
- }\r
- output[current++] = s.substring(last,i);\r
- while (current < output.length) {\r
- output[current++] = "";\r
- }\r
- }\r
-\r
- /**\r
- * Split a string into pieces based on the given divider character\r
- * @param s the string to split\r
- * @param divider the character on which to split. Occurrences of\r
- * this character are not included in the output\r
- * @return output an array to receive the substrings between\r
- * instances of divider. Adjacent instances of the divider\r
- * character will place empty strings into output.\r
- */\r
- public static String[] split(String s, char divider) {\r
- int last = 0;\r
- int i;\r
- ArrayList output = new ArrayList();\r
- for (i = 0; i < s.length(); ++i) {\r
- if (s.charAt(i) == divider) {\r
- output.add(s.substring(last,i));\r
- last = i+1;\r
- }\r
- }\r
- output.add( s.substring(last,i));\r
- return (String[]) output.toArray(new String[output.size()]);\r
- }\r
- \r
- /**\r
- * Look up a given string in a string array. Returns the index at\r
- * which the first occurrence of the string was found in the\r
- * array, or -1 if it was not found.\r
- * @param source the string to search for\r
- * @param target the array of zero or more strings in which to\r
- * look for source\r
- * @return the index of target at which source first occurs, or -1\r
- * if not found\r
- */\r
- public static int lookup(String source, String[] target) {\r
- for (int i = 0; i < target.length; ++i) {\r
- if (source.equals(target[i])) return i;\r
- }\r
- return -1;\r
- }\r
-\r
- /**\r
- * Skip over a sequence of zero or more white space characters\r
- * at pos. Return the index of the first non-white-space character\r
- * at or after pos, or str.length(), if there is none.\r
- */\r
- public static int skipWhitespace(String str, int pos) {\r
- while (pos < str.length()) {\r
- int c = UTF16.charAt(str, pos);\r
- if (!UCharacterProperty.isRuleWhiteSpace(c)) {\r
- break;\r
- }\r
- pos += UTF16.getCharCount(c);\r
- }\r
- return pos;\r
- }\r
-\r
- /**\r
- * Skip over a sequence of zero or more white space characters\r
- * at pos[0], advancing it.\r
- */\r
- public static void skipWhitespace(String str, int[] pos) {\r
- pos[0] = skipWhitespace(str, pos[0]);\r
- }\r
-\r
- /**\r
- * Remove all rule white space from a string.\r
- */\r
- public static String deleteRuleWhiteSpace(String str) {\r
- StringBuffer buf = new StringBuffer();\r
- for (int i=0; i<str.length(); ) {\r
- int ch = UTF16.charAt(str, i);\r
- i += UTF16.getCharCount(ch);\r
- if (UCharacterProperty.isRuleWhiteSpace(ch)) {\r
- continue;\r
- }\r
- UTF16.append(buf, ch);\r
- }\r
- return buf.toString();\r
- }\r
-\r
- /**\r
- * Parse a single non-whitespace character 'ch', optionally\r
- * preceded by whitespace.\r
- * @param id the string to be parsed\r
- * @param pos INPUT-OUTPUT parameter. On input, pos[0] is the\r
- * offset of the first character to be parsed. On output, pos[0]\r
- * is the index after the last parsed character. If the parse\r
- * fails, pos[0] will be unchanged.\r
- * @param ch the non-whitespace character to be parsed.\r
- * @return true if 'ch' is seen preceded by zero or more\r
- * whitespace characters.\r
- */\r
- public static boolean parseChar(String id, int[] pos, char ch) {\r
- int start = pos[0];\r
- skipWhitespace(id, pos);\r
- if (pos[0] == id.length() ||\r
- id.charAt(pos[0]) != ch) {\r
- pos[0] = start;\r
- return false;\r
- }\r
- ++pos[0];\r
- return true;\r
- }\r
-\r
- /**\r
- * Parse a pattern string starting at offset pos. Keywords are\r
- * matched case-insensitively. Spaces may be skipped and may be\r
- * optional or required. Integer values may be parsed, and if\r
- * they are, they will be returned in the given array. If\r
- * successful, the offset of the next non-space character is\r
- * returned. On failure, -1 is returned.\r
- * @param pattern must only contain lowercase characters, which\r
- * will match their uppercase equivalents as well. A space\r
- * character matches one or more required spaces. A '~' character\r
- * matches zero or more optional spaces. A '#' character matches\r
- * an integer and stores it in parsedInts, which the caller must\r
- * ensure has enough capacity.\r
- * @param parsedInts array to receive parsed integers. Caller\r
- * must ensure that parsedInts.length is >= the number of '#'\r
- * signs in 'pattern'.\r
- * @return the position after the last character parsed, or -1 if\r
- * the parse failed\r
- */\r
- public static int parsePattern(String rule, int pos, int limit,\r
- String pattern, int[] parsedInts) {\r
- // TODO Update this to handle surrogates\r
- int[] p = new int[1];\r
- int intCount = 0; // number of integers parsed\r
- for (int i=0; i<pattern.length(); ++i) {\r
- char cpat = pattern.charAt(i);\r
- char c;\r
- switch (cpat) {\r
- case ' ':\r
- if (pos >= limit) {\r
- return -1;\r
- }\r
- c = rule.charAt(pos++);\r
- if (!UCharacterProperty.isRuleWhiteSpace(c)) {\r
- return -1;\r
- }\r
- // FALL THROUGH to skipWhitespace\r
- case '~':\r
- pos = skipWhitespace(rule, pos);\r
- break;\r
- case '#':\r
- p[0] = pos;\r
- parsedInts[intCount++] = parseInteger(rule, p, limit);\r
- if (p[0] == pos) {\r
- // Syntax error; failed to parse integer\r
- return -1;\r
- }\r
- pos = p[0];\r
- break;\r
- default:\r
- if (pos >= limit) {\r
- return -1;\r
- }\r
- c = (char) UCharacter.toLowerCase(rule.charAt(pos++));\r
- if (c != cpat) {\r
- return -1;\r
- }\r
- break;\r
- }\r
- }\r
- return pos;\r
- }\r
-\r
- /**\r
- * Parse a pattern string within the given Replaceable and a parsing\r
- * pattern. Characters are matched literally and case-sensitively\r
- * except for the following special characters:\r
- *\r
- * ~ zero or more uprv_isRuleWhiteSpace chars\r
- *\r
- * If end of pattern is reached with all matches along the way,\r
- * pos is advanced to the first unparsed index and returned.\r
- * Otherwise -1 is returned.\r
- * @param pat pattern that controls parsing\r
- * @param text text to be parsed, starting at index\r
- * @param index offset to first character to parse\r
- * @param limit offset after last character to parse\r
- * @return index after last parsed character, or -1 on parse failure.\r
- */\r
- public static int parsePattern(String pat,\r
- Replaceable text,\r
- int index,\r
- int limit) {\r
- int ipat = 0;\r
-\r
- // empty pattern matches immediately\r
- if (ipat == pat.length()) {\r
- return index;\r
- }\r
-\r
- int cpat = UTF16.charAt(pat, ipat);\r
-\r
- while (index < limit) {\r
- int c = text.char32At(index);\r
-\r
- // parse \s*\r
- if (cpat == '~') {\r
- if (UCharacterProperty.isRuleWhiteSpace(c)) {\r
- index += UTF16.getCharCount(c);\r
- continue;\r
- } else {\r
- if (++ipat == pat.length()) {\r
- return index; // success; c unparsed\r
- }\r
- // fall thru; process c again with next cpat\r
- }\r
- }\r
-\r
- // parse literal\r
- else if (c == cpat) {\r
- int n = UTF16.getCharCount(c);\r
- index += n;\r
- ipat += n;\r
- if (ipat == pat.length()) {\r
- return index; // success; c parsed\r
- }\r
- // fall thru; get next cpat\r
- }\r
-\r
- // match failure of literal\r
- else {\r
- return -1;\r
- }\r
-\r
- cpat = UTF16.charAt(pat, ipat);\r
- }\r
-\r
- return -1; // text ended before end of pat\r
- }\r
-\r
- /**\r
- * Parse an integer at pos, either of the form \d+ or of the form\r
- * 0x[0-9A-Fa-f]+ or 0[0-7]+, that is, in standard decimal, hex,\r
- * or octal format.\r
- * @param pos INPUT-OUTPUT parameter. On input, the first\r
- * character to parse. On output, the character after the last\r
- * parsed character.\r
- */\r
- public static int parseInteger(String rule, int[] pos, int limit) {\r
- int count = 0;\r
- int value = 0;\r
- int p = pos[0];\r
- int radix = 10;\r
-\r
- if (rule.regionMatches(true, p, "0x", 0, 2)) {\r
- p += 2;\r
- radix = 16;\r
- } else if (p < limit && rule.charAt(p) == '0') {\r
- p++;\r
- count = 1;\r
- radix = 8;\r
- }\r
-\r
- while (p < limit) {\r
- int d = UCharacter.digit(rule.charAt(p++), radix);\r
- if (d < 0) {\r
- --p;\r
- break;\r
- }\r
- ++count;\r
- int v = (value * radix) + d;\r
- if (v <= value) {\r
- // If there are too many input digits, at some point\r
- // the value will go negative, e.g., if we have seen\r
- // "0x8000000" already and there is another '0', when\r
- // we parse the next 0 the value will go negative.\r
- return 0;\r
- }\r
- value = v;\r
- }\r
- if (count > 0) {\r
- pos[0] = p;\r
- }\r
- return value;\r
- }\r
-\r
- /**\r
- * Parse a Unicode identifier from the given string at the given\r
- * position. Return the identifier, or null if there is no\r
- * identifier.\r
- * @param str the string to parse\r
- * @param pos INPUT-OUPUT parameter. On INPUT, pos[0] is the\r
- * first character to examine. It must be less than str.length(),\r
- * and it must not point to a whitespace character. That is, must\r
- * have pos[0] < str.length() and\r
- * !UCharacterProperty.isRuleWhiteSpace(UTF16.charAt(str, pos[0])). On\r
- * OUTPUT, the position after the last parsed character.\r
- * @return the Unicode identifier, or null if there is no valid\r
- * identifier at pos[0].\r
- */\r
- public static String parseUnicodeIdentifier(String str, int[] pos) {\r
- // assert(pos[0] < str.length());\r
- // assert(!UCharacterProperty.isRuleWhiteSpace(UTF16.charAt(str, pos[0])));\r
- StringBuffer buf = new StringBuffer();\r
- int p = pos[0];\r
- while (p < str.length()) {\r
- int ch = UTF16.charAt(str, p);\r
- if (buf.length() == 0) {\r
- if (UCharacter.isUnicodeIdentifierStart(ch)) {\r
- UTF16.append(buf, ch);\r
- } else {\r
- return null;\r
- }\r
- } else {\r
- if (UCharacter.isUnicodeIdentifierPart(ch)) {\r
- UTF16.append(buf, ch);\r
- } else {\r
- break;\r
- }\r
- }\r
- p += UTF16.getCharCount(ch);\r
- }\r
- pos[0] = p;\r
- return buf.toString();\r
- }\r
-\r
- /**\r
- * Trim whitespace from ends of a StringBuffer.\r
- */\r
- public static StringBuffer trim(StringBuffer b) {\r
- // TODO update to handle surrogates\r
- int i;\r
- for (i=0; i<b.length() && UCharacter.isWhitespace(b.charAt(i)); ++i) {}\r
- b.delete(0, i);\r
- for (i=b.length()-1; i>=0 && UCharacter.isWhitespace(b.charAt(i)); --i) {}\r
- return b.delete(i+1, b.length());\r
- }\r
-\r
- static final char DIGITS[] = {\r
- '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',\r
- 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',\r
- 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',\r
- 'U', 'V', 'W', 'X', 'Y', 'Z'\r
- };\r
-\r
- /**\r
- * Append a number to the given StringBuffer in the radix 10\r
- * generating at least one digit.\r
- */\r
- public static StringBuffer appendNumber(StringBuffer result, int n) {\r
- return appendNumber(result, n, 10, 1);\r
- }\r
-\r
- /**\r
- * Append the digits of a positive integer to the given\r
- * <code>StringBuffer</code> in the given radix. This is\r
- * done recursively since it is easiest to generate the low-\r
- * order digit first, but it must be appended last.\r
- *\r
- * @param result is the <code>StringBuffer</code> to append to\r
- * @param n is the positive integer\r
- * @param radix is the radix, from 2 to 36 inclusive\r
- * @param minDigits is the minimum number of digits to append.\r
- */\r
- private static void recursiveAppendNumber(StringBuffer result, int n,\r
- int radix, int minDigits)\r
- {\r
- int digit = n % radix;\r
-\r
- if (n >= radix || minDigits > 1) {\r
- recursiveAppendNumber(result, n / radix, radix, minDigits - 1);\r
- }\r
-\r
- result.append(DIGITS[digit]);\r
- }\r
-\r
- /**\r
- * Append a number to the given StringBuffer in the given radix.\r
- * Standard digits '0'-'9' are used and letters 'A'-'Z' for\r
- * radices 11 through 36.\r
- * @param result the digits of the number are appended here\r
- * @param n the number to be converted to digits; may be negative.\r
- * If negative, a '-' is prepended to the digits.\r
- * @param radix a radix from 2 to 36 inclusive.\r
- * @param minDigits the minimum number of digits, not including\r
- * any '-', to produce. Values less than 2 have no effect. One\r
- * digit is always emitted regardless of this parameter.\r
- * @return a reference to result\r
- */\r
- public static StringBuffer appendNumber(StringBuffer result, int n,\r
- int radix, int minDigits)\r
- throws IllegalArgumentException\r
- {\r
- if (radix < 2 || radix > 36) {\r
- throw new IllegalArgumentException("Illegal radix " + radix);\r
- }\r
-\r
-\r
- int abs = n;\r
-\r
- if (n < 0) {\r
- abs = -n;\r
- result.append("-");\r
- }\r
-\r
- recursiveAppendNumber(result, abs, radix, minDigits);\r
-\r
- return result;\r
- }\r
-\r
- /**\r
- * Parse an unsigned 31-bit integer at the given offset. Use\r
- * UCharacter.digit() to parse individual characters into digits.\r
- * @param text the text to be parsed\r
- * @param pos INPUT-OUTPUT parameter. On entry, pos[0] is the\r
- * offset within text at which to start parsing; it should point\r
- * to a valid digit. On exit, pos[0] is the offset after the last\r
- * parsed character. If the parse failed, it will be unchanged on\r
- * exit. Must be >= 0 on entry.\r
- * @param radix the radix in which to parse; must be >= 2 and <=\r
- * 36.\r
- * @return a non-negative parsed number, or -1 upon parse failure.\r
- * Parse fails if there are no digits, that is, if pos[0] does not\r
- * point to a valid digit on entry, or if the number to be parsed\r
- * does not fit into a 31-bit unsigned integer.\r
- */\r
- public static int parseNumber(String text, int[] pos, int radix) {\r
- // assert(pos[0] >= 0);\r
- // assert(radix >= 2);\r
- // assert(radix <= 36);\r
- int n = 0;\r
- int p = pos[0];\r
- while (p < text.length()) {\r
- int ch = UTF16.charAt(text, p);\r
- int d = UCharacter.digit(ch, radix);\r
- if (d < 0) {\r
- break;\r
- }\r
- n = radix*n + d;\r
- // ASSUME that when a 32-bit integer overflows it becomes\r
- // negative. E.g., 214748364 * 10 + 8 => negative value.\r
- if (n < 0) {\r
- return -1;\r
- }\r
- ++p;\r
- }\r
- if (p == pos[0]) {\r
- return -1;\r
- }\r
- pos[0] = p;\r
- return n;\r
- }\r
-\r
- /**\r
- * Return true if the character is NOT printable ASCII. The tab,\r
- * newline and linefeed characters are considered unprintable.\r
- */\r
- public static boolean isUnprintable(int c) {\r
- return !(c >= 0x20 && c <= 0x7E);\r
- }\r
-\r
- /**\r
- * Escape unprintable characters using <backslash>uxxxx notation\r
- * for U+0000 to U+FFFF and <backslash>Uxxxxxxxx for U+10000 and\r
- * above. If the character is printable ASCII, then do nothing\r
- * and return FALSE. Otherwise, append the escaped notation and\r
- * return TRUE.\r
- */\r
- public static boolean escapeUnprintable(StringBuffer result, int c) {\r
- if (isUnprintable(c)) {\r
- result.append('\\');\r
- if ((c & ~0xFFFF) != 0) {\r
- result.append('U');\r
- result.append(DIGITS[0xF&(c>>28)]);\r
- result.append(DIGITS[0xF&(c>>24)]);\r
- result.append(DIGITS[0xF&(c>>20)]);\r
- result.append(DIGITS[0xF&(c>>16)]);\r
- } else {\r
- result.append('u');\r
- }\r
- result.append(DIGITS[0xF&(c>>12)]);\r
- result.append(DIGITS[0xF&(c>>8)]);\r
- result.append(DIGITS[0xF&(c>>4)]);\r
- result.append(DIGITS[0xF&c]);\r
- return true;\r
- }\r
- return false;\r
- }\r
-\r
- /**\r
- * Returns the index of the first character in a set, ignoring quoted text.\r
- * For example, in the string "abc'hide'h", the 'h' in "hide" will not be\r
- * found by a search for "h". Unlike String.indexOf(), this method searches\r
- * not for a single character, but for any character of the string\r
- * <code>setOfChars</code>.\r
- * @param text text to be searched\r
- * @param start the beginning index, inclusive; <code>0 <= start\r
- * <= limit</code>.\r
- * @param limit the ending index, exclusive; <code>start <= limit\r
- * <= text.length()</code>.\r
- * @param setOfChars string with one or more distinct characters\r
- * @return Offset of the first character in <code>setOfChars</code>\r
- * found, or -1 if not found.\r
- * @see String#indexOf\r
- */\r
- public static int quotedIndexOf(String text, int start, int limit,\r
- String setOfChars) {\r
- for (int i=start; i<limit; ++i) {\r
- char c = text.charAt(i);\r
- if (c == BACKSLASH) {\r
- ++i;\r
- } else if (c == APOSTROPHE) {\r
- while (++i < limit\r
- && text.charAt(i) != APOSTROPHE) {}\r
- } else if (setOfChars.indexOf(c) >= 0) {\r
- return i;\r
- }\r
- }\r
- return -1;\r
- }\r
-\r
- /**\r
- * Similar to StringBuffer.getChars, version 1.3.\r
- * Since JDK 1.2 implements StringBuffer.getChars differently, this method\r
- * is here to provide consistent results.\r
- * To be removed after JDK 1.2 ceased to be the reference platform.\r
- * @param src source string buffer\r
- * @param srcBegin offset to the start of the src to retrieve from\r
- * @param srcEnd offset to the end of the src to retrieve from\r
- * @param dst char array to store the retrieved chars\r
- * @param dstBegin offset to the start of the destination char array to\r
- * store the retrieved chars\r
- */\r
- public static void getChars(StringBuffer src, int srcBegin, int srcEnd,\r
- char dst[], int dstBegin)\r
- {\r
- if (srcBegin == srcEnd) {\r
- return;\r
- }\r
- src.getChars(srcBegin, srcEnd, dst, dstBegin);\r
- }\r
-\r
- /**\r
- * Append a character to a rule that is being built up. To flush\r
- * the quoteBuf to rule, make one final call with isLiteral == true.\r
- * If there is no final character, pass in (int)-1 as c.\r
- * @param rule the string to append the character to\r
- * @param c the character to append, or (int)-1 if none.\r
- * @param isLiteral if true, then the given character should not be\r
- * quoted or escaped. Usually this means it is a syntactic element\r
- * such as > or $\r
- * @param escapeUnprintable if true, then unprintable characters\r
- * should be escaped using escapeUnprintable(). These escapes will\r
- * appear outside of quotes.\r
- * @param quoteBuf a buffer which is used to build up quoted\r
- * substrings. The caller should initially supply an empty buffer,\r
- * and thereafter should not modify the buffer. The buffer should be\r
- * cleared out by, at the end, calling this method with a literal\r
- * character (which may be -1).\r
- */\r
- public static void appendToRule(StringBuffer rule,\r
- int c,\r
- boolean isLiteral,\r
- boolean escapeUnprintable,\r
- StringBuffer quoteBuf) {\r
- // If we are escaping unprintables, then escape them outside\r
- // quotes. \\u and \\U are not recognized within quotes. The same\r
- // logic applies to literals, but literals are never escaped.\r
- if (isLiteral ||\r
- (escapeUnprintable && Utility.isUnprintable(c))) {\r
- if (quoteBuf.length() > 0) {\r
- // We prefer backslash APOSTROPHE to double APOSTROPHE\r
- // (more readable, less similar to ") so if there are\r
- // double APOSTROPHEs at the ends, we pull them outside\r
- // of the quote.\r
-\r
- // If the first thing in the quoteBuf is APOSTROPHE\r
- // (doubled) then pull it out.\r
- while (quoteBuf.length() >= 2 &&\r
- quoteBuf.charAt(0) == APOSTROPHE &&\r
- quoteBuf.charAt(1) == APOSTROPHE) {\r
- rule.append(BACKSLASH).append(APOSTROPHE);\r
- quoteBuf.delete(0, 2);\r
- }\r
- // If the last thing in the quoteBuf is APOSTROPHE\r
- // (doubled) then remove and count it and add it after.\r
- int trailingCount = 0;\r
- while (quoteBuf.length() >= 2 &&\r
- quoteBuf.charAt(quoteBuf.length()-2) == APOSTROPHE &&\r
- quoteBuf.charAt(quoteBuf.length()-1) == APOSTROPHE) {\r
- quoteBuf.setLength(quoteBuf.length()-2);\r
- ++trailingCount;\r
- }\r
- if (quoteBuf.length() > 0) {\r
- rule.append(APOSTROPHE);\r
- // jdk 1.3.1 does not have append(StringBuffer) yet\r
- if(ICUDebug.isJDK14OrHigher){\r
- rule.append(quoteBuf);\r
- }else{\r
- rule.append(quoteBuf.toString());\r
- }\r
- rule.append(APOSTROPHE);\r
- quoteBuf.setLength(0);\r
- }\r
- while (trailingCount-- > 0) {\r
- rule.append(BACKSLASH).append(APOSTROPHE);\r
- }\r
- }\r
- if (c != -1) {\r
- /* Since spaces are ignored during parsing, they are\r
- * emitted only for readability. We emit one here\r
- * only if there isn't already one at the end of the\r
- * rule.\r
- */\r
- if (c == ' ') {\r
- int len = rule.length();\r
- if (len > 0 && rule.charAt(len-1) != ' ') {\r
- rule.append(' ');\r
- }\r
- } else if (!escapeUnprintable || !Utility.escapeUnprintable(rule, c)) {\r
- UTF16.append(rule, c);\r
- }\r
- }\r
- }\r
-\r
- // Escape ' and '\' and don't begin a quote just for them\r
- else if (quoteBuf.length() == 0 &&\r
- (c == APOSTROPHE || c == BACKSLASH)) {\r
- rule.append(BACKSLASH).append((char)c);\r
- }\r
-\r
- // Specials (printable ascii that isn't [0-9a-zA-Z]) and\r
- // whitespace need quoting. Also append stuff to quotes if we are\r
- // building up a quoted substring already.\r
- else if (quoteBuf.length() > 0 ||\r
- (c >= 0x0021 && c <= 0x007E &&\r
- !((c >= 0x0030/*'0'*/ && c <= 0x0039/*'9'*/) ||\r
- (c >= 0x0041/*'A'*/ && c <= 0x005A/*'Z'*/) ||\r
- (c >= 0x0061/*'a'*/ && c <= 0x007A/*'z'*/))) ||\r
- UCharacterProperty.isRuleWhiteSpace(c)) {\r
- UTF16.append(quoteBuf, c);\r
- // Double ' within a quote\r
- if (c == APOSTROPHE) {\r
- quoteBuf.append((char)c);\r
- }\r
- }\r
-\r
- // Otherwise just append\r
- else {\r
- UTF16.append(rule, c);\r
- }\r
- }\r
-\r
- /**\r
- * Append the given string to the rule. Calls the single-character\r
- * version of appendToRule for each character.\r
- */\r
- public static void appendToRule(StringBuffer rule,\r
- String text,\r
- boolean isLiteral,\r
- boolean escapeUnprintable,\r
- StringBuffer quoteBuf) {\r
- for (int i=0; i<text.length(); ++i) {\r
- // Okay to process in 16-bit code units here\r
- appendToRule(rule, text.charAt(i), isLiteral, escapeUnprintable, quoteBuf);\r
- }\r
- }\r
-\r
- /**\r
- * Given a matcher reference, which may be null, append its\r
- * pattern as a literal to the given rule.\r
- */\r
- public static void appendToRule(StringBuffer rule,\r
- UnicodeMatcher matcher,\r
- boolean escapeUnprintable,\r
- StringBuffer quoteBuf) {\r
- if (matcher != null) {\r
- appendToRule(rule, matcher.toPattern(escapeUnprintable),\r
- true, escapeUnprintable, quoteBuf);\r
- }\r
- }\r
-\r
- /**\r
- * Compares 2 unsigned integers\r
- * @param source 32 bit unsigned integer\r
- * @param target 32 bit unsigned integer\r
- * @return 0 if equals, 1 if source is greater than target and -1\r
- * otherwise\r
- */\r
- public static final int compareUnsigned(int source, int target)\r
- {\r
- source += MAGIC_UNSIGNED;\r
- target += MAGIC_UNSIGNED;\r
- if (source < target) {\r
- return -1;\r
- } \r
- else if (source > target) {\r
- return 1;\r
- }\r
- return 0;\r
- }\r
-\r
- /**\r
- * Find the highest bit in a positive integer. This is done\r
- * by doing a binary search through the bits.\r
- *\r
- * @param n is the integer\r
- *\r
- * @return the bit number of the highest bit, with 0 being\r
- * the low order bit, or -1 if <code>n</code> is not positive\r
- */\r
- public static final byte highBit(int n)\r
- {\r
- if (n <= 0) {\r
- return -1;\r
- }\r
-\r
- byte bit = 0;\r
-\r
- if (n >= 1 << 16) {\r
- n >>= 16;\r
- bit += 16;\r
- }\r
-\r
- if (n >= 1 << 8) {\r
- n >>= 8;\r
- bit += 8;\r
- }\r
-\r
- if (n >= 1 << 4) {\r
- n >>= 4;\r
- bit += 4;\r
- }\r
-\r
- if (n >= 1 << 2) {\r
- n >>= 2;\r
- bit += 2;\r
- }\r
-\r
- if (n >= 1 << 1) {\r
- n >>= 1;\r
- bit += 1;\r
- }\r
-\r
- return bit;\r
- }\r
- /**\r
- * Utility method to take a int[] containing codepoints and return\r
- * a string representation with code units. \r
- */\r
- public static String valueOf(int[]source){\r
- // TODO: Investigate why this method is not on UTF16 class\r
- StringBuffer result = new StringBuffer(source.length);\r
- for(int i=0; i<source.length; i++){\r
- UTF16.append(result,source[i]);\r
- }\r
- return result.toString();\r
- }\r
- \r
- \r
- /**\r
- * Utility to duplicate a string count times\r
- * @param s\r
- * @param count\r
- */\r
- public static String repeat(String s, int count) {\r
- if (count <= 0) return "";\r
- if (count == 1) return s;\r
- StringBuffer result = new StringBuffer();\r
- for (int i = 0; i < count; ++i) {\r
- result.append(s);\r
- }\r
- return result.toString();\r
- }\r
-\r
- \r
- // !!! 1.3 compatibility\r
- public static int indexOf(StringBuffer buf, String s) {\r
-//#if defined(FOUNDATION10) || defined(J2SE13)\r
-//## return buf.toString().indexOf(s);\r
-//#else\r
- return buf.indexOf(s);\r
-//#endif\r
- }\r
- \r
- // !!! 1.3 compatibility\r
- public static int lastIndexOf(StringBuffer buf, String s) {\r
-//#if defined(FOUNDATION10) || defined(J2SE13)\r
-//## return buf.toString().lastIndexOf(s);\r
-//#else\r
- return buf.lastIndexOf(s);\r
-//#endif\r
- }\r
- \r
- // !!! 1.3 compatibility\r
- public static int indexOf(StringBuffer buf, String s, int i) {\r
-//#if defined(FOUNDATION10) || defined(J2SE13)\r
-//## return buf.toString().indexOf(s, i);\r
-//#else\r
- return buf.indexOf(s, i);\r
-//#endif\r
- }\r
- \r
- // !!! 1.3 compatibility\r
- public static int lastIndexOf(StringBuffer buf, String s, int i) {\r
-//#if defined(FOUNDATION10) || defined(J2SE13)\r
-//## return buf.toString().lastIndexOf(s, i);\r
-//#else\r
- return buf.lastIndexOf(s, i);\r
-//#endif\r
- }\r
-\r
- // !!! 1.3/1.4 compatibility\r
- public static String replace(String src, String target, String replacement) {\r
-//#if defined(FOUNDATION10) || defined(J2SE13) || defined(J2SE14)\r
-//## int i = src.indexOf(target);\r
-//## if (i == -1) {\r
-//## return src;\r
-//## }\r
-//## StringBuffer buf = new StringBuffer();\r
-//## int n = 0;\r
-//## do {\r
-//## buf.append(src.substring(n, i));\r
-//## buf.append(replacement);\r
-//## n = i + target.length();\r
-//## i = src.indexOf(target, n);\r
-//## } while (i != -1);\r
-//## if (n < src.length()) {\r
-//## buf.append(src.substring(n));\r
-//## }\r
-//## return buf.toString();\r
-//#else\r
- return src.replace(target, replacement);\r
-//#endif\r
- }\r
-\r
- // !!! 1.3 compatibility\r
- public static String replaceAll(String src, String target, String replacement) {\r
-//#if defined(FOUNDATION10) || defined(J2SE13)\r
-//## return replace(src, target, replacement);\r
-//#else\r
- return src.replaceAll(target, replacement);\r
-//#endif\r
- }\r
-\r
- //private static final String REGEX_SPECIALS = ".^$[]*+?|()";\r
-\r
- // !!! 1.3 compatibility\r
- // Note: target is not a string literal, not a regular expression.\r
- public static String[] splitString(String src, String target) {\r
-//#if defined(FOUNDATION10) || defined(J2SE13)\r
-//## int i = src.indexOf(target);\r
-//## if (i == -1) {\r
-//## return new String[] { src };\r
-//## }\r
-//## ArrayList output = new ArrayList();\r
-//## int n = 0;\r
-//## do {\r
-//## output.add(src.substring(n, i));\r
-//## n = i + target.length();\r
-//## i = src.indexOf(target, n);\r
-//## } while (i != -1);\r
-//## if (n < src.length()) {\r
-//## output.add(src.substring(n));\r
-//## }\r
-//## return (String[]) output.toArray(new String[output.size()]);\r
-//#else\r
- return src.split("\\Q" + target + "\\E");\r
-//#endif\r
- }\r
-\r
- // !!! 1.3 compatibility\r
- /**\r
- * Split the string at runs of ascii whitespace characters.\r
- */\r
- public static String[] splitWhitespace(String src) {\r
-//#if defined(FOUNDATION10) || defined(J2SE13)\r
-//## char ws[] = "\u0020\u0009\n\u000b\u000c\r".toCharArray();\r
-//## ArrayList output = new ArrayList();\r
-//## boolean inWhitespace = true;\r
-//## int n = 0;\r
-//## loop:\r
-//## for (int i = 0; i < src.length(); ++i) {\r
-//## char c = src.charAt(i);\r
-//## for (int j = 0; j < ws.length; ++j) {\r
-//## if (ws[j] == c) {\r
-//## if (!inWhitespace) {\r
-//## output.add(src.substring(n, i));\r
-//## inWhitespace = true;\r
-//## }\r
-//## continue loop;\r
-//## }\r
-//## }\r
-//## if (inWhitespace) {\r
-//## n = i;\r
-//## inWhitespace = false;\r
-//## }\r
-//## }\r
-//## if (n < src.length()) {\r
-//## output.add(src.substring(n));\r
-//## }\r
-//## return (String[]) output.toArray(new String[output.size()]);\r
-//#else\r
- return src.split("\\s+");\r
-//#endif\r
- }\r
-\r
- // !!! 1.3/1.4 compatibility\r
- // Integer constants - Integer.valueOf(int) is not supported in JDK 1.3/1.4\r
- private static final int MAX_INT_CONST = 64;\r
- private static final Integer[] INT_CONST = new Integer[MAX_INT_CONST];\r
-\r
- static {\r
- for (int i = 0; i < MAX_INT_CONST; i++) {\r
- INT_CONST[i] = new Integer(i);\r
- }\r
- }\r
-\r
- public static Integer integerValueOf(int val) {\r
- if (0 <= val && val < MAX_INT_CONST) {\r
- return INT_CONST[val];\r
- }\r
- return new Integer(val);\r
- }\r
-\r
- // !!! 1.3/1.4 compatibility\r
- // Arrays.toString(Object[])\r
- public static String arrayToString(Object[] a) {\r
- StringBuffer buf = new StringBuffer("[");\r
- for (int i = 0; i < a.length; i++) {\r
- if (i != 0) {\r
- buf.append(", ");\r
- }\r
- if (a[i] == null) {\r
- buf.append("null");\r
- } else {\r
- buf.append(a[i].toString());\r
- }\r
- }\r
- buf.append("]");\r
- return buf.toString();\r
- }\r
-}\r
+//##header J2SE15
+/*
+ *******************************************************************************
+ * Copyright (C) 1996-2009, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.impl;
+
+import java.util.ArrayList;
+
+import com.ibm.icu.lang.*;
+import com.ibm.icu.text.*;
+import com.ibm.icu.impl.UCharacterProperty;
+
+public final class Utility {
+
+ private static final char APOSTROPHE = '\'';
+ private static final char BACKSLASH = '\\';
+ private static final int MAGIC_UNSIGNED = 0x80000000;
+
+ /**
+ * Convenience utility to compare two Object[]s.
+ * Ought to be in System
+ */
+ public final static boolean arrayEquals(Object[] source, Object target) {
+ if (source == null) return (target == null);
+ if (!(target instanceof Object[])) return false;
+ Object[] targ = (Object[]) target;
+ return (source.length == targ.length
+ && arrayRegionMatches(source, 0, targ, 0, source.length));
+ }
+
+ /**
+ * Convenience utility to compare two int[]s
+ * Ought to be in System
+ */
+ public final static boolean arrayEquals(int[] source, Object target) {
+ if (source == null) return (target == null);
+ if (!(target instanceof int[])) return false;
+ int[] targ = (int[]) target;
+ return (source.length == targ.length
+ && arrayRegionMatches(source, 0, targ, 0, source.length));
+ }
+
+ /**
+ * Convenience utility to compare two double[]s
+ * Ought to be in System
+ */
+ public final static boolean arrayEquals(double[] source, Object target) {
+ if (source == null) return (target == null);
+ if (!(target instanceof double[])) return false;
+ double[] targ = (double[]) target;
+ return (source.length == targ.length
+ && arrayRegionMatches(source, 0, targ, 0, source.length));
+ }
+ public final static boolean arrayEquals(byte[] source, Object target) {
+ if (source == null) return (target == null);
+ if (!(target instanceof byte[])) return false;
+ byte[] targ = (byte[]) target;
+ return (source.length == targ.length
+ && arrayRegionMatches(source, 0, targ, 0, source.length));
+ }
+
+ /**
+ * Convenience utility to compare two Object[]s
+ * Ought to be in System
+ */
+ public final static boolean arrayEquals(Object source, Object target) {
+ if (source == null) return (target == null);
+ // for some reason, the correct arrayEquals is not being called
+ // so do it by hand for now.
+ if (source instanceof Object[])
+ return(arrayEquals((Object[]) source,target));
+ if (source instanceof int[])
+ return(arrayEquals((int[]) source,target));
+ if (source instanceof double[])
+ return(arrayEquals((int[]) source,target));
+ if (source instanceof byte[])
+ return(arrayEquals((byte[]) source,target));
+ return source.equals(target);
+ }
+
+ /**
+ * Convenience utility to compare two Object[]s
+ * Ought to be in System.
+ * @param len the length to compare.
+ * The start indices and start+len must be valid.
+ */
+ public final static boolean arrayRegionMatches(Object[] source, int sourceStart,
+ Object[] target, int targetStart,
+ int len)
+ {
+ int sourceEnd = sourceStart + len;
+ int delta = targetStart - sourceStart;
+ for (int i = sourceStart; i < sourceEnd; i++) {
+ if (!arrayEquals(source[i],target[i + delta]))
+ return false;
+ }
+ return true;
+ }
+
+ /**
+ * Convenience utility to compare two Object[]s
+ * Ought to be in System.
+ * @param len the length to compare.
+ * The start indices and start+len must be valid.
+ */
+ public final static boolean arrayRegionMatches(char[] source, int sourceStart,
+ char[] target, int targetStart,
+ int len)
+ {
+ int sourceEnd = sourceStart + len;
+ int delta = targetStart - sourceStart;
+ for (int i = sourceStart; i < sourceEnd; i++) {
+ if (source[i]!=target[i + delta])
+ return false;
+ }
+ return true;
+ }
+
+ /**
+ * Convenience utility to compare two int[]s.
+ * @param len the length to compare.
+ * The start indices and start+len must be valid.
+ * Ought to be in System
+ */
+ public final static boolean arrayRegionMatches(int[] source, int sourceStart,
+ int[] target, int targetStart,
+ int len)
+ {
+ int sourceEnd = sourceStart + len;
+ int delta = targetStart - sourceStart;
+ for (int i = sourceStart; i < sourceEnd; i++) {
+ if (source[i] != target[i + delta])
+ return false;
+ }
+ return true;
+ }
+
+ /**
+ * Convenience utility to compare two arrays of doubles.
+ * @param len the length to compare.
+ * The start indices and start+len must be valid.
+ * Ought to be in System
+ */
+ public final static boolean arrayRegionMatches(double[] source, int sourceStart,
+ double[] target, int targetStart,
+ int len)
+ {
+ int sourceEnd = sourceStart + len;
+ int delta = targetStart - sourceStart;
+ for (int i = sourceStart; i < sourceEnd; i++) {
+ if (source[i] != target[i + delta])
+ return false;
+ }
+ return true;
+ }
+ public final static boolean arrayRegionMatches(byte[] source, int sourceStart,
+ byte[] target, int targetStart, int len){
+ int sourceEnd = sourceStart + len;
+ int delta = targetStart - sourceStart;
+ for (int i = sourceStart; i < sourceEnd; i++) {
+ if (source[i] != target[i + delta])
+ return false;
+ }
+ return true;
+ }
+
+ /**
+ * Convenience utility. Does null checks on objects, then calls equals.
+ */
+ public final static boolean objectEquals(Object source, Object target) {
+ if (source == null)
+ return (target == null);
+ else
+ return source.equals(target);
+ }
+
+ /**
+ * The ESCAPE character is used during run-length encoding. It signals
+ * a run of identical chars.
+ */
+ private static final char ESCAPE = '\uA5A5';
+
+ /**
+ * The ESCAPE_BYTE character is used during run-length encoding. It signals
+ * a run of identical bytes.
+ */
+ static final byte ESCAPE_BYTE = (byte)0xA5;
+
+ /**
+ * Construct a string representing an int array. Use run-length encoding.
+ * A character represents itself, unless it is the ESCAPE character. Then
+ * the following notations are possible:
+ * ESCAPE ESCAPE ESCAPE literal
+ * ESCAPE n c n instances of character c
+ * Since an encoded run occupies 3 characters, we only encode runs of 4 or
+ * more characters. Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF.
+ * If we encounter a run where n == ESCAPE, we represent this as:
+ * c ESCAPE n-1 c
+ * The ESCAPE value is chosen so as not to collide with commonly
+ * seen values.
+ */
+ static public final String arrayToRLEString(int[] a) {
+ StringBuffer buffer = new StringBuffer();
+
+ appendInt(buffer, a.length);
+ int runValue = a[0];
+ int runLength = 1;
+ for (int i=1; i<a.length; ++i) {
+ int s = a[i];
+ if (s == runValue && runLength < 0xFFFF) {
+ ++runLength;
+ } else {
+ encodeRun(buffer, runValue, runLength);
+ runValue = s;
+ runLength = 1;
+ }
+ }
+ encodeRun(buffer, runValue, runLength);
+ return buffer.toString();
+ }
+
+ /**
+ * Construct a string representing a short array. Use run-length encoding.
+ * A character represents itself, unless it is the ESCAPE character. Then
+ * the following notations are possible:
+ * ESCAPE ESCAPE ESCAPE literal
+ * ESCAPE n c n instances of character c
+ * Since an encoded run occupies 3 characters, we only encode runs of 4 or
+ * more characters. Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF.
+ * If we encounter a run where n == ESCAPE, we represent this as:
+ * c ESCAPE n-1 c
+ * The ESCAPE value is chosen so as not to collide with commonly
+ * seen values.
+ */
+ static public final String arrayToRLEString(short[] a) {
+ StringBuffer buffer = new StringBuffer();
+ // for (int i=0; i<a.length; ++i) buffer.append((char) a[i]);
+ buffer.append((char) (a.length >> 16));
+ buffer.append((char) a.length);
+ short runValue = a[0];
+ int runLength = 1;
+ for (int i=1; i<a.length; ++i) {
+ short s = a[i];
+ if (s == runValue && runLength < 0xFFFF) ++runLength;
+ else {
+ encodeRun(buffer, runValue, runLength);
+ runValue = s;
+ runLength = 1;
+ }
+ }
+ encodeRun(buffer, runValue, runLength);
+ return buffer.toString();
+ }
+
+ /**
+ * Construct a string representing a char array. Use run-length encoding.
+ * A character represents itself, unless it is the ESCAPE character. Then
+ * the following notations are possible:
+ * ESCAPE ESCAPE ESCAPE literal
+ * ESCAPE n c n instances of character c
+ * Since an encoded run occupies 3 characters, we only encode runs of 4 or
+ * more characters. Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF.
+ * If we encounter a run where n == ESCAPE, we represent this as:
+ * c ESCAPE n-1 c
+ * The ESCAPE value is chosen so as not to collide with commonly
+ * seen values.
+ */
+ static public final String arrayToRLEString(char[] a) {
+ StringBuffer buffer = new StringBuffer();
+ buffer.append((char) (a.length >> 16));
+ buffer.append((char) a.length);
+ char runValue = a[0];
+ int runLength = 1;
+ for (int i=1; i<a.length; ++i) {
+ char s = a[i];
+ if (s == runValue && runLength < 0xFFFF) ++runLength;
+ else {
+ encodeRun(buffer, (short)runValue, runLength);
+ runValue = s;
+ runLength = 1;
+ }
+ }
+ encodeRun(buffer, (short)runValue, runLength);
+ return buffer.toString();
+ }
+
+ /**
+ * Construct a string representing a byte array. Use run-length encoding.
+ * Two bytes are packed into a single char, with a single extra zero byte at
+ * the end if needed. A byte represents itself, unless it is the
+ * ESCAPE_BYTE. Then the following notations are possible:
+ * ESCAPE_BYTE ESCAPE_BYTE ESCAPE_BYTE literal
+ * ESCAPE_BYTE n b n instances of byte b
+ * Since an encoded run occupies 3 bytes, we only encode runs of 4 or
+ * more bytes. Thus we have n > 0 and n != ESCAPE_BYTE and n <= 0xFF.
+ * If we encounter a run where n == ESCAPE_BYTE, we represent this as:
+ * b ESCAPE_BYTE n-1 b
+ * The ESCAPE_BYTE value is chosen so as not to collide with commonly
+ * seen values.
+ */
+ static public final String arrayToRLEString(byte[] a) {
+ StringBuffer buffer = new StringBuffer();
+ buffer.append((char) (a.length >> 16));
+ buffer.append((char) a.length);
+ byte runValue = a[0];
+ int runLength = 1;
+ byte[] state = new byte[2];
+ for (int i=1; i<a.length; ++i) {
+ byte b = a[i];
+ if (b == runValue && runLength < 0xFF) ++runLength;
+ else {
+ encodeRun(buffer, runValue, runLength, state);
+ runValue = b;
+ runLength = 1;
+ }
+ }
+ encodeRun(buffer, runValue, runLength, state);
+
+ // We must save the final byte, if there is one, by padding
+ // an extra zero.
+ if (state[0] != 0) appendEncodedByte(buffer, (byte)0, state);
+
+ return buffer.toString();
+ }
+
+ /**
+ * Encode a run, possibly a degenerate run (of < 4 values).
+ * @param length The length of the run; must be > 0 && <= 0xFFFF.
+ */
+ private static final void encodeRun(StringBuffer buffer, int value, int length) {
+ if (length < 4) {
+ for (int j=0; j<length; ++j) {
+ if (value == ESCAPE) {
+ appendInt(buffer, value);
+ }
+ appendInt(buffer, value);
+ }
+ }
+ else {
+ if (length == (int) ESCAPE) {
+ if (value == (int) ESCAPE) {
+ appendInt(buffer, ESCAPE);
+ }
+ appendInt(buffer, value);
+ --length;
+ }
+ appendInt(buffer, ESCAPE);
+ appendInt(buffer, length);
+ appendInt(buffer, value); // Don't need to escape this value
+ }
+ }
+
+ private static final void appendInt(StringBuffer buffer, int value) {
+ buffer.append((char)(value >>> 16));
+ buffer.append((char)(value & 0xFFFF));
+ }
+
+ /**
+ * Encode a run, possibly a degenerate run (of < 4 values).
+ * @param length The length of the run; must be > 0 && <= 0xFFFF.
+ */
+ private static final void encodeRun(StringBuffer buffer, short value, int length) {
+ if (length < 4) {
+ for (int j=0; j<length; ++j) {
+ if (value == (int) ESCAPE) buffer.append(ESCAPE);
+ buffer.append((char) value);
+ }
+ }
+ else {
+ if (length == (int) ESCAPE) {
+ if (value == (int) ESCAPE) buffer.append(ESCAPE);
+ buffer.append((char) value);
+ --length;
+ }
+ buffer.append(ESCAPE);
+ buffer.append((char) length);
+ buffer.append((char) value); // Don't need to escape this value
+ }
+ }
+
+ /**
+ * Encode a run, possibly a degenerate run (of < 4 values).
+ * @param length The length of the run; must be > 0 && <= 0xFF.
+ */
+ private static final void encodeRun(StringBuffer buffer, byte value, int length,
+ byte[] state) {
+ if (length < 4) {
+ for (int j=0; j<length; ++j) {
+ if (value == ESCAPE_BYTE) appendEncodedByte(buffer, ESCAPE_BYTE, state);
+ appendEncodedByte(buffer, value, state);
+ }
+ }
+ else {
+ if (length == ESCAPE_BYTE) {
+ if (value == ESCAPE_BYTE) appendEncodedByte(buffer, ESCAPE_BYTE, state);
+ appendEncodedByte(buffer, value, state);
+ --length;
+ }
+ appendEncodedByte(buffer, ESCAPE_BYTE, state);
+ appendEncodedByte(buffer, (byte)length, state);
+ appendEncodedByte(buffer, value, state); // Don't need to escape this value
+ }
+ }
+
+ /**
+ * Append a byte to the given StringBuffer, packing two bytes into each
+ * character. The state parameter maintains intermediary data between
+ * calls.
+ * @param state A two-element array, with state[0] == 0 if this is the
+ * first byte of a pair, or state[0] != 0 if this is the second byte
+ * of a pair, in which case state[1] is the first byte.
+ */
+ private static final void appendEncodedByte(StringBuffer buffer, byte value,
+ byte[] state) {
+ if (state[0] != 0) {
+ char c = (char) ((state[1] << 8) | (((int) value) & 0xFF));
+ buffer.append(c);
+ state[0] = 0;
+ }
+ else {
+ state[0] = 1;
+ state[1] = value;
+ }
+ }
+
+ /**
+ * Construct an array of ints from a run-length encoded string.
+ */
+ static public final int[] RLEStringToIntArray(String s) {
+ int length = getInt(s, 0);
+ int[] array = new int[length];
+ int ai = 0, i = 1;
+
+ int maxI = s.length() / 2;
+ while (ai < length && i < maxI) {
+ int c = getInt(s, i++);
+
+ if (c == ESCAPE) {
+ c = getInt(s, i++);
+ if (c == ESCAPE) {
+ array[ai++] = c;
+ } else {
+ int runLength = c;
+ int runValue = getInt(s, i++);
+ for (int j=0; j<runLength; ++j) {
+ array[ai++] = runValue;
+ }
+ }
+ }
+ else {
+ array[ai++] = c;
+ }
+ }
+
+ if (ai != length || i != maxI) {
+ throw new IllegalStateException("Bad run-length encoded int array");
+ }
+
+ return array;
+ }
+ static final int getInt(String s, int i) {
+ return (((int) s.charAt(2*i)) << 16) | (int) s.charAt(2*i+1);
+ }
+
+ /**
+ * Construct an array of shorts from a run-length encoded string.
+ */
+ static public final short[] RLEStringToShortArray(String s) {
+ int length = (((int) s.charAt(0)) << 16) | ((int) s.charAt(1));
+ short[] array = new short[length];
+ int ai = 0;
+ for (int i=2; i<s.length(); ++i) {
+ char c = s.charAt(i);
+ if (c == ESCAPE) {
+ c = s.charAt(++i);
+ if (c == ESCAPE) {
+ array[ai++] = (short) c;
+ } else {
+ int runLength = (int) c;
+ short runValue = (short) s.charAt(++i);
+ for (int j=0; j<runLength; ++j) array[ai++] = runValue;
+ }
+ }
+ else {
+ array[ai++] = (short) c;
+ }
+ }
+
+ if (ai != length)
+ throw new IllegalStateException("Bad run-length encoded short array");
+
+ return array;
+ }
+
+ /**
+ * Construct an array of shorts from a run-length encoded string.
+ */
+ static public final char[] RLEStringToCharArray(String s) {
+ int length = (((int) s.charAt(0)) << 16) | ((int) s.charAt(1));
+ char[] array = new char[length];
+ int ai = 0;
+ for (int i=2; i<s.length(); ++i) {
+ char c = s.charAt(i);
+ if (c == ESCAPE) {
+ c = s.charAt(++i);
+ if (c == ESCAPE) {
+ array[ai++] = c;
+ } else {
+ int runLength = (int) c;
+ char runValue = s.charAt(++i);
+ for (int j=0; j<runLength; ++j) array[ai++] = runValue;
+ }
+ }
+ else {
+ array[ai++] = c;
+ }
+ }
+
+ if (ai != length)
+ throw new IllegalStateException("Bad run-length encoded short array");
+
+ return array;
+ }
+
+ /**
+ * Construct an array of bytes from a run-length encoded string.
+ */
+ static public final byte[] RLEStringToByteArray(String s) {
+ int length = (((int) s.charAt(0)) << 16) | ((int) s.charAt(1));
+ byte[] array = new byte[length];
+ boolean nextChar = true;
+ char c = 0;
+ int node = 0;
+ int runLength = 0;
+ int i = 2;
+ for (int ai=0; ai<length; ) {
+ // This part of the loop places the next byte into the local
+ // variable 'b' each time through the loop. It keeps the
+ // current character in 'c' and uses the boolean 'nextChar'
+ // to see if we've taken both bytes out of 'c' yet.
+ byte b;
+ if (nextChar) {
+ c = s.charAt(i++);
+ b = (byte) (c >> 8);
+ nextChar = false;
+ }
+ else {
+ b = (byte) (c & 0xFF);
+ nextChar = true;
+ }
+
+ // This part of the loop is a tiny state machine which handles
+ // the parsing of the run-length encoding. This would be simpler
+ // if we could look ahead, but we can't, so we use 'node' to
+ // move between three nodes in the state machine.
+ switch (node) {
+ case 0:
+ // Normal idle node
+ if (b == ESCAPE_BYTE) {
+ node = 1;
+ }
+ else {
+ array[ai++] = b;
+ }
+ break;
+ case 1:
+ // We have seen one ESCAPE_BYTE; we expect either a second
+ // one, or a run length and value.
+ if (b == ESCAPE_BYTE) {
+ array[ai++] = ESCAPE_BYTE;
+ node = 0;
+ }
+ else {
+ runLength = b;
+ // Interpret signed byte as unsigned
+ if (runLength < 0) runLength += 0x100;
+ node = 2;
+ }
+ break;
+ case 2:
+ // We have seen an ESCAPE_BYTE and length byte. We interpret
+ // the next byte as the value to be repeated.
+ for (int j=0; j<runLength; ++j) array[ai++] = b;
+ node = 0;
+ break;
+ }
+ }
+
+ if (node != 0)
+ throw new IllegalStateException("Bad run-length encoded byte array");
+
+ if (i != s.length())
+ throw new IllegalStateException("Excess data in RLE byte array string");
+
+ return array;
+ }
+
+ static public String LINE_SEPARATOR = System.getProperty("line.separator");
+
+ /**
+ * Format a String for representation in a source file. This includes
+ * breaking it into lines and escaping characters using octal notation
+ * when necessary (control characters and double quotes).
+ */
+ static public final String formatForSource(String s) {
+ StringBuffer buffer = new StringBuffer();
+ for (int i=0; i<s.length();) {
+ if (i > 0) buffer.append('+').append(LINE_SEPARATOR);
+ buffer.append(" \"");
+ int count = 11;
+ while (i<s.length() && count<80) {
+ char c = s.charAt(i++);
+ if (c < '\u0020' || c == '"' || c == '\\') {
+ if (c == '\n') {
+ buffer.append("\\n");
+ count += 2;
+ } else if (c == '\t') {
+ buffer.append("\\t");
+ count += 2;
+ } else if (c == '\r') {
+ buffer.append("\\r");
+ count += 2;
+ } else {
+ // Represent control characters, backslash and double quote
+ // using octal notation; otherwise the string we form
+ // won't compile, since Unicode escape sequences are
+ // processed before tokenization.
+ buffer.append('\\');
+ buffer.append(HEX_DIGIT[(c & 0700) >> 6]); // HEX_DIGIT works for octal
+ buffer.append(HEX_DIGIT[(c & 0070) >> 3]);
+ buffer.append(HEX_DIGIT[(c & 0007)]);
+ count += 4;
+ }
+ }
+ else if (c <= '\u007E') {
+ buffer.append(c);
+ count += 1;
+ }
+ else {
+ buffer.append("\\u");
+ buffer.append(HEX_DIGIT[(c & 0xF000) >> 12]);
+ buffer.append(HEX_DIGIT[(c & 0x0F00) >> 8]);
+ buffer.append(HEX_DIGIT[(c & 0x00F0) >> 4]);
+ buffer.append(HEX_DIGIT[(c & 0x000F)]);
+ count += 6;
+ }
+ }
+ buffer.append('"');
+ }
+ return buffer.toString();
+ }
+
+ static final char[] HEX_DIGIT = {'0','1','2','3','4','5','6','7',
+ '8','9','A','B','C','D','E','F'};
+
+ /**
+ * Format a String for representation in a source file. Like
+ * formatForSource but does not do line breaking.
+ */
+ static public final String format1ForSource(String s) {
+ StringBuffer buffer = new StringBuffer();
+ buffer.append("\"");
+ for (int i=0; i<s.length();) {
+ char c = s.charAt(i++);
+ if (c < '\u0020' || c == '"' || c == '\\') {
+ if (c == '\n') {
+ buffer.append("\\n");
+ } else if (c == '\t') {
+ buffer.append("\\t");
+ } else if (c == '\r') {
+ buffer.append("\\r");
+ } else {
+ // Represent control characters, backslash and double quote
+ // using octal notation; otherwise the string we form
+ // won't compile, since Unicode escape sequences are
+ // processed before tokenization.
+ buffer.append('\\');
+ buffer.append(HEX_DIGIT[(c & 0700) >> 6]); // HEX_DIGIT works for octal
+ buffer.append(HEX_DIGIT[(c & 0070) >> 3]);
+ buffer.append(HEX_DIGIT[(c & 0007)]);
+ }
+ }
+ else if (c <= '\u007E') {
+ buffer.append(c);
+ }
+ else {
+ buffer.append("\\u");
+ buffer.append(HEX_DIGIT[(c & 0xF000) >> 12]);
+ buffer.append(HEX_DIGIT[(c & 0x0F00) >> 8]);
+ buffer.append(HEX_DIGIT[(c & 0x00F0) >> 4]);
+ buffer.append(HEX_DIGIT[(c & 0x000F)]);
+ }
+ }
+ buffer.append('"');
+ return buffer.toString();
+ }
+
+ /**
+ * Convert characters outside the range U+0020 to U+007F to
+ * Unicode escapes, and convert backslash to a double backslash.
+ */
+ public static final String escape(String s) {
+ StringBuffer buf = new StringBuffer();
+ for (int i=0; i<s.length(); ) {
+ int c = UTF16.charAt(s, i);
+ i += UTF16.getCharCount(c);
+ if (c >= ' ' && c <= 0x007F) {
+ if (c == '\\') {
+ buf.append("\\\\"); // That is, "\\"
+ } else {
+ buf.append((char)c);
+ }
+ } else {
+ boolean four = c <= 0xFFFF;
+ buf.append(four ? "\\u" : "\\U");
+ hex(c, four ? 4 : 8, buf);
+ }
+ }
+ return buf.toString();
+ }
+
+ /* This map must be in ASCENDING ORDER OF THE ESCAPE CODE */
+ static private final char[] UNESCAPE_MAP = {
+ /*" 0x22, 0x22 */
+ /*' 0x27, 0x27 */
+ /*? 0x3F, 0x3F */
+ /*\ 0x5C, 0x5C */
+ /*a*/ 0x61, 0x07,
+ /*b*/ 0x62, 0x08,
+ /*e*/ 0x65, 0x1b,
+ /*f*/ 0x66, 0x0c,
+ /*n*/ 0x6E, 0x0a,
+ /*r*/ 0x72, 0x0d,
+ /*t*/ 0x74, 0x09,
+ /*v*/ 0x76, 0x0b
+ };
+
+ /**
+ * Convert an escape to a 32-bit code point value. We attempt
+ * to parallel the icu4c unescapeAt() function.
+ * @param offset16 an array containing offset to the character
+ * <em>after</em> the backslash. Upon return offset16[0] will
+ * be updated to point after the escape sequence.
+ * @return character value from 0 to 10FFFF, or -1 on error.
+ */
+ public static int unescapeAt(String s, int[] offset16) {
+ int c;
+ int result = 0;
+ int n = 0;
+ int minDig = 0;
+ int maxDig = 0;
+ int bitsPerDigit = 4;
+ int dig;
+ int i;
+ boolean braces = false;
+
+ /* Check that offset is in range */
+ int offset = offset16[0];
+ int length = s.length();
+ if (offset < 0 || offset >= length) {
+ return -1;
+ }
+
+ /* Fetch first UChar after '\\' */
+ c = UTF16.charAt(s, offset);
+ offset += UTF16.getCharCount(c);
+
+ /* Convert hexadecimal and octal escapes */
+ switch (c) {
+ case 'u':
+ minDig = maxDig = 4;
+ break;
+ case 'U':
+ minDig = maxDig = 8;
+ break;
+ case 'x':
+ minDig = 1;
+ if (offset < length && UTF16.charAt(s, offset) == 0x7B /*{*/) {
+ ++offset;
+ braces = true;
+ maxDig = 8;
+ } else {
+ maxDig = 2;
+ }
+ break;
+ default:
+ dig = UCharacter.digit(c, 8);
+ if (dig >= 0) {
+ minDig = 1;
+ maxDig = 3;
+ n = 1; /* Already have first octal digit */
+ bitsPerDigit = 3;
+ result = dig;
+ }
+ break;
+ }
+ if (minDig != 0) {
+ while (offset < length && n < maxDig) {
+ c = UTF16.charAt(s, offset);
+ dig = UCharacter.digit(c, (bitsPerDigit == 3) ? 8 : 16);
+ if (dig < 0) {
+ break;
+ }
+ result = (result << bitsPerDigit) | dig;
+ offset += UTF16.getCharCount(c);
+ ++n;
+ }
+ if (n < minDig) {
+ return -1;
+ }
+ if (braces) {
+ if (c != 0x7D /*}*/) {
+ return -1;
+ }
+ ++offset;
+ }
+ if (result < 0 || result >= 0x110000) {
+ return -1;
+ }
+ // If an escape sequence specifies a lead surrogate, see
+ // if there is a trail surrogate after it, either as an
+ // escape or as a literal. If so, join them up into a
+ // supplementary.
+ if (offset < length &&
+ UTF16.isLeadSurrogate((char) result)) {
+ int ahead = offset+1;
+ c = s.charAt(offset); // [sic] get 16-bit code unit
+ if (c == '\\' && ahead < length) {
+ int o[] = new int[] { ahead };
+ c = unescapeAt(s, o);
+ ahead = o[0];
+ }
+ if (UTF16.isTrailSurrogate((char) c)) {
+ offset = ahead;
+ result = UCharacterProperty.getRawSupplementary(
+ (char) result, (char) c);
+ }
+ }
+ offset16[0] = offset;
+ return result;
+ }
+
+ /* Convert C-style escapes in table */
+ for (i=0; i<UNESCAPE_MAP.length; i+=2) {
+ if (c == UNESCAPE_MAP[i]) {
+ offset16[0] = offset;
+ return UNESCAPE_MAP[i+1];
+ } else if (c < UNESCAPE_MAP[i]) {
+ break;
+ }
+ }
+
+ /* Map \cX to control-X: X & 0x1F */
+ if (c == 'c' && offset < length) {
+ c = UTF16.charAt(s, offset);
+ offset16[0] = offset + UTF16.getCharCount(c);
+ return 0x1F & c;
+ }
+
+ /* If no special forms are recognized, then consider
+ * the backslash to generically escape the next character. */
+ offset16[0] = offset;
+ return c;
+ }
+
+ /**
+ * Convert all escapes in a given string using unescapeAt().
+ * @exception IllegalArgumentException if an invalid escape is
+ * seen.
+ */
+ public static String unescape(String s) {
+ StringBuffer buf = new StringBuffer();
+ int[] pos = new int[1];
+ for (int i=0; i<s.length(); ) {
+ char c = s.charAt(i++);
+ if (c == '\\') {
+ pos[0] = i;
+ int e = unescapeAt(s, pos);
+ if (e < 0) {
+ throw new IllegalArgumentException("Invalid escape sequence " +
+ s.substring(i-1, Math.min(i+8, s.length())));
+ }
+ UTF16.append(buf, e);
+ i = pos[0];
+ } else {
+ buf.append(c);
+ }
+ }
+ return buf.toString();
+ }
+
+ /**
+ * Convert all escapes in a given string using unescapeAt().
+ * Leave invalid escape sequences unchanged.
+ */
+ public static String unescapeLeniently(String s) {
+ StringBuffer buf = new StringBuffer();
+ int[] pos = new int[1];
+ for (int i=0; i<s.length(); ) {
+ char c = s.charAt(i++);
+ if (c == '\\') {
+ pos[0] = i;
+ int e = unescapeAt(s, pos);
+ if (e < 0) {
+ buf.append(c);
+ } else {
+ UTF16.append(buf, e);
+ i = pos[0];
+ }
+ } else {
+ buf.append(c);
+ }
+ }
+ return buf.toString();
+ }
+
+ /**
+ * Convert a char to 4 hex uppercase digits. E.g., hex('a') =>
+ * "0041".
+ */
+ public static String hex(char ch) {
+ StringBuffer temp = new StringBuffer();
+ return hex(ch, temp).toString();
+ }
+
+ /**
+ * Convert a string to comma-separated groups of 4 hex uppercase
+ * digits. E.g., hex('ab') => "0041,0042".
+ */
+ public static String hex(String s) {
+ StringBuffer temp = new StringBuffer();
+ return hex(s, temp).toString();
+ }
+
+ /**
+ * Convert a string to comma-separated groups of 4 hex uppercase
+ * digits. E.g., hex('ab') => "0041,0042".
+ */
+ public static String hex(StringBuffer s) {
+ return hex(s.toString());
+ }
+
+ /**
+ * Convert a char to 4 hex uppercase digits. E.g., hex('a') =>
+ * "0041". Append the output to the given StringBuffer.
+ */
+ public static StringBuffer hex(char ch, StringBuffer output) {
+ return appendNumber(output, ch, 16, 4);
+ }
+
+ /**
+ * Convert a integer to size width hex uppercase digits.
+ * E.g., hex('a', 4, str) => "0041".
+ * Append the output to the given StringBuffer.
+ * If width is too small to fit, nothing will be appended to output.
+ */
+ public static StringBuffer hex(int ch, int width, StringBuffer output) {
+ return appendNumber(output, ch, 16, width);
+ }
+
+ /**
+ * Convert a integer to size width (minimum) hex uppercase digits.
+ * E.g., hex('a', 4, str) => "0041". If the integer requires more
+ * than width digits, more will be used.
+ */
+ public static String hex(int ch, int width) {
+ StringBuffer buf = new StringBuffer();
+ return appendNumber(buf, ch, 16, width).toString();
+ }
+ /**
+ * Supplies a zero-padded hex representation of an integer (without 0x)
+ */
+ static public String hex(long i, int places) {
+ if (i == Long.MIN_VALUE) return "-8000000000000000";
+ boolean negative = i < 0;
+ if (negative) {
+ i = -i;
+ }
+ String result = Long.toString(i, 16).toUpperCase();
+ if (result.length() < places) {
+ result = "0000000000000000".substring(result.length(),places) + result;
+ }
+ if (negative) {
+ return '-' + result;
+ }
+ return result;
+ }
+
+ public static String hex(long ch) {
+ return hex(ch,4);
+ }
+
+ /**
+ * Convert a string to comma-separated groups of 4 hex uppercase
+ * digits. E.g., hex('ab') => "0041,0042". Append the output
+ * to the given StringBuffer.
+ */
+ public static StringBuffer hex(String s, StringBuffer result) {
+ for (int i = 0; i < s.length(); ++i) {
+ if (i != 0) result.append(',');
+ hex(s.charAt(i), result);
+ }
+ return result;
+ }
+
+ /**
+ * Split a string into pieces based on the given divider character
+ * @param s the string to split
+ * @param divider the character on which to split. Occurrences of
+ * this character are not included in the output
+ * @param output an array to receive the substrings between
+ * instances of divider. It must be large enough on entry to
+ * accomodate all output. Adjacent instances of the divider
+ * character will place empty strings into output. Before
+ * returning, output is padded out with empty strings.
+ */
+ public static void split(String s, char divider, String[] output) {
+ int last = 0;
+ int current = 0;
+ int i;
+ for (i = 0; i < s.length(); ++i) {
+ if (s.charAt(i) == divider) {
+ output[current++] = s.substring(last,i);
+ last = i+1;
+ }
+ }
+ output[current++] = s.substring(last,i);
+ while (current < output.length) {
+ output[current++] = "";
+ }
+ }
+
+ /**
+ * Split a string into pieces based on the given divider character
+ * @param s the string to split
+ * @param divider the character on which to split. Occurrences of
+ * this character are not included in the output
+ * @return output an array to receive the substrings between
+ * instances of divider. Adjacent instances of the divider
+ * character will place empty strings into output.
+ */
+ public static String[] split(String s, char divider) {
+ int last = 0;
+ int i;
+ ArrayList output = new ArrayList();
+ for (i = 0; i < s.length(); ++i) {
+ if (s.charAt(i) == divider) {
+ output.add(s.substring(last,i));
+ last = i+1;
+ }
+ }
+ output.add( s.substring(last,i));
+ return (String[]) output.toArray(new String[output.size()]);
+ }
+
+ /**
+ * Look up a given string in a string array. Returns the index at
+ * which the first occurrence of the string was found in the
+ * array, or -1 if it was not found.
+ * @param source the string to search for
+ * @param target the array of zero or more strings in which to
+ * look for source
+ * @return the index of target at which source first occurs, or -1
+ * if not found
+ */
+ public static int lookup(String source, String[] target) {
+ for (int i = 0; i < target.length; ++i) {
+ if (source.equals(target[i])) return i;
+ }
+ return -1;
+ }
+
+ /**
+ * Skip over a sequence of zero or more white space characters
+ * at pos. Return the index of the first non-white-space character
+ * at or after pos, or str.length(), if there is none.
+ */
+ public static int skipWhitespace(String str, int pos) {
+ while (pos < str.length()) {
+ int c = UTF16.charAt(str, pos);
+ if (!UCharacterProperty.isRuleWhiteSpace(c)) {
+ break;
+ }
+ pos += UTF16.getCharCount(c);
+ }
+ return pos;
+ }
+
+ /**
+ * Skip over a sequence of zero or more white space characters
+ * at pos[0], advancing it.
+ */
+ public static void skipWhitespace(String str, int[] pos) {
+ pos[0] = skipWhitespace(str, pos[0]);
+ }
+
+ /**
+ * Remove all rule white space from a string.
+ */
+ public static String deleteRuleWhiteSpace(String str) {
+ StringBuffer buf = new StringBuffer();
+ for (int i=0; i<str.length(); ) {
+ int ch = UTF16.charAt(str, i);
+ i += UTF16.getCharCount(ch);
+ if (UCharacterProperty.isRuleWhiteSpace(ch)) {
+ continue;
+ }
+ UTF16.append(buf, ch);
+ }
+ return buf.toString();
+ }
+
+ /**
+ * Parse a single non-whitespace character 'ch', optionally
+ * preceded by whitespace.
+ * @param id the string to be parsed
+ * @param pos INPUT-OUTPUT parameter. On input, pos[0] is the
+ * offset of the first character to be parsed. On output, pos[0]
+ * is the index after the last parsed character. If the parse
+ * fails, pos[0] will be unchanged.
+ * @param ch the non-whitespace character to be parsed.
+ * @return true if 'ch' is seen preceded by zero or more
+ * whitespace characters.
+ */
+ public static boolean parseChar(String id, int[] pos, char ch) {
+ int start = pos[0];
+ skipWhitespace(id, pos);
+ if (pos[0] == id.length() ||
+ id.charAt(pos[0]) != ch) {
+ pos[0] = start;
+ return false;
+ }
+ ++pos[0];
+ return true;
+ }
+
+ /**
+ * Parse a pattern string starting at offset pos. Keywords are
+ * matched case-insensitively. Spaces may be skipped and may be
+ * optional or required. Integer values may be parsed, and if
+ * they are, they will be returned in the given array. If
+ * successful, the offset of the next non-space character is
+ * returned. On failure, -1 is returned.
+ * @param pattern must only contain lowercase characters, which
+ * will match their uppercase equivalents as well. A space
+ * character matches one or more required spaces. A '~' character
+ * matches zero or more optional spaces. A '#' character matches
+ * an integer and stores it in parsedInts, which the caller must
+ * ensure has enough capacity.
+ * @param parsedInts array to receive parsed integers. Caller
+ * must ensure that parsedInts.length is >= the number of '#'
+ * signs in 'pattern'.
+ * @return the position after the last character parsed, or -1 if
+ * the parse failed
+ */
+ public static int parsePattern(String rule, int pos, int limit,
+ String pattern, int[] parsedInts) {
+ // TODO Update this to handle surrogates
+ int[] p = new int[1];
+ int intCount = 0; // number of integers parsed
+ for (int i=0; i<pattern.length(); ++i) {
+ char cpat = pattern.charAt(i);
+ char c;
+ switch (cpat) {
+ case ' ':
+ if (pos >= limit) {
+ return -1;
+ }
+ c = rule.charAt(pos++);
+ if (!UCharacterProperty.isRuleWhiteSpace(c)) {
+ return -1;
+ }
+ // FALL THROUGH to skipWhitespace
+ case '~':
+ pos = skipWhitespace(rule, pos);
+ break;
+ case '#':
+ p[0] = pos;
+ parsedInts[intCount++] = parseInteger(rule, p, limit);
+ if (p[0] == pos) {
+ // Syntax error; failed to parse integer
+ return -1;
+ }
+ pos = p[0];
+ break;
+ default:
+ if (pos >= limit) {
+ return -1;
+ }
+ c = (char) UCharacter.toLowerCase(rule.charAt(pos++));
+ if (c != cpat) {
+ return -1;
+ }
+ break;
+ }
+ }
+ return pos;
+ }
+
+ /**
+ * Parse a pattern string within the given Replaceable and a parsing
+ * pattern. Characters are matched literally and case-sensitively
+ * except for the following special characters:
+ *
+ * ~ zero or more uprv_isRuleWhiteSpace chars
+ *
+ * If end of pattern is reached with all matches along the way,
+ * pos is advanced to the first unparsed index and returned.
+ * Otherwise -1 is returned.
+ * @param pat pattern that controls parsing
+ * @param text text to be parsed, starting at index
+ * @param index offset to first character to parse
+ * @param limit offset after last character to parse
+ * @return index after last parsed character, or -1 on parse failure.
+ */
+ public static int parsePattern(String pat,
+ Replaceable text,
+ int index,
+ int limit) {
+ int ipat = 0;
+
+ // empty pattern matches immediately
+ if (ipat == pat.length()) {
+ return index;
+ }
+
+ int cpat = UTF16.charAt(pat, ipat);
+
+ while (index < limit) {
+ int c = text.char32At(index);
+
+ // parse \s*
+ if (cpat == '~') {
+ if (UCharacterProperty.isRuleWhiteSpace(c)) {
+ index += UTF16.getCharCount(c);
+ continue;
+ } else {
+ if (++ipat == pat.length()) {
+ return index; // success; c unparsed
+ }
+ // fall thru; process c again with next cpat
+ }
+ }
+
+ // parse literal
+ else if (c == cpat) {
+ int n = UTF16.getCharCount(c);
+ index += n;
+ ipat += n;
+ if (ipat == pat.length()) {
+ return index; // success; c parsed
+ }
+ // fall thru; get next cpat
+ }
+
+ // match failure of literal
+ else {
+ return -1;
+ }
+
+ cpat = UTF16.charAt(pat, ipat);
+ }
+
+ return -1; // text ended before end of pat
+ }
+
+ /**
+ * Parse an integer at pos, either of the form \d+ or of the form
+ * 0x[0-9A-Fa-f]+ or 0[0-7]+, that is, in standard decimal, hex,
+ * or octal format.
+ * @param pos INPUT-OUTPUT parameter. On input, the first
+ * character to parse. On output, the character after the last
+ * parsed character.
+ */
+ public static int parseInteger(String rule, int[] pos, int limit) {
+ int count = 0;
+ int value = 0;
+ int p = pos[0];
+ int radix = 10;
+
+ if (rule.regionMatches(true, p, "0x", 0, 2)) {
+ p += 2;
+ radix = 16;
+ } else if (p < limit && rule.charAt(p) == '0') {
+ p++;
+ count = 1;
+ radix = 8;
+ }
+
+ while (p < limit) {
+ int d = UCharacter.digit(rule.charAt(p++), radix);
+ if (d < 0) {
+ --p;
+ break;
+ }
+ ++count;
+ int v = (value * radix) + d;
+ if (v <= value) {
+ // If there are too many input digits, at some point
+ // the value will go negative, e.g., if we have seen
+ // "0x8000000" already and there is another '0', when
+ // we parse the next 0 the value will go negative.
+ return 0;
+ }
+ value = v;
+ }
+ if (count > 0) {
+ pos[0] = p;
+ }
+ return value;
+ }
+
+ /**
+ * Parse a Unicode identifier from the given string at the given
+ * position. Return the identifier, or null if there is no
+ * identifier.
+ * @param str the string to parse
+ * @param pos INPUT-OUPUT parameter. On INPUT, pos[0] is the
+ * first character to examine. It must be less than str.length(),
+ * and it must not point to a whitespace character. That is, must
+ * have pos[0] < str.length() and
+ * !UCharacterProperty.isRuleWhiteSpace(UTF16.charAt(str, pos[0])). On
+ * OUTPUT, the position after the last parsed character.
+ * @return the Unicode identifier, or null if there is no valid
+ * identifier at pos[0].
+ */
+ public static String parseUnicodeIdentifier(String str, int[] pos) {
+ // assert(pos[0] < str.length());
+ // assert(!UCharacterProperty.isRuleWhiteSpace(UTF16.charAt(str, pos[0])));
+ StringBuffer buf = new StringBuffer();
+ int p = pos[0];
+ while (p < str.length()) {
+ int ch = UTF16.charAt(str, p);
+ if (buf.length() == 0) {
+ if (UCharacter.isUnicodeIdentifierStart(ch)) {
+ UTF16.append(buf, ch);
+ } else {
+ return null;
+ }
+ } else {
+ if (UCharacter.isUnicodeIdentifierPart(ch)) {
+ UTF16.append(buf, ch);
+ } else {
+ break;
+ }
+ }
+ p += UTF16.getCharCount(ch);
+ }
+ pos[0] = p;
+ return buf.toString();
+ }
+
+ /**
+ * Trim whitespace from ends of a StringBuffer.
+ */
+ public static StringBuffer trim(StringBuffer b) {
+ // TODO update to handle surrogates
+ int i;
+ for (i=0; i<b.length() && UCharacter.isWhitespace(b.charAt(i)); ++i) {}
+ b.delete(0, i);
+ for (i=b.length()-1; i>=0 && UCharacter.isWhitespace(b.charAt(i)); --i) {}
+ return b.delete(i+1, b.length());
+ }
+
+ static final char DIGITS[] = {
+ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
+ 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
+ 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
+ 'U', 'V', 'W', 'X', 'Y', 'Z'
+ };
+
+ /**
+ * Append a number to the given StringBuffer in the radix 10
+ * generating at least one digit.
+ */
+ public static StringBuffer appendNumber(StringBuffer result, int n) {
+ return appendNumber(result, n, 10, 1);
+ }
+
+ /**
+ * Append the digits of a positive integer to the given
+ * <code>StringBuffer</code> in the given radix. This is
+ * done recursively since it is easiest to generate the low-
+ * order digit first, but it must be appended last.
+ *
+ * @param result is the <code>StringBuffer</code> to append to
+ * @param n is the positive integer
+ * @param radix is the radix, from 2 to 36 inclusive
+ * @param minDigits is the minimum number of digits to append.
+ */
+ private static void recursiveAppendNumber(StringBuffer result, int n,
+ int radix, int minDigits)
+ {
+ int digit = n % radix;
+
+ if (n >= radix || minDigits > 1) {
+ recursiveAppendNumber(result, n / radix, radix, minDigits - 1);
+ }
+
+ result.append(DIGITS[digit]);
+ }
+
+ /**
+ * Append a number to the given StringBuffer in the given radix.
+ * Standard digits '0'-'9' are used and letters 'A'-'Z' for
+ * radices 11 through 36.
+ * @param result the digits of the number are appended here
+ * @param n the number to be converted to digits; may be negative.
+ * If negative, a '-' is prepended to the digits.
+ * @param radix a radix from 2 to 36 inclusive.
+ * @param minDigits the minimum number of digits, not including
+ * any '-', to produce. Values less than 2 have no effect. One
+ * digit is always emitted regardless of this parameter.
+ * @return a reference to result
+ */
+ public static StringBuffer appendNumber(StringBuffer result, int n,
+ int radix, int minDigits)
+ throws IllegalArgumentException
+ {
+ if (radix < 2 || radix > 36) {
+ throw new IllegalArgumentException("Illegal radix " + radix);
+ }
+
+
+ int abs = n;
+
+ if (n < 0) {
+ abs = -n;
+ result.append("-");
+ }
+
+ recursiveAppendNumber(result, abs, radix, minDigits);
+
+ return result;
+ }
+
+ /**
+ * Parse an unsigned 31-bit integer at the given offset. Use
+ * UCharacter.digit() to parse individual characters into digits.
+ * @param text the text to be parsed
+ * @param pos INPUT-OUTPUT parameter. On entry, pos[0] is the
+ * offset within text at which to start parsing; it should point
+ * to a valid digit. On exit, pos[0] is the offset after the last
+ * parsed character. If the parse failed, it will be unchanged on
+ * exit. Must be >= 0 on entry.
+ * @param radix the radix in which to parse; must be >= 2 and <=
+ * 36.
+ * @return a non-negative parsed number, or -1 upon parse failure.
+ * Parse fails if there are no digits, that is, if pos[0] does not
+ * point to a valid digit on entry, or if the number to be parsed
+ * does not fit into a 31-bit unsigned integer.
+ */
+ public static int parseNumber(String text, int[] pos, int radix) {
+ // assert(pos[0] >= 0);
+ // assert(radix >= 2);
+ // assert(radix <= 36);
+ int n = 0;
+ int p = pos[0];
+ while (p < text.length()) {
+ int ch = UTF16.charAt(text, p);
+ int d = UCharacter.digit(ch, radix);
+ if (d < 0) {
+ break;
+ }
+ n = radix*n + d;
+ // ASSUME that when a 32-bit integer overflows it becomes
+ // negative. E.g., 214748364 * 10 + 8 => negative value.
+ if (n < 0) {
+ return -1;
+ }
+ ++p;
+ }
+ if (p == pos[0]) {
+ return -1;
+ }
+ pos[0] = p;
+ return n;
+ }
+
+ /**
+ * Return true if the character is NOT printable ASCII. The tab,
+ * newline and linefeed characters are considered unprintable.
+ */
+ public static boolean isUnprintable(int c) {
+ return !(c >= 0x20 && c <= 0x7E);
+ }
+
+ /**
+ * Escape unprintable characters using <backslash>uxxxx notation
+ * for U+0000 to U+FFFF and <backslash>Uxxxxxxxx for U+10000 and
+ * above. If the character is printable ASCII, then do nothing
+ * and return FALSE. Otherwise, append the escaped notation and
+ * return TRUE.
+ */
+ public static boolean escapeUnprintable(StringBuffer result, int c) {
+ if (isUnprintable(c)) {
+ result.append('\\');
+ if ((c & ~0xFFFF) != 0) {
+ result.append('U');
+ result.append(DIGITS[0xF&(c>>28)]);
+ result.append(DIGITS[0xF&(c>>24)]);
+ result.append(DIGITS[0xF&(c>>20)]);
+ result.append(DIGITS[0xF&(c>>16)]);
+ } else {
+ result.append('u');
+ }
+ result.append(DIGITS[0xF&(c>>12)]);
+ result.append(DIGITS[0xF&(c>>8)]);
+ result.append(DIGITS[0xF&(c>>4)]);
+ result.append(DIGITS[0xF&c]);
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * Returns the index of the first character in a set, ignoring quoted text.
+ * For example, in the string "abc'hide'h", the 'h' in "hide" will not be
+ * found by a search for "h". Unlike String.indexOf(), this method searches
+ * not for a single character, but for any character of the string
+ * <code>setOfChars</code>.
+ * @param text text to be searched
+ * @param start the beginning index, inclusive; <code>0 <= start
+ * <= limit</code>.
+ * @param limit the ending index, exclusive; <code>start <= limit
+ * <= text.length()</code>.
+ * @param setOfChars string with one or more distinct characters
+ * @return Offset of the first character in <code>setOfChars</code>
+ * found, or -1 if not found.
+ * @see String#indexOf
+ */
+ public static int quotedIndexOf(String text, int start, int limit,
+ String setOfChars) {
+ for (int i=start; i<limit; ++i) {
+ char c = text.charAt(i);
+ if (c == BACKSLASH) {
+ ++i;
+ } else if (c == APOSTROPHE) {
+ while (++i < limit
+ && text.charAt(i) != APOSTROPHE) {}
+ } else if (setOfChars.indexOf(c) >= 0) {
+ return i;
+ }
+ }
+ return -1;
+ }
+
+ /**
+ * Similar to StringBuffer.getChars, version 1.3.
+ * Since JDK 1.2 implements StringBuffer.getChars differently, this method
+ * is here to provide consistent results.
+ * To be removed after JDK 1.2 ceased to be the reference platform.
+ * @param src source string buffer
+ * @param srcBegin offset to the start of the src to retrieve from
+ * @param srcEnd offset to the end of the src to retrieve from
+ * @param dst char array to store the retrieved chars
+ * @param dstBegin offset to the start of the destination char array to
+ * store the retrieved chars
+ */
+ public static void getChars(StringBuffer src, int srcBegin, int srcEnd,
+ char dst[], int dstBegin)
+ {
+ if (srcBegin == srcEnd) {
+ return;
+ }
+ src.getChars(srcBegin, srcEnd, dst, dstBegin);
+ }
+
+ /**
+ * Append a character to a rule that is being built up. To flush
+ * the quoteBuf to rule, make one final call with isLiteral == true.
+ * If there is no final character, pass in (int)-1 as c.
+ * @param rule the string to append the character to
+ * @param c the character to append, or (int)-1 if none.
+ * @param isLiteral if true, then the given character should not be
+ * quoted or escaped. Usually this means it is a syntactic element
+ * such as > or $
+ * @param escapeUnprintable if true, then unprintable characters
+ * should be escaped using escapeUnprintable(). These escapes will
+ * appear outside of quotes.
+ * @param quoteBuf a buffer which is used to build up quoted
+ * substrings. The caller should initially supply an empty buffer,
+ * and thereafter should not modify the buffer. The buffer should be
+ * cleared out by, at the end, calling this method with a literal
+ * character (which may be -1).
+ */
+ public static void appendToRule(StringBuffer rule,
+ int c,
+ boolean isLiteral,
+ boolean escapeUnprintable,
+ StringBuffer quoteBuf) {
+ // If we are escaping unprintables, then escape them outside
+ // quotes. \\u and \\U are not recognized within quotes. The same
+ // logic applies to literals, but literals are never escaped.
+ if (isLiteral ||
+ (escapeUnprintable && Utility.isUnprintable(c))) {
+ if (quoteBuf.length() > 0) {
+ // We prefer backslash APOSTROPHE to double APOSTROPHE
+ // (more readable, less similar to ") so if there are
+ // double APOSTROPHEs at the ends, we pull them outside
+ // of the quote.
+
+ // If the first thing in the quoteBuf is APOSTROPHE
+ // (doubled) then pull it out.
+ while (quoteBuf.length() >= 2 &&
+ quoteBuf.charAt(0) == APOSTROPHE &&
+ quoteBuf.charAt(1) == APOSTROPHE) {
+ rule.append(BACKSLASH).append(APOSTROPHE);
+ quoteBuf.delete(0, 2);
+ }
+ // If the last thing in the quoteBuf is APOSTROPHE
+ // (doubled) then remove and count it and add it after.
+ int trailingCount = 0;
+ while (quoteBuf.length() >= 2 &&
+ quoteBuf.charAt(quoteBuf.length()-2) == APOSTROPHE &&
+ quoteBuf.charAt(quoteBuf.length()-1) == APOSTROPHE) {
+ quoteBuf.setLength(quoteBuf.length()-2);
+ ++trailingCount;
+ }
+ if (quoteBuf.length() > 0) {
+ rule.append(APOSTROPHE);
+ // jdk 1.3.1 does not have append(StringBuffer) yet
+ if(ICUDebug.isJDK14OrHigher){
+ rule.append(quoteBuf);
+ }else{
+ rule.append(quoteBuf.toString());
+ }
+ rule.append(APOSTROPHE);
+ quoteBuf.setLength(0);
+ }
+ while (trailingCount-- > 0) {
+ rule.append(BACKSLASH).append(APOSTROPHE);
+ }
+ }
+ if (c != -1) {
+ /* Since spaces are ignored during parsing, they are
+ * emitted only for readability. We emit one here
+ * only if there isn't already one at the end of the
+ * rule.
+ */
+ if (c == ' ') {
+ int len = rule.length();
+ if (len > 0 && rule.charAt(len-1) != ' ') {
+ rule.append(' ');
+ }
+ } else if (!escapeUnprintable || !Utility.escapeUnprintable(rule, c)) {
+ UTF16.append(rule, c);
+ }
+ }
+ }
+
+ // Escape ' and '\' and don't begin a quote just for them
+ else if (quoteBuf.length() == 0 &&
+ (c == APOSTROPHE || c == BACKSLASH)) {
+ rule.append(BACKSLASH).append((char)c);
+ }
+
+ // Specials (printable ascii that isn't [0-9a-zA-Z]) and
+ // whitespace need quoting. Also append stuff to quotes if we are
+ // building up a quoted substring already.
+ else if (quoteBuf.length() > 0 ||
+ (c >= 0x0021 && c <= 0x007E &&
+ !((c >= 0x0030/*'0'*/ && c <= 0x0039/*'9'*/) ||
+ (c >= 0x0041/*'A'*/ && c <= 0x005A/*'Z'*/) ||
+ (c >= 0x0061/*'a'*/ && c <= 0x007A/*'z'*/))) ||
+ UCharacterProperty.isRuleWhiteSpace(c)) {
+ UTF16.append(quoteBuf, c);
+ // Double ' within a quote
+ if (c == APOSTROPHE) {
+ quoteBuf.append((char)c);
+ }
+ }
+
+ // Otherwise just append
+ else {
+ UTF16.append(rule, c);
+ }
+ }
+
+ /**
+ * Append the given string to the rule. Calls the single-character
+ * version of appendToRule for each character.
+ */
+ public static void appendToRule(StringBuffer rule,
+ String text,
+ boolean isLiteral,
+ boolean escapeUnprintable,
+ StringBuffer quoteBuf) {
+ for (int i=0; i<text.length(); ++i) {
+ // Okay to process in 16-bit code units here
+ appendToRule(rule, text.charAt(i), isLiteral, escapeUnprintable, quoteBuf);
+ }
+ }
+
+ /**
+ * Given a matcher reference, which may be null, append its
+ * pattern as a literal to the given rule.
+ */
+ public static void appendToRule(StringBuffer rule,
+ UnicodeMatcher matcher,
+ boolean escapeUnprintable,
+ StringBuffer quoteBuf) {
+ if (matcher != null) {
+ appendToRule(rule, matcher.toPattern(escapeUnprintable),
+ true, escapeUnprintable, quoteBuf);
+ }
+ }
+
+ /**
+ * Compares 2 unsigned integers
+ * @param source 32 bit unsigned integer
+ * @param target 32 bit unsigned integer
+ * @return 0 if equals, 1 if source is greater than target and -1
+ * otherwise
+ */
+ public static final int compareUnsigned(int source, int target)
+ {
+ source += MAGIC_UNSIGNED;
+ target += MAGIC_UNSIGNED;
+ if (source < target) {
+ return -1;
+ }
+ else if (source > target) {
+ return 1;
+ }
+ return 0;
+ }
+
+ /**
+ * Find the highest bit in a positive integer. This is done
+ * by doing a binary search through the bits.
+ *
+ * @param n is the integer
+ *
+ * @return the bit number of the highest bit, with 0 being
+ * the low order bit, or -1 if <code>n</code> is not positive
+ */
+ public static final byte highBit(int n)
+ {
+ if (n <= 0) {
+ return -1;
+ }
+
+ byte bit = 0;
+
+ if (n >= 1 << 16) {
+ n >>= 16;
+ bit += 16;
+ }
+
+ if (n >= 1 << 8) {
+ n >>= 8;
+ bit += 8;
+ }
+
+ if (n >= 1 << 4) {
+ n >>= 4;
+ bit += 4;
+ }
+
+ if (n >= 1 << 2) {
+ n >>= 2;
+ bit += 2;
+ }
+
+ if (n >= 1 << 1) {
+ n >>= 1;
+ bit += 1;
+ }
+
+ return bit;
+ }
+ /**
+ * Utility method to take a int[] containing codepoints and return
+ * a string representation with code units.
+ */
+ public static String valueOf(int[]source){
+ // TODO: Investigate why this method is not on UTF16 class
+ StringBuffer result = new StringBuffer(source.length);
+ for(int i=0; i<source.length; i++){
+ UTF16.append(result,source[i]);
+ }
+ return result.toString();
+ }
+
+
+ /**
+ * Utility to duplicate a string count times
+ * @param s
+ * @param count
+ */
+ public static String repeat(String s, int count) {
+ if (count <= 0) return "";
+ if (count == 1) return s;
+ StringBuffer result = new StringBuffer();
+ for (int i = 0; i < count; ++i) {
+ result.append(s);
+ }
+ return result.toString();
+ }
+
+
+ // !!! 1.3 compatibility
+ public static int indexOf(StringBuffer buf, String s) {
+//#if defined(FOUNDATION10) || defined(J2SE13)
+//## return buf.toString().indexOf(s);
+//#else
+ return buf.indexOf(s);
+//#endif
+ }
+
+ // !!! 1.3 compatibility
+ public static int lastIndexOf(StringBuffer buf, String s) {
+//#if defined(FOUNDATION10) || defined(J2SE13)
+//## return buf.toString().lastIndexOf(s);
+//#else
+ return buf.lastIndexOf(s);
+//#endif
+ }
+
+ // !!! 1.3 compatibility
+ public static int indexOf(StringBuffer buf, String s, int i) {
+//#if defined(FOUNDATION10) || defined(J2SE13)
+//## return buf.toString().indexOf(s, i);
+//#else
+ return buf.indexOf(s, i);
+//#endif
+ }
+
+ // !!! 1.3 compatibility
+ public static int lastIndexOf(StringBuffer buf, String s, int i) {
+//#if defined(FOUNDATION10) || defined(J2SE13)
+//## return buf.toString().lastIndexOf(s, i);
+//#else
+ return buf.lastIndexOf(s, i);
+//#endif
+ }
+
+ // !!! 1.3/1.4 compatibility
+ public static String replace(String src, String target, String replacement) {
+//#if defined(FOUNDATION10) || defined(J2SE13) || defined(J2SE14)
+//## int i = src.indexOf(target);
+//## if (i == -1) {
+//## return src;
+//## }
+//## StringBuffer buf = new StringBuffer();
+//## int n = 0;
+//## do {
+//## buf.append(src.substring(n, i));
+//## buf.append(replacement);
+//## n = i + target.length();
+//## i = src.indexOf(target, n);
+//## } while (i != -1);
+//## if (n < src.length()) {
+//## buf.append(src.substring(n));
+//## }
+//## return buf.toString();
+//#else
+ return src.replace(target, replacement);
+//#endif
+ }
+
+ // !!! 1.3 compatibility
+ public static String replaceAll(String src, String target, String replacement) {
+//#if defined(FOUNDATION10) || defined(J2SE13)
+//## return replace(src, target, replacement);
+//#else
+ return src.replaceAll(target, replacement);
+//#endif
+ }
+
+ //private static final String REGEX_SPECIALS = ".^$[]*+?|()";
+
+ // !!! 1.3 compatibility
+ // Note: target is not a string literal, not a regular expression.
+ public static String[] splitString(String src, String target) {
+//#if defined(FOUNDATION10) || defined(J2SE13)
+//## int i = src.indexOf(target);
+//## if (i == -1) {
+//## return new String[] { src };
+//## }
+//## ArrayList output = new ArrayList();
+//## int n = 0;
+//## do {
+//## output.add(src.substring(n, i));
+//## n = i + target.length();
+//## i = src.indexOf(target, n);
+//## } while (i != -1);
+//## if (n < src.length()) {
+//## output.add(src.substring(n));
+//## }
+//## return (String[]) output.toArray(new String[output.size()]);
+//#else
+ return src.split("\\Q" + target + "\\E");
+//#endif
+ }
+
+ // !!! 1.3 compatibility
+ /**
+ * Split the string at runs of ascii whitespace characters.
+ */
+ public static String[] splitWhitespace(String src) {
+//#if defined(FOUNDATION10) || defined(J2SE13)
+//## char ws[] = "\u0020\u0009\n\u000b\u000c\r".toCharArray();
+//## ArrayList output = new ArrayList();
+//## boolean inWhitespace = true;
+//## int n = 0;
+//## loop:
+//## for (int i = 0; i < src.length(); ++i) {
+//## char c = src.charAt(i);
+//## for (int j = 0; j < ws.length; ++j) {
+//## if (ws[j] == c) {
+//## if (!inWhitespace) {
+//## output.add(src.substring(n, i));
+//## inWhitespace = true;
+//## }
+//## continue loop;
+//## }
+//## }
+//## if (inWhitespace) {
+//## n = i;
+//## inWhitespace = false;
+//## }
+//## }
+//## if (n < src.length()) {
+//## output.add(src.substring(n));
+//## }
+//## return (String[]) output.toArray(new String[output.size()]);
+//#else
+ return src.split("\\s+");
+//#endif
+ }
+
+ // !!! 1.3/1.4 compatibility
+ // Integer constants - Integer.valueOf(int) is not supported in JDK 1.3/1.4
+ private static final int MAX_INT_CONST = 64;
+ private static final Integer[] INT_CONST = new Integer[MAX_INT_CONST];
+
+ static {
+ for (int i = 0; i < MAX_INT_CONST; i++) {
+ INT_CONST[i] = new Integer(i);
+ }
+ }
+
+ public static Integer integerValueOf(int val) {
+ if (0 <= val && val < MAX_INT_CONST) {
+ return INT_CONST[val];
+ }
+ return new Integer(val);
+ }
+
+ // !!! 1.3/1.4 compatibility
+ // Arrays.toString(Object[])
+ public static String arrayToString(Object[] a) {
+ StringBuffer buf = new StringBuffer("[");
+ for (int i = 0; i < a.length; i++) {
+ if (i != 0) {
+ buf.append(", ");
+ }
+ if (a[i] == null) {
+ buf.append("null");
+ } else {
+ buf.append(a[i].toString());
+ }
+ }
+ buf.append("]");
+ return buf.toString();
+ }
+}