-//##header\r
-/*\r
- *******************************************************************************\r
- * Copyright (C) 2006-2009, Google, International Business Machines Corporation *\r
- * and others. All Rights Reserved. *\r
- *******************************************************************************\r
- */\r
-package com.ibm.icu.impl;\r
-\r
-import com.ibm.icu.text.UTF16;\r
-import com.ibm.icu.text.UnicodeSet;\r
-\r
-/**\r
- * A simple parsing class for patterns and rules. Handles '...' quotations, \\uxxxx and \\Uxxxxxxxx, and symple syntax.\r
- * The '' (two quotes) is treated as a single quote, inside or outside a quote\r
- * <ul>\r
- * <li>Any ignorable characters are ignored in parsing.</li>\r
- * <li>Any syntax characters are broken into separate tokens</li>\r
- * <li>Quote characters can be specified: '...', "...", and \x </li>\r
- * <li>Other characters are treated as literals</li>\r
- * </ul>\r
- */\r
-public class PatternTokenizer {\r
- // settings used in the interpretation of the pattern\r
- private UnicodeSet ignorableCharacters = new UnicodeSet();\r
- private UnicodeSet syntaxCharacters = new UnicodeSet();\r
- private UnicodeSet extraQuotingCharacters = new UnicodeSet();\r
- private UnicodeSet escapeCharacters = new UnicodeSet();\r
- private boolean usingSlash = false;\r
- private boolean usingQuote = false;\r
- \r
- // transient data, set when needed. Null it out for any changes in the above fields.\r
- private transient UnicodeSet needingQuoteCharacters = null;\r
- \r
- // data about the current pattern being parsed. start gets moved as we go along.\r
- private int start;\r
- private int limit;\r
- private String pattern;\r
- \r
- public UnicodeSet getIgnorableCharacters() {\r
- return (UnicodeSet) ignorableCharacters.clone();\r
- }\r
- /**\r
- * Sets the characters to be ignored in parsing, eg new UnicodeSet("[:pattern_whitespace:]");\r
- * @param ignorableCharacters\r
- * @return\r
- */\r
- public PatternTokenizer setIgnorableCharacters(UnicodeSet ignorableCharacters) {\r
- this.ignorableCharacters = (UnicodeSet) ignorableCharacters.clone();\r
- needingQuoteCharacters = null;\r
- return this;\r
- }\r
- public UnicodeSet getSyntaxCharacters() {\r
- return (UnicodeSet) syntaxCharacters.clone();\r
- }\r
- public UnicodeSet getExtraQuotingCharacters() {\r
- return (UnicodeSet) extraQuotingCharacters.clone();\r
- }\r
- /**\r
- * Sets the characters to be interpreted as syntax characters in parsing, eg new UnicodeSet("[:pattern_syntax:]")\r
- * @param syntaxCharacters\r
- * @return\r
- */\r
- public PatternTokenizer setSyntaxCharacters(UnicodeSet syntaxCharacters) {\r
- this.syntaxCharacters = (UnicodeSet) syntaxCharacters.clone();\r
- needingQuoteCharacters = null;\r
- return this;\r
- } \r
- /**\r
- * Sets the extra characters to be quoted in literals\r
- * @param syntaxCharacters\r
- * @return\r
- */\r
- public PatternTokenizer setExtraQuotingCharacters(UnicodeSet syntaxCharacters) {\r
- this.extraQuotingCharacters = (UnicodeSet) syntaxCharacters.clone();\r
- needingQuoteCharacters = null;\r
- return this;\r
- } \r
- \r
- public UnicodeSet getEscapeCharacters() {\r
- return (UnicodeSet) escapeCharacters.clone();\r
- }\r
- /**\r
- * Set characters to be escaped in literals, in quoteLiteral and normalize, eg new UnicodeSet("[^\\u0020-\\u007E]");\r
- * @param escapeCharacters\r
- * @return\r
- */\r
- public PatternTokenizer setEscapeCharacters(UnicodeSet escapeCharacters) {\r
- this.escapeCharacters = (UnicodeSet) escapeCharacters.clone();\r
- return this;\r
- }\r
- public boolean isUsingQuote() {\r
- return usingQuote;\r
- }\r
- public PatternTokenizer setUsingQuote(boolean usingQuote) {\r
- this.usingQuote = usingQuote;\r
- needingQuoteCharacters = null;\r
- return this;\r
- }\r
- public boolean isUsingSlash() {\r
- return usingSlash;\r
- }\r
- public PatternTokenizer setUsingSlash(boolean usingSlash) {\r
- this.usingSlash = usingSlash;\r
- needingQuoteCharacters = null;\r
- return this;\r
- }\r
- // public UnicodeSet getQuoteCharacters() {\r
-// return (UnicodeSet) quoteCharacters.clone();\r
-// }\r
-// public PatternTokenizer setQuoteCharacters(UnicodeSet quoteCharacters) {\r
-// this.quoteCharacters = (UnicodeSet) quoteCharacters.clone();\r
-// needingQuoteCharacters = null;\r
-// return this;\r
-// }\r
- public int getLimit() {\r
- return limit;\r
- }\r
- public PatternTokenizer setLimit(int limit) {\r
- this.limit = limit;\r
- return this;\r
- }\r
- public int getStart() {\r
- return start;\r
- }\r
- public PatternTokenizer setStart(int start) {\r
- this.start = start;\r
- return this;\r
- }\r
-\r
-//#if defined(FOUNDATION10) || defined(J2SE13)\r
-//## public PatternTokenizer setPattern(StringBuffer pattern) {\r
-//## return setPattern(pattern.toString());\r
-//## }\r
-//#else \r
- public PatternTokenizer setPattern(CharSequence pattern) {\r
- return setPattern(pattern.toString());\r
- }\r
-//#endif\r
-\r
- public PatternTokenizer setPattern(String pattern) {\r
- if (pattern == null) {\r
- throw new IllegalArgumentException("Inconsistent arguments");\r
- }\r
- this.start = 0;\r
- this.limit = pattern.length();\r
- this.pattern = pattern;\r
- return this;\r
- }\r
-\r
- public static final char SINGLE_QUOTE = '\'';\r
- public static final char BACK_SLASH = '\\';\r
- private static int NO_QUOTE = -1, IN_QUOTE = -2;\r
-\r
-//#if defined(FOUNDATION10) || defined(J2SE13)\r
-//## public String quoteLiteral(StringBuffer string) {\r
-//## return quoteLiteral(string.toString());\r
-//## }\r
-//#else\r
- public String quoteLiteral(CharSequence string) {\r
- return quoteLiteral(string.toString());\r
- }\r
-//#endif\r
-\r
- /**\r
- * Quote a literal string, using the available settings. Thus syntax characters, quote characters, and ignorable characters will be put into quotes.\r
- * @param string\r
- * @return\r
- */\r
- public String quoteLiteral(String string) {\r
- if (needingQuoteCharacters == null) {\r
- needingQuoteCharacters = new UnicodeSet().addAll(syntaxCharacters).addAll(ignorableCharacters).addAll(extraQuotingCharacters); // .addAll(quoteCharacters)\r
- if (usingSlash) needingQuoteCharacters.add(BACK_SLASH);\r
- if (usingQuote) needingQuoteCharacters.add(SINGLE_QUOTE);\r
- }\r
- StringBuffer result = new StringBuffer();\r
- int quotedChar = NO_QUOTE;\r
- int cp;\r
- for (int i = 0; i < string.length(); i += UTF16.getCharCount(cp)) {\r
- cp = UTF16.charAt(string, i);\r
- if (escapeCharacters.contains(cp)) {\r
- // we may have to fix up previous characters\r
- if (quotedChar == IN_QUOTE) {\r
- result.append(SINGLE_QUOTE);\r
- quotedChar = NO_QUOTE;\r
- }\r
- appendEscaped(result, cp);\r
- continue;\r
- }\r
- \r
- if (needingQuoteCharacters.contains(cp)) {\r
- // if we have already started a quote\r
- if (quotedChar == IN_QUOTE) {\r
- UTF16.append(result, cp);\r
- if (usingQuote && cp == SINGLE_QUOTE) { // double it\r
- result.append(SINGLE_QUOTE);\r
- }\r
- continue;\r
- }\r
- // otherwise not already in quote\r
- if (usingSlash) {\r
- result.append(BACK_SLASH);\r
- UTF16.append(result, cp);\r
- continue;\r
- }\r
- if (usingQuote) {\r
- if (cp == SINGLE_QUOTE) { // double it and continue\r
- result.append(SINGLE_QUOTE);\r
- result.append(SINGLE_QUOTE);\r
- continue;\r
- }\r
- result.append(SINGLE_QUOTE);\r
- UTF16.append(result, cp);\r
- quotedChar = IN_QUOTE;\r
- continue;\r
- }\r
- // we have no choice but to use \\u or \\U\r
- appendEscaped(result, cp);\r
- continue;\r
- }\r
- // otherwise cp doesn't need quoting\r
- // we may have to fix up previous characters\r
- if (quotedChar == IN_QUOTE) {\r
- result.append(SINGLE_QUOTE);\r
- quotedChar = NO_QUOTE;\r
- }\r
- UTF16.append(result, cp);\r
- }\r
- // all done. \r
- // we may have to fix up previous characters\r
- if (quotedChar == IN_QUOTE) {\r
- result.append(SINGLE_QUOTE);\r
- }\r
- return result.toString();\r
- }\r
-\r
- private void appendEscaped(StringBuffer result, int cp) {\r
- if (cp <= 0xFFFF) {\r
- result.append("\\u").append(Utility.hex(cp,4));\r
- } else {\r
- result.append("\\U").append(Utility.hex(cp,8));\r
- }\r
- }\r
- \r
- public String normalize() {\r
- int oldStart = start;\r
- StringBuffer result = new StringBuffer();\r
- StringBuffer buffer = new StringBuffer();\r
- while (true) {\r
- buffer.setLength(0);\r
- int status = next(buffer);\r
- if (status == DONE) {\r
- start = oldStart;\r
- return result.toString();\r
- }\r
- if (status != SYNTAX) {\r
- result.append(quoteLiteral(buffer));\r
- } else {\r
- result.append(buffer);\r
- }\r
- }\r
- }\r
- \r
- public static final int DONE = 0, SYNTAX = 1, LITERAL = 2, BROKEN_QUOTE = 3, BROKEN_ESCAPE = 4, UNKNOWN = 5;\r
- \r
- private static final int AFTER_QUOTE = -1, NONE = 0, START_QUOTE = 1, NORMAL_QUOTE = 2, SLASH_START = 3, HEX = 4;\r
- \r
- public int next(StringBuffer buffer) {\r
- if (start >= limit) return DONE;\r
- int status = UNKNOWN;\r
- int lastQuote = UNKNOWN;\r
- int quoteStatus = NONE;\r
- int hexCount = 0;\r
- int hexValue = 0;\r
- int cp;\r
- main:\r
- for (int i = start; i < limit; i += UTF16.getCharCount(cp)) {\r
- cp = UTF16.charAt(pattern, i);\r
- // if we are in a quote, then handle it.\r
- switch (quoteStatus) {\r
- case SLASH_START:\r
- switch (cp) {\r
- case 'u':\r
- quoteStatus = HEX;\r
- hexCount = 4;\r
- hexValue = 0;\r
- continue main;\r
- case 'U': \r
- quoteStatus = HEX;\r
- hexCount = 8;\r
- hexValue = 0;\r
- continue main;\r
- default:\r
- if (usingSlash) {\r
- UTF16.append(buffer, cp);\r
- quoteStatus = NONE;\r
- continue main;\r
- } else {\r
- buffer.append(BACK_SLASH);\r
- quoteStatus = NONE;\r
- }\r
- }\r
- break; // fall through to NONE\r
- case HEX:\r
- hexValue <<= 4;\r
- hexValue += cp;\r
- switch (cp) {\r
- case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':\r
- hexValue -= '0'; break;\r
- case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':\r
- hexValue -= 'a' - 10; break;\r
- case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':\r
- hexValue -= 'A' - 10; break;\r
- default:\r
- start = i;\r
- return BROKEN_ESCAPE;\r
- }\r
- --hexCount;\r
- if (hexCount == 0) {\r
- quoteStatus = NONE;\r
- UTF16.append(buffer, hexValue);\r
- }\r
- continue main;\r
- case AFTER_QUOTE:\r
- // see if we get another quote character\r
- // if we just ended a quote BUT the following character is the lastQuote character, then we have a situation like '...''...', so we restart the quote\r
- if (cp == lastQuote) {\r
- UTF16.append(buffer, cp);\r
- quoteStatus = NORMAL_QUOTE;\r
- continue main;\r
- }\r
- quoteStatus = NONE;\r
- break; // fall through to NONE\r
- case START_QUOTE:\r
- // if we are at the very start of a quote, and we hit another quote mark then we emit a literal quote character and end the quote\r
- if (cp == lastQuote) {\r
- UTF16.append(buffer, cp);\r
- quoteStatus = NONE; // get out of quote, with no trace remaining\r
- continue; \r
- }\r
- // otherwise get into quote\r
- UTF16.append(buffer, cp);\r
- quoteStatus = NORMAL_QUOTE;\r
- continue main;\r
- case NORMAL_QUOTE: \r
- if (cp == lastQuote) {\r
- quoteStatus = AFTER_QUOTE; // get out of quote\r
- continue main;\r
- }\r
- UTF16.append(buffer, cp);\r
- continue main;\r
- }\r
- \r
- if (ignorableCharacters.contains(cp)) {\r
- continue;\r
- }\r
- // do syntax characters\r
- if (syntaxCharacters.contains(cp)) {\r
- if (status == UNKNOWN) {\r
- UTF16.append(buffer, cp);\r
- start = i + UTF16.getCharCount(cp);\r
- return SYNTAX;\r
- } else { // LITERAL, so back up and break\r
- start = i;\r
- return status;\r
- }\r
- }\r
- // otherwise it is a literal; keep on going\r
- status = LITERAL;\r
- if (cp == BACK_SLASH) {\r
- quoteStatus = SLASH_START;\r
- continue;\r
- } else if (usingQuote && cp == SINGLE_QUOTE) {\r
- lastQuote = cp;\r
- quoteStatus = START_QUOTE;\r
- continue;\r
- }\r
- // normal literals\r
- UTF16.append(buffer, cp);\r
- }\r
- // handle final cleanup\r
- start = limit;\r
- switch (quoteStatus) {\r
- case HEX:\r
- status = BROKEN_ESCAPE;\r
- break;\r
- case SLASH_START:\r
- if (usingSlash) {\r
- status = BROKEN_ESCAPE;\r
- } else {\r
- buffer.append(BACK_SLASH);\r
- }\r
- break;\r
- case START_QUOTE: case NORMAL_QUOTE:\r
- status = BROKEN_QUOTE;\r
- break;\r
- }\r
- return status;\r
- }\r
- \r
- \r
-}\r
-//eof\r
+//##header J2SE15
+/*
+ *******************************************************************************
+ * Copyright (C) 2006-2009, Google, International Business Machines Corporation *
+ * and others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.impl;
+
+import com.ibm.icu.text.UTF16;
+import com.ibm.icu.text.UnicodeSet;
+
+/**
+ * A simple parsing class for patterns and rules. Handles '...' quotations, \\uxxxx and \\Uxxxxxxxx, and symple syntax.
+ * The '' (two quotes) is treated as a single quote, inside or outside a quote
+ * <ul>
+ * <li>Any ignorable characters are ignored in parsing.</li>
+ * <li>Any syntax characters are broken into separate tokens</li>
+ * <li>Quote characters can be specified: '...', "...", and \x </li>
+ * <li>Other characters are treated as literals</li>
+ * </ul>
+ */
+public class PatternTokenizer {
+ // settings used in the interpretation of the pattern
+ private UnicodeSet ignorableCharacters = new UnicodeSet();
+ private UnicodeSet syntaxCharacters = new UnicodeSet();
+ private UnicodeSet extraQuotingCharacters = new UnicodeSet();
+ private UnicodeSet escapeCharacters = new UnicodeSet();
+ private boolean usingSlash = false;
+ private boolean usingQuote = false;
+
+ // transient data, set when needed. Null it out for any changes in the above fields.
+ private transient UnicodeSet needingQuoteCharacters = null;
+
+ // data about the current pattern being parsed. start gets moved as we go along.
+ private int start;
+ private int limit;
+ private String pattern;
+
+ public UnicodeSet getIgnorableCharacters() {
+ return (UnicodeSet) ignorableCharacters.clone();
+ }
+ /**
+ * Sets the characters to be ignored in parsing, eg new UnicodeSet("[:pattern_whitespace:]");
+ * @param ignorableCharacters
+ * @return
+ */
+ public PatternTokenizer setIgnorableCharacters(UnicodeSet ignorableCharacters) {
+ this.ignorableCharacters = (UnicodeSet) ignorableCharacters.clone();
+ needingQuoteCharacters = null;
+ return this;
+ }
+ public UnicodeSet getSyntaxCharacters() {
+ return (UnicodeSet) syntaxCharacters.clone();
+ }
+ public UnicodeSet getExtraQuotingCharacters() {
+ return (UnicodeSet) extraQuotingCharacters.clone();
+ }
+ /**
+ * Sets the characters to be interpreted as syntax characters in parsing, eg new UnicodeSet("[:pattern_syntax:]")
+ * @param syntaxCharacters
+ * @return
+ */
+ public PatternTokenizer setSyntaxCharacters(UnicodeSet syntaxCharacters) {
+ this.syntaxCharacters = (UnicodeSet) syntaxCharacters.clone();
+ needingQuoteCharacters = null;
+ return this;
+ }
+ /**
+ * Sets the extra characters to be quoted in literals
+ * @param syntaxCharacters
+ * @return
+ */
+ public PatternTokenizer setExtraQuotingCharacters(UnicodeSet syntaxCharacters) {
+ this.extraQuotingCharacters = (UnicodeSet) syntaxCharacters.clone();
+ needingQuoteCharacters = null;
+ return this;
+ }
+
+ public UnicodeSet getEscapeCharacters() {
+ return (UnicodeSet) escapeCharacters.clone();
+ }
+ /**
+ * Set characters to be escaped in literals, in quoteLiteral and normalize, eg new UnicodeSet("[^\\u0020-\\u007E]");
+ * @param escapeCharacters
+ * @return
+ */
+ public PatternTokenizer setEscapeCharacters(UnicodeSet escapeCharacters) {
+ this.escapeCharacters = (UnicodeSet) escapeCharacters.clone();
+ return this;
+ }
+ public boolean isUsingQuote() {
+ return usingQuote;
+ }
+ public PatternTokenizer setUsingQuote(boolean usingQuote) {
+ this.usingQuote = usingQuote;
+ needingQuoteCharacters = null;
+ return this;
+ }
+ public boolean isUsingSlash() {
+ return usingSlash;
+ }
+ public PatternTokenizer setUsingSlash(boolean usingSlash) {
+ this.usingSlash = usingSlash;
+ needingQuoteCharacters = null;
+ return this;
+ }
+ // public UnicodeSet getQuoteCharacters() {
+// return (UnicodeSet) quoteCharacters.clone();
+// }
+// public PatternTokenizer setQuoteCharacters(UnicodeSet quoteCharacters) {
+// this.quoteCharacters = (UnicodeSet) quoteCharacters.clone();
+// needingQuoteCharacters = null;
+// return this;
+// }
+ public int getLimit() {
+ return limit;
+ }
+ public PatternTokenizer setLimit(int limit) {
+ this.limit = limit;
+ return this;
+ }
+ public int getStart() {
+ return start;
+ }
+ public PatternTokenizer setStart(int start) {
+ this.start = start;
+ return this;
+ }
+
+//#if defined(FOUNDATION10) || defined(J2SE13)
+//## public PatternTokenizer setPattern(StringBuffer pattern) {
+//## return setPattern(pattern.toString());
+//## }
+//#else
+ public PatternTokenizer setPattern(CharSequence pattern) {
+ return setPattern(pattern.toString());
+ }
+//#endif
+
+ public PatternTokenizer setPattern(String pattern) {
+ if (pattern == null) {
+ throw new IllegalArgumentException("Inconsistent arguments");
+ }
+ this.start = 0;
+ this.limit = pattern.length();
+ this.pattern = pattern;
+ return this;
+ }
+
+ public static final char SINGLE_QUOTE = '\'';
+ public static final char BACK_SLASH = '\\';
+ private static int NO_QUOTE = -1, IN_QUOTE = -2;
+
+//#if defined(FOUNDATION10) || defined(J2SE13)
+//## public String quoteLiteral(StringBuffer string) {
+//## return quoteLiteral(string.toString());
+//## }
+//#else
+ public String quoteLiteral(CharSequence string) {
+ return quoteLiteral(string.toString());
+ }
+//#endif
+
+ /**
+ * Quote a literal string, using the available settings. Thus syntax characters, quote characters, and ignorable characters will be put into quotes.
+ * @param string
+ * @return
+ */
+ public String quoteLiteral(String string) {
+ if (needingQuoteCharacters == null) {
+ needingQuoteCharacters = new UnicodeSet().addAll(syntaxCharacters).addAll(ignorableCharacters).addAll(extraQuotingCharacters); // .addAll(quoteCharacters)
+ if (usingSlash) needingQuoteCharacters.add(BACK_SLASH);
+ if (usingQuote) needingQuoteCharacters.add(SINGLE_QUOTE);
+ }
+ StringBuffer result = new StringBuffer();
+ int quotedChar = NO_QUOTE;
+ int cp;
+ for (int i = 0; i < string.length(); i += UTF16.getCharCount(cp)) {
+ cp = UTF16.charAt(string, i);
+ if (escapeCharacters.contains(cp)) {
+ // we may have to fix up previous characters
+ if (quotedChar == IN_QUOTE) {
+ result.append(SINGLE_QUOTE);
+ quotedChar = NO_QUOTE;
+ }
+ appendEscaped(result, cp);
+ continue;
+ }
+
+ if (needingQuoteCharacters.contains(cp)) {
+ // if we have already started a quote
+ if (quotedChar == IN_QUOTE) {
+ UTF16.append(result, cp);
+ if (usingQuote && cp == SINGLE_QUOTE) { // double it
+ result.append(SINGLE_QUOTE);
+ }
+ continue;
+ }
+ // otherwise not already in quote
+ if (usingSlash) {
+ result.append(BACK_SLASH);
+ UTF16.append(result, cp);
+ continue;
+ }
+ if (usingQuote) {
+ if (cp == SINGLE_QUOTE) { // double it and continue
+ result.append(SINGLE_QUOTE);
+ result.append(SINGLE_QUOTE);
+ continue;
+ }
+ result.append(SINGLE_QUOTE);
+ UTF16.append(result, cp);
+ quotedChar = IN_QUOTE;
+ continue;
+ }
+ // we have no choice but to use \\u or \\U
+ appendEscaped(result, cp);
+ continue;
+ }
+ // otherwise cp doesn't need quoting
+ // we may have to fix up previous characters
+ if (quotedChar == IN_QUOTE) {
+ result.append(SINGLE_QUOTE);
+ quotedChar = NO_QUOTE;
+ }
+ UTF16.append(result, cp);
+ }
+ // all done.
+ // we may have to fix up previous characters
+ if (quotedChar == IN_QUOTE) {
+ result.append(SINGLE_QUOTE);
+ }
+ return result.toString();
+ }
+
+ private void appendEscaped(StringBuffer result, int cp) {
+ if (cp <= 0xFFFF) {
+ result.append("\\u").append(Utility.hex(cp,4));
+ } else {
+ result.append("\\U").append(Utility.hex(cp,8));
+ }
+ }
+
+ public String normalize() {
+ int oldStart = start;
+ StringBuffer result = new StringBuffer();
+ StringBuffer buffer = new StringBuffer();
+ while (true) {
+ buffer.setLength(0);
+ int status = next(buffer);
+ if (status == DONE) {
+ start = oldStart;
+ return result.toString();
+ }
+ if (status != SYNTAX) {
+ result.append(quoteLiteral(buffer));
+ } else {
+ result.append(buffer);
+ }
+ }
+ }
+
+ public static final int DONE = 0, SYNTAX = 1, LITERAL = 2, BROKEN_QUOTE = 3, BROKEN_ESCAPE = 4, UNKNOWN = 5;
+
+ private static final int AFTER_QUOTE = -1, NONE = 0, START_QUOTE = 1, NORMAL_QUOTE = 2, SLASH_START = 3, HEX = 4;
+
+ public int next(StringBuffer buffer) {
+ if (start >= limit) return DONE;
+ int status = UNKNOWN;
+ int lastQuote = UNKNOWN;
+ int quoteStatus = NONE;
+ int hexCount = 0;
+ int hexValue = 0;
+ int cp;
+ main:
+ for (int i = start; i < limit; i += UTF16.getCharCount(cp)) {
+ cp = UTF16.charAt(pattern, i);
+ // if we are in a quote, then handle it.
+ switch (quoteStatus) {
+ case SLASH_START:
+ switch (cp) {
+ case 'u':
+ quoteStatus = HEX;
+ hexCount = 4;
+ hexValue = 0;
+ continue main;
+ case 'U':
+ quoteStatus = HEX;
+ hexCount = 8;
+ hexValue = 0;
+ continue main;
+ default:
+ if (usingSlash) {
+ UTF16.append(buffer, cp);
+ quoteStatus = NONE;
+ continue main;
+ } else {
+ buffer.append(BACK_SLASH);
+ quoteStatus = NONE;
+ }
+ }
+ break; // fall through to NONE
+ case HEX:
+ hexValue <<= 4;
+ hexValue += cp;
+ switch (cp) {
+ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
+ hexValue -= '0'; break;
+ case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+ hexValue -= 'a' - 10; break;
+ case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+ hexValue -= 'A' - 10; break;
+ default:
+ start = i;
+ return BROKEN_ESCAPE;
+ }
+ --hexCount;
+ if (hexCount == 0) {
+ quoteStatus = NONE;
+ UTF16.append(buffer, hexValue);
+ }
+ continue main;
+ case AFTER_QUOTE:
+ // see if we get another quote character
+ // if we just ended a quote BUT the following character is the lastQuote character, then we have a situation like '...''...', so we restart the quote
+ if (cp == lastQuote) {
+ UTF16.append(buffer, cp);
+ quoteStatus = NORMAL_QUOTE;
+ continue main;
+ }
+ quoteStatus = NONE;
+ break; // fall through to NONE
+ case START_QUOTE:
+ // if we are at the very start of a quote, and we hit another quote mark then we emit a literal quote character and end the quote
+ if (cp == lastQuote) {
+ UTF16.append(buffer, cp);
+ quoteStatus = NONE; // get out of quote, with no trace remaining
+ continue;
+ }
+ // otherwise get into quote
+ UTF16.append(buffer, cp);
+ quoteStatus = NORMAL_QUOTE;
+ continue main;
+ case NORMAL_QUOTE:
+ if (cp == lastQuote) {
+ quoteStatus = AFTER_QUOTE; // get out of quote
+ continue main;
+ }
+ UTF16.append(buffer, cp);
+ continue main;
+ }
+
+ if (ignorableCharacters.contains(cp)) {
+ continue;
+ }
+ // do syntax characters
+ if (syntaxCharacters.contains(cp)) {
+ if (status == UNKNOWN) {
+ UTF16.append(buffer, cp);
+ start = i + UTF16.getCharCount(cp);
+ return SYNTAX;
+ } else { // LITERAL, so back up and break
+ start = i;
+ return status;
+ }
+ }
+ // otherwise it is a literal; keep on going
+ status = LITERAL;
+ if (cp == BACK_SLASH) {
+ quoteStatus = SLASH_START;
+ continue;
+ } else if (usingQuote && cp == SINGLE_QUOTE) {
+ lastQuote = cp;
+ quoteStatus = START_QUOTE;
+ continue;
+ }
+ // normal literals
+ UTF16.append(buffer, cp);
+ }
+ // handle final cleanup
+ start = limit;
+ switch (quoteStatus) {
+ case HEX:
+ status = BROKEN_ESCAPE;
+ break;
+ case SLASH_START:
+ if (usingSlash) {
+ status = BROKEN_ESCAPE;
+ } else {
+ buffer.append(BACK_SLASH);
+ }
+ break;
+ case START_QUOTE: case NORMAL_QUOTE:
+ status = BROKEN_QUOTE;
+ break;
+ }
+ return status;
+ }
+
+
+}
+//eof