-//##header\r
-//#if defined(FOUNDATION10) || defined(J2SE13)\r
-//#else\r
-/*\r
- *******************************************************************************\r
- * Copyright (C) 2002-2009, International Business Machines Corporation and *\r
- * others. All Rights Reserved. *\r
- *******************************************************************************\r
- */\r
-package com.ibm.icu.dev.test.util;\r
-\r
-import java.text.ParsePosition;\r
-\r
-import com.ibm.icu.text.*;\r
-import com.ibm.icu.lang.*;\r
-\r
-import java.util.HashMap;\r
-import java.util.HashSet;\r
-import java.util.Set;\r
-import java.util.Map;\r
-\r
-public class Tokenizer {\r
- protected String source;\r
- \r
- protected StringBuffer buffer = new StringBuffer();\r
- protected long number;\r
- protected UnicodeSet unicodeSet = null;\r
- protected int index;\r
- boolean backedup = false;\r
- protected int lastIndex = -1;\r
- protected int nextIndex;\r
- int lastValue = BACKEDUP_TOO_FAR;\r
- TokenSymbolTable symbolTable = new TokenSymbolTable();\r
-\r
- private static final char\r
- QUOTE = '\'',\r
- BSLASH = '\\';\r
- private static final UnicodeSet QUOTERS = new UnicodeSet().add(QUOTE).add(BSLASH);\r
- private static final UnicodeSet WHITESPACE = new UnicodeSet("[" +\r
- "\\u0009-\\u000D\\u0020\\u0085\\u200E\\u200F\\u2028\\u2029" +\r
- "]");\r
- private static final UnicodeSet SYNTAX = new UnicodeSet("[" +\r
- "\\u0021-\\u002F\\u003A-\\u0040\\u005B-\\u0060\\u007B-\\u007E" +\r
- "\\u00A1-\\u00A7\\u00A9\\u00AB-\\u00AC\\u00AE" +\r
- "\\u00B0-\\u00B1\\u00B6\\u00B7\\u00BB\\u00BF\\u00D7\\u00F7" +\r
- "\\u2010-\\u2027\\u2030-\\u205E\\u2190-\\u2BFF" +\r
- "\\u3001\\u3003\\u3008-\\u3020\\u3030" +\r
- "\\uFD3E\\uFD3F\\uFE45\\uFE46" +\r
- "]").removeAll(QUOTERS).remove('$');\r
- private static final UnicodeSet NEWLINE = new UnicodeSet("[\\u000A\\u000D\\u0085\\u2028\\u2029]");\r
- //private static final UnicodeSet DECIMAL = new UnicodeSet("[:Nd:]");\r
- private static final UnicodeSet NON_STRING = new UnicodeSet()\r
- .addAll(WHITESPACE)\r
- .addAll(SYNTAX);\r
- \r
- protected UnicodeSet whiteSpace = WHITESPACE;\r
- protected UnicodeSet syntax = SYNTAX;\r
- private UnicodeSet non_string = NON_STRING;\r
-\r
- private void fixSets() {\r
- if (syntax.containsSome(QUOTERS) || syntax.containsSome(whiteSpace)) {\r
- syntax = ((UnicodeSet)syntax.clone()).removeAll(QUOTERS).removeAll(whiteSpace);\r
- }\r
- if (whiteSpace.containsSome(QUOTERS)) {\r
- whiteSpace = ((UnicodeSet)whiteSpace.clone()).removeAll(QUOTERS);\r
- }\r
- non_string = new UnicodeSet(syntax)\r
- .addAll(whiteSpace);\r
- }\r
- \r
- public Tokenizer setSource(String source) {\r
- this.source = source;\r
- this.index = 0;\r
- return this; // for chaining\r
- }\r
- \r
- public Tokenizer setIndex(int index) {\r
- this.index = index;\r
- return this; // for chaining\r
- }\r
- \r
- public static final int \r
- DONE = -1, \r
- NUMBER = -2, \r
- STRING = -3, \r
- UNICODESET = -4, \r
- UNTERMINATED_QUOTE = -5,\r
- BACKEDUP_TOO_FAR = -6;\r
- \r
- private static final int\r
- //FIRST = 0,\r
- //IN_NUMBER = 1,\r
- //IN_SPACE = 2,\r
- AFTER_QUOTE = 3, // warning: order is important for switch statement\r
- IN_STRING = 4, \r
- AFTER_BSLASH = 5, \r
- IN_QUOTE = 6;\r
- \r
- public String toString(int type, boolean backedupBefore) {\r
- String s = backedup ? "@" : "*";\r
- switch(type) {\r
- case DONE: \r
- return s+"Done"+s;\r
- case BACKEDUP_TOO_FAR:\r
- return s+"Illegal Backup"+s;\r
- case UNTERMINATED_QUOTE: \r
- return s+"Unterminated Quote=" + getString() + s;\r
- case STRING:\r
- return s+"s=" + getString() + s;\r
- case NUMBER:\r
- return s+"n=" + getNumber() + s;\r
- case UNICODESET:\r
- return s+"n=" + getUnicodeSet() + s; \r
- default:\r
- return s+"c=" + usf.getName(type,true) + s;\r
- }\r
- }\r
- \r
- private static final BagFormatter usf = new BagFormatter();\r
- \r
- public void backup() {\r
- if (backedup) throw new IllegalArgumentException("backup too far");\r
- backedup = true;\r
- nextIndex = index;\r
- index = lastIndex;\r
- }\r
- \r
- /*\r
- public int next2() {\r
- boolean backedupBefore = backedup;\r
- int result = next();\r
- System.out.println(toString(result, backedupBefore));\r
- return result;\r
- } \r
- */\r
- \r
- public int next() {\r
- if (backedup) {\r
- backedup = false;\r
- index = nextIndex;\r
- return lastValue;\r
- }\r
- int cp = 0;\r
- boolean inComment = false;\r
- // clean off any leading whitespace or comments\r
- while (true) {\r
- if (index >= source.length()) return lastValue = DONE;\r
- cp = nextChar();\r
- if (inComment) {\r
- if (NEWLINE.contains(cp)) inComment = false;\r
- } else {\r
- if (cp == '#') inComment = true;\r
- else if (!whiteSpace.contains(cp)) break;\r
- }\r
- }\r
- // record the last index in case we have to backup\r
- lastIndex = index;\r
- \r
- if (cp == '[') {\r
- ParsePosition pos = new ParsePosition(index-1);\r
- unicodeSet = new UnicodeSet(source,pos,symbolTable);\r
- index = pos.getIndex();\r
- return lastValue = UNICODESET;\r
- }\r
- // get syntax character\r
- if (syntax.contains(cp)) return lastValue = cp;\r
- \r
- // get number, if there is one\r
- if (UCharacter.getType(cp) == Character.DECIMAL_DIGIT_NUMBER) {\r
- number = UCharacter.getNumericValue(cp);\r
- while (index < source.length()) {\r
- cp = nextChar();\r
- if (UCharacter.getType(cp) != Character.DECIMAL_DIGIT_NUMBER) {\r
- index -= UTF16.getCharCount(cp); // BACKUP!\r
- break;\r
- }\r
- number *= 10;\r
- number += UCharacter.getNumericValue(cp);\r
- }\r
- return lastValue = NUMBER;\r
- }\r
- buffer.setLength(0);\r
- int status = IN_STRING;\r
- main:\r
- while (true) {\r
- switch (status) {\r
- case AFTER_QUOTE: // check for double ''?\r
- if (cp == QUOTE) {\r
- UTF16.append(buffer, QUOTE);\r
- status = IN_QUOTE;\r
- break;\r
- }\r
- // OTHERWISE FALL THROUGH!!!\r
- case IN_STRING: \r
- if (cp == QUOTE) status = IN_QUOTE;\r
- else if (cp == BSLASH) status = AFTER_BSLASH;\r
- else if (non_string.contains(cp)) {\r
- index -= UTF16.getCharCount(cp); // BACKUP!\r
- break main;\r
- } else UTF16.append(buffer,cp);\r
- break;\r
- case IN_QUOTE:\r
- if (cp == QUOTE) status = AFTER_QUOTE;\r
- else UTF16.append(buffer,cp);\r
- break;\r
- case AFTER_BSLASH:\r
- switch(cp) {\r
- case 'n': cp = '\n'; break;\r
- case 'r': cp = '\r'; break;\r
- case 't': cp = '\t'; break;\r
- }\r
- UTF16.append(buffer,cp);\r
- status = IN_STRING;\r
- break;\r
- default: throw new IllegalArgumentException("Internal Error");\r
- }\r
- if (index >= source.length()) break;\r
- cp = nextChar();\r
- }\r
- if (status > IN_STRING) return lastValue = UNTERMINATED_QUOTE;\r
- return lastValue = STRING;\r
- }\r
- \r
- public String getString() {\r
- return buffer.toString();\r
- }\r
- \r
- public String toString() {\r
- return source.substring(0,index) + "$$$" + source.substring(index);\r
- }\r
- \r
- public long getNumber() {\r
- return number;\r
- }\r
- \r
- public UnicodeSet getUnicodeSet() {\r
- return unicodeSet;\r
- }\r
- \r
- private int nextChar() {\r
- int cp = UTF16.charAt(source,index);\r
- index += UTF16.getCharCount(cp);\r
- return cp;\r
- }\r
- public int getIndex() {\r
- return index;\r
- }\r
- public String getSource() {\r
- return source;\r
- }\r
- public UnicodeSet getSyntax() {\r
- return syntax;\r
- }\r
- public UnicodeSet getWhiteSpace() {\r
- return whiteSpace;\r
- }\r
- public void setSyntax(UnicodeSet set) {\r
- syntax = set;\r
- fixSets();\r
- }\r
- public void setWhiteSpace(UnicodeSet set) {\r
- whiteSpace = set;\r
- fixSets();\r
- }\r
- \r
- public Set getLookedUpItems() {\r
- return symbolTable.itemsLookedUp;\r
- }\r
- \r
- public void addSymbol(String var, String value, int start, int limit) {\r
- // the limit is after the ';', so remove it\r
- --limit;\r
- char[] body = new char[limit - start];\r
- value.getChars(start, limit, body, 0);\r
- symbolTable.add(var, body);\r
- }\r
- \r
- public class TokenSymbolTable implements SymbolTable {\r
- Map contents = new HashMap();\r
- Set itemsLookedUp = new HashSet();\r
- \r
- public void add(String var, char[] body) {\r
- // start from 1 to avoid the $\r
- contents.put(var.substring(1), body);\r
- }\r
- \r
- /* (non-Javadoc)\r
- * @see com.ibm.icu.text.SymbolTable#lookup(java.lang.String)\r
- */\r
- public char[] lookup(String s) {\r
- itemsLookedUp.add('$' + s);\r
- return (char[])contents.get(s);\r
- }\r
- \r
- /* (non-Javadoc)\r
- * @see com.ibm.icu.text.SymbolTable#lookupMatcher(int)\r
- */\r
- public UnicodeMatcher lookupMatcher(int ch) {\r
- // TODO Auto-generated method stub\r
- return null;\r
- }\r
- \r
- /* (non-Javadoc)\r
- * @see com.ibm.icu.text.SymbolTable#parseReference(java.lang.String, java.text.ParsePosition, int)\r
- */\r
- public String parseReference(String text, ParsePosition pos, int limit) {\r
- int cp;\r
- int start = pos.getIndex();\r
- int i;\r
- for (i = start; i < limit; i += UTF16.getCharCount(cp)) {\r
- cp = UTF16.charAt(text, i);\r
- if (!com.ibm.icu.lang.UCharacter.isUnicodeIdentifierPart(cp)) {\r
- break;\r
- }\r
- }\r
- pos.setIndex(i);\r
- return text.substring(start,i);\r
- }\r
- \r
- }\r
-}\r
-\r
-//#endif\r
+//##header J2SE15
+//#if defined(FOUNDATION10) || defined(J2SE13)
+//#else
+/*
+ *******************************************************************************
+ * Copyright (C) 2002-2009, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.dev.test.util;
+
+import java.text.ParsePosition;
+
+import com.ibm.icu.text.*;
+import com.ibm.icu.lang.*;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.Map;
+
+public class Tokenizer {
+ protected String source;
+
+ protected StringBuffer buffer = new StringBuffer();
+ protected long number;
+ protected UnicodeSet unicodeSet = null;
+ protected int index;
+ boolean backedup = false;
+ protected int lastIndex = -1;
+ protected int nextIndex;
+ int lastValue = BACKEDUP_TOO_FAR;
+ TokenSymbolTable symbolTable = new TokenSymbolTable();
+
+ private static final char
+ QUOTE = '\'',
+ BSLASH = '\\';
+ private static final UnicodeSet QUOTERS = new UnicodeSet().add(QUOTE).add(BSLASH);
+ private static final UnicodeSet WHITESPACE = new UnicodeSet("[" +
+ "\\u0009-\\u000D\\u0020\\u0085\\u200E\\u200F\\u2028\\u2029" +
+ "]");
+ private static final UnicodeSet SYNTAX = new UnicodeSet("[" +
+ "\\u0021-\\u002F\\u003A-\\u0040\\u005B-\\u0060\\u007B-\\u007E" +
+ "\\u00A1-\\u00A7\\u00A9\\u00AB-\\u00AC\\u00AE" +
+ "\\u00B0-\\u00B1\\u00B6\\u00B7\\u00BB\\u00BF\\u00D7\\u00F7" +
+ "\\u2010-\\u2027\\u2030-\\u205E\\u2190-\\u2BFF" +
+ "\\u3001\\u3003\\u3008-\\u3020\\u3030" +
+ "\\uFD3E\\uFD3F\\uFE45\\uFE46" +
+ "]").removeAll(QUOTERS).remove('$');
+ private static final UnicodeSet NEWLINE = new UnicodeSet("[\\u000A\\u000D\\u0085\\u2028\\u2029]");
+ //private static final UnicodeSet DECIMAL = new UnicodeSet("[:Nd:]");
+ private static final UnicodeSet NON_STRING = new UnicodeSet()
+ .addAll(WHITESPACE)
+ .addAll(SYNTAX);
+
+ protected UnicodeSet whiteSpace = WHITESPACE;
+ protected UnicodeSet syntax = SYNTAX;
+ private UnicodeSet non_string = NON_STRING;
+
+ private void fixSets() {
+ if (syntax.containsSome(QUOTERS) || syntax.containsSome(whiteSpace)) {
+ syntax = ((UnicodeSet)syntax.clone()).removeAll(QUOTERS).removeAll(whiteSpace);
+ }
+ if (whiteSpace.containsSome(QUOTERS)) {
+ whiteSpace = ((UnicodeSet)whiteSpace.clone()).removeAll(QUOTERS);
+ }
+ non_string = new UnicodeSet(syntax)
+ .addAll(whiteSpace);
+ }
+
+ public Tokenizer setSource(String source) {
+ this.source = source;
+ this.index = 0;
+ return this; // for chaining
+ }
+
+ public Tokenizer setIndex(int index) {
+ this.index = index;
+ return this; // for chaining
+ }
+
+ public static final int
+ DONE = -1,
+ NUMBER = -2,
+ STRING = -3,
+ UNICODESET = -4,
+ UNTERMINATED_QUOTE = -5,
+ BACKEDUP_TOO_FAR = -6;
+
+ private static final int
+ //FIRST = 0,
+ //IN_NUMBER = 1,
+ //IN_SPACE = 2,
+ AFTER_QUOTE = 3, // warning: order is important for switch statement
+ IN_STRING = 4,
+ AFTER_BSLASH = 5,
+ IN_QUOTE = 6;
+
+ public String toString(int type, boolean backedupBefore) {
+ String s = backedup ? "@" : "*";
+ switch(type) {
+ case DONE:
+ return s+"Done"+s;
+ case BACKEDUP_TOO_FAR:
+ return s+"Illegal Backup"+s;
+ case UNTERMINATED_QUOTE:
+ return s+"Unterminated Quote=" + getString() + s;
+ case STRING:
+ return s+"s=" + getString() + s;
+ case NUMBER:
+ return s+"n=" + getNumber() + s;
+ case UNICODESET:
+ return s+"n=" + getUnicodeSet() + s;
+ default:
+ return s+"c=" + usf.getName(type,true) + s;
+ }
+ }
+
+ private static final BagFormatter usf = new BagFormatter();
+
+ public void backup() {
+ if (backedup) throw new IllegalArgumentException("backup too far");
+ backedup = true;
+ nextIndex = index;
+ index = lastIndex;
+ }
+
+ /*
+ public int next2() {
+ boolean backedupBefore = backedup;
+ int result = next();
+ System.out.println(toString(result, backedupBefore));
+ return result;
+ }
+ */
+
+ public int next() {
+ if (backedup) {
+ backedup = false;
+ index = nextIndex;
+ return lastValue;
+ }
+ int cp = 0;
+ boolean inComment = false;
+ // clean off any leading whitespace or comments
+ while (true) {
+ if (index >= source.length()) return lastValue = DONE;
+ cp = nextChar();
+ if (inComment) {
+ if (NEWLINE.contains(cp)) inComment = false;
+ } else {
+ if (cp == '#') inComment = true;
+ else if (!whiteSpace.contains(cp)) break;
+ }
+ }
+ // record the last index in case we have to backup
+ lastIndex = index;
+
+ if (cp == '[') {
+ ParsePosition pos = new ParsePosition(index-1);
+ unicodeSet = new UnicodeSet(source,pos,symbolTable);
+ index = pos.getIndex();
+ return lastValue = UNICODESET;
+ }
+ // get syntax character
+ if (syntax.contains(cp)) return lastValue = cp;
+
+ // get number, if there is one
+ if (UCharacter.getType(cp) == Character.DECIMAL_DIGIT_NUMBER) {
+ number = UCharacter.getNumericValue(cp);
+ while (index < source.length()) {
+ cp = nextChar();
+ if (UCharacter.getType(cp) != Character.DECIMAL_DIGIT_NUMBER) {
+ index -= UTF16.getCharCount(cp); // BACKUP!
+ break;
+ }
+ number *= 10;
+ number += UCharacter.getNumericValue(cp);
+ }
+ return lastValue = NUMBER;
+ }
+ buffer.setLength(0);
+ int status = IN_STRING;
+ main:
+ while (true) {
+ switch (status) {
+ case AFTER_QUOTE: // check for double ''?
+ if (cp == QUOTE) {
+ UTF16.append(buffer, QUOTE);
+ status = IN_QUOTE;
+ break;
+ }
+ // OTHERWISE FALL THROUGH!!!
+ case IN_STRING:
+ if (cp == QUOTE) status = IN_QUOTE;
+ else if (cp == BSLASH) status = AFTER_BSLASH;
+ else if (non_string.contains(cp)) {
+ index -= UTF16.getCharCount(cp); // BACKUP!
+ break main;
+ } else UTF16.append(buffer,cp);
+ break;
+ case IN_QUOTE:
+ if (cp == QUOTE) status = AFTER_QUOTE;
+ else UTF16.append(buffer,cp);
+ break;
+ case AFTER_BSLASH:
+ switch(cp) {
+ case 'n': cp = '\n'; break;
+ case 'r': cp = '\r'; break;
+ case 't': cp = '\t'; break;
+ }
+ UTF16.append(buffer,cp);
+ status = IN_STRING;
+ break;
+ default: throw new IllegalArgumentException("Internal Error");
+ }
+ if (index >= source.length()) break;
+ cp = nextChar();
+ }
+ if (status > IN_STRING) return lastValue = UNTERMINATED_QUOTE;
+ return lastValue = STRING;
+ }
+
+ public String getString() {
+ return buffer.toString();
+ }
+
+ public String toString() {
+ return source.substring(0,index) + "$$$" + source.substring(index);
+ }
+
+ public long getNumber() {
+ return number;
+ }
+
+ public UnicodeSet getUnicodeSet() {
+ return unicodeSet;
+ }
+
+ private int nextChar() {
+ int cp = UTF16.charAt(source,index);
+ index += UTF16.getCharCount(cp);
+ return cp;
+ }
+ public int getIndex() {
+ return index;
+ }
+ public String getSource() {
+ return source;
+ }
+ public UnicodeSet getSyntax() {
+ return syntax;
+ }
+ public UnicodeSet getWhiteSpace() {
+ return whiteSpace;
+ }
+ public void setSyntax(UnicodeSet set) {
+ syntax = set;
+ fixSets();
+ }
+ public void setWhiteSpace(UnicodeSet set) {
+ whiteSpace = set;
+ fixSets();
+ }
+
+ public Set getLookedUpItems() {
+ return symbolTable.itemsLookedUp;
+ }
+
+ public void addSymbol(String var, String value, int start, int limit) {
+ // the limit is after the ';', so remove it
+ --limit;
+ char[] body = new char[limit - start];
+ value.getChars(start, limit, body, 0);
+ symbolTable.add(var, body);
+ }
+
+ public class TokenSymbolTable implements SymbolTable {
+ Map contents = new HashMap();
+ Set itemsLookedUp = new HashSet();
+
+ public void add(String var, char[] body) {
+ // start from 1 to avoid the $
+ contents.put(var.substring(1), body);
+ }
+
+ /* (non-Javadoc)
+ * @see com.ibm.icu.text.SymbolTable#lookup(java.lang.String)
+ */
+ public char[] lookup(String s) {
+ itemsLookedUp.add('$' + s);
+ return (char[])contents.get(s);
+ }
+
+ /* (non-Javadoc)
+ * @see com.ibm.icu.text.SymbolTable#lookupMatcher(int)
+ */
+ public UnicodeMatcher lookupMatcher(int ch) {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+ /* (non-Javadoc)
+ * @see com.ibm.icu.text.SymbolTable#parseReference(java.lang.String, java.text.ParsePosition, int)
+ */
+ public String parseReference(String text, ParsePosition pos, int limit) {
+ int cp;
+ int start = pos.getIndex();
+ int i;
+ for (i = start; i < limit; i += UTF16.getCharCount(cp)) {
+ cp = UTF16.charAt(text, i);
+ if (!com.ibm.icu.lang.UCharacter.isUnicodeIdentifierPart(cp)) {
+ break;
+ }
+ }
+ pos.setIndex(i);
+ return text.substring(start,i);
+ }
+
+ }
+}
+
+//#endif