go

[Dictionary.git] / jars / icu4j-4_2_1-src / src / com / ibm / icu / text / TransliteratorParser.java
diff --git a/jars/icu4j-4_2_1-src/src/com/ibm/icu/text/TransliteratorParser.java b/jars/icu4j-4_2_1-src/src/com/ibm/icu/text/TransliteratorParser.java

old mode 100755 (executable)

new mode 100644 (file)

index 9ca3b26..3fc6402
--- a/jars/icu4j-4_2_1-src/src/com/ibm/icu/text/TransliteratorParser.java
+++ b/jars/icu4j-4_2_1-src/src/com/ibm/icu/text/TransliteratorParser.java
@@ -1,1565 +1,1565 @@
-//##header\r
-/*\r
-**********************************************************************\r
-*   Copyright (c) 2001-2009, International Business Machines\r
-*   Corporation and others.  All Rights Reserved.\r
-**********************************************************************\r
-*/\r
-package com.ibm.icu.text;\r
-\r
-import com.ibm.icu.impl.IllegalIcuArgumentException;\r
-import com.ibm.icu.impl.Utility;\r
-\r
-import java.util.ArrayList;\r
-import java.util.List;\r
-import java.util.Vector;\r
-import java.util.Hashtable;\r
-import java.text.ParsePosition;\r
-import com.ibm.icu.lang.*;\r
-import com.ibm.icu.impl.UCharacterProperty;\r
-\r
-class TransliteratorParser {\r
-\r
-    //----------------------------------------------------------------------\r
-    // Data members\r
-    //----------------------------------------------------------------------\r
-\r
-    /**\r
-     * PUBLIC data member.\r
-     * A Vector of RuleBasedTransliterator.Data objects, one for each discrete group\r
-     * of rules in the rule set\r
-     */\r
-    public Vector dataVector;\r
-\r
-    /**\r
-     * PUBLIC data member.\r
-     * A Vector of Strings containing all of the ID blocks in the rule set\r
-     */\r
-    public Vector idBlockVector;\r
-\r
-    /**\r
-     * The current data object for which we are parsing rules\r
-     */\r
-    private RuleBasedTransliterator.Data curData;\r
-\r
-    /**\r
-     * PUBLIC data member containing the parsed compound filter, if any.\r
-     */\r
-    public UnicodeSet compoundFilter;\r
-\r
-\r
-    private int direction;\r
-\r
-    /**\r
-     * Temporary symbol table used during parsing.\r
-     */\r
-    private ParseData parseData;\r
-\r
-    /**\r
-     * Temporary vector of set variables.  When parsing is complete, this\r
-     * is copied into the array data.variables.  As with data.variables,\r
-     * element 0 corresponds to character data.variablesBase.\r
-     */\r
-    private Vector variablesVector;\r
-\r
-    /**\r
-     * Temporary table of variable names.  When parsing is complete, this is\r
-     * copied into data.variableNames.\r
-     */\r
-    private Hashtable variableNames;\r
-\r
-    /**\r
-     * String of standins for segments.  Used during the parsing of a single\r
-     * rule.  segmentStandins.charAt(0) is the standin for "$1" and corresponds\r
-     * to StringMatcher object segmentObjects.elementAt(0), etc.\r
-     */\r
-    private StringBuffer segmentStandins;\r
-\r
-    /**\r
-     * Vector of StringMatcher objects for segments.  Used during the\r
-     * parsing of a single rule.  \r
-     * segmentStandins.charAt(0) is the standin for "$1" and corresponds\r
-     * to StringMatcher object segmentObjects.elementAt(0), etc.\r
-     */\r
-    private Vector segmentObjects;\r
-\r
-    /**\r
-     * The next available stand-in for variables.  This starts at some point in\r
-     * the private use area (discovered dynamically) and increments up toward\r
-     * <code>variableLimit</code>.  At any point during parsing, available\r
-     * variables are <code>variableNext..variableLimit-1</code>.\r
-     */\r
-    private char variableNext;\r
-\r
-    /**\r
-     * The last available stand-in for variables.  This is discovered\r
-     * dynamically.  At any point during parsing, available variables are\r
-     * <code>variableNext..variableLimit-1</code>.  During variable definition\r
-     * we use the special value variableLimit-1 as a placeholder.\r
-     */\r
-    private char variableLimit;\r
-\r
-    /**\r
-     * When we encounter an undefined variable, we do not immediately signal\r
-     * an error, in case we are defining this variable, e.g., "$a = [a-z];".\r
-     * Instead, we save the name of the undefined variable, and substitute\r
-     * in the placeholder char variableLimit - 1, and decrement\r
-     * variableLimit.\r
-     */\r
-    private String undefinedVariableName;\r
-\r
-    /**\r
-     * The stand-in character for the 'dot' set, represented by '.' in\r
-     * patterns.  This is allocated the first time it is needed, and\r
-     * reused thereafter.\r
-     */\r
-    private int dotStandIn = -1;\r
-\r
-    //----------------------------------------------------------------------\r
-    // Constants\r
-    //----------------------------------------------------------------------\r
-\r
-    // Indicator for ID blocks\r
-    private static final String ID_TOKEN = "::";\r
-    private static final int ID_TOKEN_LEN = 2;\r
-\r
-/*\r
-(reserved for future expansion)\r
-    // markers for beginning and end of rule groups\r
-    private static final String BEGIN_TOKEN = "BEGIN";\r
-    private static final String END_TOKEN = "END";\r
-*/\r
-\r
-    // Operators\r
-    private static final char VARIABLE_DEF_OP   = '=';\r
-    private static final char FORWARD_RULE_OP   = '>';\r
-    private static final char REVERSE_RULE_OP   = '<';\r
-    private static final char FWDREV_RULE_OP    = '~'; // internal rep of <> op\r
-\r
-    private static final String OPERATORS = "=><\u2190\u2192\u2194";\r
-    private static final String HALF_ENDERS = "=><\u2190\u2192\u2194;";\r
-\r
-    // Other special characters\r
-    private static final char QUOTE               = '\'';\r
-    private static final char ESCAPE              = '\\';\r
-    private static final char END_OF_RULE         = ';';\r
-    private static final char RULE_COMMENT_CHAR   = '#';\r
-\r
-    private static final char CONTEXT_ANTE        = '{'; // ante{key\r
-    private static final char CONTEXT_POST        = '}'; // key}post\r
-    private static final char CURSOR_POS          = '|';\r
-    private static final char CURSOR_OFFSET       = '@';\r
-    private static final char ANCHOR_START        = '^';\r
-\r
-    private static final char KLEENE_STAR         = '*';\r
-    private static final char ONE_OR_MORE         = '+';\r
-    private static final char ZERO_OR_ONE         = '?';\r
-\r
-    private static final char DOT                 = '.';\r
-    private static final String DOT_SET           = "[^[:Zp:][:Zl:]\\r\\n$]";\r
-\r
-    // By definition, the ANCHOR_END special character is a\r
-    // trailing SymbolTable.SYMBOL_REF character.\r
-    // private static final char ANCHOR_END       = '$';\r
-\r
-    // Segments of the input string are delimited by "(" and ")".  In the\r
-    // output string these segments are referenced as "$1", "$2", etc.\r
-    private static final char SEGMENT_OPEN        = '(';\r
-    private static final char SEGMENT_CLOSE       = ')';\r
-\r
-    // A function is denoted &Source-Target/Variant(text)\r
-    private static final char FUNCTION            = '&';\r
-\r
-    // Aliases for some of the syntax characters. These are provided so\r
-    // transliteration rules can be expressed in XML without clashing with\r
-    // XML syntax characters '<', '>', and '&'.\r
-    private static final char ALT_REVERSE_RULE_OP = '\u2190'; // Left Arrow\r
-    private static final char ALT_FORWARD_RULE_OP = '\u2192'; // Right Arrow\r
-    private static final char ALT_FWDREV_RULE_OP  = '\u2194'; // Left Right Arrow\r
-    private static final char ALT_FUNCTION        = '\u2206'; // Increment (~Greek Capital Delta)\r
-    \r
-    // Special characters disallowed at the top level\r
-    private static UnicodeSet ILLEGAL_TOP = new UnicodeSet("[\\)]");\r
-\r
-    // Special characters disallowed within a segment\r
-    private static UnicodeSet ILLEGAL_SEG = new UnicodeSet("[\\{\\}\\|\\@]");\r
-\r
-    // Special characters disallowed within a function argument\r
-    private static UnicodeSet ILLEGAL_FUNC = new UnicodeSet("[\\^\\(\\.\\*\\+\\?\\{\\}\\|\\@]");\r
-\r
-    //----------------------------------------------------------------------\r
-    // class ParseData\r
-    //----------------------------------------------------------------------\r
-\r
-    /**\r
-     * This class implements the SymbolTable interface.  It is used\r
-     * during parsing to give UnicodeSet access to variables that\r
-     * have been defined so far.  Note that it uses variablesVector,\r
-     * _not_ data.variables.\r
-     */\r
-    private class ParseData implements SymbolTable {\r
-\r
-        /**\r
-         * Implement SymbolTable API.\r
-         */\r
-        public char[] lookup(String name) {\r
-            return (char[]) variableNames.get(name);\r
-        }\r
-\r
-        /**\r
-         * Implement SymbolTable API.\r
-         */\r
-        public UnicodeMatcher lookupMatcher(int ch) {\r
-            // Note that we cannot use data.lookup() because the\r
-            // set array has not been constructed yet.\r
-            int i = ch - curData.variablesBase;\r
-            if (i >= 0 && i < variablesVector.size()) {\r
-                return (UnicodeMatcher) variablesVector.elementAt(i);\r
-            }\r
-            return null;\r
-        }\r
-\r
-        /**\r
-         * Implement SymbolTable API.  Parse out a symbol reference\r
-         * name.\r
-         */\r
-        public String parseReference(String text, ParsePosition pos, int limit) {\r
-            int start = pos.getIndex();\r
-            int i = start;\r
-            while (i < limit) {\r
-                char c = text.charAt(i);\r
-                if ((i==start && !UCharacter.isUnicodeIdentifierStart(c)) ||\r
-                    !UCharacter.isUnicodeIdentifierPart(c)) {\r
-                    break;\r
-                }\r
-                ++i;\r
-            }\r
-            if (i == start) { // No valid name chars\r
-                return null;\r
-            }\r
-            pos.setIndex(i);\r
-            return text.substring(start, i);\r
-        }\r
-\r
-        /**\r
-         * Return true if the given character is a matcher standin or a plain\r
-         * character (non standin).\r
-         */\r
-        public boolean isMatcher(int ch) {\r
-            // Note that we cannot use data.lookup() because the\r
-            // set array has not been constructed yet.\r
-            int i = ch - curData.variablesBase;\r
-            if (i >= 0 && i < variablesVector.size()) {\r
-                return variablesVector.elementAt(i) instanceof UnicodeMatcher;\r
-            }\r
-            return true;\r
-        }\r
-\r
-        /**\r
-         * Return true if the given character is a replacer standin or a plain\r
-         * character (non standin).\r
-         */\r
-        public boolean isReplacer(int ch) {\r
-            // Note that we cannot use data.lookup() because the\r
-            // set array has not been constructed yet.\r
-            int i = ch - curData.variablesBase;\r
-            if (i >= 0 && i < variablesVector.size()) {\r
-                return variablesVector.elementAt(i) instanceof UnicodeReplacer;\r
-            }\r
-            return true;\r
-        }\r
-    }\r
-\r
-    //----------------------------------------------------------------------\r
-    // classes RuleBody, RuleArray, and RuleReader\r
-    //----------------------------------------------------------------------\r
-\r
-    /**\r
-     * A private abstract class representing the interface to rule\r
-     * source code that is broken up into lines.  Handles the\r
-     * folding of lines terminated by a backslash.  This folding\r
-     * is limited; it does not account for comments, quotes, or\r
-     * escapes, so its use to be limited.\r
-     */\r
-    private static abstract class RuleBody {\r
-\r
-        /**\r
-         * Retrieve the next line of the source, or return null if\r
-         * none.  Folds lines terminated by a backslash into the\r
-         * next line, without regard for comments, quotes, or\r
-         * escapes.\r
-         */\r
-        String nextLine() {\r
-            String s = handleNextLine();\r
-            if (s != null &&\r
-                s.length() > 0 &&\r
-                s.charAt(s.length() - 1) == '\\') {\r
-\r
-                StringBuffer b = new StringBuffer(s);\r
-                do {\r
-                    b.deleteCharAt(b.length()-1);\r
-                    s = handleNextLine();\r
-                    if (s == null) {\r
-                        break;\r
-                    }\r
-                    b.append(s);\r
-                } while (s.length() > 0 &&\r
-                         s.charAt(s.length() - 1) == '\\');\r
-\r
-                s = b.toString();\r
-            }\r
-            return s;\r
-        }\r
-\r
-        /**\r
-         * Reset to the first line of the source.\r
-         */\r
-        abstract void reset();\r
-\r
-        /**\r
-         * Subclass method to return the next line of the source.\r
-         */\r
-        abstract String handleNextLine();\r
-    }\r
-\r
-    /**\r
-     * RuleBody subclass for a String[] array.\r
-     */\r
-    private static class RuleArray extends RuleBody {\r
-        String[] array;\r
-        int i;\r
-        public RuleArray(String[] array) { this.array = array; i = 0; }\r
-        public String handleNextLine() {\r
-            return (i < array.length) ? array[i++] : null;\r
-        }\r
-        public void reset() {\r
-            i = 0;\r
-        }\r
-    }\r
-\r
-    /*\r
-     * RuleBody subclass for a ResourceReader.\r
-     */\r
-/*    private static class RuleReader extends RuleBody {\r
-        ResourceReader reader;\r
-        public RuleReader(ResourceReader reader) { this.reader = reader; }\r
-        public String handleNextLine() {\r
-            try {\r
-                return reader.readLine();\r
-            } catch (java.io.IOException e) {}\r
-            return null;\r
-        }\r
-        public void reset() {\r
-            reader.reset();\r
-        }\r
-    }*/\r
-\r
-    //----------------------------------------------------------------------\r
-    // class RuleHalf\r
-    //----------------------------------------------------------------------\r
-\r
-    /**\r
-     * A class representing one side of a rule.  This class knows how to\r
-     * parse half of a rule.  It is tightly coupled to the method\r
-     * TransliteratorParser.parseRule().\r
-     */\r
-    private static class RuleHalf {\r
-\r
-        public String text;\r
-\r
-        public int cursor = -1; // position of cursor in text\r
-        public int ante = -1;   // position of ante context marker '{' in text\r
-        public int post = -1;   // position of post context marker '}' in text\r
-\r
-        // Record the offset to the cursor either to the left or to the\r
-        // right of the key.  This is indicated by characters on the output\r
-        // side that allow the cursor to be positioned arbitrarily within\r
-        // the matching text.  For example, abc{def} > | @@@ xyz; changes\r
-        // def to xyz and moves the cursor to before abc.  Offset characters\r
-        // must be at the start or end, and they cannot move the cursor past\r
-        // the ante- or postcontext text.  Placeholders are only valid in\r
-        // output text.  The length of the ante and post context is\r
-        // determined at runtime, because of supplementals and quantifiers.\r
-        public int cursorOffset = 0; // only nonzero on output side\r
-\r
-        // Position of first CURSOR_OFFSET on _right_.  This will be -1\r
-        // for |@, -2 for |@@, etc., and 1 for @|, 2 for @@|, etc.\r
-        private int cursorOffsetPos = 0;\r
-\r
-        public boolean anchorStart = false;\r
-        public boolean anchorEnd   = false;\r
-\r
-        /**\r
-         * The segment number from 1..n of the next '(' we see\r
-         * during parsing; 1-based.\r
-         */\r
-        private int nextSegmentNumber = 1;\r
-\r
-        /**\r
-         * Parse one side of a rule, stopping at either the limit,\r
-         * the END_OF_RULE character, or an operator.\r
-         * @return the index after the terminating character, or\r
-         * if limit was reached, limit\r
-         */\r
-        public int parse(String rule, int pos, int limit,\r
-                         TransliteratorParser parser) {\r
-            int start = pos;\r
-            StringBuffer buf = new StringBuffer();\r
-            pos = parseSection(rule, pos, limit, parser, buf, ILLEGAL_TOP, false);\r
-            text = buf.toString();\r
-\r
-            if (cursorOffset > 0 && cursor != cursorOffsetPos) {\r
-                syntaxError("Misplaced " + CURSOR_POS, rule, start);\r
-            }\r
-\r
-            return pos;\r
-        }\r
-\r
-        /**\r
-         * Parse a section of one side of a rule, stopping at either\r
-         * the limit, the END_OF_RULE character, an operator, or a\r
-         * segment close character.  This method parses both a\r
-         * top-level rule half and a segment within such a rule half.\r
-         * It calls itself recursively to parse segments and nested\r
-         * segments.\r
-         * @param buf buffer into which to accumulate the rule pattern\r
-         * characters, either literal characters from the rule or\r
-         * standins for UnicodeMatcher objects including segments.\r
-         * @param illegal the set of special characters that is illegal during\r
-         * this parse.\r
-         * @param isSegment if true, then we've already seen a '(' and\r
-         * pos on entry points right after it.  Accumulate everything\r
-         * up to the closing ')', put it in a segment matcher object,\r
-         * generate a standin for it, and add the standin to buf.  As\r
-         * a side effect, update the segments vector with a reference\r
-         * to the segment matcher.  This works recursively for nested\r
-         * segments.  If isSegment is false, just accumulate\r
-         * characters into buf.\r
-         * @return the index after the terminating character, or\r
-         * if limit was reached, limit\r
-         */\r
-        private int parseSection(String rule, int pos, int limit,\r
-                                 TransliteratorParser parser,\r
-                                 StringBuffer buf,\r
-                                 UnicodeSet illegal,\r
-                                 boolean isSegment) {\r
-            int start = pos;\r
-            ParsePosition pp = null;\r
-            int quoteStart = -1; // Most recent 'single quoted string'\r
-            int quoteLimit = -1;\r
-            int varStart = -1; // Most recent $variableReference\r
-            int varLimit = -1;\r
-            int[] iref = new int[1];\r
-            int bufStart = buf.length();\r
-\r
-        main:\r
-            while (pos < limit) {\r
-                // Since all syntax characters are in the BMP, fetching\r
-                // 16-bit code units suffices here.\r
-                char c = rule.charAt(pos++);\r
-                if (UCharacterProperty.isRuleWhiteSpace(c)) {\r
-                    continue;\r
-                }\r
-                // HALF_ENDERS is all chars that end a rule half: "<>=;"\r
-                if (HALF_ENDERS.indexOf(c) >= 0) {\r
-                    if (isSegment) {\r
-                        syntaxError("Unclosed segment", rule, start);\r
-                    }\r
-                    break main;\r
-                }\r
-                if (anchorEnd) {\r
-                    // Text after a presumed end anchor is a syntax err\r
-                    syntaxError("Malformed variable reference", rule, start);\r
-                }\r
-                if (UnicodeSet.resemblesPattern(rule, pos-1)) {\r
-                    if (pp == null) {\r
-                        pp = new ParsePosition(0);\r
-                    }\r
-                    pp.setIndex(pos-1); // Backup to opening '['\r
-                    buf.append(parser.parseSet(rule, pp));\r
-                    pos = pp.getIndex();                    \r
-                    continue;\r
-                }\r
-                // Handle escapes\r
-                if (c == ESCAPE) {\r
-                    if (pos == limit) {\r
-                        syntaxError("Trailing backslash", rule, start);\r
-                    }\r
-                    iref[0] = pos;\r
-                    int escaped = Utility.unescapeAt(rule, iref);\r
-                    pos = iref[0];\r
-                    if (escaped == -1) {\r
-                        syntaxError("Malformed escape", rule, start);\r
-                    }\r
-                    parser.checkVariableRange(escaped, rule, start);\r
-                    UTF16.append(buf, escaped);\r
-                    continue;\r
-                }\r
-                // Handle quoted matter\r
-                if (c == QUOTE) {\r
-                    int iq = rule.indexOf(QUOTE, pos);\r
-                    if (iq == pos) {\r
-                        buf.append(c); // Parse [''] outside quotes as [']\r
-                        ++pos;\r
-                    } else {\r
-                        /* This loop picks up a run of quoted text of the\r
-                         * form 'aaaa' each time through.  If this run\r
-                         * hasn't really ended ('aaaa''bbbb') then it keeps\r
-                         * looping, each time adding on a new run.  When it\r
-                         * reaches the final quote it breaks.\r
-                         */\r
-                        quoteStart = buf.length();\r
-                        for (;;) {\r
-                            if (iq < 0) {\r
-                                syntaxError("Unterminated quote", rule, start);\r
-                            }\r
-                            buf.append(rule.substring(pos, iq));\r
-                            pos = iq+1;\r
-                            if (pos < limit && rule.charAt(pos) == QUOTE) {\r
-                            // Parse [''] inside quotes as [']\r
-                                iq = rule.indexOf(QUOTE, pos+1);\r
-                            // Continue looping\r
-                            } else {\r
-                                break;\r
-                            }\r
-                        }\r
-                        quoteLimit = buf.length();\r
-                        \r
-                        for (iq=quoteStart; iq<quoteLimit; ++iq) {\r
-                            parser.checkVariableRange(buf.charAt(iq), rule, start);\r
-                        }\r
-                    }\r
-                    continue;\r
-                }\r
-\r
-                parser.checkVariableRange(c, rule, start);\r
-\r
-                if (illegal.contains(c)) {\r
-                    syntaxError("Illegal character '" + c + '\'', rule, start);\r
-                }\r
-\r
-                switch (c) {\r
-                    \r
-                //------------------------------------------------------\r
-                // Elements allowed within and out of segments\r
-                //------------------------------------------------------\r
-                case ANCHOR_START:\r
-                    if (buf.length() == 0 && !anchorStart) {\r
-                        anchorStart = true;\r
-                    } else {\r
-                        syntaxError("Misplaced anchor start",\r
-                                    rule, start);\r
-                    }\r
-                    break;\r
-                case SEGMENT_OPEN:\r
-                    {\r
-                        // bufSegStart is the offset in buf to the first\r
-                        // character of the segment we are parsing.\r
-                        int bufSegStart = buf.length();\r
-\r
-                        // Record segment number now, since nextSegmentNumber\r
-                        // will be incremented during the call to parseSection\r
-                        // if there are nested segments.\r
-                        int segmentNumber = nextSegmentNumber++; // 1-based\r
-\r
-                        // Parse the segment\r
-                        pos = parseSection(rule, pos, limit, parser, buf, ILLEGAL_SEG, true);\r
-\r
-                        // After parsing a segment, the relevant characters are\r
-                        // in buf, starting at offset bufSegStart.  Extract them\r
-                        // into a string matcher, and replace them with a\r
-                        // standin for that matcher.\r
-                        StringMatcher m =\r
-                            new StringMatcher(buf.substring(bufSegStart),\r
-                                              segmentNumber, parser.curData);\r
-\r
-                        // Record and associate object and segment number\r
-                        parser.setSegmentObject(segmentNumber, m);\r
-                        buf.setLength(bufSegStart);\r
-                        buf.append(parser.getSegmentStandin(segmentNumber));\r
-                    }\r
-                    break;\r
-                case FUNCTION:\r
-                case ALT_FUNCTION:\r
-                    {\r
-                        iref[0] = pos;\r
-                        TransliteratorIDParser.SingleID single = TransliteratorIDParser.parseFilterID(rule, iref);\r
-                        // The next character MUST be a segment open\r
-                        if (single == null ||\r
-                            !Utility.parseChar(rule, iref, SEGMENT_OPEN)) {\r
-                            syntaxError("Invalid function", rule, start);\r
-                        }\r
-\r
-                        Transliterator t = single.getInstance();\r
-                        if (t == null) {\r
-                            syntaxError("Invalid function ID", rule, start);\r
-                        }\r
-\r
-                        // bufSegStart is the offset in buf to the first\r
-                        // character of the segment we are parsing.\r
-                        int bufSegStart = buf.length();\r
-\r
-                        // Parse the segment\r
-                        pos = parseSection(rule, iref[0], limit, parser, buf, ILLEGAL_FUNC, true);\r
-\r
-                        // After parsing a segment, the relevant characters are\r
-                        // in buf, starting at offset bufSegStart.\r
-                        FunctionReplacer r =\r
-                            new FunctionReplacer(t,\r
-                                new StringReplacer(buf.substring(bufSegStart), parser.curData));\r
-\r
-                        // Replace the buffer contents with a stand-in\r
-                        buf.setLength(bufSegStart);\r
-                        buf.append(parser.generateStandInFor(r));\r
-                    }\r
-                    break;\r
-                case SymbolTable.SYMBOL_REF:\r
-                    // Handle variable references and segment references "$1" .. "$9"\r
-                    {\r
-                        // A variable reference must be followed immediately\r
-                        // by a Unicode identifier start and zero or more\r
-                        // Unicode identifier part characters, or by a digit\r
-                        // 1..9 if it is a segment reference.\r
-                        if (pos == limit) {\r
-                            // A variable ref character at the end acts as\r
-                            // an anchor to the context limit, as in perl.\r
-                            anchorEnd = true;\r
-                            break;\r
-                        }\r
-                        // Parse "$1" "$2" .. "$9" .. (no upper limit)\r
-                        c = rule.charAt(pos);\r
-                        int r = UCharacter.digit(c, 10);\r
-                        if (r >= 1 && r <= 9) {\r
-                            iref[0] = pos;\r
-                            r = Utility.parseNumber(rule, iref, 10);\r
-                            if (r < 0) {\r
-                                syntaxError("Undefined segment reference",\r
-                                            rule, start);\r
-                            }\r
-                            pos = iref[0];\r
-                            buf.append(parser.getSegmentStandin(r));\r
-                        } else {\r
-                            if (pp == null) { // Lazy create\r
-                                pp = new ParsePosition(0);\r
-                            }\r
-                            pp.setIndex(pos);\r
-                            String name = parser.parseData.\r
-                                parseReference(rule, pp, limit);\r
-                            if (name == null) {\r
-                                // This means the '$' was not followed by a\r
-                                // valid name.  Try to interpret it as an\r
-                                // end anchor then.  If this also doesn't work\r
-                                // (if we see a following character) then signal\r
-                                // an error.\r
-                                anchorEnd = true;\r
-                                break;\r
-                            }\r
-                            pos = pp.getIndex();\r
-                            // If this is a variable definition statement,\r
-                            // then the LHS variable will be undefined.  In\r
-                            // that case appendVariableDef() will append the\r
-                            // special placeholder char variableLimit-1.\r
-                            varStart = buf.length();\r
-                            parser.appendVariableDef(name, buf);\r
-                            varLimit = buf.length();\r
-                        }\r
-                    }\r
-                    break;\r
-                case DOT:\r
-                    buf.append(parser.getDotStandIn());\r
-                    break;\r
-                case KLEENE_STAR:\r
-                case ONE_OR_MORE:\r
-                case ZERO_OR_ONE:\r
-                    // Quantifiers.  We handle single characters, quoted strings,\r
-                    // variable references, and segments.\r
-                    //  a+      matches  aaa\r
-                    //  'foo'+  matches  foofoofoo\r
-                    //  $v+     matches  xyxyxy if $v == xy\r
-                    //  (seg)+  matches  segsegseg\r
-                    {\r
-                        if (isSegment && buf.length() == bufStart) {\r
-                            // The */+ immediately follows '('\r
-                            syntaxError("Misplaced quantifier", rule, start);\r
-                            break;\r
-                        } \r
- \r
-                        int qstart, qlimit;\r
-                        // The */+ follows an isolated character or quote\r
-                        // or variable reference\r
-                        if (buf.length() == quoteLimit) {\r
-                            // The */+ follows a 'quoted string'\r
-                            qstart = quoteStart;\r
-                            qlimit = quoteLimit;\r
-                        } else if (buf.length() == varLimit) {\r
-                            // The */+ follows a $variableReference\r
-                            qstart = varStart;\r
-                            qlimit = varLimit;\r
-                        } else {\r
-                            // The */+ follows a single character, possibly\r
-                            // a segment standin\r
-                            qstart = buf.length() - 1;\r
-                            qlimit = qstart + 1;\r
-                        }\r
-\r
-                        UnicodeMatcher m;\r
-                        try {\r
-                            m = new StringMatcher(buf.toString(), qstart, qlimit,\r
-                                              0, parser.curData);\r
-                        } catch (RuntimeException e) {\r
-                            final String precontext = pos < 50 ? rule.substring(0, pos) : "..." + rule.substring(pos - 50, pos);\r
-                            final String postContext = limit-pos <= 50 ? rule.substring(pos, limit) : rule.substring(pos, pos+50) + "...";\r
-                            throw (RuntimeException)\r
-                                new IllegalIcuArgumentException("Failure in rule: " + precontext + "$$$"\r
-                                        + postContext)\r
-//#if defined(FOUNDATION10) || defined(J2SE13)\r
-//#else\r
-                                .initCause(e)\r
-//#endif\r
-                                ;\r
-                        }\r
-                        int min = 0;\r
-                        int max = Quantifier.MAX;\r
-                        switch (c) {\r
-                        case ONE_OR_MORE:\r
-                            min = 1;\r
-                            break;\r
-                        case ZERO_OR_ONE:\r
-                            min = 0;\r
-                            max = 1;\r
-                            break;\r
-                            // case KLEENE_STAR:\r
-                            //    do nothing -- min, max already set\r
-                        }\r
-                        m = new Quantifier(m, min, max);\r
-                        buf.setLength(qstart);\r
-                        buf.append(parser.generateStandInFor(m));\r
-                    }\r
-                    break;\r
-\r
-                //------------------------------------------------------\r
-                // Elements allowed ONLY WITHIN segments\r
-                //------------------------------------------------------\r
-                case SEGMENT_CLOSE:\r
-                    // assert(isSegment);\r
-                    // We're done parsing a segment.\r
-                    break main;\r
-\r
-                //------------------------------------------------------\r
-                // Elements allowed ONLY OUTSIDE segments\r
-                //------------------------------------------------------\r
-                case CONTEXT_ANTE:\r
-                    if (ante >= 0) {\r
-                        syntaxError("Multiple ante contexts", rule, start);\r
-                    }\r
-                    ante = buf.length();\r
-                    break;\r
-                case CONTEXT_POST:\r
-                    if (post >= 0) {\r
-                        syntaxError("Multiple post contexts", rule, start);\r
-                    }\r
-                    post = buf.length();\r
-                    break;\r
-                case CURSOR_POS:\r
-                    if (cursor >= 0) {\r
-                        syntaxError("Multiple cursors", rule, start);\r
-                    }\r
-                    cursor = buf.length();\r
-                    break;\r
-                case CURSOR_OFFSET:\r
-                    if (cursorOffset < 0) {\r
-                        if (buf.length() > 0) {\r
-                            syntaxError("Misplaced " + c, rule, start);\r
-                        }\r
-                        --cursorOffset;\r
-                    } else if (cursorOffset > 0) {\r
-                        if (buf.length() != cursorOffsetPos || cursor >= 0) {\r
-                            syntaxError("Misplaced " + c, rule, start);\r
-                        }\r
-                        ++cursorOffset;\r
-                    } else {\r
-                        if (cursor == 0 && buf.length() == 0) {\r
-                            cursorOffset = -1;\r
-                        } else if (cursor < 0) {\r
-                            cursorOffsetPos = buf.length();\r
-                            cursorOffset = 1;\r
-                        } else {\r
-                            syntaxError("Misplaced " + c, rule, start);\r
-                        }\r
-                    }\r
-                    break;\r
-\r
-                //------------------------------------------------------\r
-                // Non-special characters\r
-                //------------------------------------------------------\r
-                default:\r
-                    // Disallow unquoted characters other than [0-9A-Za-z]\r
-                    // in the printable ASCII range.  These characters are\r
-                    // reserved for possible future use.\r
-                    if (c >= 0x0021 && c <= 0x007E &&\r
-                        !((c >= '0' && c <= '9') ||\r
-                          (c >= 'A' && c <= 'Z') ||\r
-                          (c >= 'a' && c <= 'z'))) {\r
-                        syntaxError("Unquoted " + c, rule, start);\r
-                    }\r
-                    buf.append(c);\r
-                    break;\r
-                }\r
-            }\r
-            return pos;\r
-        }\r
-\r
-        /**\r
-         * Remove context.\r
-         */\r
-        void removeContext() {\r
-            text = text.substring(ante < 0 ? 0 : ante,\r
-                                  post < 0 ? text.length() : post);\r
-            ante = post = -1;\r
-            anchorStart = anchorEnd = false;\r
-        }\r
-\r
-        /**\r
-         * Return true if this half looks like valid output, that is, does not\r
-         * contain quantifiers or other special input-only elements.\r
-         */\r
-        public boolean isValidOutput(TransliteratorParser parser) {\r
-            for (int i=0; i<text.length(); ) {\r
-                int c = UTF16.charAt(text, i);\r
-                i += UTF16.getCharCount(c);\r
-                if (!parser.parseData.isReplacer(c)) {\r
-                    return false;\r
-                }\r
-            }\r
-            return true;\r
-        }\r
-\r
-        /**\r
-         * Return true if this half looks like valid input, that is, does not\r
-         * contain functions or other special output-only elements.\r
-         */\r
-        public boolean isValidInput(TransliteratorParser parser) {\r
-            for (int i=0; i<text.length(); ) {\r
-                int c = UTF16.charAt(text, i);\r
-                i += UTF16.getCharCount(c);\r
-                if (!parser.parseData.isMatcher(c)) {\r
-                    return false;\r
-                }\r
-            }\r
-            return true;\r
-        }\r
-    }\r
-\r
-    //----------------------------------------------------------------------\r
-    // PUBLIC methods\r
-    //----------------------------------------------------------------------\r
-\r
-    /**\r
-     * Constructor.\r
-     */\r
-    public TransliteratorParser() {\r
-    }\r
-\r
-    /**\r
-     * Parse a set of rules.  After the parse completes, examine the public\r
-     * data members for results.\r
-     */\r
-    public void parse(String rules, int dir) {\r
-        parseRules(new RuleArray(new String[] { rules }), dir);\r
-    }\r
-   \r
-    /*\r
-     * Parse a set of rules.  After the parse completes, examine the public\r
-     * data members for results.\r
-     */\r
-/*    public void parse(ResourceReader rules, int direction) {\r
-        parseRules(new RuleReader(rules), direction);\r
-    }*/\r
-\r
-    //----------------------------------------------------------------------\r
-    // PRIVATE methods\r
-    //----------------------------------------------------------------------\r
-\r
-    /**\r
-     * Parse an array of zero or more rules.  The strings in the array are\r
-     * treated as if they were concatenated together, with rule terminators\r
-     * inserted between array elements if not present already.\r
-     *\r
-     * Any previous rules are discarded.  Typically this method is called exactly\r
-     * once, during construction.\r
-     *\r
-     * The member this.data will be set to null if there are no rules.\r
-     *\r
-     * @exception IllegalIcuArgumentException if there is a syntax error in the\r
-     * rules\r
-     */\r
-    void parseRules(RuleBody ruleArray, int dir) {\r
-        boolean parsingIDs = true;\r
-        int ruleCount = 0;\r
-\r
-        dataVector = new Vector();\r
-        idBlockVector = new Vector();\r
-        curData = null;\r
-        direction = dir;\r
-        compoundFilter = null;\r
-        variablesVector = new Vector();\r
-        variableNames = new Hashtable();\r
-        parseData = new ParseData();\r
-\r
-        List errors = new ArrayList();\r
-        int errorCount = 0;\r
-\r
-        ruleArray.reset();\r
-\r
-        StringBuffer idBlockResult = new StringBuffer();\r
-\r
-        // The compound filter offset is an index into idBlockResult.\r
-        // If it is 0, then the compound filter occurred at the start,\r
-        // and it is the offset to the _start_ of the compound filter\r
-        // pattern.  Otherwise it is the offset to the _limit_ of the\r
-        // compound filter pattern within idBlockResult.\r
-        this.compoundFilter = null;\r
-        int compoundFilterOffset = -1;\r
-\r
-    main:\r
-        for (;;) {\r
-            String rule = ruleArray.nextLine();\r
-            if (rule == null) {\r
-                break;\r
-            }\r
-            int pos = 0;\r
-            int limit = rule.length();\r
-            while (pos < limit) {\r
-                char c = rule.charAt(pos++);\r
-                if (UCharacterProperty.isRuleWhiteSpace(c)) {\r
-                    continue;\r
-                }\r
-                // Skip lines starting with the comment character\r
-                if (c == RULE_COMMENT_CHAR) {\r
-                    pos = rule.indexOf("\n", pos) + 1;\r
-                    if (pos == 0) {\r
-                        break; // No "\n" found; rest of rule is a commnet\r
-                    }\r
-                    continue; // Either fall out or restart with next line\r
-                }\r
-\r
-                // skip empty rules\r
-                if (c == END_OF_RULE)\r
-                    continue;\r
-\r
-                // Often a rule file contains multiple errors.  It's\r
-                // convenient to the rule author if these are all reported\r
-                // at once.  We keep parsing rules even after a failure, up\r
-                // to a specified limit, and report all errors at once.\r
-                try {\r
-                    ++ruleCount;\r
-\r
-                    // We've found the start of a rule or ID.  c is its first\r
-                    // character, and pos points past c.\r
-                    --pos;\r
-                    // Look for an ID token.  Must have at least ID_TOKEN_LEN + 1\r
-                    // chars left.\r
-                    if ((pos + ID_TOKEN_LEN + 1) <= limit &&\r
-                            rule.regionMatches(pos, ID_TOKEN, 0, ID_TOKEN_LEN)) {\r
-                        pos += ID_TOKEN_LEN;\r
-                        c = rule.charAt(pos);\r
-                        while (UCharacterProperty.isRuleWhiteSpace(c) && pos < limit) {\r
-                            ++pos;\r
-                            c = rule.charAt(pos);\r
-                        }\r
-                        int[] p = new int[] { pos };\r
-\r
-                        if (!parsingIDs) {\r
-                            if (curData != null) {\r
-                                if (direction == Transliterator.FORWARD)\r
-                                    dataVector.add(curData);\r
-                                else\r
-                                    dataVector.insertElementAt(curData, 0);\r
-                                curData = null;\r
-                            }\r
-                            parsingIDs = true;\r
-                        }\r
-\r
-                        TransliteratorIDParser.SingleID id =\r
-                            TransliteratorIDParser.parseSingleID(\r
-                                          rule, p, direction);\r
-                        if (p[0] != pos && Utility.parseChar(rule, p, END_OF_RULE)) {\r
-                            // Successful ::ID parse.\r
-\r
-                            if (direction == Transliterator.FORWARD) {\r
-                                idBlockResult.append(id.canonID).append(END_OF_RULE);\r
-                            } else {\r
-                                idBlockResult.insert(0, id.canonID + END_OF_RULE);\r
-                            }\r
-\r
-                        } else {\r
-                            // Couldn't parse an ID.  Try to parse a global filter\r
-                            int[] withParens = new int[] { -1 };\r
-                            UnicodeSet f = TransliteratorIDParser.parseGlobalFilter(rule, p, direction, withParens, null);\r
-                            if (f != null && Utility.parseChar(rule, p, END_OF_RULE)) {\r
-                                if ((direction == Transliterator.FORWARD) ==\r
-                                    (withParens[0] == 0)) {\r
-                                    if (compoundFilter != null) {\r
-                                        // Multiple compound filters\r
-                                        syntaxError("Multiple global filters", rule, pos);\r
-                                    }\r
-                                    compoundFilter = f;\r
-                                    compoundFilterOffset = ruleCount;\r
-                               }\r
-                            } else {\r
-                                // Invalid ::id\r
-                                // Can be parsed as neither an ID nor a global filter\r
-                                syntaxError("Invalid ::ID", rule, pos);\r
-                            }\r
-                        }\r
-\r
-                        pos = p[0];\r
-                    } else {\r
-                        if (parsingIDs) {\r
-                            if (direction == Transliterator.FORWARD)\r
-                                idBlockVector.add(idBlockResult.toString());\r
-                            else\r
-                                idBlockVector.insertElementAt(idBlockResult.toString(), 0);\r
-                            idBlockResult.delete(0, idBlockResult.length());\r
-                            parsingIDs = false;\r
-                            curData = new RuleBasedTransliterator.Data();\r
-\r
-                            // By default, rules use part of the private use area\r
-                            // E000..F8FF for variables and other stand-ins.  Currently\r
-                            // the range F000..F8FF is typically sufficient.  The 'use\r
-                            // variable range' pragma allows rule sets to modify this.\r
-                            setVariableRange(0xF000, 0xF8FF);\r
-                        }\r
-\r
-                        if (resemblesPragma(rule, pos, limit)) {\r
-                            int ppp = parsePragma(rule, pos, limit);\r
-                            if (ppp < 0) {\r
-                                syntaxError("Unrecognized pragma", rule, pos);\r
-                            }\r
-                            pos = ppp;\r
-                        // Parse a rule\r
-                        } else {\r
-                            pos = parseRule(rule, pos, limit);\r
-                        }\r
-                    }\r
-                } catch (IllegalArgumentException e) {\r
-                    if (errorCount == 30) {\r
-                        errors.add(new IllegalIcuArgumentException("\nMore than 30 errors; further messages squelched")\r
-//#if defined(FOUNDATION10) || defined(J2SE13)\r
-//#else\r
-                            .initCause(e)\r
-//#endif\r
-                            );\r
-                        break main;\r
-                    }\r
-                    e.fillInStackTrace();\r
-                    errors.add(e);\r
-                    ++errorCount;\r
-                    pos = ruleEnd(rule, pos, limit) + 1; // +1 advances past ';'\r
-                }\r
-            }\r
-        }\r
-        if (parsingIDs && idBlockResult.length() > 0) {\r
-            if (direction == Transliterator.FORWARD)\r
-                idBlockVector.add(idBlockResult.toString());\r
-            else\r
-                idBlockVector.insertElementAt(idBlockResult.toString(), 0);\r
-        }\r
-        else if (!parsingIDs && curData != null) {\r
-            if (direction == Transliterator.FORWARD)\r
-                dataVector.add(curData);\r
-            else\r
-                dataVector.insertElementAt(curData, 0);\r
-        }\r
-\r
-        // Convert the set vector to an array\r
-        for (int i = 0; i < dataVector.size(); i++) {\r
-            RuleBasedTransliterator.Data data = (RuleBasedTransliterator.Data)dataVector.get(i);\r
-            data.variables = new Object[variablesVector.size()];\r
-            variablesVector.copyInto(data.variables);\r
-            data.variableNames = new Hashtable();\r
-            data.variableNames.putAll(variableNames);\r
-        }\r
-        variablesVector = null;\r
-\r
-        // Do more syntax checking and index the rules\r
-        try {\r
-            if (compoundFilter != null) {\r
-                if ((direction == Transliterator.FORWARD &&\r
-                     compoundFilterOffset != 1) ||\r
-                    (direction == Transliterator.REVERSE &&\r
-                     compoundFilterOffset != ruleCount)) {\r
-                    throw new IllegalIcuArgumentException("Compound filters misplaced");\r
-                }\r
-            }\r
-\r
-            for (int i = 0; i < dataVector.size(); i++) {\r
-                RuleBasedTransliterator.Data data = (RuleBasedTransliterator.Data)dataVector.get(i);\r
-                data.ruleSet.freeze();\r
-            }\r
-\r
-            if (idBlockVector.size() == 1 && ((String)idBlockVector.get(0)).length() == 0)\r
-                idBlockVector.remove(0);\r
-\r
-        } catch (IllegalArgumentException e) {\r
-            e.fillInStackTrace();\r
-            errors.add(e);\r
-        }\r
-\r
-        if (errors.size() != 0) {\r
-//#if defined(FOUNDATION10) || defined(J2SE13)\r
-//#else\r
-            for (int i = errors.size()-1; i > 0; --i) {\r
-                RuntimeException previous = (RuntimeException) errors.get(i-1);\r
-                while (previous.getCause() != null) {\r
-                    previous = (RuntimeException) previous.getCause(); // chain specially\r
-                }\r
-                previous.initCause((RuntimeException) errors.get(i));\r
-            }\r
-//#endif\r
-            throw (RuntimeException) errors.get(0);\r
-            // if initCause not supported: throw new IllegalArgumentException(errors.toString());\r
-        }\r
-    }\r
-\r
-    /**\r
-     * MAIN PARSER.  Parse the next rule in the given rule string, starting\r
-     * at pos.  Return the index after the last character parsed.  Do not\r
-     * parse characters at or after limit.\r
-     *\r
-     * Important:  The character at pos must be a non-whitespace character\r
-     * that is not the comment character.\r
-     *\r
-     * This method handles quoting, escaping, and whitespace removal.  It\r
-     * parses the end-of-rule character.  It recognizes context and cursor\r
-     * indicators.  Once it does a lexical breakdown of the rule at pos, it\r
-     * creates a rule object and adds it to our rule list.\r
-     *\r
-     * This method is tightly coupled to the inner class RuleHalf.\r
-     */\r
-    private int parseRule(String rule, int pos, int limit) {\r
-        // Locate the left side, operator, and right side\r
-        int start = pos;\r
-        char operator = 0;\r
-\r
-        // Set up segments data\r
-        segmentStandins = new StringBuffer();\r
-        segmentObjects = new Vector();\r
-\r
-        RuleHalf left  = new RuleHalf();\r
-        RuleHalf right = new RuleHalf();\r
-\r
-        undefinedVariableName = null;\r
-        pos = left.parse(rule, pos, limit, this);\r
-\r
-        if (pos == limit ||\r
-            OPERATORS.indexOf(operator = rule.charAt(--pos)) < 0) {\r
-            syntaxError("No operator pos=" + pos, rule, start);\r
-        }\r
-        ++pos;\r
-\r
-        // Found an operator char.  Check for forward-reverse operator.\r
-        if (operator == REVERSE_RULE_OP &&\r
-            (pos < limit && rule.charAt(pos) == FORWARD_RULE_OP)) {\r
-            ++pos;\r
-            operator = FWDREV_RULE_OP;\r
-        }\r
-\r
-        // Translate alternate op characters.\r
-        switch (operator) {\r
-        case ALT_FORWARD_RULE_OP:\r
-            operator = FORWARD_RULE_OP;\r
-            break;\r
-        case ALT_REVERSE_RULE_OP:\r
-            operator = REVERSE_RULE_OP;\r
-            break;\r
-        case ALT_FWDREV_RULE_OP:\r
-            operator = FWDREV_RULE_OP;\r
-            break;\r
-        }\r
-\r
-        pos = right.parse(rule, pos, limit, this);\r
-\r
-        if (pos < limit) {\r
-            if (rule.charAt(--pos) == END_OF_RULE) {\r
-                ++pos;\r
-            } else {\r
-                // RuleHalf parser must have terminated at an operator\r
-                syntaxError("Unquoted operator", rule, start);\r
-            }\r
-        }\r
-\r
-        if (operator == VARIABLE_DEF_OP) {\r
-            // LHS is the name.  RHS is a single character, either a literal\r
-            // or a set (already parsed).  If RHS is longer than one\r
-            // character, it is either a multi-character string, or multiple\r
-            // sets, or a mixture of chars and sets -- syntax error.\r
-\r
-            // We expect to see a single undefined variable (the one being\r
-            // defined).\r
-            if (undefinedVariableName == null) {\r
-                syntaxError("Missing '$' or duplicate definition", rule, start);\r
-            }\r
-            if (left.text.length() != 1 || left.text.charAt(0) != variableLimit) {\r
-                syntaxError("Malformed LHS", rule, start);\r
-            }\r
-            if (left.anchorStart || left.anchorEnd ||\r
-                right.anchorStart || right.anchorEnd) {\r
-                syntaxError("Malformed variable def", rule, start);\r
-            }\r
-            // We allow anything on the right, including an empty string.\r
-            int n = right.text.length();\r
-            char[] value = new char[n];\r
-            right.text.getChars(0, n, value, 0);\r
-            variableNames.put(undefinedVariableName, value);\r
-\r
-            ++variableLimit;\r
-            return pos;\r
-        }\r
-\r
-        // If this is not a variable definition rule, we shouldn't have\r
-        // any undefined variable names.\r
-        if (undefinedVariableName != null) {\r
-            syntaxError("Undefined variable $" + undefinedVariableName,\r
-                        rule, start);\r
-        }\r
-\r
-        // Verify segments\r
-        if (segmentStandins.length() > segmentObjects.size()) {\r
-            syntaxError("Undefined segment reference", rule, start);\r
-        }\r
-        for (int i=0; i<segmentStandins.length(); ++i) {\r
-            if (segmentStandins.charAt(i) == 0) {\r
-                syntaxError("Internal error", rule, start); // will never happen\r
-            }\r
-        }\r
-        for (int i=0; i<segmentObjects.size(); ++i) {\r
-            if (segmentObjects.elementAt(i) == null) {\r
-                syntaxError("Internal error", rule, start); // will never happen\r
-            }\r
-        }\r
-\r
-        // If the direction we want doesn't match the rule\r
-        // direction, do nothing.\r
-        if (operator != FWDREV_RULE_OP &&\r
-            ((direction == Transliterator.FORWARD) != (operator == FORWARD_RULE_OP))) {\r
-            return pos;\r
-        }\r
-\r
-        // Transform the rule into a forward rule by swapping the\r
-        // sides if necessary.\r
-        if (direction == Transliterator.REVERSE) {\r
-            RuleHalf temp = left;\r
-            left = right;\r
-            right = temp;\r
-        }\r
-\r
-        // Remove non-applicable elements in forward-reverse\r
-        // rules.  Bidirectional rules ignore elements that do not\r
-        // apply.\r
-        if (operator == FWDREV_RULE_OP) {\r
-            right.removeContext();\r
-            left.cursor = -1;\r
-            left.cursorOffset = 0;\r
-        }\r
-\r
-        // Normalize context\r
-        if (left.ante < 0) {\r
-            left.ante = 0;\r
-        }\r
-        if (left.post < 0) {\r
-            left.post = left.text.length();\r
-        }\r
-\r
-        // Context is only allowed on the input side.  Cursors are only\r
-        // allowed on the output side.  Segment delimiters can only appear\r
-        // on the left, and references on the right.  Cursor offset\r
-        // cannot appear without an explicit cursor.  Cursor offset\r
-        // cannot place the cursor outside the limits of the context.\r
-        // Anchors are only allowed on the input side.\r
-        if (right.ante >= 0 || right.post >= 0 || left.cursor >= 0 ||\r
-            (right.cursorOffset != 0 && right.cursor < 0) ||\r
-            // - The following two checks were used to ensure that the\r
-            // - the cursor offset stayed within the ante- or postcontext.\r
-            // - However, with the addition of quantifiers, we have to\r
-            // - allow arbitrary cursor offsets and do runtime checking.\r
-            //(right.cursorOffset > (left.text.length() - left.post)) ||\r
-            //(-right.cursorOffset > left.ante) ||\r
-            right.anchorStart || right.anchorEnd ||\r
-            !left.isValidInput(this) || !right.isValidOutput(this) ||\r
-            left.ante > left.post) {\r
-            syntaxError("Malformed rule", rule, start);\r
-        }\r
-\r
-        // Flatten segment objects vector to an array\r
-        UnicodeMatcher[] segmentsArray = null;\r
-        if (segmentObjects.size() > 0) {\r
-            segmentsArray = new UnicodeMatcher[segmentObjects.size()];\r
-            segmentObjects.toArray(segmentsArray);\r
-        }\r
-\r
-        curData.ruleSet.addRule(new TransliterationRule(\r
-                                     left.text, left.ante, left.post,\r
-                                     right.text, right.cursor, right.cursorOffset,\r
-                                     segmentsArray,\r
-                                     left.anchorStart, left.anchorEnd,\r
-                                     curData));\r
-\r
-        return pos;\r
-    }\r
-\r
-    /**\r
-     * Set the variable range to [start, end] (inclusive).\r
-     */\r
-    private void setVariableRange(int start, int end) {\r
-        if (start > end || start < 0 || end > 0xFFFF) {\r
-            throw new IllegalIcuArgumentException("Invalid variable range " + start + ", " + end);\r
-        }\r
-        \r
-        curData.variablesBase = (char) start; // first private use\r
-\r
-        if (dataVector.size() == 0) {\r
-            variableNext = (char) start;\r
-            variableLimit = (char) (end + 1);\r
-        }\r
-    }\r
-\r
-    /**\r
-     * Assert that the given character is NOT within the variable range.\r
-     * If it is, signal an error.  This is neccesary to ensure that the\r
-     * variable range does not overlap characters used in a rule.\r
-     */\r
-    private void checkVariableRange(int ch, String rule, int start) {\r
-        if (ch >= curData.variablesBase && ch < variableLimit) {\r
-            syntaxError("Variable range character in rule", rule, start);\r
-        }\r
-    }\r
-\r
-    // (The following method is part of an unimplemented feature.\r
-    // Remove this clover pragma after the feature is implemented.\r
-    // 2003-06-11 ICU 2.6 Alan)\r
-    ///CLOVER:OFF\r
-    /**\r
-     * Set the maximum backup to 'backup', in response to a pragma\r
-     * statement.\r
-     */\r
-    private void pragmaMaximumBackup(int backup) {\r
-        //TODO Finish\r
-        throw new IllegalIcuArgumentException("use maximum backup pragma not implemented yet");\r
-    }\r
-    ///CLOVER:ON\r
-\r
-    // (The following method is part of an unimplemented feature.\r
-    // Remove this clover pragma after the feature is implemented.\r
-    // 2003-06-11 ICU 2.6 Alan)\r
-    ///CLOVER:OFF\r
-    /**\r
-     * Begin normalizing all rules using the given mode, in response\r
-     * to a pragma statement.\r
-     */\r
-    private void pragmaNormalizeRules(Normalizer.Mode mode) {\r
-        //TODO Finish\r
-        throw new IllegalIcuArgumentException("use normalize rules pragma not implemented yet");\r
-    }\r
-    ///CLOVER:ON\r
-\r
-    /**\r
-     * Return true if the given rule looks like a pragma.\r
-     * @param pos offset to the first non-whitespace character\r
-     * of the rule.\r
-     * @param limit pointer past the last character of the rule.\r
-     */\r
-    static boolean resemblesPragma(String rule, int pos, int limit) {\r
-        // Must start with /use\s/i\r
-        return Utility.parsePattern(rule, pos, limit, "use ", null) >= 0;\r
-    }\r
-\r
-    /**\r
-     * Parse a pragma.  This method assumes resemblesPragma() has\r
-     * already returned true.\r
-     * @param pos offset to the first non-whitespace character\r
-     * of the rule.\r
-     * @param limit pointer past the last character of the rule.\r
-     * @return the position index after the final ';' of the pragma,\r
-     * or -1 on failure.\r
-     */\r
-    private int parsePragma(String rule, int pos, int limit) {\r
-        int[] array = new int[2];\r
-\r
-        // resemblesPragma() has already returned true, so we\r
-        // know that pos points to /use\s/i; we can skip 4 characters\r
-        // immediately\r
-        pos += 4;\r
-        \r
-        // Here are the pragmas we recognize:\r
-        // use variable range 0xE000 0xEFFF;\r
-        // use maximum backup 16;\r
-        // use nfd rules;\r
-        int p = Utility.parsePattern(rule, pos, limit, "~variable range # #~;", array);\r
-        if (p >= 0) {\r
-            setVariableRange(array[0], array[1]);\r
-            return p;\r
-        }\r
-\r
-        p = Utility.parsePattern(rule, pos, limit, "~maximum backup #~;", array);\r
-        if (p >= 0) {\r
-            pragmaMaximumBackup(array[0]);\r
-            return p;\r
-        }\r
-\r
-        p = Utility.parsePattern(rule, pos, limit, "~nfd rules~;", null);\r
-        if (p >= 0) {\r
-            pragmaNormalizeRules(Normalizer.NFD);\r
-            return p;\r
-        }\r
-\r
-        p = Utility.parsePattern(rule, pos, limit, "~nfc rules~;", null);\r
-        if (p >= 0) {\r
-            pragmaNormalizeRules(Normalizer.NFC);\r
-            return p;\r
-        }\r
-\r
-        // Syntax error: unable to parse pragma\r
-        return -1;\r
-    }\r
-\r
-    /**\r
-     * Throw an exception indicating a syntax error.  Search the rule string\r
-     * for the probable end of the rule.  Of course, if the error is that\r
-     * the end of rule marker is missing, then the rule end will not be found.\r
-     * In any case the rule start will be correctly reported.\r
-     * @param msg error description\r
-     * @param rule pattern string\r
-     * @param start position of first character of current rule\r
-     */\r
-    static final void syntaxError(String msg, String rule, int start) {\r
-        int end = ruleEnd(rule, start, rule.length());\r
-        throw new IllegalIcuArgumentException(msg + " in \"" +\r
-                                           Utility.escape(rule.substring(start, end)) + '"');\r
-    }\r
-\r
-    static final int ruleEnd(String rule, int start, int limit) {\r
-        int end = Utility.quotedIndexOf(rule, start, limit, ";");\r
-        if (end < 0) {\r
-            end = limit;\r
-        }\r
-        return end;\r
-    }\r
-\r
-    /**\r
-     * Parse a UnicodeSet out, store it, and return the stand-in character\r
-     * used to represent it.\r
-     */\r
-    private final char parseSet(String rule, ParsePosition pos) {\r
-        UnicodeSet set = new UnicodeSet(rule, pos, parseData);\r
-        if (variableNext >= variableLimit) {\r
-            throw new RuntimeException("Private use variables exhausted");\r
-        }\r
-        set.compact();\r
-        return generateStandInFor(set);\r
-    }\r
-\r
-    /**\r
-     * Generate and return a stand-in for a new UnicodeMatcher or UnicodeReplacer.\r
-     * Store the object.\r
-     */\r
-    char generateStandInFor(Object obj) {\r
-        // assert(obj != null);\r
-\r
-        // Look up previous stand-in, if any.  This is a short list\r
-        // (typical n is 0, 1, or 2); linear search is optimal.\r
-        for (int i=0; i<variablesVector.size(); ++i) {\r
-            if (variablesVector.elementAt(i) == obj) { // [sic] pointer comparison\r
-                return (char) (curData.variablesBase + i);\r
-            }\r
-        }\r
-\r
-        if (variableNext >= variableLimit) {\r
-            throw new RuntimeException("Variable range exhausted");\r
-        }\r
-        variablesVector.addElement(obj);\r
-        return variableNext++;\r
-    }\r
-\r
-    /**\r
-     * Return the standin for segment seg (1-based).\r
-     */\r
-    public char getSegmentStandin(int seg) {\r
-        if (segmentStandins.length() < seg) {\r
-            segmentStandins.setLength(seg);\r
-        }\r
-        char c = segmentStandins.charAt(seg-1);\r
-        if (c == 0) {\r
-            if (variableNext >= variableLimit) {\r
-                throw new RuntimeException("Variable range exhausted");\r
-            }\r
-            c = variableNext++;\r
-            // Set a placeholder in the master variables vector that will be\r
-            // filled in later by setSegmentObject().  We know that we will get\r
-            // called first because setSegmentObject() will call us.\r
-            variablesVector.addElement(null);\r
-            segmentStandins.setCharAt(seg-1, c);\r
-        }\r
-        return c;\r
-    }\r
-    \r
-    /**\r
-     * Set the object for segment seg (1-based).\r
-     */\r
-    public void setSegmentObject(int seg, StringMatcher obj) {\r
-        // Since we call parseSection() recursively, nested\r
-        // segments will result in segment i+1 getting parsed\r
-        // and stored before segment i; be careful with the\r
-        // vector handling here.\r
-        if (segmentObjects.size() < seg) {\r
-            segmentObjects.setSize(seg);\r
-        }\r
-        int index = getSegmentStandin(seg) - curData.variablesBase;\r
-        if (segmentObjects.elementAt(seg-1) != null ||\r
-            variablesVector.elementAt(index) != null) {\r
-            throw new RuntimeException(); // should never happen\r
-        }\r
-        segmentObjects.setElementAt(obj, seg-1);\r
-        variablesVector.setElementAt(obj, index);\r
-    }\r
-\r
-    /**\r
-     * Return the stand-in for the dot set.  It is allocated the first\r
-     * time and reused thereafter.\r
-     */\r
-    char getDotStandIn() {\r
-        if (dotStandIn == -1) {\r
-            dotStandIn = generateStandInFor(new UnicodeSet(DOT_SET));\r
-        }\r
-        return (char) dotStandIn;\r
-    }\r
-\r
-    /**\r
-     * Append the value of the given variable name to the given\r
-     * StringBuffer.\r
-     * @exception IllegalIcuArgumentException if the name is unknown.\r
-     */\r
-    private void appendVariableDef(String name, StringBuffer buf) {\r
-        char[] ch = (char[]) variableNames.get(name);\r
-        if (ch == null) {\r
-            // We allow one undefined variable so that variable definition\r
-            // statements work.  For the first undefined variable we return\r
-            // the special placeholder variableLimit-1, and save the variable\r
-            // name.\r
-            if (undefinedVariableName == null) {\r
-                undefinedVariableName = name;\r
-                if (variableNext >= variableLimit) {\r
-                    throw new RuntimeException("Private use variables exhausted");\r
-                }\r
-                buf.append((char) --variableLimit);\r
-            } else {\r
-                throw new IllegalIcuArgumentException("Undefined variable $"\r
-                                                   + name);\r
-            }\r
-        } else {\r
-            buf.append(ch);\r
-        }\r
-    }\r
-}\r
-\r
-//eof\r
+//##header J2SE15
+/*
+**********************************************************************
+*   Copyright (c) 2001-2009, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*/
+package com.ibm.icu.text;
+
+import com.ibm.icu.impl.IllegalIcuArgumentException;
+import com.ibm.icu.impl.Utility;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Vector;
+import java.util.Hashtable;
+import java.text.ParsePosition;
+import com.ibm.icu.lang.*;
+import com.ibm.icu.impl.UCharacterProperty;
+
+class TransliteratorParser {
+
+    //----------------------------------------------------------------------
+    // Data members
+    //----------------------------------------------------------------------
+
+    /**
+     * PUBLIC data member.
+     * A Vector of RuleBasedTransliterator.Data objects, one for each discrete group
+     * of rules in the rule set
+     */
+    public Vector dataVector;
+
+    /**
+     * PUBLIC data member.
+     * A Vector of Strings containing all of the ID blocks in the rule set
+     */
+    public Vector idBlockVector;
+
+    /**
+     * The current data object for which we are parsing rules
+     */
+    private RuleBasedTransliterator.Data curData;
+
+    /**
+     * PUBLIC data member containing the parsed compound filter, if any.
+     */
+    public UnicodeSet compoundFilter;
+
+
+    private int direction;
+
+    /**
+     * Temporary symbol table used during parsing.
+     */
+    private ParseData parseData;
+
+    /**
+     * Temporary vector of set variables.  When parsing is complete, this
+     * is copied into the array data.variables.  As with data.variables,
+     * element 0 corresponds to character data.variablesBase.
+     */
+    private Vector variablesVector;
+
+    /**
+     * Temporary table of variable names.  When parsing is complete, this is
+     * copied into data.variableNames.
+     */
+    private Hashtable variableNames;
+
+    /**
+     * String of standins for segments.  Used during the parsing of a single
+     * rule.  segmentStandins.charAt(0) is the standin for "$1" and corresponds
+     * to StringMatcher object segmentObjects.elementAt(0), etc.
+     */
+    private StringBuffer segmentStandins;
+
+    /**
+     * Vector of StringMatcher objects for segments.  Used during the
+     * parsing of a single rule.  
+     * segmentStandins.charAt(0) is the standin for "$1" and corresponds
+     * to StringMatcher object segmentObjects.elementAt(0), etc.
+     */
+    private Vector segmentObjects;
+
+    /**
+     * The next available stand-in for variables.  This starts at some point in
+     * the private use area (discovered dynamically) and increments up toward
+     * <code>variableLimit</code>.  At any point during parsing, available
+     * variables are <code>variableNext..variableLimit-1</code>.
+     */
+    private char variableNext;
+
+    /**
+     * The last available stand-in for variables.  This is discovered
+     * dynamically.  At any point during parsing, available variables are
+     * <code>variableNext..variableLimit-1</code>.  During variable definition
+     * we use the special value variableLimit-1 as a placeholder.
+     */
+    private char variableLimit;
+
+    /**
+     * When we encounter an undefined variable, we do not immediately signal
+     * an error, in case we are defining this variable, e.g., "$a = [a-z];".
+     * Instead, we save the name of the undefined variable, and substitute
+     * in the placeholder char variableLimit - 1, and decrement
+     * variableLimit.
+     */
+    private String undefinedVariableName;
+
+    /**
+     * The stand-in character for the 'dot' set, represented by '.' in
+     * patterns.  This is allocated the first time it is needed, and
+     * reused thereafter.
+     */
+    private int dotStandIn = -1;
+
+    //----------------------------------------------------------------------
+    // Constants
+    //----------------------------------------------------------------------
+
+    // Indicator for ID blocks
+    private static final String ID_TOKEN = "::";
+    private static final int ID_TOKEN_LEN = 2;
+
+/*
+(reserved for future expansion)
+    // markers for beginning and end of rule groups
+    private static final String BEGIN_TOKEN = "BEGIN";
+    private static final String END_TOKEN = "END";
+*/
+
+    // Operators
+    private static final char VARIABLE_DEF_OP   = '=';
+    private static final char FORWARD_RULE_OP   = '>';
+    private static final char REVERSE_RULE_OP   = '<';
+    private static final char FWDREV_RULE_OP    = '~'; // internal rep of <> op
+
+    private static final String OPERATORS = "=><\u2190\u2192\u2194";
+    private static final String HALF_ENDERS = "=><\u2190\u2192\u2194;";
+
+    // Other special characters
+    private static final char QUOTE               = '\'';
+    private static final char ESCAPE              = '\\';
+    private static final char END_OF_RULE         = ';';
+    private static final char RULE_COMMENT_CHAR   = '#';
+
+    private static final char CONTEXT_ANTE        = '{'; // ante{key
+    private static final char CONTEXT_POST        = '}'; // key}post
+    private static final char CURSOR_POS          = '|';
+    private static final char CURSOR_OFFSET       = '@';
+    private static final char ANCHOR_START        = '^';
+
+    private static final char KLEENE_STAR         = '*';
+    private static final char ONE_OR_MORE         = '+';
+    private static final char ZERO_OR_ONE         = '?';
+
+    private static final char DOT                 = '.';
+    private static final String DOT_SET           = "[^[:Zp:][:Zl:]\\r\\n$]";
+
+    // By definition, the ANCHOR_END special character is a
+    // trailing SymbolTable.SYMBOL_REF character.
+    // private static final char ANCHOR_END       = '$';
+
+    // Segments of the input string are delimited by "(" and ")".  In the
+    // output string these segments are referenced as "$1", "$2", etc.
+    private static final char SEGMENT_OPEN        = '(';
+    private static final char SEGMENT_CLOSE       = ')';
+
+    // A function is denoted &Source-Target/Variant(text)
+    private static final char FUNCTION            = '&';
+
+    // Aliases for some of the syntax characters. These are provided so
+    // transliteration rules can be expressed in XML without clashing with
+    // XML syntax characters '<', '>', and '&'.
+    private static final char ALT_REVERSE_RULE_OP = '\u2190'; // Left Arrow
+    private static final char ALT_FORWARD_RULE_OP = '\u2192'; // Right Arrow
+    private static final char ALT_FWDREV_RULE_OP  = '\u2194'; // Left Right Arrow
+    private static final char ALT_FUNCTION        = '\u2206'; // Increment (~Greek Capital Delta)
+    
+    // Special characters disallowed at the top level
+    private static UnicodeSet ILLEGAL_TOP = new UnicodeSet("[\\)]");
+
+    // Special characters disallowed within a segment
+    private static UnicodeSet ILLEGAL_SEG = new UnicodeSet("[\\{\\}\\|\\@]");
+
+    // Special characters disallowed within a function argument
+    private static UnicodeSet ILLEGAL_FUNC = new UnicodeSet("[\\^\\(\\.\\*\\+\\?\\{\\}\\|\\@]");
+
+    //----------------------------------------------------------------------
+    // class ParseData
+    //----------------------------------------------------------------------
+
+    /**
+     * This class implements the SymbolTable interface.  It is used
+     * during parsing to give UnicodeSet access to variables that
+     * have been defined so far.  Note that it uses variablesVector,
+     * _not_ data.variables.
+     */
+    private class ParseData implements SymbolTable {
+
+        /**
+         * Implement SymbolTable API.
+         */
+        public char[] lookup(String name) {
+            return (char[]) variableNames.get(name);
+        }
+
+        /**
+         * Implement SymbolTable API.
+         */
+        public UnicodeMatcher lookupMatcher(int ch) {
+            // Note that we cannot use data.lookup() because the
+            // set array has not been constructed yet.
+            int i = ch - curData.variablesBase;
+            if (i >= 0 && i < variablesVector.size()) {
+                return (UnicodeMatcher) variablesVector.elementAt(i);
+            }
+            return null;
+        }
+
+        /**
+         * Implement SymbolTable API.  Parse out a symbol reference
+         * name.
+         */
+        public String parseReference(String text, ParsePosition pos, int limit) {
+            int start = pos.getIndex();
+            int i = start;
+            while (i < limit) {
+                char c = text.charAt(i);
+                if ((i==start && !UCharacter.isUnicodeIdentifierStart(c)) ||
+                    !UCharacter.isUnicodeIdentifierPart(c)) {
+                    break;
+                }
+                ++i;
+            }
+            if (i == start) { // No valid name chars
+                return null;
+            }
+            pos.setIndex(i);
+            return text.substring(start, i);
+        }
+
+        /**
+         * Return true if the given character is a matcher standin or a plain
+         * character (non standin).
+         */
+        public boolean isMatcher(int ch) {
+            // Note that we cannot use data.lookup() because the
+            // set array has not been constructed yet.
+            int i = ch - curData.variablesBase;
+            if (i >= 0 && i < variablesVector.size()) {
+                return variablesVector.elementAt(i) instanceof UnicodeMatcher;
+            }
+            return true;
+        }
+
+        /**
+         * Return true if the given character is a replacer standin or a plain
+         * character (non standin).
+         */
+        public boolean isReplacer(int ch) {
+            // Note that we cannot use data.lookup() because the
+            // set array has not been constructed yet.
+            int i = ch - curData.variablesBase;
+            if (i >= 0 && i < variablesVector.size()) {
+                return variablesVector.elementAt(i) instanceof UnicodeReplacer;
+            }
+            return true;
+        }
+    }
+
+    //----------------------------------------------------------------------
+    // classes RuleBody, RuleArray, and RuleReader
+    //----------------------------------------------------------------------
+
+    /**
+     * A private abstract class representing the interface to rule
+     * source code that is broken up into lines.  Handles the
+     * folding of lines terminated by a backslash.  This folding
+     * is limited; it does not account for comments, quotes, or
+     * escapes, so its use to be limited.
+     */
+    private static abstract class RuleBody {
+
+        /**
+         * Retrieve the next line of the source, or return null if
+         * none.  Folds lines terminated by a backslash into the
+         * next line, without regard for comments, quotes, or
+         * escapes.
+         */
+        String nextLine() {
+            String s = handleNextLine();
+            if (s != null &&
+                s.length() > 0 &&
+                s.charAt(s.length() - 1) == '\\') {
+
+                StringBuffer b = new StringBuffer(s);
+                do {
+                    b.deleteCharAt(b.length()-1);
+                    s = handleNextLine();
+                    if (s == null) {
+                        break;
+                    }
+                    b.append(s);
+                } while (s.length() > 0 &&
+                         s.charAt(s.length() - 1) == '\\');
+
+                s = b.toString();
+            }
+            return s;
+        }
+
+        /**
+         * Reset to the first line of the source.
+         */
+        abstract void reset();
+
+        /**
+         * Subclass method to return the next line of the source.
+         */
+        abstract String handleNextLine();
+    }
+
+    /**
+     * RuleBody subclass for a String[] array.
+     */
+    private static class RuleArray extends RuleBody {
+        String[] array;
+        int i;
+        public RuleArray(String[] array) { this.array = array; i = 0; }
+        public String handleNextLine() {
+            return (i < array.length) ? array[i++] : null;
+        }
+        public void reset() {
+            i = 0;
+        }
+    }
+
+    /*
+     * RuleBody subclass for a ResourceReader.
+     */
+/*    private static class RuleReader extends RuleBody {
+        ResourceReader reader;
+        public RuleReader(ResourceReader reader) { this.reader = reader; }
+        public String handleNextLine() {
+            try {
+                return reader.readLine();
+            } catch (java.io.IOException e) {}
+            return null;
+        }
+        public void reset() {
+            reader.reset();
+        }
+    }*/
+
+    //----------------------------------------------------------------------
+    // class RuleHalf
+    //----------------------------------------------------------------------
+
+    /**
+     * A class representing one side of a rule.  This class knows how to
+     * parse half of a rule.  It is tightly coupled to the method
+     * TransliteratorParser.parseRule().
+     */
+    private static class RuleHalf {
+
+        public String text;
+
+        public int cursor = -1; // position of cursor in text
+        public int ante = -1;   // position of ante context marker '{' in text
+        public int post = -1;   // position of post context marker '}' in text
+
+        // Record the offset to the cursor either to the left or to the
+        // right of the key.  This is indicated by characters on the output
+        // side that allow the cursor to be positioned arbitrarily within
+        // the matching text.  For example, abc{def} > | @@@ xyz; changes
+        // def to xyz and moves the cursor to before abc.  Offset characters
+        // must be at the start or end, and they cannot move the cursor past
+        // the ante- or postcontext text.  Placeholders are only valid in
+        // output text.  The length of the ante and post context is
+        // determined at runtime, because of supplementals and quantifiers.
+        public int cursorOffset = 0; // only nonzero on output side
+
+        // Position of first CURSOR_OFFSET on _right_.  This will be -1
+        // for |@, -2 for |@@, etc., and 1 for @|, 2 for @@|, etc.
+        private int cursorOffsetPos = 0;
+
+        public boolean anchorStart = false;
+        public boolean anchorEnd   = false;
+
+        /**
+         * The segment number from 1..n of the next '(' we see
+         * during parsing; 1-based.
+         */
+        private int nextSegmentNumber = 1;
+
+        /**
+         * Parse one side of a rule, stopping at either the limit,
+         * the END_OF_RULE character, or an operator.
+         * @return the index after the terminating character, or
+         * if limit was reached, limit
+         */
+        public int parse(String rule, int pos, int limit,
+                         TransliteratorParser parser) {
+            int start = pos;
+            StringBuffer buf = new StringBuffer();
+            pos = parseSection(rule, pos, limit, parser, buf, ILLEGAL_TOP, false);
+            text = buf.toString();
+
+            if (cursorOffset > 0 && cursor != cursorOffsetPos) {
+                syntaxError("Misplaced " + CURSOR_POS, rule, start);
+            }
+
+            return pos;
+        }
+
+        /**
+         * Parse a section of one side of a rule, stopping at either
+         * the limit, the END_OF_RULE character, an operator, or a
+         * segment close character.  This method parses both a
+         * top-level rule half and a segment within such a rule half.
+         * It calls itself recursively to parse segments and nested
+         * segments.
+         * @param buf buffer into which to accumulate the rule pattern
+         * characters, either literal characters from the rule or
+         * standins for UnicodeMatcher objects including segments.
+         * @param illegal the set of special characters that is illegal during
+         * this parse.
+         * @param isSegment if true, then we've already seen a '(' and
+         * pos on entry points right after it.  Accumulate everything
+         * up to the closing ')', put it in a segment matcher object,
+         * generate a standin for it, and add the standin to buf.  As
+         * a side effect, update the segments vector with a reference
+         * to the segment matcher.  This works recursively for nested
+         * segments.  If isSegment is false, just accumulate
+         * characters into buf.
+         * @return the index after the terminating character, or
+         * if limit was reached, limit
+         */
+        private int parseSection(String rule, int pos, int limit,
+                                 TransliteratorParser parser,
+                                 StringBuffer buf,
+                                 UnicodeSet illegal,
+                                 boolean isSegment) {
+            int start = pos;
+            ParsePosition pp = null;
+            int quoteStart = -1; // Most recent 'single quoted string'
+            int quoteLimit = -1;
+            int varStart = -1; // Most recent $variableReference
+            int varLimit = -1;
+            int[] iref = new int[1];
+            int bufStart = buf.length();
+
+        main:
+            while (pos < limit) {
+                // Since all syntax characters are in the BMP, fetching
+                // 16-bit code units suffices here.
+                char c = rule.charAt(pos++);
+                if (UCharacterProperty.isRuleWhiteSpace(c)) {
+                    continue;
+                }
+                // HALF_ENDERS is all chars that end a rule half: "<>=;"
+                if (HALF_ENDERS.indexOf(c) >= 0) {
+                    if (isSegment) {
+                        syntaxError("Unclosed segment", rule, start);
+                    }
+                    break main;
+                }
+                if (anchorEnd) {
+                    // Text after a presumed end anchor is a syntax err
+                    syntaxError("Malformed variable reference", rule, start);
+                }
+                if (UnicodeSet.resemblesPattern(rule, pos-1)) {
+                    if (pp == null) {
+                        pp = new ParsePosition(0);
+                    }
+                    pp.setIndex(pos-1); // Backup to opening '['
+                    buf.append(parser.parseSet(rule, pp));
+                    pos = pp.getIndex();                    
+                    continue;
+                }
+                // Handle escapes
+                if (c == ESCAPE) {
+                    if (pos == limit) {
+                        syntaxError("Trailing backslash", rule, start);
+                    }
+                    iref[0] = pos;
+                    int escaped = Utility.unescapeAt(rule, iref);
+                    pos = iref[0];
+                    if (escaped == -1) {
+                        syntaxError("Malformed escape", rule, start);
+                    }
+                    parser.checkVariableRange(escaped, rule, start);
+                    UTF16.append(buf, escaped);
+                    continue;
+                }
+                // Handle quoted matter
+                if (c == QUOTE) {
+                    int iq = rule.indexOf(QUOTE, pos);
+                    if (iq == pos) {
+                        buf.append(c); // Parse [''] outside quotes as [']
+                        ++pos;
+                    } else {
+                        /* This loop picks up a run of quoted text of the
+                         * form 'aaaa' each time through.  If this run
+                         * hasn't really ended ('aaaa''bbbb') then it keeps
+                         * looping, each time adding on a new run.  When it
+                         * reaches the final quote it breaks.
+                         */
+                        quoteStart = buf.length();
+                        for (;;) {
+                            if (iq < 0) {
+                                syntaxError("Unterminated quote", rule, start);
+                            }
+                            buf.append(rule.substring(pos, iq));
+                            pos = iq+1;
+                            if (pos < limit && rule.charAt(pos) == QUOTE) {
+                            // Parse [''] inside quotes as [']
+                                iq = rule.indexOf(QUOTE, pos+1);
+                            // Continue looping
+                            } else {
+                                break;
+                            }
+                        }
+                        quoteLimit = buf.length();
+                        
+                        for (iq=quoteStart; iq<quoteLimit; ++iq) {
+                            parser.checkVariableRange(buf.charAt(iq), rule, start);
+                        }
+                    }
+                    continue;
+                }
+
+                parser.checkVariableRange(c, rule, start);
+
+                if (illegal.contains(c)) {
+                    syntaxError("Illegal character '" + c + '\'', rule, start);
+                }
+
+                switch (c) {
+                    
+                //------------------------------------------------------
+                // Elements allowed within and out of segments
+                //------------------------------------------------------
+                case ANCHOR_START:
+                    if (buf.length() == 0 && !anchorStart) {
+                        anchorStart = true;
+                    } else {
+                        syntaxError("Misplaced anchor start",
+                                    rule, start);
+                    }
+                    break;
+                case SEGMENT_OPEN:
+                    {
+                        // bufSegStart is the offset in buf to the first
+                        // character of the segment we are parsing.
+                        int bufSegStart = buf.length();
+
+                        // Record segment number now, since nextSegmentNumber
+                        // will be incremented during the call to parseSection
+                        // if there are nested segments.
+                        int segmentNumber = nextSegmentNumber++; // 1-based
+
+                        // Parse the segment
+                        pos = parseSection(rule, pos, limit, parser, buf, ILLEGAL_SEG, true);
+
+                        // After parsing a segment, the relevant characters are
+                        // in buf, starting at offset bufSegStart.  Extract them
+                        // into a string matcher, and replace them with a
+                        // standin for that matcher.
+                        StringMatcher m =
+                            new StringMatcher(buf.substring(bufSegStart),
+                                              segmentNumber, parser.curData);
+
+                        // Record and associate object and segment number
+                        parser.setSegmentObject(segmentNumber, m);
+                        buf.setLength(bufSegStart);
+                        buf.append(parser.getSegmentStandin(segmentNumber));
+                    }
+                    break;
+                case FUNCTION:
+                case ALT_FUNCTION:
+                    {
+                        iref[0] = pos;
+                        TransliteratorIDParser.SingleID single = TransliteratorIDParser.parseFilterID(rule, iref);
+                        // The next character MUST be a segment open
+                        if (single == null ||
+                            !Utility.parseChar(rule, iref, SEGMENT_OPEN)) {
+                            syntaxError("Invalid function", rule, start);
+                        }
+
+                        Transliterator t = single.getInstance();
+                        if (t == null) {
+                            syntaxError("Invalid function ID", rule, start);
+                        }
+
+                        // bufSegStart is the offset in buf to the first
+                        // character of the segment we are parsing.
+                        int bufSegStart = buf.length();
+
+                        // Parse the segment
+                        pos = parseSection(rule, iref[0], limit, parser, buf, ILLEGAL_FUNC, true);
+
+                        // After parsing a segment, the relevant characters are
+                        // in buf, starting at offset bufSegStart.
+                        FunctionReplacer r =
+                            new FunctionReplacer(t,
+                                new StringReplacer(buf.substring(bufSegStart), parser.curData));
+
+                        // Replace the buffer contents with a stand-in
+                        buf.setLength(bufSegStart);
+                        buf.append(parser.generateStandInFor(r));
+                    }
+                    break;
+                case SymbolTable.SYMBOL_REF:
+                    // Handle variable references and segment references "$1" .. "$9"
+                    {
+                        // A variable reference must be followed immediately
+                        // by a Unicode identifier start and zero or more
+                        // Unicode identifier part characters, or by a digit
+                        // 1..9 if it is a segment reference.
+                        if (pos == limit) {
+                            // A variable ref character at the end acts as
+                            // an anchor to the context limit, as in perl.
+                            anchorEnd = true;
+                            break;
+                        }
+                        // Parse "$1" "$2" .. "$9" .. (no upper limit)
+                        c = rule.charAt(pos);
+                        int r = UCharacter.digit(c, 10);
+                        if (r >= 1 && r <= 9) {
+                            iref[0] = pos;
+                            r = Utility.parseNumber(rule, iref, 10);
+                            if (r < 0) {
+                                syntaxError("Undefined segment reference",
+                                            rule, start);
+                            }
+                            pos = iref[0];
+                            buf.append(parser.getSegmentStandin(r));
+                        } else {
+                            if (pp == null) { // Lazy create
+                                pp = new ParsePosition(0);
+                            }
+                            pp.setIndex(pos);
+                            String name = parser.parseData.
+                                parseReference(rule, pp, limit);
+                            if (name == null) {
+                                // This means the '$' was not followed by a
+                                // valid name.  Try to interpret it as an
+                                // end anchor then.  If this also doesn't work
+                                // (if we see a following character) then signal
+                                // an error.
+                                anchorEnd = true;
+                                break;
+                            }
+                            pos = pp.getIndex();
+                            // If this is a variable definition statement,
+                            // then the LHS variable will be undefined.  In
+                            // that case appendVariableDef() will append the
+                            // special placeholder char variableLimit-1.
+                            varStart = buf.length();
+                            parser.appendVariableDef(name, buf);
+                            varLimit = buf.length();
+                        }
+                    }
+                    break;
+                case DOT:
+                    buf.append(parser.getDotStandIn());
+                    break;
+                case KLEENE_STAR:
+                case ONE_OR_MORE:
+                case ZERO_OR_ONE:
+                    // Quantifiers.  We handle single characters, quoted strings,
+                    // variable references, and segments.
+                    //  a+      matches  aaa
+                    //  'foo'+  matches  foofoofoo
+                    //  $v+     matches  xyxyxy if $v == xy
+                    //  (seg)+  matches  segsegseg
+                    {
+                        if (isSegment && buf.length() == bufStart) {
+                            // The */+ immediately follows '('
+                            syntaxError("Misplaced quantifier", rule, start);
+                            break;
+                        } 
+ 
+                        int qstart, qlimit;
+                        // The */+ follows an isolated character or quote
+                        // or variable reference
+                        if (buf.length() == quoteLimit) {
+                            // The */+ follows a 'quoted string'
+                            qstart = quoteStart;
+                            qlimit = quoteLimit;
+                        } else if (buf.length() == varLimit) {
+                            // The */+ follows a $variableReference
+                            qstart = varStart;
+                            qlimit = varLimit;
+                        } else {
+                            // The */+ follows a single character, possibly
+                            // a segment standin
+                            qstart = buf.length() - 1;
+                            qlimit = qstart + 1;
+                        }
+
+                        UnicodeMatcher m;
+                        try {
+                            m = new StringMatcher(buf.toString(), qstart, qlimit,
+                                              0, parser.curData);
+                        } catch (RuntimeException e) {
+                            final String precontext = pos < 50 ? rule.substring(0, pos) : "..." + rule.substring(pos - 50, pos);
+                            final String postContext = limit-pos <= 50 ? rule.substring(pos, limit) : rule.substring(pos, pos+50) + "...";
+                            throw (RuntimeException)
+                                new IllegalIcuArgumentException("Failure in rule: " + precontext + "$$$"
+                                        + postContext)
+//#if defined(FOUNDATION10) || defined(J2SE13)
+//#else
+                                .initCause(e)
+//#endif
+                                ;
+                        }
+                        int min = 0;
+                        int max = Quantifier.MAX;
+                        switch (c) {
+                        case ONE_OR_MORE:
+                            min = 1;
+                            break;
+                        case ZERO_OR_ONE:
+                            min = 0;
+                            max = 1;
+                            break;
+                            // case KLEENE_STAR:
+                            //    do nothing -- min, max already set
+                        }
+                        m = new Quantifier(m, min, max);
+                        buf.setLength(qstart);
+                        buf.append(parser.generateStandInFor(m));
+                    }
+                    break;
+
+                //------------------------------------------------------
+                // Elements allowed ONLY WITHIN segments
+                //------------------------------------------------------
+                case SEGMENT_CLOSE:
+                    // assert(isSegment);
+                    // We're done parsing a segment.
+                    break main;
+
+                //------------------------------------------------------
+                // Elements allowed ONLY OUTSIDE segments
+                //------------------------------------------------------
+                case CONTEXT_ANTE:
+                    if (ante >= 0) {
+                        syntaxError("Multiple ante contexts", rule, start);
+                    }
+                    ante = buf.length();
+                    break;
+                case CONTEXT_POST:
+                    if (post >= 0) {
+                        syntaxError("Multiple post contexts", rule, start);
+                    }
+                    post = buf.length();
+                    break;
+                case CURSOR_POS:
+                    if (cursor >= 0) {
+                        syntaxError("Multiple cursors", rule, start);
+                    }
+                    cursor = buf.length();
+                    break;
+                case CURSOR_OFFSET:
+                    if (cursorOffset < 0) {
+                        if (buf.length() > 0) {
+                            syntaxError("Misplaced " + c, rule, start);
+                        }
+                        --cursorOffset;
+                    } else if (cursorOffset > 0) {
+                        if (buf.length() != cursorOffsetPos || cursor >= 0) {
+                            syntaxError("Misplaced " + c, rule, start);
+                        }
+                        ++cursorOffset;
+                    } else {
+                        if (cursor == 0 && buf.length() == 0) {
+                            cursorOffset = -1;
+                        } else if (cursor < 0) {
+                            cursorOffsetPos = buf.length();
+                            cursorOffset = 1;
+                        } else {
+                            syntaxError("Misplaced " + c, rule, start);
+                        }
+                    }
+                    break;
+
+                //------------------------------------------------------
+                // Non-special characters
+                //------------------------------------------------------
+                default:
+                    // Disallow unquoted characters other than [0-9A-Za-z]
+                    // in the printable ASCII range.  These characters are
+                    // reserved for possible future use.
+                    if (c >= 0x0021 && c <= 0x007E &&
+                        !((c >= '0' && c <= '9') ||
+                          (c >= 'A' && c <= 'Z') ||
+                          (c >= 'a' && c <= 'z'))) {
+                        syntaxError("Unquoted " + c, rule, start);
+                    }
+                    buf.append(c);
+                    break;
+                }
+            }
+            return pos;
+        }
+
+        /**
+         * Remove context.
+         */
+        void removeContext() {
+            text = text.substring(ante < 0 ? 0 : ante,
+                                  post < 0 ? text.length() : post);
+            ante = post = -1;
+            anchorStart = anchorEnd = false;
+        }
+
+        /**
+         * Return true if this half looks like valid output, that is, does not
+         * contain quantifiers or other special input-only elements.
+         */
+        public boolean isValidOutput(TransliteratorParser parser) {
+            for (int i=0; i<text.length(); ) {
+                int c = UTF16.charAt(text, i);
+                i += UTF16.getCharCount(c);
+                if (!parser.parseData.isReplacer(c)) {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        /**
+         * Return true if this half looks like valid input, that is, does not
+         * contain functions or other special output-only elements.
+         */
+        public boolean isValidInput(TransliteratorParser parser) {
+            for (int i=0; i<text.length(); ) {
+                int c = UTF16.charAt(text, i);
+                i += UTF16.getCharCount(c);
+                if (!parser.parseData.isMatcher(c)) {
+                    return false;
+                }
+            }
+            return true;
+        }
+    }
+
+    //----------------------------------------------------------------------
+    // PUBLIC methods
+    //----------------------------------------------------------------------
+
+    /**
+     * Constructor.
+     */
+    public TransliteratorParser() {
+    }
+
+    /**
+     * Parse a set of rules.  After the parse completes, examine the public
+     * data members for results.
+     */
+    public void parse(String rules, int dir) {
+        parseRules(new RuleArray(new String[] { rules }), dir);
+    }
+   
+    /*
+     * Parse a set of rules.  After the parse completes, examine the public
+     * data members for results.
+     */
+/*    public void parse(ResourceReader rules, int direction) {
+        parseRules(new RuleReader(rules), direction);
+    }*/
+
+    //----------------------------------------------------------------------
+    // PRIVATE methods
+    //----------------------------------------------------------------------
+
+    /**
+     * Parse an array of zero or more rules.  The strings in the array are
+     * treated as if they were concatenated together, with rule terminators
+     * inserted between array elements if not present already.
+     *
+     * Any previous rules are discarded.  Typically this method is called exactly
+     * once, during construction.
+     *
+     * The member this.data will be set to null if there are no rules.
+     *
+     * @exception IllegalIcuArgumentException if there is a syntax error in the
+     * rules
+     */
+    void parseRules(RuleBody ruleArray, int dir) {
+        boolean parsingIDs = true;
+        int ruleCount = 0;
+
+        dataVector = new Vector();
+        idBlockVector = new Vector();
+        curData = null;
+        direction = dir;
+        compoundFilter = null;
+        variablesVector = new Vector();
+        variableNames = new Hashtable();
+        parseData = new ParseData();
+
+        List errors = new ArrayList();
+        int errorCount = 0;
+
+        ruleArray.reset();
+
+        StringBuffer idBlockResult = new StringBuffer();
+
+        // The compound filter offset is an index into idBlockResult.
+        // If it is 0, then the compound filter occurred at the start,
+        // and it is the offset to the _start_ of the compound filter
+        // pattern.  Otherwise it is the offset to the _limit_ of the
+        // compound filter pattern within idBlockResult.
+        this.compoundFilter = null;
+        int compoundFilterOffset = -1;
+
+    main:
+        for (;;) {
+            String rule = ruleArray.nextLine();
+            if (rule == null) {
+                break;
+            }
+            int pos = 0;
+            int limit = rule.length();
+            while (pos < limit) {
+                char c = rule.charAt(pos++);
+                if (UCharacterProperty.isRuleWhiteSpace(c)) {
+                    continue;
+                }
+                // Skip lines starting with the comment character
+                if (c == RULE_COMMENT_CHAR) {
+                    pos = rule.indexOf("\n", pos) + 1;
+                    if (pos == 0) {
+                        break; // No "\n" found; rest of rule is a commnet
+                    }
+                    continue; // Either fall out or restart with next line
+                }
+
+                // skip empty rules
+                if (c == END_OF_RULE)
+                    continue;
+
+                // Often a rule file contains multiple errors.  It's
+                // convenient to the rule author if these are all reported
+                // at once.  We keep parsing rules even after a failure, up
+                // to a specified limit, and report all errors at once.
+                try {
+                    ++ruleCount;
+
+                    // We've found the start of a rule or ID.  c is its first
+                    // character, and pos points past c.
+                    --pos;
+                    // Look for an ID token.  Must have at least ID_TOKEN_LEN + 1
+                    // chars left.
+                    if ((pos + ID_TOKEN_LEN + 1) <= limit &&
+                            rule.regionMatches(pos, ID_TOKEN, 0, ID_TOKEN_LEN)) {
+                        pos += ID_TOKEN_LEN;
+                        c = rule.charAt(pos);
+                        while (UCharacterProperty.isRuleWhiteSpace(c) && pos < limit) {
+                            ++pos;
+                            c = rule.charAt(pos);
+                        }
+                        int[] p = new int[] { pos };
+
+                        if (!parsingIDs) {
+                            if (curData != null) {
+                                if (direction == Transliterator.FORWARD)
+                                    dataVector.add(curData);
+                                else
+                                    dataVector.insertElementAt(curData, 0);
+                                curData = null;
+                            }
+                            parsingIDs = true;
+                        }
+
+                        TransliteratorIDParser.SingleID id =
+                            TransliteratorIDParser.parseSingleID(
+                                          rule, p, direction);
+                        if (p[0] != pos && Utility.parseChar(rule, p, END_OF_RULE)) {
+                            // Successful ::ID parse.
+
+                            if (direction == Transliterator.FORWARD) {
+                                idBlockResult.append(id.canonID).append(END_OF_RULE);
+                            } else {
+                                idBlockResult.insert(0, id.canonID + END_OF_RULE);
+                            }
+
+                        } else {
+                            // Couldn't parse an ID.  Try to parse a global filter
+                            int[] withParens = new int[] { -1 };
+                            UnicodeSet f = TransliteratorIDParser.parseGlobalFilter(rule, p, direction, withParens, null);
+                            if (f != null && Utility.parseChar(rule, p, END_OF_RULE)) {
+                                if ((direction == Transliterator.FORWARD) ==
+                                    (withParens[0] == 0)) {
+                                    if (compoundFilter != null) {
+                                        // Multiple compound filters
+                                        syntaxError("Multiple global filters", rule, pos);
+                                    }
+                                    compoundFilter = f;
+                                    compoundFilterOffset = ruleCount;
+                               }
+                            } else {
+                                // Invalid ::id
+                                // Can be parsed as neither an ID nor a global filter
+                                syntaxError("Invalid ::ID", rule, pos);
+                            }
+                        }
+
+                        pos = p[0];
+                    } else {
+                        if (parsingIDs) {
+                            if (direction == Transliterator.FORWARD)
+                                idBlockVector.add(idBlockResult.toString());
+                            else
+                                idBlockVector.insertElementAt(idBlockResult.toString(), 0);
+                            idBlockResult.delete(0, idBlockResult.length());
+                            parsingIDs = false;
+                            curData = new RuleBasedTransliterator.Data();
+
+                            // By default, rules use part of the private use area
+                            // E000..F8FF for variables and other stand-ins.  Currently
+                            // the range F000..F8FF is typically sufficient.  The 'use
+                            // variable range' pragma allows rule sets to modify this.
+                            setVariableRange(0xF000, 0xF8FF);
+                        }
+
+                        if (resemblesPragma(rule, pos, limit)) {
+                            int ppp = parsePragma(rule, pos, limit);
+                            if (ppp < 0) {
+                                syntaxError("Unrecognized pragma", rule, pos);
+                            }
+                            pos = ppp;
+                        // Parse a rule
+                        } else {
+                            pos = parseRule(rule, pos, limit);
+                        }
+                    }
+                } catch (IllegalArgumentException e) {
+                    if (errorCount == 30) {
+                        errors.add(new IllegalIcuArgumentException("\nMore than 30 errors; further messages squelched")
+//#if defined(FOUNDATION10) || defined(J2SE13)
+//#else
+                            .initCause(e)
+//#endif
+                            );
+                        break main;
+                    }
+                    e.fillInStackTrace();
+                    errors.add(e);
+                    ++errorCount;
+                    pos = ruleEnd(rule, pos, limit) + 1; // +1 advances past ';'
+                }
+            }
+        }
+        if (parsingIDs && idBlockResult.length() > 0) {
+            if (direction == Transliterator.FORWARD)
+                idBlockVector.add(idBlockResult.toString());
+            else
+                idBlockVector.insertElementAt(idBlockResult.toString(), 0);
+        }
+        else if (!parsingIDs && curData != null) {
+            if (direction == Transliterator.FORWARD)
+                dataVector.add(curData);
+            else
+                dataVector.insertElementAt(curData, 0);
+        }
+
+        // Convert the set vector to an array
+        for (int i = 0; i < dataVector.size(); i++) {
+            RuleBasedTransliterator.Data data = (RuleBasedTransliterator.Data)dataVector.get(i);
+            data.variables = new Object[variablesVector.size()];
+            variablesVector.copyInto(data.variables);
+            data.variableNames = new Hashtable();
+            data.variableNames.putAll(variableNames);
+        }
+        variablesVector = null;
+
+        // Do more syntax checking and index the rules
+        try {
+            if (compoundFilter != null) {
+                if ((direction == Transliterator.FORWARD &&
+                     compoundFilterOffset != 1) ||
+                    (direction == Transliterator.REVERSE &&
+                     compoundFilterOffset != ruleCount)) {
+                    throw new IllegalIcuArgumentException("Compound filters misplaced");
+                }
+            }
+
+            for (int i = 0; i < dataVector.size(); i++) {
+                RuleBasedTransliterator.Data data = (RuleBasedTransliterator.Data)dataVector.get(i);
+                data.ruleSet.freeze();
+            }
+
+            if (idBlockVector.size() == 1 && ((String)idBlockVector.get(0)).length() == 0)
+                idBlockVector.remove(0);
+
+        } catch (IllegalArgumentException e) {
+            e.fillInStackTrace();
+            errors.add(e);
+        }
+
+        if (errors.size() != 0) {
+//#if defined(FOUNDATION10) || defined(J2SE13)
+//#else
+            for (int i = errors.size()-1; i > 0; --i) {
+                RuntimeException previous = (RuntimeException) errors.get(i-1);
+                while (previous.getCause() != null) {
+                    previous = (RuntimeException) previous.getCause(); // chain specially
+                }
+                previous.initCause((RuntimeException) errors.get(i));
+            }
+//#endif
+            throw (RuntimeException) errors.get(0);
+            // if initCause not supported: throw new IllegalArgumentException(errors.toString());
+        }
+    }
+
+    /**
+     * MAIN PARSER.  Parse the next rule in the given rule string, starting
+     * at pos.  Return the index after the last character parsed.  Do not
+     * parse characters at or after limit.
+     *
+     * Important:  The character at pos must be a non-whitespace character
+     * that is not the comment character.
+     *
+     * This method handles quoting, escaping, and whitespace removal.  It
+     * parses the end-of-rule character.  It recognizes context and cursor
+     * indicators.  Once it does a lexical breakdown of the rule at pos, it
+     * creates a rule object and adds it to our rule list.
+     *
+     * This method is tightly coupled to the inner class RuleHalf.
+     */
+    private int parseRule(String rule, int pos, int limit) {
+        // Locate the left side, operator, and right side
+        int start = pos;
+        char operator = 0;
+
+        // Set up segments data
+        segmentStandins = new StringBuffer();
+        segmentObjects = new Vector();
+
+        RuleHalf left  = new RuleHalf();
+        RuleHalf right = new RuleHalf();
+
+        undefinedVariableName = null;
+        pos = left.parse(rule, pos, limit, this);
+
+        if (pos == limit ||
+            OPERATORS.indexOf(operator = rule.charAt(--pos)) < 0) {
+            syntaxError("No operator pos=" + pos, rule, start);
+        }
+        ++pos;
+
+        // Found an operator char.  Check for forward-reverse operator.
+        if (operator == REVERSE_RULE_OP &&
+            (pos < limit && rule.charAt(pos) == FORWARD_RULE_OP)) {
+            ++pos;
+            operator = FWDREV_RULE_OP;
+        }
+
+        // Translate alternate op characters.
+        switch (operator) {
+        case ALT_FORWARD_RULE_OP:
+            operator = FORWARD_RULE_OP;
+            break;
+        case ALT_REVERSE_RULE_OP:
+            operator = REVERSE_RULE_OP;
+            break;
+        case ALT_FWDREV_RULE_OP:
+            operator = FWDREV_RULE_OP;
+            break;
+        }
+
+        pos = right.parse(rule, pos, limit, this);
+
+        if (pos < limit) {
+            if (rule.charAt(--pos) == END_OF_RULE) {
+                ++pos;
+            } else {
+                // RuleHalf parser must have terminated at an operator
+                syntaxError("Unquoted operator", rule, start);
+            }
+        }
+
+        if (operator == VARIABLE_DEF_OP) {
+            // LHS is the name.  RHS is a single character, either a literal
+            // or a set (already parsed).  If RHS is longer than one
+            // character, it is either a multi-character string, or multiple
+            // sets, or a mixture of chars and sets -- syntax error.
+
+            // We expect to see a single undefined variable (the one being
+            // defined).
+            if (undefinedVariableName == null) {
+                syntaxError("Missing '$' or duplicate definition", rule, start);
+            }
+            if (left.text.length() != 1 || left.text.charAt(0) != variableLimit) {
+                syntaxError("Malformed LHS", rule, start);
+            }
+            if (left.anchorStart || left.anchorEnd ||
+                right.anchorStart || right.anchorEnd) {
+                syntaxError("Malformed variable def", rule, start);
+            }
+            // We allow anything on the right, including an empty string.
+            int n = right.text.length();
+            char[] value = new char[n];
+            right.text.getChars(0, n, value, 0);
+            variableNames.put(undefinedVariableName, value);
+
+            ++variableLimit;
+            return pos;
+        }
+
+        // If this is not a variable definition rule, we shouldn't have
+        // any undefined variable names.
+        if (undefinedVariableName != null) {
+            syntaxError("Undefined variable $" + undefinedVariableName,
+                        rule, start);
+        }
+
+        // Verify segments
+        if (segmentStandins.length() > segmentObjects.size()) {
+            syntaxError("Undefined segment reference", rule, start);
+        }
+        for (int i=0; i<segmentStandins.length(); ++i) {
+            if (segmentStandins.charAt(i) == 0) {
+                syntaxError("Internal error", rule, start); // will never happen
+            }
+        }
+        for (int i=0; i<segmentObjects.size(); ++i) {
+            if (segmentObjects.elementAt(i) == null) {
+                syntaxError("Internal error", rule, start); // will never happen
+            }
+        }
+
+        // If the direction we want doesn't match the rule
+        // direction, do nothing.
+        if (operator != FWDREV_RULE_OP &&
+            ((direction == Transliterator.FORWARD) != (operator == FORWARD_RULE_OP))) {
+            return pos;
+        }
+
+        // Transform the rule into a forward rule by swapping the
+        // sides if necessary.
+        if (direction == Transliterator.REVERSE) {
+            RuleHalf temp = left;
+            left = right;
+            right = temp;
+        }
+
+        // Remove non-applicable elements in forward-reverse
+        // rules.  Bidirectional rules ignore elements that do not
+        // apply.
+        if (operator == FWDREV_RULE_OP) {
+            right.removeContext();
+            left.cursor = -1;
+            left.cursorOffset = 0;
+        }
+
+        // Normalize context
+        if (left.ante < 0) {
+            left.ante = 0;
+        }
+        if (left.post < 0) {
+            left.post = left.text.length();
+        }
+
+        // Context is only allowed on the input side.  Cursors are only
+        // allowed on the output side.  Segment delimiters can only appear
+        // on the left, and references on the right.  Cursor offset
+        // cannot appear without an explicit cursor.  Cursor offset
+        // cannot place the cursor outside the limits of the context.
+        // Anchors are only allowed on the input side.
+        if (right.ante >= 0 || right.post >= 0 || left.cursor >= 0 ||
+            (right.cursorOffset != 0 && right.cursor < 0) ||
+            // - The following two checks were used to ensure that the
+            // - the cursor offset stayed within the ante- or postcontext.
+            // - However, with the addition of quantifiers, we have to
+            // - allow arbitrary cursor offsets and do runtime checking.
+            //(right.cursorOffset > (left.text.length() - left.post)) ||
+            //(-right.cursorOffset > left.ante) ||
+            right.anchorStart || right.anchorEnd ||
+            !left.isValidInput(this) || !right.isValidOutput(this) ||
+            left.ante > left.post) {
+            syntaxError("Malformed rule", rule, start);
+        }
+
+        // Flatten segment objects vector to an array
+        UnicodeMatcher[] segmentsArray = null;
+        if (segmentObjects.size() > 0) {
+            segmentsArray = new UnicodeMatcher[segmentObjects.size()];
+            segmentObjects.toArray(segmentsArray);
+        }
+
+        curData.ruleSet.addRule(new TransliterationRule(
+                                     left.text, left.ante, left.post,
+                                     right.text, right.cursor, right.cursorOffset,
+                                     segmentsArray,
+                                     left.anchorStart, left.anchorEnd,
+                                     curData));
+
+        return pos;
+    }
+
+    /**
+     * Set the variable range to [start, end] (inclusive).
+     */
+    private void setVariableRange(int start, int end) {
+        if (start > end || start < 0 || end > 0xFFFF) {
+            throw new IllegalIcuArgumentException("Invalid variable range " + start + ", " + end);
+        }
+        
+        curData.variablesBase = (char) start; // first private use
+
+        if (dataVector.size() == 0) {
+            variableNext = (char) start;
+            variableLimit = (char) (end + 1);
+        }
+    }
+
+    /**
+     * Assert that the given character is NOT within the variable range.
+     * If it is, signal an error.  This is neccesary to ensure that the
+     * variable range does not overlap characters used in a rule.
+     */
+    private void checkVariableRange(int ch, String rule, int start) {
+        if (ch >= curData.variablesBase && ch < variableLimit) {
+            syntaxError("Variable range character in rule", rule, start);
+        }
+    }
+
+    // (The following method is part of an unimplemented feature.
+    // Remove this clover pragma after the feature is implemented.
+    // 2003-06-11 ICU 2.6 Alan)
+    ///CLOVER:OFF
+    /**
+     * Set the maximum backup to 'backup', in response to a pragma
+     * statement.
+     */
+    private void pragmaMaximumBackup(int backup) {
+        //TODO Finish
+        throw new IllegalIcuArgumentException("use maximum backup pragma not implemented yet");
+    }
+    ///CLOVER:ON
+
+    // (The following method is part of an unimplemented feature.
+    // Remove this clover pragma after the feature is implemented.
+    // 2003-06-11 ICU 2.6 Alan)
+    ///CLOVER:OFF
+    /**
+     * Begin normalizing all rules using the given mode, in response
+     * to a pragma statement.
+     */
+    private void pragmaNormalizeRules(Normalizer.Mode mode) {
+        //TODO Finish
+        throw new IllegalIcuArgumentException("use normalize rules pragma not implemented yet");
+    }
+    ///CLOVER:ON
+
+    /**
+     * Return true if the given rule looks like a pragma.
+     * @param pos offset to the first non-whitespace character
+     * of the rule.
+     * @param limit pointer past the last character of the rule.
+     */
+    static boolean resemblesPragma(String rule, int pos, int limit) {
+        // Must start with /use\s/i
+        return Utility.parsePattern(rule, pos, limit, "use ", null) >= 0;
+    }
+
+    /**
+     * Parse a pragma.  This method assumes resemblesPragma() has
+     * already returned true.
+     * @param pos offset to the first non-whitespace character
+     * of the rule.
+     * @param limit pointer past the last character of the rule.
+     * @return the position index after the final ';' of the pragma,
+     * or -1 on failure.
+     */
+    private int parsePragma(String rule, int pos, int limit) {
+        int[] array = new int[2];
+
+        // resemblesPragma() has already returned true, so we
+        // know that pos points to /use\s/i; we can skip 4 characters
+        // immediately
+        pos += 4;
+        
+        // Here are the pragmas we recognize:
+        // use variable range 0xE000 0xEFFF;
+        // use maximum backup 16;
+        // use nfd rules;
+        int p = Utility.parsePattern(rule, pos, limit, "~variable range # #~;", array);
+        if (p >= 0) {
+            setVariableRange(array[0], array[1]);
+            return p;
+        }
+
+        p = Utility.parsePattern(rule, pos, limit, "~maximum backup #~;", array);
+        if (p >= 0) {
+            pragmaMaximumBackup(array[0]);
+            return p;
+        }
+
+        p = Utility.parsePattern(rule, pos, limit, "~nfd rules~;", null);
+        if (p >= 0) {
+            pragmaNormalizeRules(Normalizer.NFD);
+            return p;
+        }
+
+        p = Utility.parsePattern(rule, pos, limit, "~nfc rules~;", null);
+        if (p >= 0) {
+            pragmaNormalizeRules(Normalizer.NFC);
+            return p;
+        }
+
+        // Syntax error: unable to parse pragma
+        return -1;
+    }
+
+    /**
+     * Throw an exception indicating a syntax error.  Search the rule string
+     * for the probable end of the rule.  Of course, if the error is that
+     * the end of rule marker is missing, then the rule end will not be found.
+     * In any case the rule start will be correctly reported.
+     * @param msg error description
+     * @param rule pattern string
+     * @param start position of first character of current rule
+     */
+    static final void syntaxError(String msg, String rule, int start) {
+        int end = ruleEnd(rule, start, rule.length());
+        throw new IllegalIcuArgumentException(msg + " in \"" +
+                                           Utility.escape(rule.substring(start, end)) + '"');
+    }
+
+    static final int ruleEnd(String rule, int start, int limit) {
+        int end = Utility.quotedIndexOf(rule, start, limit, ";");
+        if (end < 0) {
+            end = limit;
+        }
+        return end;
+    }
+
+    /**
+     * Parse a UnicodeSet out, store it, and return the stand-in character
+     * used to represent it.
+     */
+    private final char parseSet(String rule, ParsePosition pos) {
+        UnicodeSet set = new UnicodeSet(rule, pos, parseData);
+        if (variableNext >= variableLimit) {
+            throw new RuntimeException("Private use variables exhausted");
+        }
+        set.compact();
+        return generateStandInFor(set);
+    }
+
+    /**
+     * Generate and return a stand-in for a new UnicodeMatcher or UnicodeReplacer.
+     * Store the object.
+     */
+    char generateStandInFor(Object obj) {
+        // assert(obj != null);
+
+        // Look up previous stand-in, if any.  This is a short list
+        // (typical n is 0, 1, or 2); linear search is optimal.
+        for (int i=0; i<variablesVector.size(); ++i) {
+            if (variablesVector.elementAt(i) == obj) { // [sic] pointer comparison
+                return (char) (curData.variablesBase + i);
+            }
+        }
+
+        if (variableNext >= variableLimit) {
+            throw new RuntimeException("Variable range exhausted");
+        }
+        variablesVector.addElement(obj);
+        return variableNext++;
+    }
+
+    /**
+     * Return the standin for segment seg (1-based).
+     */
+    public char getSegmentStandin(int seg) {
+        if (segmentStandins.length() < seg) {
+            segmentStandins.setLength(seg);
+        }
+        char c = segmentStandins.charAt(seg-1);
+        if (c == 0) {
+            if (variableNext >= variableLimit) {
+                throw new RuntimeException("Variable range exhausted");
+            }
+            c = variableNext++;
+            // Set a placeholder in the master variables vector that will be
+            // filled in later by setSegmentObject().  We know that we will get
+            // called first because setSegmentObject() will call us.
+            variablesVector.addElement(null);
+            segmentStandins.setCharAt(seg-1, c);
+        }
+        return c;
+    }
+    
+    /**
+     * Set the object for segment seg (1-based).
+     */
+    public void setSegmentObject(int seg, StringMatcher obj) {
+        // Since we call parseSection() recursively, nested
+        // segments will result in segment i+1 getting parsed
+        // and stored before segment i; be careful with the
+        // vector handling here.
+        if (segmentObjects.size() < seg) {
+            segmentObjects.setSize(seg);
+        }
+        int index = getSegmentStandin(seg) - curData.variablesBase;
+        if (segmentObjects.elementAt(seg-1) != null ||
+            variablesVector.elementAt(index) != null) {
+            throw new RuntimeException(); // should never happen
+        }
+        segmentObjects.setElementAt(obj, seg-1);
+        variablesVector.setElementAt(obj, index);
+    }
+
+    /**
+     * Return the stand-in for the dot set.  It is allocated the first
+     * time and reused thereafter.
+     */
+    char getDotStandIn() {
+        if (dotStandIn == -1) {
+            dotStandIn = generateStandInFor(new UnicodeSet(DOT_SET));
+        }
+        return (char) dotStandIn;
+    }
+
+    /**
+     * Append the value of the given variable name to the given
+     * StringBuffer.
+     * @exception IllegalIcuArgumentException if the name is unknown.
+     */
+    private void appendVariableDef(String name, StringBuffer buf) {
+        char[] ch = (char[]) variableNames.get(name);
+        if (ch == null) {
+            // We allow one undefined variable so that variable definition
+            // statements work.  For the first undefined variable we return
+            // the special placeholder variableLimit-1, and save the variable
+            // name.
+            if (undefinedVariableName == null) {
+                undefinedVariableName = name;
+                if (variableNext >= variableLimit) {
+                    throw new RuntimeException("Private use variables exhausted");
+                }
+                buf.append((char) --variableLimit);
+            } else {
+                throw new IllegalIcuArgumentException("Undefined variable $"
+                                                   + name);
+            }
+        } else {
+            buf.append(ch);
+        }
+    }
+}
+
+//eof