2 **********************************************************************
\r
3 * Copyright (c) 2001-2010, International Business Machines
\r
4 * Corporation and others. All Rights Reserved.
\r
5 **********************************************************************
\r
7 package com.ibm.icu.text;
\r
9 import java.text.ParsePosition;
\r
10 import java.util.ArrayList;
\r
11 import java.util.Hashtable;
\r
12 import java.util.List;
\r
13 import java.util.Vector;
\r
15 import com.ibm.icu.impl.IllegalIcuArgumentException;
\r
16 import com.ibm.icu.impl.UCharacterProperty;
\r
17 import com.ibm.icu.impl.Utility;
\r
18 import com.ibm.icu.lang.UCharacter;
\r
19 import com.ibm.icu.text.RuleBasedTransliterator.Data;
\r
21 class TransliteratorParser {
\r
23 //----------------------------------------------------------------------
\r
25 //----------------------------------------------------------------------
\r
28 * PUBLIC data member.
\r
29 * A Vector of RuleBasedTransliterator.Data objects, one for each discrete group
\r
30 * of rules in the rule set
\r
32 public Vector<Data> dataVector;
\r
35 * PUBLIC data member.
\r
36 * A Vector of Strings containing all of the ID blocks in the rule set
\r
38 public Vector<String> idBlockVector;
\r
41 * The current data object for which we are parsing rules
\r
43 private Data curData;
\r
46 * PUBLIC data member containing the parsed compound filter, if any.
\r
48 public UnicodeSet compoundFilter;
\r
51 private int direction;
\r
54 * Temporary symbol table used during parsing.
\r
56 private ParseData parseData;
\r
59 * Temporary vector of set variables. When parsing is complete, this
\r
60 * is copied into the array data.variables. As with data.variables,
\r
61 * element 0 corresponds to character data.variablesBase.
\r
63 private Vector<Object> variablesVector;
\r
66 * Temporary table of variable names. When parsing is complete, this is
\r
67 * copied into data.variableNames.
\r
69 private Hashtable<String, char[]> variableNames;
\r
72 * String of standins for segments. Used during the parsing of a single
\r
73 * rule. segmentStandins.charAt(0) is the standin for "$1" and corresponds
\r
74 * to StringMatcher object segmentObjects.elementAt(0), etc.
\r
76 private StringBuffer segmentStandins;
\r
79 * Vector of StringMatcher objects for segments. Used during the
\r
80 * parsing of a single rule.
\r
81 * segmentStandins.charAt(0) is the standin for "$1" and corresponds
\r
82 * to StringMatcher object segmentObjects.elementAt(0), etc.
\r
84 private Vector<StringMatcher> segmentObjects;
\r
87 * The next available stand-in for variables. This starts at some point in
\r
88 * the private use area (discovered dynamically) and increments up toward
\r
89 * <code>variableLimit</code>. At any point during parsing, available
\r
90 * variables are <code>variableNext..variableLimit-1</code>.
\r
92 private char variableNext;
\r
95 * The last available stand-in for variables. This is discovered
\r
96 * dynamically. At any point during parsing, available variables are
\r
97 * <code>variableNext..variableLimit-1</code>. During variable definition
\r
98 * we use the special value variableLimit-1 as a placeholder.
\r
100 private char variableLimit;
\r
103 * When we encounter an undefined variable, we do not immediately signal
\r
104 * an error, in case we are defining this variable, e.g., "$a = [a-z];".
\r
105 * Instead, we save the name of the undefined variable, and substitute
\r
106 * in the placeholder char variableLimit - 1, and decrement
\r
109 private String undefinedVariableName;
\r
112 * The stand-in character for the 'dot' set, represented by '.' in
\r
113 * patterns. This is allocated the first time it is needed, and
\r
114 * reused thereafter.
\r
116 private int dotStandIn = -1;
\r
118 //----------------------------------------------------------------------
\r
120 //----------------------------------------------------------------------
\r
122 // Indicator for ID blocks
\r
123 private static final String ID_TOKEN = "::";
\r
124 private static final int ID_TOKEN_LEN = 2;
\r
127 (reserved for future expansion)
\r
128 // markers for beginning and end of rule groups
\r
129 private static final String BEGIN_TOKEN = "BEGIN";
\r
130 private static final String END_TOKEN = "END";
\r
134 private static final char VARIABLE_DEF_OP = '=';
\r
135 private static final char FORWARD_RULE_OP = '>';
\r
136 private static final char REVERSE_RULE_OP = '<';
\r
137 private static final char FWDREV_RULE_OP = '~'; // internal rep of <> op
\r
139 private static final String OPERATORS = "=><\u2190\u2192\u2194";
\r
140 private static final String HALF_ENDERS = "=><\u2190\u2192\u2194;";
\r
142 // Other special characters
\r
143 private static final char QUOTE = '\'';
\r
144 private static final char ESCAPE = '\\';
\r
145 private static final char END_OF_RULE = ';';
\r
146 private static final char RULE_COMMENT_CHAR = '#';
\r
148 private static final char CONTEXT_ANTE = '{'; // ante{key
\r
149 private static final char CONTEXT_POST = '}'; // key}post
\r
150 private static final char CURSOR_POS = '|';
\r
151 private static final char CURSOR_OFFSET = '@';
\r
152 private static final char ANCHOR_START = '^';
\r
154 private static final char KLEENE_STAR = '*';
\r
155 private static final char ONE_OR_MORE = '+';
\r
156 private static final char ZERO_OR_ONE = '?';
\r
158 private static final char DOT = '.';
\r
159 private static final String DOT_SET = "[^[:Zp:][:Zl:]\\r\\n$]";
\r
161 // By definition, the ANCHOR_END special character is a
\r
162 // trailing SymbolTable.SYMBOL_REF character.
\r
163 // private static final char ANCHOR_END = '$';
\r
165 // Segments of the input string are delimited by "(" and ")". In the
\r
166 // output string these segments are referenced as "$1", "$2", etc.
\r
167 private static final char SEGMENT_OPEN = '(';
\r
168 private static final char SEGMENT_CLOSE = ')';
\r
170 // A function is denoted &Source-Target/Variant(text)
\r
171 private static final char FUNCTION = '&';
\r
173 // Aliases for some of the syntax characters. These are provided so
\r
174 // transliteration rules can be expressed in XML without clashing with
\r
175 // XML syntax characters '<', '>', and '&'.
\r
176 private static final char ALT_REVERSE_RULE_OP = '\u2190'; // Left Arrow
\r
177 private static final char ALT_FORWARD_RULE_OP = '\u2192'; // Right Arrow
\r
178 private static final char ALT_FWDREV_RULE_OP = '\u2194'; // Left Right Arrow
\r
179 private static final char ALT_FUNCTION = '\u2206'; // Increment (~Greek Capital Delta)
\r
181 // Special characters disallowed at the top level
\r
182 private static UnicodeSet ILLEGAL_TOP = new UnicodeSet("[\\)]");
\r
184 // Special characters disallowed within a segment
\r
185 private static UnicodeSet ILLEGAL_SEG = new UnicodeSet("[\\{\\}\\|\\@]");
\r
187 // Special characters disallowed within a function argument
\r
188 private static UnicodeSet ILLEGAL_FUNC = new UnicodeSet("[\\^\\(\\.\\*\\+\\?\\{\\}\\|\\@]");
\r
190 //----------------------------------------------------------------------
\r
192 //----------------------------------------------------------------------
\r
195 * This class implements the SymbolTable interface. It is used
\r
196 * during parsing to give UnicodeSet access to variables that
\r
197 * have been defined so far. Note that it uses variablesVector,
\r
198 * _not_ data.variables.
\r
200 private class ParseData implements SymbolTable {
\r
203 * Implement SymbolTable API.
\r
205 public char[] lookup(String name) {
\r
206 return variableNames.get(name);
\r
210 * Implement SymbolTable API.
\r
212 public UnicodeMatcher lookupMatcher(int ch) {
\r
213 // Note that we cannot use data.lookup() because the
\r
214 // set array has not been constructed yet.
\r
215 int i = ch - curData.variablesBase;
\r
216 if (i >= 0 && i < variablesVector.size()) {
\r
217 return (UnicodeMatcher) variablesVector.elementAt(i);
\r
223 * Implement SymbolTable API. Parse out a symbol reference
\r
226 public String parseReference(String text, ParsePosition pos, int limit) {
\r
227 int start = pos.getIndex();
\r
229 while (i < limit) {
\r
230 char c = text.charAt(i);
\r
231 if ((i==start && !UCharacter.isUnicodeIdentifierStart(c)) ||
\r
232 !UCharacter.isUnicodeIdentifierPart(c)) {
\r
237 if (i == start) { // No valid name chars
\r
241 return text.substring(start, i);
\r
245 * Return true if the given character is a matcher standin or a plain
\r
246 * character (non standin).
\r
248 public boolean isMatcher(int ch) {
\r
249 // Note that we cannot use data.lookup() because the
\r
250 // set array has not been constructed yet.
\r
251 int i = ch - curData.variablesBase;
\r
252 if (i >= 0 && i < variablesVector.size()) {
\r
253 return variablesVector.elementAt(i) instanceof UnicodeMatcher;
\r
259 * Return true if the given character is a replacer standin or a plain
\r
260 * character (non standin).
\r
262 public boolean isReplacer(int ch) {
\r
263 // Note that we cannot use data.lookup() because the
\r
264 // set array has not been constructed yet.
\r
265 int i = ch - curData.variablesBase;
\r
266 if (i >= 0 && i < variablesVector.size()) {
\r
267 return variablesVector.elementAt(i) instanceof UnicodeReplacer;
\r
273 //----------------------------------------------------------------------
\r
274 // classes RuleBody, RuleArray, and RuleReader
\r
275 //----------------------------------------------------------------------
\r
278 * A private abstract class representing the interface to rule
\r
279 * source code that is broken up into lines. Handles the
\r
280 * folding of lines terminated by a backslash. This folding
\r
281 * is limited; it does not account for comments, quotes, or
\r
282 * escapes, so its use to be limited.
\r
284 private static abstract class RuleBody {
\r
287 * Retrieve the next line of the source, or return null if
\r
288 * none. Folds lines terminated by a backslash into the
\r
289 * next line, without regard for comments, quotes, or
\r
292 String nextLine() {
\r
293 String s = handleNextLine();
\r
296 s.charAt(s.length() - 1) == '\\') {
\r
297 StringBuilder b = new StringBuilder(s);
\r
299 b.deleteCharAt(b.length()-1);
\r
300 s = handleNextLine();
\r
305 } while (s.length() > 0 &&
\r
306 s.charAt(s.length() - 1) == '\\');
\r
313 * Reset to the first line of the source.
\r
315 abstract void reset();
\r
318 * Subclass method to return the next line of the source.
\r
320 abstract String handleNextLine();
\r
324 * RuleBody subclass for a String[] array.
\r
326 private static class RuleArray extends RuleBody {
\r
329 public RuleArray(String[] array) { this.array = array; i = 0; }
\r
330 public String handleNextLine() {
\r
331 return (i < array.length) ? array[i++] : null;
\r
333 public void reset() {
\r
339 * RuleBody subclass for a ResourceReader.
\r
341 /* private static class RuleReader extends RuleBody {
\r
342 ResourceReader reader;
\r
343 public RuleReader(ResourceReader reader) { this.reader = reader; }
\r
344 public String handleNextLine() {
\r
346 return reader.readLine();
\r
347 } catch (java.io.IOException e) {}
\r
350 public void reset() {
\r
355 //----------------------------------------------------------------------
\r
357 //----------------------------------------------------------------------
\r
360 * A class representing one side of a rule. This class knows how to
\r
361 * parse half of a rule. It is tightly coupled to the method
\r
362 * TransliteratorParser.parseRule().
\r
364 private static class RuleHalf {
\r
366 public String text;
\r
368 public int cursor = -1; // position of cursor in text
\r
369 public int ante = -1; // position of ante context marker '{' in text
\r
370 public int post = -1; // position of post context marker '}' in text
\r
372 // Record the offset to the cursor either to the left or to the
\r
373 // right of the key. This is indicated by characters on the output
\r
374 // side that allow the cursor to be positioned arbitrarily within
\r
375 // the matching text. For example, abc{def} > | @@@ xyz; changes
\r
376 // def to xyz and moves the cursor to before abc. Offset characters
\r
377 // must be at the start or end, and they cannot move the cursor past
\r
378 // the ante- or postcontext text. Placeholders are only valid in
\r
379 // output text. The length of the ante and post context is
\r
380 // determined at runtime, because of supplementals and quantifiers.
\r
381 public int cursorOffset = 0; // only nonzero on output side
\r
383 // Position of first CURSOR_OFFSET on _right_. This will be -1
\r
384 // for |@, -2 for |@@, etc., and 1 for @|, 2 for @@|, etc.
\r
385 private int cursorOffsetPos = 0;
\r
387 public boolean anchorStart = false;
\r
388 public boolean anchorEnd = false;
\r
391 * The segment number from 1..n of the next '(' we see
\r
392 * during parsing; 1-based.
\r
394 private int nextSegmentNumber = 1;
\r
397 * Parse one side of a rule, stopping at either the limit,
\r
398 * the END_OF_RULE character, or an operator.
\r
399 * @return the index after the terminating character, or
\r
400 * if limit was reached, limit
\r
402 public int parse(String rule, int pos, int limit,
\r
403 TransliteratorParser parser) {
\r
405 StringBuffer buf = new StringBuffer();
\r
406 pos = parseSection(rule, pos, limit, parser, buf, ILLEGAL_TOP, false);
\r
407 text = buf.toString();
\r
409 if (cursorOffset > 0 && cursor != cursorOffsetPos) {
\r
410 syntaxError("Misplaced " + CURSOR_POS, rule, start);
\r
417 * Parse a section of one side of a rule, stopping at either
\r
418 * the limit, the END_OF_RULE character, an operator, or a
\r
419 * segment close character. This method parses both a
\r
420 * top-level rule half and a segment within such a rule half.
\r
421 * It calls itself recursively to parse segments and nested
\r
423 * @param buf buffer into which to accumulate the rule pattern
\r
424 * characters, either literal characters from the rule or
\r
425 * standins for UnicodeMatcher objects including segments.
\r
426 * @param illegal the set of special characters that is illegal during
\r
428 * @param isSegment if true, then we've already seen a '(' and
\r
429 * pos on entry points right after it. Accumulate everything
\r
430 * up to the closing ')', put it in a segment matcher object,
\r
431 * generate a standin for it, and add the standin to buf. As
\r
432 * a side effect, update the segments vector with a reference
\r
433 * to the segment matcher. This works recursively for nested
\r
434 * segments. If isSegment is false, just accumulate
\r
435 * characters into buf.
\r
436 * @return the index after the terminating character, or
\r
437 * if limit was reached, limit
\r
439 private int parseSection(String rule, int pos, int limit,
\r
440 TransliteratorParser parser,
\r
442 UnicodeSet illegal,
\r
443 boolean isSegment) {
\r
445 ParsePosition pp = null;
\r
446 int quoteStart = -1; // Most recent 'single quoted string'
\r
447 int quoteLimit = -1;
\r
448 int varStart = -1; // Most recent $variableReference
\r
450 int[] iref = new int[1];
\r
451 int bufStart = buf.length();
\r
454 while (pos < limit) {
\r
455 // Since all syntax characters are in the BMP, fetching
\r
456 // 16-bit code units suffices here.
\r
457 char c = rule.charAt(pos++);
\r
458 if (UCharacterProperty.isRuleWhiteSpace(c)) {
\r
461 // HALF_ENDERS is all chars that end a rule half: "<>=;"
\r
462 if (HALF_ENDERS.indexOf(c) >= 0) {
\r
464 // isSegment is always false
\r
466 syntaxError("Unclosed segment", rule, start);
\r
472 // Text after a presumed end anchor is a syntax err
\r
473 syntaxError("Malformed variable reference", rule, start);
\r
475 if (UnicodeSet.resemblesPattern(rule, pos-1)) {
\r
477 pp = new ParsePosition(0);
\r
479 pp.setIndex(pos-1); // Backup to opening '['
\r
480 buf.append(parser.parseSet(rule, pp));
\r
481 pos = pp.getIndex();
\r
486 if (pos == limit) {
\r
487 syntaxError("Trailing backslash", rule, start);
\r
490 int escaped = Utility.unescapeAt(rule, iref);
\r
492 if (escaped == -1) {
\r
493 syntaxError("Malformed escape", rule, start);
\r
495 parser.checkVariableRange(escaped, rule, start);
\r
496 UTF16.append(buf, escaped);
\r
499 // Handle quoted matter
\r
501 int iq = rule.indexOf(QUOTE, pos);
\r
503 buf.append(c); // Parse [''] outside quotes as [']
\r
506 /* This loop picks up a run of quoted text of the
\r
507 * form 'aaaa' each time through. If this run
\r
508 * hasn't really ended ('aaaa''bbbb') then it keeps
\r
509 * looping, each time adding on a new run. When it
\r
510 * reaches the final quote it breaks.
\r
512 quoteStart = buf.length();
\r
515 syntaxError("Unterminated quote", rule, start);
\r
517 buf.append(rule.substring(pos, iq));
\r
519 if (pos < limit && rule.charAt(pos) == QUOTE) {
\r
520 // Parse [''] inside quotes as [']
\r
521 iq = rule.indexOf(QUOTE, pos+1);
\r
522 // Continue looping
\r
527 quoteLimit = buf.length();
\r
529 for (iq=quoteStart; iq<quoteLimit; ++iq) {
\r
530 parser.checkVariableRange(buf.charAt(iq), rule, start);
\r
536 parser.checkVariableRange(c, rule, start);
\r
538 if (illegal.contains(c)) {
\r
539 syntaxError("Illegal character '" + c + '\'', rule, start);
\r
544 //------------------------------------------------------
\r
545 // Elements allowed within and out of segments
\r
546 //------------------------------------------------------
\r
548 if (buf.length() == 0 && !anchorStart) {
\r
549 anchorStart = true;
\r
551 syntaxError("Misplaced anchor start",
\r
557 // bufSegStart is the offset in buf to the first
\r
558 // character of the segment we are parsing.
\r
559 int bufSegStart = buf.length();
\r
561 // Record segment number now, since nextSegmentNumber
\r
562 // will be incremented during the call to parseSection
\r
563 // if there are nested segments.
\r
564 int segmentNumber = nextSegmentNumber++; // 1-based
\r
566 // Parse the segment
\r
567 pos = parseSection(rule, pos, limit, parser, buf, ILLEGAL_SEG, true);
\r
569 // After parsing a segment, the relevant characters are
\r
570 // in buf, starting at offset bufSegStart. Extract them
\r
571 // into a string matcher, and replace them with a
\r
572 // standin for that matcher.
\r
574 new StringMatcher(buf.substring(bufSegStart),
\r
575 segmentNumber, parser.curData);
\r
577 // Record and associate object and segment number
\r
578 parser.setSegmentObject(segmentNumber, m);
\r
579 buf.setLength(bufSegStart);
\r
580 buf.append(parser.getSegmentStandin(segmentNumber));
\r
587 TransliteratorIDParser.SingleID single = TransliteratorIDParser.parseFilterID(rule, iref);
\r
588 // The next character MUST be a segment open
\r
589 if (single == null ||
\r
590 !Utility.parseChar(rule, iref, SEGMENT_OPEN)) {
\r
591 syntaxError("Invalid function", rule, start);
\r
594 Transliterator t = single.getInstance();
\r
596 syntaxError("Invalid function ID", rule, start);
\r
599 // bufSegStart is the offset in buf to the first
\r
600 // character of the segment we are parsing.
\r
601 int bufSegStart = buf.length();
\r
603 // Parse the segment
\r
604 pos = parseSection(rule, iref[0], limit, parser, buf, ILLEGAL_FUNC, true);
\r
606 // After parsing a segment, the relevant characters are
\r
607 // in buf, starting at offset bufSegStart.
\r
608 FunctionReplacer r =
\r
609 new FunctionReplacer(t,
\r
610 new StringReplacer(buf.substring(bufSegStart), parser.curData));
\r
612 // Replace the buffer contents with a stand-in
\r
613 buf.setLength(bufSegStart);
\r
614 buf.append(parser.generateStandInFor(r));
\r
617 case SymbolTable.SYMBOL_REF:
\r
618 // Handle variable references and segment references "$1" .. "$9"
\r
620 // A variable reference must be followed immediately
\r
621 // by a Unicode identifier start and zero or more
\r
622 // Unicode identifier part characters, or by a digit
\r
623 // 1..9 if it is a segment reference.
\r
624 if (pos == limit) {
\r
625 // A variable ref character at the end acts as
\r
626 // an anchor to the context limit, as in perl.
\r
630 // Parse "$1" "$2" .. "$9" .. (no upper limit)
\r
631 c = rule.charAt(pos);
\r
632 int r = UCharacter.digit(c, 10);
\r
633 if (r >= 1 && r <= 9) {
\r
635 r = Utility.parseNumber(rule, iref, 10);
\r
637 syntaxError("Undefined segment reference",
\r
641 buf.append(parser.getSegmentStandin(r));
\r
643 if (pp == null) { // Lazy create
\r
644 pp = new ParsePosition(0);
\r
647 String name = parser.parseData.
\r
648 parseReference(rule, pp, limit);
\r
649 if (name == null) {
\r
650 // This means the '$' was not followed by a
\r
651 // valid name. Try to interpret it as an
\r
652 // end anchor then. If this also doesn't work
\r
653 // (if we see a following character) then signal
\r
658 pos = pp.getIndex();
\r
659 // If this is a variable definition statement,
\r
660 // then the LHS variable will be undefined. In
\r
661 // that case appendVariableDef() will append the
\r
662 // special placeholder char variableLimit-1.
\r
663 varStart = buf.length();
\r
664 parser.appendVariableDef(name, buf);
\r
665 varLimit = buf.length();
\r
670 buf.append(parser.getDotStandIn());
\r
675 // Quantifiers. We handle single characters, quoted strings,
\r
676 // variable references, and segments.
\r
678 // 'foo'+ matches foofoofoo
\r
679 // $v+ matches xyxyxy if $v == xy
\r
680 // (seg)+ matches segsegseg
\r
683 // isSegment is always false
\r
684 if (isSegment && buf.length() == bufStart) {
\r
685 // The */+ immediately follows '('
\r
686 syntaxError("Misplaced quantifier", rule, start);
\r
691 int qstart, qlimit;
\r
692 // The */+ follows an isolated character or quote
\r
693 // or variable reference
\r
694 if (buf.length() == quoteLimit) {
\r
695 // The */+ follows a 'quoted string'
\r
696 qstart = quoteStart;
\r
697 qlimit = quoteLimit;
\r
698 } else if (buf.length() == varLimit) {
\r
699 // The */+ follows a $variableReference
\r
703 // The */+ follows a single character, possibly
\r
704 // a segment standin
\r
705 qstart = buf.length() - 1;
\r
706 qlimit = qstart + 1;
\r
711 m = new StringMatcher(buf.toString(), qstart, qlimit,
\r
712 0, parser.curData);
\r
713 } catch (RuntimeException e) {
\r
714 final String precontext = pos < 50 ? rule.substring(0, pos) : "..." + rule.substring(pos - 50, pos);
\r
715 final String postContext = limit-pos <= 50 ? rule.substring(pos, limit) : rule.substring(pos, pos+50) + "...";
\r
716 throw (RuntimeException)
\r
717 new IllegalIcuArgumentException("Failure in rule: " + precontext + "$$$"
\r
718 + postContext).initCause(e);
\r
721 int max = Quantifier.MAX;
\r
730 // case KLEENE_STAR:
\r
731 // do nothing -- min, max already set
\r
733 m = new Quantifier(m, min, max);
\r
734 buf.setLength(qstart);
\r
735 buf.append(parser.generateStandInFor(m));
\r
739 //------------------------------------------------------
\r
740 // Elements allowed ONLY WITHIN segments
\r
741 //------------------------------------------------------
\r
742 case SEGMENT_CLOSE:
\r
743 // assert(isSegment);
\r
744 // We're done parsing a segment.
\r
747 //------------------------------------------------------
\r
748 // Elements allowed ONLY OUTSIDE segments
\r
749 //------------------------------------------------------
\r
752 syntaxError("Multiple ante contexts", rule, start);
\r
754 ante = buf.length();
\r
758 syntaxError("Multiple post contexts", rule, start);
\r
760 post = buf.length();
\r
764 syntaxError("Multiple cursors", rule, start);
\r
766 cursor = buf.length();
\r
768 case CURSOR_OFFSET:
\r
769 if (cursorOffset < 0) {
\r
770 if (buf.length() > 0) {
\r
771 syntaxError("Misplaced " + c, rule, start);
\r
774 } else if (cursorOffset > 0) {
\r
775 if (buf.length() != cursorOffsetPos || cursor >= 0) {
\r
776 syntaxError("Misplaced " + c, rule, start);
\r
780 if (cursor == 0 && buf.length() == 0) {
\r
782 } else if (cursor < 0) {
\r
783 cursorOffsetPos = buf.length();
\r
786 syntaxError("Misplaced " + c, rule, start);
\r
791 //------------------------------------------------------
\r
792 // Non-special characters
\r
793 //------------------------------------------------------
\r
795 // Disallow unquoted characters other than [0-9A-Za-z]
\r
796 // in the printable ASCII range. These characters are
\r
797 // reserved for possible future use.
\r
798 if (c >= 0x0021 && c <= 0x007E &&
\r
799 !((c >= '0' && c <= '9') ||
\r
800 (c >= 'A' && c <= 'Z') ||
\r
801 (c >= 'a' && c <= 'z'))) {
\r
802 syntaxError("Unquoted " + c, rule, start);
\r
814 void removeContext() {
\r
815 text = text.substring(ante < 0 ? 0 : ante,
\r
816 post < 0 ? text.length() : post);
\r
818 anchorStart = anchorEnd = false;
\r
822 * Return true if this half looks like valid output, that is, does not
\r
823 * contain quantifiers or other special input-only elements.
\r
825 public boolean isValidOutput(TransliteratorParser parser) {
\r
826 for (int i=0; i<text.length(); ) {
\r
827 int c = UTF16.charAt(text, i);
\r
828 i += UTF16.getCharCount(c);
\r
829 if (!parser.parseData.isReplacer(c)) {
\r
837 * Return true if this half looks like valid input, that is, does not
\r
838 * contain functions or other special output-only elements.
\r
840 public boolean isValidInput(TransliteratorParser parser) {
\r
841 for (int i=0; i<text.length(); ) {
\r
842 int c = UTF16.charAt(text, i);
\r
843 i += UTF16.getCharCount(c);
\r
844 if (!parser.parseData.isMatcher(c)) {
\r
852 //----------------------------------------------------------------------
\r
854 //----------------------------------------------------------------------
\r
859 public TransliteratorParser() {
\r
863 * Parse a set of rules. After the parse completes, examine the public
\r
864 * data members for results.
\r
866 public void parse(String rules, int dir) {
\r
867 parseRules(new RuleArray(new String[] { rules }), dir);
\r
871 * Parse a set of rules. After the parse completes, examine the public
\r
872 * data members for results.
\r
874 /* public void parse(ResourceReader rules, int direction) {
\r
875 parseRules(new RuleReader(rules), direction);
\r
878 //----------------------------------------------------------------------
\r
880 //----------------------------------------------------------------------
\r
883 * Parse an array of zero or more rules. The strings in the array are
\r
884 * treated as if they were concatenated together, with rule terminators
\r
885 * inserted between array elements if not present already.
\r
887 * Any previous rules are discarded. Typically this method is called exactly
\r
888 * once, during construction.
\r
890 * The member this.data will be set to null if there are no rules.
\r
892 * @exception IllegalIcuArgumentException if there is a syntax error in the
\r
895 void parseRules(RuleBody ruleArray, int dir) {
\r
896 boolean parsingIDs = true;
\r
899 dataVector = new Vector<Data>();
\r
900 idBlockVector = new Vector<String>();
\r
903 compoundFilter = null;
\r
904 variablesVector = new Vector<Object>();
\r
905 variableNames = new Hashtable<String, char[]>();
\r
906 parseData = new ParseData();
\r
908 List<RuntimeException> errors = new ArrayList<RuntimeException>();
\r
909 int errorCount = 0;
\r
913 StringBuilder idBlockResult = new StringBuilder();
\r
915 // The compound filter offset is an index into idBlockResult.
\r
916 // If it is 0, then the compound filter occurred at the start,
\r
917 // and it is the offset to the _start_ of the compound filter
\r
918 // pattern. Otherwise it is the offset to the _limit_ of the
\r
919 // compound filter pattern within idBlockResult.
\r
920 this.compoundFilter = null;
\r
921 int compoundFilterOffset = -1;
\r
925 String rule = ruleArray.nextLine();
\r
926 if (rule == null) {
\r
930 int limit = rule.length();
\r
931 while (pos < limit) {
\r
932 char c = rule.charAt(pos++);
\r
933 if (UCharacterProperty.isRuleWhiteSpace(c)) {
\r
936 // Skip lines starting with the comment character
\r
937 if (c == RULE_COMMENT_CHAR) {
\r
938 pos = rule.indexOf("\n", pos) + 1;
\r
940 break; // No "\n" found; rest of rule is a commnet
\r
942 continue; // Either fall out or restart with next line
\r
945 // skip empty rules
\r
946 if (c == END_OF_RULE)
\r
949 // Often a rule file contains multiple errors. It's
\r
950 // convenient to the rule author if these are all reported
\r
951 // at once. We keep parsing rules even after a failure, up
\r
952 // to a specified limit, and report all errors at once.
\r
956 // We've found the start of a rule or ID. c is its first
\r
957 // character, and pos points past c.
\r
959 // Look for an ID token. Must have at least ID_TOKEN_LEN + 1
\r
961 if ((pos + ID_TOKEN_LEN + 1) <= limit &&
\r
962 rule.regionMatches(pos, ID_TOKEN, 0, ID_TOKEN_LEN)) {
\r
963 pos += ID_TOKEN_LEN;
\r
964 c = rule.charAt(pos);
\r
965 while (UCharacterProperty.isRuleWhiteSpace(c) && pos < limit) {
\r
967 c = rule.charAt(pos);
\r
969 int[] p = new int[] { pos };
\r
972 if (curData != null) {
\r
973 if (direction == Transliterator.FORWARD)
\r
974 dataVector.add(curData);
\r
976 dataVector.insertElementAt(curData, 0);
\r
982 TransliteratorIDParser.SingleID id =
\r
983 TransliteratorIDParser.parseSingleID(
\r
984 rule, p, direction);
\r
985 if (p[0] != pos && Utility.parseChar(rule, p, END_OF_RULE)) {
\r
986 // Successful ::ID parse.
\r
988 if (direction == Transliterator.FORWARD) {
\r
989 idBlockResult.append(id.canonID).append(END_OF_RULE);
\r
991 idBlockResult.insert(0, id.canonID + END_OF_RULE);
\r
995 // Couldn't parse an ID. Try to parse a global filter
\r
996 int[] withParens = new int[] { -1 };
\r
997 UnicodeSet f = TransliteratorIDParser.parseGlobalFilter(rule, p, direction, withParens, null);
\r
998 if (f != null && Utility.parseChar(rule, p, END_OF_RULE)) {
\r
999 if ((direction == Transliterator.FORWARD) ==
\r
1000 (withParens[0] == 0)) {
\r
1001 if (compoundFilter != null) {
\r
1002 // Multiple compound filters
\r
1003 syntaxError("Multiple global filters", rule, pos);
\r
1005 compoundFilter = f;
\r
1006 compoundFilterOffset = ruleCount;
\r
1010 // Can be parsed as neither an ID nor a global filter
\r
1011 syntaxError("Invalid ::ID", rule, pos);
\r
1018 if (direction == Transliterator.FORWARD)
\r
1019 idBlockVector.add(idBlockResult.toString());
\r
1021 idBlockVector.insertElementAt(idBlockResult.toString(), 0);
\r
1022 idBlockResult.delete(0, idBlockResult.length());
\r
1023 parsingIDs = false;
\r
1024 curData = new RuleBasedTransliterator.Data();
\r
1026 // By default, rules use part of the private use area
\r
1027 // E000..F8FF for variables and other stand-ins. Currently
\r
1028 // the range F000..F8FF is typically sufficient. The 'use
\r
1029 // variable range' pragma allows rule sets to modify this.
\r
1030 setVariableRange(0xF000, 0xF8FF);
\r
1033 if (resemblesPragma(rule, pos, limit)) {
\r
1034 int ppp = parsePragma(rule, pos, limit);
\r
1036 syntaxError("Unrecognized pragma", rule, pos);
\r
1041 pos = parseRule(rule, pos, limit);
\r
1044 } catch (IllegalArgumentException e) {
\r
1045 if (errorCount == 30) {
\r
1046 IllegalIcuArgumentException icuEx = new IllegalIcuArgumentException("\nMore than 30 errors; further messages squelched");
\r
1047 icuEx.initCause(e);
\r
1048 errors.add(icuEx);
\r
1051 e.fillInStackTrace();
\r
1054 pos = ruleEnd(rule, pos, limit) + 1; // +1 advances past ';'
\r
1058 if (parsingIDs && idBlockResult.length() > 0) {
\r
1059 if (direction == Transliterator.FORWARD)
\r
1060 idBlockVector.add(idBlockResult.toString());
\r
1062 idBlockVector.insertElementAt(idBlockResult.toString(), 0);
\r
1064 else if (!parsingIDs && curData != null) {
\r
1065 if (direction == Transliterator.FORWARD)
\r
1066 dataVector.add(curData);
\r
1068 dataVector.insertElementAt(curData, 0);
\r
1071 // Convert the set vector to an array
\r
1072 for (int i = 0; i < dataVector.size(); i++) {
\r
1073 Data data = dataVector.get(i);
\r
1074 data.variables = new Object[variablesVector.size()];
\r
1075 variablesVector.copyInto(data.variables);
\r
1076 data.variableNames = new Hashtable<String, char[]>();
\r
1077 data.variableNames.putAll(variableNames);
\r
1079 variablesVector = null;
\r
1081 // Do more syntax checking and index the rules
\r
1083 if (compoundFilter != null) {
\r
1084 if ((direction == Transliterator.FORWARD &&
\r
1085 compoundFilterOffset != 1) ||
\r
1086 (direction == Transliterator.REVERSE &&
\r
1087 compoundFilterOffset != ruleCount)) {
\r
1088 throw new IllegalIcuArgumentException("Compound filters misplaced");
\r
1092 for (int i = 0; i < dataVector.size(); i++) {
\r
1093 Data data = dataVector.get(i);
\r
1094 data.ruleSet.freeze();
\r
1097 if (idBlockVector.size() == 1 && (idBlockVector.get(0)).length() == 0)
\r
1098 idBlockVector.remove(0);
\r
1100 } catch (IllegalArgumentException e) {
\r
1101 e.fillInStackTrace();
\r
1105 if (errors.size() != 0) {
\r
1106 for (int i = errors.size()-1; i > 0; --i) {
\r
1107 RuntimeException previous = errors.get(i-1);
\r
1108 while (previous.getCause() != null) {
\r
1109 previous = (RuntimeException) previous.getCause(); // chain specially
\r
1111 previous.initCause(errors.get(i));
\r
1113 throw errors.get(0);
\r
1114 // if initCause not supported: throw new IllegalArgumentException(errors.toString());
\r
1119 * MAIN PARSER. Parse the next rule in the given rule string, starting
\r
1120 * at pos. Return the index after the last character parsed. Do not
\r
1121 * parse characters at or after limit.
\r
1123 * Important: The character at pos must be a non-whitespace character
\r
1124 * that is not the comment character.
\r
1126 * This method handles quoting, escaping, and whitespace removal. It
\r
1127 * parses the end-of-rule character. It recognizes context and cursor
\r
1128 * indicators. Once it does a lexical breakdown of the rule at pos, it
\r
1129 * creates a rule object and adds it to our rule list.
\r
1131 * This method is tightly coupled to the inner class RuleHalf.
\r
1133 private int parseRule(String rule, int pos, int limit) {
\r
1134 // Locate the left side, operator, and right side
\r
1136 char operator = 0;
\r
1138 // Set up segments data
\r
1139 segmentStandins = new StringBuffer();
\r
1140 segmentObjects = new Vector<StringMatcher>();
\r
1142 RuleHalf left = new RuleHalf();
\r
1143 RuleHalf right = new RuleHalf();
\r
1145 undefinedVariableName = null;
\r
1146 pos = left.parse(rule, pos, limit, this);
\r
1148 if (pos == limit ||
\r
1149 OPERATORS.indexOf(operator = rule.charAt(--pos)) < 0) {
\r
1150 syntaxError("No operator pos=" + pos, rule, start);
\r
1154 // Found an operator char. Check for forward-reverse operator.
\r
1155 if (operator == REVERSE_RULE_OP &&
\r
1156 (pos < limit && rule.charAt(pos) == FORWARD_RULE_OP)) {
\r
1158 operator = FWDREV_RULE_OP;
\r
1161 // Translate alternate op characters.
\r
1162 switch (operator) {
\r
1163 case ALT_FORWARD_RULE_OP:
\r
1164 operator = FORWARD_RULE_OP;
\r
1166 case ALT_REVERSE_RULE_OP:
\r
1167 operator = REVERSE_RULE_OP;
\r
1169 case ALT_FWDREV_RULE_OP:
\r
1170 operator = FWDREV_RULE_OP;
\r
1174 pos = right.parse(rule, pos, limit, this);
\r
1176 if (pos < limit) {
\r
1177 if (rule.charAt(--pos) == END_OF_RULE) {
\r
1180 // RuleHalf parser must have terminated at an operator
\r
1181 syntaxError("Unquoted operator", rule, start);
\r
1185 if (operator == VARIABLE_DEF_OP) {
\r
1186 // LHS is the name. RHS is a single character, either a literal
\r
1187 // or a set (already parsed). If RHS is longer than one
\r
1188 // character, it is either a multi-character string, or multiple
\r
1189 // sets, or a mixture of chars and sets -- syntax error.
\r
1191 // We expect to see a single undefined variable (the one being
\r
1193 if (undefinedVariableName == null) {
\r
1194 syntaxError("Missing '$' or duplicate definition", rule, start);
\r
1196 if (left.text.length() != 1 || left.text.charAt(0) != variableLimit) {
\r
1197 syntaxError("Malformed LHS", rule, start);
\r
1199 if (left.anchorStart || left.anchorEnd ||
\r
1200 right.anchorStart || right.anchorEnd) {
\r
1201 syntaxError("Malformed variable def", rule, start);
\r
1203 // We allow anything on the right, including an empty string.
\r
1204 int n = right.text.length();
\r
1205 char[] value = new char[n];
\r
1206 right.text.getChars(0, n, value, 0);
\r
1207 variableNames.put(undefinedVariableName, value);
\r
1213 // If this is not a variable definition rule, we shouldn't have
\r
1214 // any undefined variable names.
\r
1215 if (undefinedVariableName != null) {
\r
1216 syntaxError("Undefined variable $" + undefinedVariableName,
\r
1220 // Verify segments
\r
1221 if (segmentStandins.length() > segmentObjects.size()) {
\r
1222 syntaxError("Undefined segment reference", rule, start);
\r
1224 for (int i=0; i<segmentStandins.length(); ++i) {
\r
1225 if (segmentStandins.charAt(i) == 0) {
\r
1226 syntaxError("Internal error", rule, start); // will never happen
\r
1229 for (int i=0; i<segmentObjects.size(); ++i) {
\r
1230 if (segmentObjects.elementAt(i) == null) {
\r
1231 syntaxError("Internal error", rule, start); // will never happen
\r
1235 // If the direction we want doesn't match the rule
\r
1236 // direction, do nothing.
\r
1237 if (operator != FWDREV_RULE_OP &&
\r
1238 ((direction == Transliterator.FORWARD) != (operator == FORWARD_RULE_OP))) {
\r
1242 // Transform the rule into a forward rule by swapping the
\r
1243 // sides if necessary.
\r
1244 if (direction == Transliterator.REVERSE) {
\r
1245 RuleHalf temp = left;
\r
1250 // Remove non-applicable elements in forward-reverse
\r
1251 // rules. Bidirectional rules ignore elements that do not
\r
1253 if (operator == FWDREV_RULE_OP) {
\r
1254 right.removeContext();
\r
1256 left.cursorOffset = 0;
\r
1259 // Normalize context
\r
1260 if (left.ante < 0) {
\r
1263 if (left.post < 0) {
\r
1264 left.post = left.text.length();
\r
1267 // Context is only allowed on the input side. Cursors are only
\r
1268 // allowed on the output side. Segment delimiters can only appear
\r
1269 // on the left, and references on the right. Cursor offset
\r
1270 // cannot appear without an explicit cursor. Cursor offset
\r
1271 // cannot place the cursor outside the limits of the context.
\r
1272 // Anchors are only allowed on the input side.
\r
1273 if (right.ante >= 0 || right.post >= 0 || left.cursor >= 0 ||
\r
1274 (right.cursorOffset != 0 && right.cursor < 0) ||
\r
1275 // - The following two checks were used to ensure that the
\r
1276 // - the cursor offset stayed within the ante- or postcontext.
\r
1277 // - However, with the addition of quantifiers, we have to
\r
1278 // - allow arbitrary cursor offsets and do runtime checking.
\r
1279 //(right.cursorOffset > (left.text.length() - left.post)) ||
\r
1280 //(-right.cursorOffset > left.ante) ||
\r
1281 right.anchorStart || right.anchorEnd ||
\r
1282 !left.isValidInput(this) || !right.isValidOutput(this) ||
\r
1283 left.ante > left.post) {
\r
1284 syntaxError("Malformed rule", rule, start);
\r
1287 // Flatten segment objects vector to an array
\r
1288 UnicodeMatcher[] segmentsArray = null;
\r
1289 if (segmentObjects.size() > 0) {
\r
1290 segmentsArray = new UnicodeMatcher[segmentObjects.size()];
\r
1291 segmentObjects.toArray(segmentsArray);
\r
1294 curData.ruleSet.addRule(new TransliterationRule(
\r
1295 left.text, left.ante, left.post,
\r
1296 right.text, right.cursor, right.cursorOffset,
\r
1298 left.anchorStart, left.anchorEnd,
\r
1305 * Set the variable range to [start, end] (inclusive).
\r
1307 private void setVariableRange(int start, int end) {
\r
1308 if (start > end || start < 0 || end > 0xFFFF) {
\r
1309 throw new IllegalIcuArgumentException("Invalid variable range " + start + ", " + end);
\r
1312 curData.variablesBase = (char) start; // first private use
\r
1314 if (dataVector.size() == 0) {
\r
1315 variableNext = (char) start;
\r
1316 variableLimit = (char) (end + 1);
\r
1321 * Assert that the given character is NOT within the variable range.
\r
1322 * If it is, signal an error. This is neccesary to ensure that the
\r
1323 * variable range does not overlap characters used in a rule.
\r
1325 private void checkVariableRange(int ch, String rule, int start) {
\r
1326 if (ch >= curData.variablesBase && ch < variableLimit) {
\r
1327 syntaxError("Variable range character in rule", rule, start);
\r
1331 // (The following method is part of an unimplemented feature.
\r
1332 // Remove this clover pragma after the feature is implemented.
\r
1333 // 2003-06-11 ICU 2.6 Alan)
\r
1336 * Set the maximum backup to 'backup', in response to a pragma
\r
1339 private void pragmaMaximumBackup(int backup) {
\r
1341 throw new IllegalIcuArgumentException("use maximum backup pragma not implemented yet");
\r
1345 // (The following method is part of an unimplemented feature.
\r
1346 // Remove this clover pragma after the feature is implemented.
\r
1347 // 2003-06-11 ICU 2.6 Alan)
\r
1350 * Begin normalizing all rules using the given mode, in response
\r
1351 * to a pragma statement.
\r
1353 private void pragmaNormalizeRules(Normalizer.Mode mode) {
\r
1355 throw new IllegalIcuArgumentException("use normalize rules pragma not implemented yet");
\r
1360 * Return true if the given rule looks like a pragma.
\r
1361 * @param pos offset to the first non-whitespace character
\r
1363 * @param limit pointer past the last character of the rule.
\r
1365 static boolean resemblesPragma(String rule, int pos, int limit) {
\r
1366 // Must start with /use\s/i
\r
1367 return Utility.parsePattern(rule, pos, limit, "use ", null) >= 0;
\r
1371 * Parse a pragma. This method assumes resemblesPragma() has
\r
1372 * already returned true.
\r
1373 * @param pos offset to the first non-whitespace character
\r
1375 * @param limit pointer past the last character of the rule.
\r
1376 * @return the position index after the final ';' of the pragma,
\r
1377 * or -1 on failure.
\r
1379 private int parsePragma(String rule, int pos, int limit) {
\r
1380 int[] array = new int[2];
\r
1382 // resemblesPragma() has already returned true, so we
\r
1383 // know that pos points to /use\s/i; we can skip 4 characters
\r
1387 // Here are the pragmas we recognize:
\r
1388 // use variable range 0xE000 0xEFFF;
\r
1389 // use maximum backup 16;
\r
1391 int p = Utility.parsePattern(rule, pos, limit, "~variable range # #~;", array);
\r
1393 setVariableRange(array[0], array[1]);
\r
1397 p = Utility.parsePattern(rule, pos, limit, "~maximum backup #~;", array);
\r
1399 pragmaMaximumBackup(array[0]);
\r
1403 p = Utility.parsePattern(rule, pos, limit, "~nfd rules~;", null);
\r
1405 pragmaNormalizeRules(Normalizer.NFD);
\r
1409 p = Utility.parsePattern(rule, pos, limit, "~nfc rules~;", null);
\r
1411 pragmaNormalizeRules(Normalizer.NFC);
\r
1415 // Syntax error: unable to parse pragma
\r
1420 * Throw an exception indicating a syntax error. Search the rule string
\r
1421 * for the probable end of the rule. Of course, if the error is that
\r
1422 * the end of rule marker is missing, then the rule end will not be found.
\r
1423 * In any case the rule start will be correctly reported.
\r
1424 * @param msg error description
\r
1425 * @param rule pattern string
\r
1426 * @param start position of first character of current rule
\r
1428 static final void syntaxError(String msg, String rule, int start) {
\r
1429 int end = ruleEnd(rule, start, rule.length());
\r
1430 throw new IllegalIcuArgumentException(msg + " in \"" +
\r
1431 Utility.escape(rule.substring(start, end)) + '"');
\r
1434 static final int ruleEnd(String rule, int start, int limit) {
\r
1435 int end = Utility.quotedIndexOf(rule, start, limit, ";");
\r
1443 * Parse a UnicodeSet out, store it, and return the stand-in character
\r
1444 * used to represent it.
\r
1446 private final char parseSet(String rule, ParsePosition pos) {
\r
1447 UnicodeSet set = new UnicodeSet(rule, pos, parseData);
\r
1448 if (variableNext >= variableLimit) {
\r
1449 throw new RuntimeException("Private use variables exhausted");
\r
1452 return generateStandInFor(set);
\r
1456 * Generate and return a stand-in for a new UnicodeMatcher or UnicodeReplacer.
\r
1457 * Store the object.
\r
1459 char generateStandInFor(Object obj) {
\r
1460 // assert(obj != null);
\r
1462 // Look up previous stand-in, if any. This is a short list
\r
1463 // (typical n is 0, 1, or 2); linear search is optimal.
\r
1464 for (int i=0; i<variablesVector.size(); ++i) {
\r
1465 if (variablesVector.elementAt(i) == obj) { // [sic] pointer comparison
\r
1466 return (char) (curData.variablesBase + i);
\r
1470 if (variableNext >= variableLimit) {
\r
1471 throw new RuntimeException("Variable range exhausted");
\r
1473 variablesVector.addElement(obj);
\r
1474 return variableNext++;
\r
1478 * Return the standin for segment seg (1-based).
\r
1480 public char getSegmentStandin(int seg) {
\r
1481 if (segmentStandins.length() < seg) {
\r
1482 segmentStandins.setLength(seg);
\r
1484 char c = segmentStandins.charAt(seg-1);
\r
1486 if (variableNext >= variableLimit) {
\r
1487 throw new RuntimeException("Variable range exhausted");
\r
1489 c = variableNext++;
\r
1490 // Set a placeholder in the master variables vector that will be
\r
1491 // filled in later by setSegmentObject(). We know that we will get
\r
1492 // called first because setSegmentObject() will call us.
\r
1493 variablesVector.addElement(null);
\r
1494 segmentStandins.setCharAt(seg-1, c);
\r
1500 * Set the object for segment seg (1-based).
\r
1502 public void setSegmentObject(int seg, StringMatcher obj) {
\r
1503 // Since we call parseSection() recursively, nested
\r
1504 // segments will result in segment i+1 getting parsed
\r
1505 // and stored before segment i; be careful with the
\r
1506 // vector handling here.
\r
1507 if (segmentObjects.size() < seg) {
\r
1508 segmentObjects.setSize(seg);
\r
1510 int index = getSegmentStandin(seg) - curData.variablesBase;
\r
1511 if (segmentObjects.elementAt(seg-1) != null ||
\r
1512 variablesVector.elementAt(index) != null) {
\r
1513 throw new RuntimeException(); // should never happen
\r
1515 segmentObjects.setElementAt(obj, seg-1);
\r
1516 variablesVector.setElementAt(obj, index);
\r
1520 * Return the stand-in for the dot set. It is allocated the first
\r
1521 * time and reused thereafter.
\r
1523 char getDotStandIn() {
\r
1524 if (dotStandIn == -1) {
\r
1525 dotStandIn = generateStandInFor(new UnicodeSet(DOT_SET));
\r
1527 return (char) dotStandIn;
\r
1531 * Append the value of the given variable name to the given
\r
1533 * @exception IllegalIcuArgumentException if the name is unknown.
\r
1535 private void appendVariableDef(String name, StringBuffer buf) {
\r
1536 char[] ch = variableNames.get(name);
\r
1538 // We allow one undefined variable so that variable definition
\r
1539 // statements work. For the first undefined variable we return
\r
1540 // the special placeholder variableLimit-1, and save the variable
\r
1542 if (undefinedVariableName == null) {
\r
1543 undefinedVariableName = name;
\r
1544 if (variableNext >= variableLimit) {
\r
1545 throw new RuntimeException("Private use variables exhausted");
\r
1547 buf.append(--variableLimit);
\r
1549 throw new IllegalIcuArgumentException("Undefined variable $"
\r