2 ***************************************************************************
\r
3 * Copyright (C) 2002-2006 International Business Machines Corporation *
\r
4 * and others. All rights reserved. *
\r
5 ***************************************************************************
\r
7 package com.ibm.icu.text;
\r
9 import java.util.HashMap;
\r
10 import java.util.Collection;
\r
12 import java.text.ParsePosition;
\r
13 import com.ibm.icu.lang.UCharacter;
\r
15 class RBBISymbolTable implements SymbolTable{
\r
19 RBBIRuleScanner fRuleScanner;
\r
21 // These next two fields are part of the mechanism for passing references to
\r
22 // already-constructed UnicodeSets back to the UnicodeSet constructor
\r
23 // when the pattern includes $variable references.
\r
25 UnicodeSet fCachedSetLookup;
\r
29 static class RBBISymbolTableEntry {
\r
35 RBBISymbolTable(RBBIRuleScanner rs, String rules) {
\r
38 fHashTable = new HashMap();
\r
39 ffffString = "\uffff";
\r
43 // RBBISymbolTable::lookup This function from the abstract symbol table inteface
\r
44 // looks up a variable name and returns a UnicodeString
\r
45 // containing the substitution text.
\r
47 // The variable name does NOT include the leading $.
\r
49 public char[] lookup(String s) {
\r
50 RBBISymbolTableEntry el;
\r
51 RBBINode varRefNode;
\r
57 el = (RBBISymbolTableEntry) fHashTable.get(s);
\r
62 // Walk through any chain of variable assignments that ultimately resolve to a Set Ref.
\r
63 varRefNode = el.val;
\r
64 while (varRefNode.fLeftChild.fType == RBBINode.varRef) {
\r
65 varRefNode = varRefNode.fLeftChild;
\r
68 exprNode = varRefNode.fLeftChild; // Root node of expression for variable
\r
69 if (exprNode.fType == RBBINode.setRef) {
\r
70 // The $variable refers to a single UnicodeSet
\r
71 // return the ffffString, which will subsequently be interpreted as a
\r
72 // stand-in character for the set by RBBISymbolTable::lookupMatcher()
\r
73 usetNode = exprNode.fLeftChild;
\r
74 fCachedSetLookup = usetNode.fInputSet;
\r
75 retString = ffffString;
\r
77 // The variable refers to something other than just a set.
\r
78 // This is an error in the rules being compiled. $Variables inside of UnicodeSets
\r
79 // must refer only to another set, not to some random non-set expression.
\r
80 // Note: single characters are represented as sets, so they are ok.
\r
81 fRuleScanner.error(RBBIRuleBuilder.U_BRK_MALFORMED_SET);
\r
82 retString = exprNode.fText;
\r
83 fCachedSetLookup = null;
\r
85 return retString.toCharArray();
\r
89 // RBBISymbolTable::lookupMatcher This function from the abstract symbol table
\r
90 // interface maps a single stand-in character to a
\r
91 // pointer to a Unicode Set. The Unicode Set code uses this
\r
92 // mechanism to get all references to the same $variable
\r
93 // name to refer to a single common Unicode Set instance.
\r
95 // This implementation cheats a little, and does not maintain a map of stand-in chars
\r
96 // to sets. Instead, it takes advantage of the fact that the UnicodeSet
\r
97 // constructor will always call this function right after calling lookup(),
\r
98 // and we just need to remember what set to return between these two calls.
\r
99 public UnicodeMatcher lookupMatcher(int ch) {
\r
100 UnicodeSet retVal = null;
\r
101 if (ch == 0xffff) {
\r
102 retVal = fCachedSetLookup;
\r
103 fCachedSetLookup = null;
\r
109 // RBBISymbolTable::parseReference This function from the abstract symbol table interface
\r
110 // looks for a $variable name in the source text.
\r
111 // It does not look it up, only scans for it.
\r
112 // It is used by the UnicodeSet parser.
\r
114 public String parseReference(String text, ParsePosition pos, int limit) {
\r
115 int start = pos.getIndex();
\r
117 String result = "";
\r
118 while (i < limit) {
\r
119 int c = UTF16.charAt(text, i);
\r
120 if ((i == start && !UCharacter.isUnicodeIdentifierStart(c))
\r
121 || !UCharacter.isUnicodeIdentifierPart(c)) {
\r
124 i += UTF16.getCharCount(c);
\r
126 if (i == start) { // No valid name chars
\r
127 return result; // Indicate failure with empty string
\r
130 result = text.substring(start, i);
\r
135 // RBBISymbolTable::lookupNode Given a key (a variable name), return the
\r
136 // corresponding RBBI Node. If there is no entry
\r
137 // in the table for this name, return NULL.
\r
139 RBBINode lookupNode(String key) {
\r
141 RBBINode retNode = null;
\r
142 RBBISymbolTableEntry el;
\r
144 el = (RBBISymbolTableEntry) fHashTable.get(key);
\r
152 // RBBISymbolTable::addEntry Add a new entry to the symbol table.
\r
153 // Indicate an error if the name already exists -
\r
154 // this will only occur in the case of duplicate
\r
155 // variable assignments.
\r
157 void addEntry(String key, RBBINode val) {
\r
158 RBBISymbolTableEntry e;
\r
159 e = (RBBISymbolTableEntry) fHashTable.get(key);
\r
161 fRuleScanner.error(RBBIRuleBuilder.U_BRK_VARIABLE_REDFINITION);
\r
165 e = new RBBISymbolTableEntry();
\r
168 fHashTable.put(e.key, e);
\r
172 // RBBISymbolTable::print Debugging function, dump out the symbol table contents.
\r
175 void rbbiSymtablePrint() {
\r
177 .print("Variable Definitions\n"
\r
178 + "Name Node Val String Val\n"
\r
179 + "----------------------------------------------------------------------\n");
\r
181 RBBISymbolTableEntry[] syms = new RBBISymbolTableEntry[0];
\r
182 Collection t = fHashTable.values();
\r
183 syms = (RBBISymbolTableEntry[]) t.toArray(syms);
\r
185 for (int i = 0; i < syms.length; i++) {
\r
186 RBBISymbolTableEntry s = syms[i];
\r
188 System.out.print(" " + s.key + " "); // TODO: format output into columns.
\r
189 System.out.print(" " + s.val + " ");
\r
190 System.out.print(s.val.fLeftChild.fText);
\r
191 System.out.print("\n");
\r
194 System.out.println("\nParsed Variable Definitions\n");
\r
195 for (int i = 0; i < syms.length; i++) {
\r
196 RBBISymbolTableEntry s = syms[i];
\r
197 System.out.print(s.key);
\r
198 s.val.fLeftChild.printTree(true);
\r
199 System.out.print("\n");
\r