2 ***************************************************************************
\r
3 * Copyright (C) 2002-2009 International Business Machines Corporation *
\r
4 * and others. All rights reserved. *
\r
5 ***************************************************************************
\r
7 package com.ibm.icu.text;
\r
9 import java.text.ParsePosition;
\r
10 import java.util.HashMap;
\r
12 import com.ibm.icu.lang.UCharacter;
\r
14 class RBBISymbolTable implements SymbolTable{
\r
17 HashMap<String, RBBISymbolTableEntry> fHashTable;
\r
18 RBBIRuleScanner fRuleScanner;
\r
20 // These next two fields are part of the mechanism for passing references to
\r
21 // already-constructed UnicodeSets back to the UnicodeSet constructor
\r
22 // when the pattern includes $variable references.
\r
24 UnicodeSet fCachedSetLookup;
\r
28 static class RBBISymbolTableEntry {
\r
34 RBBISymbolTable(RBBIRuleScanner rs, String rules) {
\r
37 fHashTable = new HashMap<String, RBBISymbolTableEntry>();
\r
38 ffffString = "\uffff";
\r
42 // RBBISymbolTable::lookup This function from the abstract symbol table inteface
\r
43 // looks up a variable name and returns a UnicodeString
\r
44 // containing the substitution text.
\r
46 // The variable name does NOT include the leading $.
\r
48 public char[] lookup(String s) {
\r
49 RBBISymbolTableEntry el;
\r
50 RBBINode varRefNode;
\r
56 el = fHashTable.get(s);
\r
61 // Walk through any chain of variable assignments that ultimately resolve to a Set Ref.
\r
62 varRefNode = el.val;
\r
63 while (varRefNode.fLeftChild.fType == RBBINode.varRef) {
\r
64 varRefNode = varRefNode.fLeftChild;
\r
67 exprNode = varRefNode.fLeftChild; // Root node of expression for variable
\r
68 if (exprNode.fType == RBBINode.setRef) {
\r
69 // The $variable refers to a single UnicodeSet
\r
70 // return the ffffString, which will subsequently be interpreted as a
\r
71 // stand-in character for the set by RBBISymbolTable::lookupMatcher()
\r
72 usetNode = exprNode.fLeftChild;
\r
73 fCachedSetLookup = usetNode.fInputSet;
\r
74 retString = ffffString;
\r
76 // The variable refers to something other than just a set.
\r
77 // This is an error in the rules being compiled. $Variables inside of UnicodeSets
\r
78 // must refer only to another set, not to some random non-set expression.
\r
79 // Note: single characters are represented as sets, so they are ok.
\r
80 fRuleScanner.error(RBBIRuleBuilder.U_BRK_MALFORMED_SET);
\r
81 retString = exprNode.fText;
\r
82 fCachedSetLookup = null;
\r
84 return retString.toCharArray();
\r
88 // RBBISymbolTable::lookupMatcher This function from the abstract symbol table
\r
89 // interface maps a single stand-in character to a
\r
90 // pointer to a Unicode Set. The Unicode Set code uses this
\r
91 // mechanism to get all references to the same $variable
\r
92 // name to refer to a single common Unicode Set instance.
\r
94 // This implementation cheats a little, and does not maintain a map of stand-in chars
\r
95 // to sets. Instead, it takes advantage of the fact that the UnicodeSet
\r
96 // constructor will always call this function right after calling lookup(),
\r
97 // and we just need to remember what set to return between these two calls.
\r
98 public UnicodeMatcher lookupMatcher(int ch) {
\r
99 UnicodeSet retVal = null;
\r
100 if (ch == 0xffff) {
\r
101 retVal = fCachedSetLookup;
\r
102 fCachedSetLookup = null;
\r
108 // RBBISymbolTable::parseReference This function from the abstract symbol table interface
\r
109 // looks for a $variable name in the source text.
\r
110 // It does not look it up, only scans for it.
\r
111 // It is used by the UnicodeSet parser.
\r
113 public String parseReference(String text, ParsePosition pos, int limit) {
\r
114 int start = pos.getIndex();
\r
116 String result = "";
\r
117 while (i < limit) {
\r
118 int c = UTF16.charAt(text, i);
\r
119 if ((i == start && !UCharacter.isUnicodeIdentifierStart(c))
\r
120 || !UCharacter.isUnicodeIdentifierPart(c)) {
\r
123 i += UTF16.getCharCount(c);
\r
125 if (i == start) { // No valid name chars
\r
126 return result; // Indicate failure with empty string
\r
129 result = text.substring(start, i);
\r
134 // RBBISymbolTable::lookupNode Given a key (a variable name), return the
\r
135 // corresponding RBBI Node. If there is no entry
\r
136 // in the table for this name, return NULL.
\r
138 RBBINode lookupNode(String key) {
\r
140 RBBINode retNode = null;
\r
141 RBBISymbolTableEntry el;
\r
143 el = fHashTable.get(key);
\r
151 // RBBISymbolTable::addEntry Add a new entry to the symbol table.
\r
152 // Indicate an error if the name already exists -
\r
153 // this will only occur in the case of duplicate
\r
154 // variable assignments.
\r
156 void addEntry(String key, RBBINode val) {
\r
157 RBBISymbolTableEntry e;
\r
158 e = fHashTable.get(key);
\r
160 fRuleScanner.error(RBBIRuleBuilder.U_BRK_VARIABLE_REDFINITION);
\r
164 e = new RBBISymbolTableEntry();
\r
167 fHashTable.put(e.key, e);
\r
171 // RBBISymbolTable::print Debugging function, dump out the symbol table contents.
\r
174 void rbbiSymtablePrint() {
\r
176 .print("Variable Definitions\n"
\r
177 + "Name Node Val String Val\n"
\r
178 + "----------------------------------------------------------------------\n");
\r
180 RBBISymbolTableEntry[] syms = fHashTable.values().toArray(new RBBISymbolTableEntry[0]);
\r
182 for (int i = 0; i < syms.length; i++) {
\r
183 RBBISymbolTableEntry s = syms[i];
\r
185 System.out.print(" " + s.key + " "); // TODO: format output into columns.
\r
186 System.out.print(" " + s.val + " ");
\r
187 System.out.print(s.val.fLeftChild.fText);
\r
188 System.out.print("\n");
\r
191 System.out.println("\nParsed Variable Definitions\n");
\r
192 for (int i = 0; i < syms.length; i++) {
\r
193 RBBISymbolTableEntry s = syms[i];
\r
194 System.out.print(s.key);
\r
195 s.val.fLeftChild.printTree(true);
\r
196 System.out.print("\n");
\r