1 /********************************************************************
\r
3 * Copyright (c) 2001-2010, International Business Machines Corporation and
\r
4 * others. All Rights Reserved.
\r
5 ********************************************************************/
\r
7 package com.ibm.icu.text;
\r
9 import java.util.HashSet;
\r
10 import java.util.List;
\r
11 import java.util.Set;
\r
13 import com.ibm.icu.impl.Assert;
\r
16 * This class represents a node in the parse tree created by the RBBI Rule compiler.
\r
22 static final int setRef = 0;
\r
23 static final int uset = 1;
\r
24 static final int varRef = 2;
\r
25 static final int leafChar = 3;
\r
26 static final int lookAhead = 4;
\r
27 static final int tag = 5;
\r
28 static final int endMark = 6;
\r
29 static final int opStart = 7;
\r
30 static final int opCat = 8;
\r
31 static final int opOr = 9;
\r
32 static final int opStar = 10;
\r
33 static final int opPlus = 11;
\r
34 static final int opQuestion = 12;
\r
35 static final int opBreak = 13;
\r
36 static final int opReverse = 14;
\r
37 static final int opLParen = 15;
\r
38 static final int nodeTypeLimit = 16; // For Assertion checking only.
\r
40 static final String [] nodeTypeNames = {
\r
59 // enum OpPrecedence {
\r
60 static final int precZero = 0;
\r
61 static final int precStart = 1;
\r
62 static final int precLParen = 2;
\r
63 static final int precOpOr = 3;
\r
64 static final int precOpCat = 4;
\r
66 int fType; // enum NodeType
\r
68 RBBINode fLeftChild;
\r
69 RBBINode fRightChild;
\r
70 UnicodeSet fInputSet; // For uset nodes only.
\r
71 int fPrecedence = precZero; // enum OpPrecedence, For binary ops only.
\r
73 String fText; // Text corresponding to this node.
\r
74 // May be lazily evaluated when (if) needed
\r
75 // for some node types.
\r
76 int fFirstPos; // Position in the rule source string of the
\r
77 // first text associated with the node.
\r
78 // If there's a left child, this will be the same
\r
79 // as that child's left pos.
\r
80 int fLastPos; // Last position in the rule source string
\r
81 // of any text associated with this node.
\r
82 // If there's a right child, this will be the same
\r
83 // as that child's last postion.
\r
85 boolean fNullable; // See Aho DFA table generation algorithm
\r
86 int fVal; // For leafChar nodes, the value.
\r
87 // Values are the character category,
\r
88 // corresponds to columns in the final
\r
89 // state transition table.
\r
91 boolean fLookAheadEnd; // For endMark nodes, set TRUE if
\r
92 // marking the end of a look-ahead rule.
\r
94 Set<RBBINode> fFirstPosSet; // See Aho DFA table generation algorithm
\r
95 Set<RBBINode> fLastPosSet; // See Aho.
\r
96 Set<RBBINode> fFollowPos; // See Aho.
\r
98 int fSerialNum; // Debugging aids. Each node gets a unique serial number.
\r
99 static int gLastSerial;
\r
102 Assert.assrt(t < nodeTypeLimit);
\r
103 fSerialNum = ++gLastSerial;
\r
106 fFirstPosSet = new HashSet<RBBINode>();
\r
107 fLastPosSet = new HashSet<RBBINode>();
\r
108 fFollowPos = new HashSet<RBBINode>();
\r
110 fPrecedence = precOpCat;
\r
111 } else if (t == opOr) {
\r
112 fPrecedence = precOpOr;
\r
113 } else if (t == opStart) {
\r
114 fPrecedence = precStart;
\r
115 } else if (t == opLParen) {
\r
116 fPrecedence = precLParen;
\r
118 fPrecedence = precZero;
\r
122 RBBINode(RBBINode other) {
\r
123 fSerialNum = ++gLastSerial;
\r
124 fType = other.fType;
\r
125 fInputSet = other.fInputSet;
\r
126 fPrecedence = other.fPrecedence;
\r
127 fText = other.fText;
\r
128 fFirstPos = other.fFirstPos;
\r
129 fLastPos = other.fLastPos;
\r
130 fNullable = other.fNullable;
\r
132 fFirstPosSet = new HashSet<RBBINode>(other.fFirstPosSet);
\r
133 fLastPosSet = new HashSet<RBBINode>(other.fLastPosSet);
\r
134 fFollowPos = new HashSet<RBBINode>(other.fFollowPos);
\r
137 //-------------------------------------------------------------------------
\r
139 // cloneTree Make a copy of the subtree rooted at this node.
\r
140 // Discard any variable references encountered along the way,
\r
141 // and replace with copies of the variable's definitions.
\r
142 // Used to replicate the expression underneath variable
\r
143 // references in preparation for generating the DFA tables.
\r
145 //-------------------------------------------------------------------------
\r
146 RBBINode cloneTree() {
\r
149 if (fType == RBBINode.varRef) {
\r
150 // If the current node is a variable reference, skip over it
\r
151 // and clone the definition of the variable instead.
\r
152 n = fLeftChild.cloneTree();
\r
153 } else if (fType == RBBINode.uset) {
\r
156 n = new RBBINode(this);
\r
157 if (fLeftChild != null) {
\r
158 n.fLeftChild = fLeftChild.cloneTree();
\r
159 n.fLeftChild.fParent = n;
\r
161 if (fRightChild != null) {
\r
162 n.fRightChild = fRightChild.cloneTree();
\r
163 n.fRightChild.fParent = n;
\r
171 //-------------------------------------------------------------------------
\r
173 // flattenVariables Walk a parse tree, replacing any variable
\r
174 // references with a copy of the variable's definition.
\r
175 // Aside from variables, the tree is not changed.
\r
177 // Return the root of the tree. If the root was not a variable
\r
178 // reference, it remains unchanged - the root we started with
\r
179 // is the root we return. If, however, the root was a variable
\r
180 // reference, the root of the newly cloned replacement tree will
\r
181 // be returned, and the original tree deleted.
\r
183 // This function works by recursively walking the tree
\r
184 // without doing anything until a variable reference is
\r
185 // found, then calling cloneTree() at that point. Any
\r
186 // nested references are handled by cloneTree(), not here.
\r
188 //-------------------------------------------------------------------------
\r
189 RBBINode flattenVariables() {
\r
190 if (fType == varRef) {
\r
191 RBBINode retNode = fLeftChild.cloneTree();
\r
196 if (fLeftChild != null) {
\r
197 fLeftChild = fLeftChild.flattenVariables();
\r
198 fLeftChild.fParent = this;
\r
200 if (fRightChild != null) {
\r
201 fRightChild = fRightChild.flattenVariables();
\r
202 fRightChild.fParent = this;
\r
207 //-------------------------------------------------------------------------
\r
209 // flattenSets Walk the parse tree, replacing any nodes of type setRef
\r
210 // with a copy of the expression tree for the set. A set's
\r
211 // equivalent expression tree is precomputed and saved as
\r
212 // the left child of the uset node.
\r
214 //-------------------------------------------------------------------------
\r
215 void flattenSets() {
\r
216 Assert.assrt(fType != setRef);
\r
218 if (fLeftChild != null) {
\r
219 if (fLeftChild.fType == setRef) {
\r
220 RBBINode setRefNode = fLeftChild;
\r
221 RBBINode usetNode = setRefNode.fLeftChild;
\r
222 RBBINode replTree = usetNode.fLeftChild;
\r
223 fLeftChild = replTree.cloneTree();
\r
224 fLeftChild.fParent = this;
\r
226 fLeftChild.flattenSets();
\r
230 if (fRightChild != null) {
\r
231 if (fRightChild.fType == setRef) {
\r
232 RBBINode setRefNode = fRightChild;
\r
233 RBBINode usetNode = setRefNode.fLeftChild;
\r
234 RBBINode replTree = usetNode.fLeftChild;
\r
235 fRightChild = replTree.cloneTree();
\r
236 fRightChild.fParent = this;
\r
237 // delete setRefNode;
\r
239 fRightChild.flattenSets();
\r
244 //-------------------------------------------------------------------------
\r
246 // findNodes() Locate all the nodes of the specified type, starting
\r
247 // at the specified root.
\r
249 //-------------------------------------------------------------------------
\r
250 void findNodes(List<RBBINode> dest, int kind) {
\r
251 if (fType == kind) {
\r
254 if (fLeftChild != null) {
\r
255 fLeftChild.findNodes(dest, kind);
\r
257 if (fRightChild != null) {
\r
258 fRightChild.findNodes(dest, kind);
\r
264 //-------------------------------------------------------------------------
\r
266 // print. Print out a single node, for debugging.
\r
268 //-------------------------------------------------------------------------
\r
270 static void printNode(RBBINode n) {
\r
273 System.out.print (" -- null --\n");
\r
275 RBBINode.printInt( n.fSerialNum, 10);
\r
276 RBBINode.printString(nodeTypeNames[n.fType], 11);
\r
277 RBBINode.printInt(n.fParent==null? 0 : n.fParent.fSerialNum, 11);
\r
278 RBBINode.printInt(n.fLeftChild==null? 0 : n.fLeftChild.fSerialNum, 11);
\r
279 RBBINode.printInt(n.fRightChild==null? 0 : n.fRightChild.fSerialNum, 12);
\r
280 RBBINode.printInt(n.fFirstPos, 12);
\r
281 RBBINode.printInt(n.fVal, 7);
\r
283 if (n.fType == varRef) {
\r
284 System.out.print(" " + n.fText);
\r
287 System.out.println("");
\r
292 // Print a String in a fixed field size.
\r
293 // Debugging function.
\r
295 static void printString(String s, int minWidth) {
\r
296 for (int i = minWidth; i < 0; i++) {
\r
297 // negative width means pad leading spaces, not fixed width.
\r
298 System.out.print(' ');
\r
300 for (int i = s.length(); i < minWidth; i++) {
\r
301 System.out.print(' ');
\r
303 System.out.print(s);
\r
308 // Print an int in a fixed size field.
\r
309 // Debugging function.
\r
312 static void printInt(int i, int minWidth) {
\r
313 String s = Integer.toString(i);
\r
314 printString(s, Math.max(minWidth, s.length() + 1));
\r
319 static void printHex(int i, int minWidth) {
\r
320 String s = Integer.toString(i, 16);
\r
321 String leadingZeroes = "00000"
\r
322 .substring(0, Math.max(0, 5 - s.length()));
\r
323 s = leadingZeroes + s;
\r
324 printString(s, minWidth);
\r
329 // -------------------------------------------------------------------------
\r
331 // print. Print out the tree of nodes rooted at "this"
\r
333 // -------------------------------------------------------------------------
\r
335 void printTree(boolean printHeading) {
\r
336 if (printHeading) {
\r
337 System.out.println( "-------------------------------------------------------------------");
\r
338 System.out.println(" Serial type Parent LeftChild RightChild position value");
\r
341 // Only dump the definition under a variable reference if asked to.
\r
342 // Unconditinally dump children of all other node types.
\r
343 if (fType != varRef) {
\r
344 if (fLeftChild != null) {
\r
345 fLeftChild.printTree(false);
\r
348 if (fRightChild != null) {
\r
349 fRightChild.printTree(false);
\r