2 **********************************************************************
3 * Copyright (c) 2002-2007, International Business Machines Corporation
4 * and others. All Rights Reserved.
5 **********************************************************************
6 * Date Name Description
7 * 01/14/2002 aliu Creation.
8 **********************************************************************
11 package com.ibm.icu.text;
12 import com.ibm.icu.impl.Utility;
15 * A replacer that produces static text as its output. The text may
16 * contain transliterator stand-in characters that represent nested
17 * UnicodeReplacer objects, making it possible to encode a tree of
18 * replacers in a StringReplacer. A StringReplacer that contains such
19 * stand-ins is called a <em>complex</em> StringReplacer. A complex
20 * StringReplacer has a slower processing loop than a non-complex one.
23 class StringReplacer implements UnicodeReplacer {
26 * Output text, possibly containing stand-in characters that
27 * represent nested UnicodeReplacers.
29 private String output;
32 * Cursor position. Value is ignored if hasCursor is false.
34 private int cursorPos;
37 * True if this object outputs a cursor position.
39 private boolean hasCursor;
42 * A complex object contains nested replacers and requires more
43 * complex processing. StringReplacers are initially assumed to
44 * be complex. If no nested replacers are seen during processing,
45 * then isComplex is set to false, and future replacements are
46 * short circuited for better performance.
48 private boolean isComplex;
51 * Object that translates stand-in characters in 'output' to
52 * UnicodeReplacer objects.
54 private final RuleBasedTransliterator.Data data;
57 * Construct a StringReplacer that sets the emits the given output
58 * text and sets the cursor to the given position.
59 * @param theOutput text that will replace input text when the
60 * replace() method is called. May contain stand-in characters
61 * that represent nested replacers.
62 * @param theCursorPos cursor position that will be returned by
63 * the replace() method
64 * @param theData transliterator context object that translates
65 * stand-in characters to UnicodeReplacer objects
67 public StringReplacer(String theOutput,
69 RuleBasedTransliterator.Data theData) {
71 cursorPos = theCursorPos;
78 * Construct a StringReplacer that sets the emits the given output
79 * text and does not modify the cursor.
80 * @param theOutput text that will replace input text when the
81 * replace() method is called. May contain stand-in characters
82 * that represent nested replacers.
83 * @param theData transliterator context object that translates
84 * stand-in characters to UnicodeReplacer objects
86 public StringReplacer(String theOutput,
87 RuleBasedTransliterator.Data theData) {
95 //= public static UnicodeReplacer valueOf(String output,
97 //= RuleBasedTransliterator.Data data) {
98 //= if (output.length() == 1) {
99 //= char c = output.charAt(0);
100 //= UnicodeReplacer r = data.lookupReplacer(c);
105 //= return new StringReplacer(output, cursorPos, data);
109 * UnicodeReplacer API
111 public int replace(Replaceable text,
118 // NOTE: It should be possible to _always_ run the complex
119 // processing code; just slower. If not, then there is a bug
120 // in the complex processing code.
122 // Simple (no nested replacers) Processing Code :
124 text.replace(start, limit, output);
125 outLen = output.length();
127 // Setup default cursor position (for cursorPos within output)
128 newStart = cursorPos;
131 // Complex (nested replacers) Processing Code :
133 /* When there are segments to be copied, use the Replaceable.copy()
134 * API in order to retain out-of-band data. Copy everything to the
135 * end of the string, then copy them back over the key. This preserves
136 * the integrity of indices into the key and surrounding context while
137 * generating the output text.
139 StringBuffer buf = new StringBuffer();
140 int oOutput; // offset into 'output'
143 // The temporary buffer starts at tempStart, and extends
144 // to destLimit + tempExtra. The start of the buffer has a single
145 // character from before the key. This provides style
146 // data when addition characters are filled into the
147 // temporary buffer. If there is nothing to the left, use
148 // the non-character U+FFFF, which Replaceable subclasses
149 // should treat specially as a "no-style character."
150 // destStart points to the point after the style context
151 // character, so it is tempStart+1 or tempStart+2.
152 int tempStart = text.length(); // start of temp buffer
153 int destStart = tempStart; // copy new text to here
155 int len = UTF16.getCharCount(text.char32At(start-1));
156 text.copy(start-len, start, tempStart);
159 text.replace(tempStart, tempStart, "\uFFFF");
162 int destLimit = destStart;
163 int tempExtra = 0; // temp chars after destLimit
165 for (oOutput=0; oOutput<output.length(); ) {
166 if (oOutput == cursorPos) {
167 // Record the position of the cursor
168 newStart = buf.length() + destLimit - destStart; // relative to start
169 // the buf.length() was inserted for bug 5789
170 // the problem is that if we are accumulating into a buffer (when r == null below)
171 // then the actual length of the text at that point needs to add the buf length.
172 // there was an alternative suggested in #5789, but that looks like it won't work
173 // if we have accumulated some stuff in the dest part AND have a non-zero buffer.
175 int c = UTF16.charAt(output, oOutput);
177 // When we are at the last position copy the right style
178 // context character into the temporary buffer. We don't
179 // do this before because it will provide an incorrect
180 // right context for previous replace() operations.
181 int nextIndex = oOutput + UTF16.getCharCount(c);
182 if (nextIndex == output.length()) {
183 tempExtra = UTF16.getCharCount(text.char32At(limit));
184 text.copy(limit, limit+tempExtra, destLimit);
187 UnicodeReplacer r = data.lookupReplacer(c);
189 // Accumulate straight (non-segment) text.
190 UTF16.append(buf, c);
194 // Insert any accumulated straight text.
195 if (buf.length() > 0) {
196 text.replace(destLimit, destLimit, buf.toString());
197 destLimit += buf.length();
201 // Delegate output generation to replacer object
202 int len = r.replace(text, destLimit, destLimit, cursor);
207 // Insert any accumulated straight text.
208 if (buf.length() > 0) {
209 text.replace(destLimit, destLimit, buf.toString());
210 destLimit += buf.length();
212 if (oOutput == cursorPos) {
213 // Record the position of the cursor
214 newStart = destLimit - destStart; // relative to start
217 outLen = destLimit - destStart;
219 // Copy new text to start, and delete it
220 text.copy(destStart, destLimit, start);
221 text.replace(tempStart + outLen, destLimit + tempExtra + outLen, "");
223 // Delete the old text (the key)
224 text.replace(start + outLen, limit + outLen, "");
228 // Adjust the cursor for positions outside the key. These
229 // refer to code points rather than code units. If cursorPos
230 // is within the output string, then use newStart, which has
231 // already been set above.
235 // Outside the output string, cursorPos counts code points
236 while (n < 0 && newStart > 0) {
237 newStart -= UTF16.getCharCount(text.char32At(newStart-1));
241 } else if (cursorPos > output.length()) {
242 newStart = start + outLen;
243 int n = cursorPos - output.length();
244 // Outside the output string, cursorPos counts code points
245 while (n > 0 && newStart < text.length()) {
246 newStart += UTF16.getCharCount(text.char32At(newStart));
251 // Cursor is within output string. It has been set up above
252 // to be relative to start.
256 cursor[0] = newStart;
263 * UnicodeReplacer API
265 public String toReplacerPattern(boolean escapeUnprintable) {
266 StringBuffer rule = new StringBuffer();
267 StringBuffer quoteBuf = new StringBuffer();
269 int cursor = cursorPos;
271 // Handle a cursor preceding the output
272 if (hasCursor && cursor < 0) {
273 while (cursor++ < 0) {
274 Utility.appendToRule(rule, '@', true, escapeUnprintable, quoteBuf);
276 // Fall through and append '|' below
279 for (int i=0; i<output.length(); ++i) {
280 if (hasCursor && i == cursor) {
281 Utility.appendToRule(rule, '|', true, escapeUnprintable, quoteBuf);
283 char c = output.charAt(i); // Ok to use 16-bits here
285 UnicodeReplacer r = data.lookupReplacer(c);
287 Utility.appendToRule(rule, c, false, escapeUnprintable, quoteBuf);
289 StringBuffer buf = new StringBuffer(" ");
290 buf.append(r.toReplacerPattern(escapeUnprintable));
292 Utility.appendToRule(rule, buf.toString(),
293 true, escapeUnprintable, quoteBuf);
297 // Handle a cursor after the output. Use > rather than >= because
298 // if cursor == output.length() it is at the end of the output,
299 // which is the default position, so we need not emit it.
300 if (hasCursor && cursor > output.length()) {
301 cursor -= output.length();
302 while (cursor-- > 0) {
303 Utility.appendToRule(rule, '@', true, escapeUnprintable, quoteBuf);
305 Utility.appendToRule(rule, '|', true, escapeUnprintable, quoteBuf);
307 // Flush quoteBuf out to result
308 Utility.appendToRule(rule, -1,
309 true, escapeUnprintable, quoteBuf);
311 return rule.toString();
315 * Union the set of all characters that may output by this object
316 * into the given set.
317 * @param toUnionTo the set into which to union the output characters
319 public void addReplacementSetTo(UnicodeSet toUnionTo) {
321 for (int i=0; i<output.length(); i+=UTF16.getCharCount(ch)) {
322 ch = UTF16.charAt(output, i);
323 UnicodeReplacer r = data.lookupReplacer(ch);
327 r.addReplacementSetTo(toUnionTo);