/* ********************************************************************** * Copyright (c) 2002-2007, International Business Machines Corporation * and others. All Rights Reserved. ********************************************************************** * Date Name Description * 01/14/2002 aliu Creation. ********************************************************************** */ package com.ibm.icu.text; import com.ibm.icu.impl.Utility; /** * A replacer that produces static text as its output. The text may * contain transliterator stand-in characters that represent nested * UnicodeReplacer objects, making it possible to encode a tree of * replacers in a StringReplacer. A StringReplacer that contains such * stand-ins is called a complex StringReplacer. A complex * StringReplacer has a slower processing loop than a non-complex one. * @author Alan Liu */ class StringReplacer implements UnicodeReplacer { /** * Output text, possibly containing stand-in characters that * represent nested UnicodeReplacers. */ private String output; /** * Cursor position. Value is ignored if hasCursor is false. */ private int cursorPos; /** * True if this object outputs a cursor position. */ private boolean hasCursor; /** * A complex object contains nested replacers and requires more * complex processing. StringReplacers are initially assumed to * be complex. If no nested replacers are seen during processing, * then isComplex is set to false, and future replacements are * short circuited for better performance. */ private boolean isComplex; /** * Object that translates stand-in characters in 'output' to * UnicodeReplacer objects. */ private final RuleBasedTransliterator.Data data; /** * Construct a StringReplacer that sets the emits the given output * text and sets the cursor to the given position. * @param theOutput text that will replace input text when the * replace() method is called. May contain stand-in characters * that represent nested replacers. * @param theCursorPos cursor position that will be returned by * the replace() method * @param theData transliterator context object that translates * stand-in characters to UnicodeReplacer objects */ public StringReplacer(String theOutput, int theCursorPos, RuleBasedTransliterator.Data theData) { output = theOutput; cursorPos = theCursorPos; hasCursor = true; data = theData; isComplex = true; } /** * Construct a StringReplacer that sets the emits the given output * text and does not modify the cursor. * @param theOutput text that will replace input text when the * replace() method is called. May contain stand-in characters * that represent nested replacers. * @param theData transliterator context object that translates * stand-in characters to UnicodeReplacer objects */ public StringReplacer(String theOutput, RuleBasedTransliterator.Data theData) { output = theOutput; cursorPos = 0; hasCursor = false; data = theData; isComplex = true; } //= public static UnicodeReplacer valueOf(String output, //= int cursorPos, //= RuleBasedTransliterator.Data data) { //= if (output.length() == 1) { //= char c = output.charAt(0); //= UnicodeReplacer r = data.lookupReplacer(c); //= if (r != null) { //= return r; //= } //= } //= return new StringReplacer(output, cursorPos, data); //= } /** * UnicodeReplacer API */ public int replace(Replaceable text, int start, int limit, int[] cursor) { int outLen; int newStart = 0; // NOTE: It should be possible to _always_ run the complex // processing code; just slower. If not, then there is a bug // in the complex processing code. // Simple (no nested replacers) Processing Code : if (!isComplex) { text.replace(start, limit, output); outLen = output.length(); // Setup default cursor position (for cursorPos within output) newStart = cursorPos; } // Complex (nested replacers) Processing Code : else { /* When there are segments to be copied, use the Replaceable.copy() * API in order to retain out-of-band data. Copy everything to the * end of the string, then copy them back over the key. This preserves * the integrity of indices into the key and surrounding context while * generating the output text. */ StringBuffer buf = new StringBuffer(); int oOutput; // offset into 'output' isComplex = false; // The temporary buffer starts at tempStart, and extends // to destLimit + tempExtra. The start of the buffer has a single // character from before the key. This provides style // data when addition characters are filled into the // temporary buffer. If there is nothing to the left, use // the non-character U+FFFF, which Replaceable subclasses // should treat specially as a "no-style character." // destStart points to the point after the style context // character, so it is tempStart+1 or tempStart+2. int tempStart = text.length(); // start of temp buffer int destStart = tempStart; // copy new text to here if (start > 0) { int len = UTF16.getCharCount(text.char32At(start-1)); text.copy(start-len, start, tempStart); destStart += len; } else { text.replace(tempStart, tempStart, "\uFFFF"); destStart++; } int destLimit = destStart; int tempExtra = 0; // temp chars after destLimit for (oOutput=0; oOutput 0) { text.replace(destLimit, destLimit, buf.toString()); destLimit += buf.length(); buf.setLength(0); } // Delegate output generation to replacer object int len = r.replace(text, destLimit, destLimit, cursor); destLimit += len; } oOutput = nextIndex; } // Insert any accumulated straight text. if (buf.length() > 0) { text.replace(destLimit, destLimit, buf.toString()); destLimit += buf.length(); } if (oOutput == cursorPos) { // Record the position of the cursor newStart = destLimit - destStart; // relative to start } outLen = destLimit - destStart; // Copy new text to start, and delete it text.copy(destStart, destLimit, start); text.replace(tempStart + outLen, destLimit + tempExtra + outLen, ""); // Delete the old text (the key) text.replace(start + outLen, limit + outLen, ""); } if (hasCursor) { // Adjust the cursor for positions outside the key. These // refer to code points rather than code units. If cursorPos // is within the output string, then use newStart, which has // already been set above. if (cursorPos < 0) { newStart = start; int n = cursorPos; // Outside the output string, cursorPos counts code points while (n < 0 && newStart > 0) { newStart -= UTF16.getCharCount(text.char32At(newStart-1)); ++n; } newStart += n; } else if (cursorPos > output.length()) { newStart = start + outLen; int n = cursorPos - output.length(); // Outside the output string, cursorPos counts code points while (n > 0 && newStart < text.length()) { newStart += UTF16.getCharCount(text.char32At(newStart)); --n; } newStart += n; } else { // Cursor is within output string. It has been set up above // to be relative to start. newStart += start; } cursor[0] = newStart; } return outLen; } /** * UnicodeReplacer API */ public String toReplacerPattern(boolean escapeUnprintable) { StringBuffer rule = new StringBuffer(); StringBuffer quoteBuf = new StringBuffer(); int cursor = cursorPos; // Handle a cursor preceding the output if (hasCursor && cursor < 0) { while (cursor++ < 0) { Utility.appendToRule(rule, '@', true, escapeUnprintable, quoteBuf); } // Fall through and append '|' below } for (int i=0; i rather than >= because // if cursor == output.length() it is at the end of the output, // which is the default position, so we need not emit it. if (hasCursor && cursor > output.length()) { cursor -= output.length(); while (cursor-- > 0) { Utility.appendToRule(rule, '@', true, escapeUnprintable, quoteBuf); } Utility.appendToRule(rule, '|', true, escapeUnprintable, quoteBuf); } // Flush quoteBuf out to result Utility.appendToRule(rule, -1, true, escapeUnprintable, quoteBuf); return rule.toString(); } /** * Union the set of all characters that may output by this object * into the given set. * @param toUnionTo the set into which to union the output characters */ public void addReplacementSetTo(UnicodeSet toUnionTo) { int ch; for (int i=0; i