2 **********************************************************************
3 * Copyright (c) 2001-2011, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 * Date Name Description
7 * 11/19/2001 aliu Creation.
8 **********************************************************************
10 package com.ibm.icu.text;
11 import com.ibm.icu.impl.Utility;
14 * A transliterator that converts Unicode characters to an escape
15 * form. Examples of escape forms are "U+4E01" and "".
16 * Escape forms have a prefix and suffix, either of which may be
17 * empty, a radix, typically 16 or 10, a minimum digit count,
18 * typically 1, 4, or 8, and a boolean that specifies whether
19 * supplemental characters are handled as 32-bit code points or as two
20 * 16-bit code units. Most escape forms handle 32-bit code points,
21 * but some, such as the Java form, intentionally break them into two
22 * surrogate pairs, for backward compatibility.
24 * <p>Some escape forms actually have two different patterns, one for
25 * BMP characters (0..FFFF) and one for supplements (>FFFF). To
26 * handle this, a second EscapeTransliterator may be defined that
27 * specifies the pattern to be produced for supplementals. An example
28 * of a form that requires this is the C form, which uses "\\uFFFF"
29 * for BMP characters and "\\U0010FFFF" for supplementals.
31 * <p>This class is package private. It registers several standard
32 * variants with the system which are then accessed via their IDs.
36 class EscapeTransliterator extends Transliterator {
39 * The prefix of the escape form; may be empty, but usually isn't.
42 private String prefix;
45 * The prefix of the escape form; often empty. May not be null.
47 private String suffix;
50 * The radix to display the number in. Typically 16 or 10. Must
51 * be in the range 2 to 36.
56 * The minimum number of digits. Typically 1, 4, or 8. Values
57 * less than 1 are equivalent to 1.
59 private int minDigits;
62 * If true, supplementals are handled as 32-bit code points. If
63 * false, they are handled as two 16-bit code units.
65 private boolean grokSupplementals;
68 * The form to be used for supplementals. If this is null then
69 * the same form is used for BMP characters and supplementals. If
70 * this is not null and if grokSupplementals is true then the
71 * prefix, suffix, radix, and minDigits of this object are used
74 private EscapeTransliterator supplementalHandler;
77 * Registers standard variants with the system. Called by
78 * Transliterator during initialization.
80 static void register() {
81 // Unicode: "U+10FFFF" hex, min=4, max=6
82 Transliterator.registerFactory("Any-Hex/Unicode", new Transliterator.Factory() {
83 public Transliterator getInstance(String ID) {
84 return new EscapeTransliterator("Any-Hex/Unicode",
85 "U+", "", 16, 4, true, null);
89 // Java: "\\uFFFF" hex, min=4, max=4
90 Transliterator.registerFactory("Any-Hex/Java", new Transliterator.Factory() {
91 public Transliterator getInstance(String ID) {
92 return new EscapeTransliterator("Any-Hex/Java",
93 "\\u", "", 16, 4, false, null);
97 // C: "\\uFFFF" hex, min=4, max=4; \\U0010FFFF hex, min=8, max=8
98 Transliterator.registerFactory("Any-Hex/C", new Transliterator.Factory() {
99 public Transliterator getInstance(String ID) {
100 return new EscapeTransliterator("Any-Hex/C",
101 "\\u", "", 16, 4, true,
102 new EscapeTransliterator("", "\\U", "", 16, 8, true, null));
106 // XML: "" hex, min=1, max=6
107 Transliterator.registerFactory("Any-Hex/XML", new Transliterator.Factory() {
108 public Transliterator getInstance(String ID) {
109 return new EscapeTransliterator("Any-Hex/XML",
110 "&#x", ";", 16, 1, true, null);
114 // XML10: "&1114111;" dec, min=1, max=7 (not really "Any-Hex")
115 Transliterator.registerFactory("Any-Hex/XML10", new Transliterator.Factory() {
116 public Transliterator getInstance(String ID) {
117 return new EscapeTransliterator("Any-Hex/XML10",
118 "&#", ";", 10, 1, true, null);
122 // Perl: "\\x{263A}" hex, min=1, max=6
123 Transliterator.registerFactory("Any-Hex/Perl", new Transliterator.Factory() {
124 public Transliterator getInstance(String ID) {
125 return new EscapeTransliterator("Any-Hex/Perl",
126 "\\x{", "}", 16, 1, true, null);
130 // Plain: "FFFF" hex, min=4, max=6
131 Transliterator.registerFactory("Any-Hex/Plain", new Transliterator.Factory() {
132 public Transliterator getInstance(String ID) {
133 return new EscapeTransliterator("Any-Hex/Plain",
134 "", "", 16, 4, true, null);
139 Transliterator.registerFactory("Any-Hex", new Transliterator.Factory() {
140 public Transliterator getInstance(String ID) {
141 return new EscapeTransliterator("Any-Hex",
142 "\\u", "", 16, 4, false, null);
148 * Constructs an escape transliterator with the given ID and
149 * parameters. See the class member documentation for details.
151 EscapeTransliterator(String ID, String prefix, String suffix,
152 int radix, int minDigits,
153 boolean grokSupplementals,
154 EscapeTransliterator supplementalHandler) {
156 this.prefix = prefix;
157 this.suffix = suffix;
159 this.minDigits = minDigits;
160 this.grokSupplementals = grokSupplementals;
161 this.supplementalHandler = supplementalHandler;
165 * Implements {@link Transliterator#handleTransliterate}.
167 protected void handleTransliterate(Replaceable text,
168 Position pos, boolean incremental) {
169 int start = pos.start;
170 int limit = pos.limit;
172 StringBuilder buf = new StringBuilder(prefix);
173 int prefixLen = prefix.length();
174 boolean redoPrefix = false;
176 while (start < limit) {
177 int c = grokSupplementals ? text.char32At(start) : text.charAt(start);
178 int charLen = grokSupplementals ? UTF16.getCharCount(c) : 1;
180 if ((c & 0xFFFF0000) != 0 && supplementalHandler != null) {
182 buf.append(supplementalHandler.prefix);
183 Utility.appendNumber(buf, c, supplementalHandler.radix,
184 supplementalHandler.minDigits);
185 buf.append(supplementalHandler.suffix);
193 buf.setLength(prefixLen);
195 Utility.appendNumber(buf, c, radix, minDigits);
199 text.replace(start, start + charLen, buf.toString());
200 start += buf.length();
201 limit += buf.length() - charLen;
204 pos.contextLimit += limit - pos.limit;
210 * @see com.ibm.icu.text.Transliterator#addSourceTargetSet(com.ibm.icu.text.UnicodeSet, com.ibm.icu.text.UnicodeSet, com.ibm.icu.text.UnicodeSet)
213 public void addSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet) {
214 sourceSet.addAll(getFilterAsUnicodeSet(inputFilter));
215 for (EscapeTransliterator it = this; it != null ; it = it.supplementalHandler) {
216 if (inputFilter.size() != 0) {
217 targetSet.addAll(it.prefix);
218 targetSet.addAll(it.suffix);
219 StringBuilder buffer = new StringBuilder();
220 for (int i = 0; i < it.radix; ++i) {
221 Utility.appendNumber(buffer, i, it.radix, it.minDigits);
223 targetSet.addAll(buffer.toString()); // TODO drop once String is changed to CharSequence in UnicodeSet