2 **********************************************************************
\r
3 * Copyright (c) 2001-2010, International Business Machines
\r
4 * Corporation and others. All Rights Reserved.
\r
5 **********************************************************************
\r
6 * Date Name Description
\r
7 * 11/19/2001 aliu Creation.
\r
8 **********************************************************************
\r
10 package com.ibm.icu.text;
\r
11 import com.ibm.icu.impl.Utility;
\r
14 * A transliterator that converts Unicode characters to an escape
\r
15 * form. Examples of escape forms are "U+4E01" and "".
\r
16 * Escape forms have a prefix and suffix, either of which may be
\r
17 * empty, a radix, typically 16 or 10, a minimum digit count,
\r
18 * typically 1, 4, or 8, and a boolean that specifies whether
\r
19 * supplemental characters are handled as 32-bit code points or as two
\r
20 * 16-bit code units. Most escape forms handle 32-bit code points,
\r
21 * but some, such as the Java form, intentionally break them into two
\r
22 * surrogate pairs, for backward compatibility.
\r
24 * <p>Some escape forms actually have two different patterns, one for
\r
25 * BMP characters (0..FFFF) and one for supplements (>FFFF). To
\r
26 * handle this, a second EscapeTransliterator may be defined that
\r
27 * specifies the pattern to be produced for supplementals. An example
\r
28 * of a form that requires this is the C form, which uses "\\uFFFF"
\r
29 * for BMP characters and "\\U0010FFFF" for supplementals.
\r
31 * <p>This class is package private. It registers several standard
\r
32 * variants with the system which are then accessed via their IDs.
\r
36 class EscapeTransliterator extends Transliterator {
\r
39 * The prefix of the escape form; may be empty, but usually isn't.
\r
42 private String prefix;
\r
45 * The prefix of the escape form; often empty. May not be null.
\r
47 private String suffix;
\r
50 * The radix to display the number in. Typically 16 or 10. Must
\r
51 * be in the range 2 to 36.
\r
56 * The minimum number of digits. Typically 1, 4, or 8. Values
\r
57 * less than 1 are equivalent to 1.
\r
59 private int minDigits;
\r
62 * If true, supplementals are handled as 32-bit code points. If
\r
63 * false, they are handled as two 16-bit code units.
\r
65 private boolean grokSupplementals;
\r
68 * The form to be used for supplementals. If this is null then
\r
69 * the same form is used for BMP characters and supplementals. If
\r
70 * this is not null and if grokSupplementals is true then the
\r
71 * prefix, suffix, radix, and minDigits of this object are used
\r
72 * for supplementals.
\r
74 private EscapeTransliterator supplementalHandler;
\r
77 * Registers standard variants with the system. Called by
\r
78 * Transliterator during initialization.
\r
80 static void register() {
\r
81 // Unicode: "U+10FFFF" hex, min=4, max=6
\r
82 Transliterator.registerFactory("Any-Hex/Unicode", new Transliterator.Factory() {
\r
83 public Transliterator getInstance(String ID) {
\r
84 return new EscapeTransliterator("Any-Hex/Unicode",
\r
85 "U+", "", 16, 4, true, null);
\r
89 // Java: "\\uFFFF" hex, min=4, max=4
\r
90 Transliterator.registerFactory("Any-Hex/Java", new Transliterator.Factory() {
\r
91 public Transliterator getInstance(String ID) {
\r
92 return new EscapeTransliterator("Any-Hex/Java",
\r
93 "\\u", "", 16, 4, false, null);
\r
97 // C: "\\uFFFF" hex, min=4, max=4; \\U0010FFFF hex, min=8, max=8
\r
98 Transliterator.registerFactory("Any-Hex/C", new Transliterator.Factory() {
\r
99 public Transliterator getInstance(String ID) {
\r
100 return new EscapeTransliterator("Any-Hex/C",
\r
101 "\\u", "", 16, 4, true,
\r
102 new EscapeTransliterator("", "\\U", "", 16, 8, true, null));
\r
106 // XML: "" hex, min=1, max=6
\r
107 Transliterator.registerFactory("Any-Hex/XML", new Transliterator.Factory() {
\r
108 public Transliterator getInstance(String ID) {
\r
109 return new EscapeTransliterator("Any-Hex/XML",
\r
110 "&#x", ";", 16, 1, true, null);
\r
114 // XML10: "&1114111;" dec, min=1, max=7 (not really "Any-Hex")
\r
115 Transliterator.registerFactory("Any-Hex/XML10", new Transliterator.Factory() {
\r
116 public Transliterator getInstance(String ID) {
\r
117 return new EscapeTransliterator("Any-Hex/XML10",
\r
118 "&#", ";", 10, 1, true, null);
\r
122 // Perl: "\\x{263A}" hex, min=1, max=6
\r
123 Transliterator.registerFactory("Any-Hex/Perl", new Transliterator.Factory() {
\r
124 public Transliterator getInstance(String ID) {
\r
125 return new EscapeTransliterator("Any-Hex/Perl",
\r
126 "\\x{", "}", 16, 1, true, null);
\r
131 Transliterator.registerFactory("Any-Hex", new Transliterator.Factory() {
\r
132 public Transliterator getInstance(String ID) {
\r
133 return new EscapeTransliterator("Any-Hex",
\r
134 "\\u", "", 16, 4, false, null);
\r
140 * Constructs an escape transliterator with the given ID and
\r
141 * parameters. See the class member documentation for details.
\r
143 EscapeTransliterator(String ID, String prefix, String suffix,
\r
144 int radix, int minDigits,
\r
145 boolean grokSupplementals,
\r
146 EscapeTransliterator supplementalHandler) {
\r
148 this.prefix = prefix;
\r
149 this.suffix = suffix;
\r
150 this.radix = radix;
\r
151 this.minDigits = minDigits;
\r
152 this.grokSupplementals = grokSupplementals;
\r
153 this.supplementalHandler = supplementalHandler;
\r
157 * Implements {@link Transliterator#handleTransliterate}.
\r
159 protected void handleTransliterate(Replaceable text,
\r
160 Position pos, boolean incremental) {
\r
161 int start = pos.start;
\r
162 int limit = pos.limit;
\r
164 StringBuilder buf = new StringBuilder(prefix);
\r
165 int prefixLen = prefix.length();
\r
166 boolean redoPrefix = false;
\r
168 while (start < limit) {
\r
169 int c = grokSupplementals ? text.char32At(start) : text.charAt(start);
\r
170 int charLen = grokSupplementals ? UTF16.getCharCount(c) : 1;
\r
172 if ((c & 0xFFFF0000) != 0 && supplementalHandler != null) {
\r
174 buf.append(supplementalHandler.prefix);
\r
175 Utility.appendNumber(buf, c, supplementalHandler.radix,
\r
176 supplementalHandler.minDigits);
\r
177 buf.append(supplementalHandler.suffix);
\r
182 buf.append(prefix);
\r
183 redoPrefix = false;
\r
185 buf.setLength(prefixLen);
\r
187 Utility.appendNumber(buf, c, radix, minDigits);
\r
188 buf.append(suffix);
\r
191 text.replace(start, start + charLen, buf.toString());
\r
192 start += buf.length();
\r
193 limit += buf.length() - charLen;
\r
196 pos.contextLimit += limit - pos.limit;
\r