2 *******************************************************************************
\r
3 * Copyright (C) 2006-2010, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 *******************************************************************************
\r
7 *******************************************************************************
\r
10 package com.ibm.icu.charset;
\r
12 import java.nio.ByteBuffer;
\r
13 import java.nio.CharBuffer;
\r
14 import java.nio.IntBuffer;
\r
15 import java.nio.charset.CoderResult;
\r
18 * <h2> Callback API for CharsetICU API </h2>
\r
20 * CharsetCallback class defines some error behaviour functions called
\r
21 * by CharsetDecoderICU and CharsetEncoderICU. The class also provides
\r
22 * the facility by which clients can write their own callbacks.
\r
24 * These functions, although public, should NEVER be called directly.
\r
25 * They should be used as parameters to the onUmappableCharacter() and
\r
26 * onMalformedInput() methods, to set the behaviour of a converter
\r
27 * when it encounters UNMAPPED/INVALID sequences.
\r
28 * Currently the only way to set callbacks is by using CodingErrorAction.
\r
29 * In the future we will provide set methods on CharsetEncoder and CharsetDecoder
\r
30 * that will accept CharsetCallback fields.
\r
35 public class CharsetCallback {
\r
37 * FROM_U, TO_U context options for sub callback
\r
39 private static final String SUB_STOP_ON_ILLEGAL = "i";
\r
42 // * FROM_U, TO_U context options for skip callback
\r
44 // private static final String SKIP_STOP_ON_ILLEGAL = "i";
\r
47 // * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to ICU (%UXXXX)
\r
49 // private static final String ESCAPE_ICU = null;
\r
52 * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to JAVA (\\uXXXX)
\r
54 private static final String ESCAPE_JAVA = "J";
\r
57 * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to C (\\uXXXX \\UXXXXXXXX)
\r
58 * TO_U_CALLBACK_ESCAPE option to escape the character value accoding to C (\\xXXXX)
\r
60 private static final String ESCAPE_C = "C";
\r
63 * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly
\r
64 * TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly
\r
66 private static final String ESCAPE_XML_DEC = "D";
\r
69 * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly
\r
70 * TO_U_CALLBACK_ESCAPE context option to escape the character value according to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly
\r
72 private static final String ESCAPE_XML_HEX = "X";
\r
75 * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to Unicode (U+XXXXX)
\r
77 private static final String ESCAPE_UNICODE = "U";
\r
80 * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to Unicode (U+XXXXX)
\r
82 private static final String ESCAPE_CSS2 = "S";
\r
85 * Decoder Callback interface
\r
88 public interface Decoder {
\r
90 * This function is called when the bytes in the source cannot be handled,
\r
91 * and this function is meant to handle or fix the error if possible.
\r
93 * @return Result of decoding action. This returned object is set to an error
\r
94 * if this function could not handle the conversion.
\r
97 public CoderResult call(CharsetDecoderICU decoder, Object context,
\r
98 ByteBuffer source, CharBuffer target, IntBuffer offsets,
\r
99 char[] buffer, int length, CoderResult cr);
\r
102 * Encoder Callback interface
\r
105 public interface Encoder {
\r
107 * This function is called when the Unicode characters in the source cannot be handled,
\r
108 * and this function is meant to handle or fix the error if possible.
\r
109 * @return Result of decoding action. This returned object is set to an error
\r
110 * if this function could not handle the conversion.
\r
113 public CoderResult call(CharsetEncoderICU encoder, Object context,
\r
114 CharBuffer source, ByteBuffer target, IntBuffer offsets,
\r
115 char[] buffer, int length, int cp, CoderResult cr);
\r
121 public static final Encoder FROM_U_CALLBACK_SKIP = new Encoder() {
\r
122 public CoderResult call(CharsetEncoderICU encoder, Object context,
\r
123 CharBuffer source, ByteBuffer target, IntBuffer offsets,
\r
124 char[] buffer, int length, int cp, CoderResult cr){
\r
126 return CoderResult.UNDERFLOW;
\r
127 }else if(((String)context).equals(SUB_STOP_ON_ILLEGAL)){
\r
128 if(!cr.isUnmappable()){
\r
131 return CoderResult.UNDERFLOW;
\r
141 public static final Decoder TO_U_CALLBACK_SKIP = new Decoder() {
\r
142 public CoderResult call(CharsetDecoderICU decoder, Object context,
\r
143 ByteBuffer source, CharBuffer target, IntBuffer offsets,
\r
144 char[] buffer, int length, CoderResult cr){
\r
146 return CoderResult.UNDERFLOW;
\r
147 }else if(((String)context).equals(SUB_STOP_ON_ILLEGAL)){
\r
148 if(!cr.isUnmappable()){
\r
151 return CoderResult.UNDERFLOW;
\r
158 * Write substitute callback
\r
161 public static final Encoder FROM_U_CALLBACK_SUBSTITUTE = new Encoder(){
\r
162 public CoderResult call(CharsetEncoderICU encoder, Object context,
\r
163 CharBuffer source, ByteBuffer target, IntBuffer offsets,
\r
164 char[] buffer, int length, int cp, CoderResult cr){
\r
166 return encoder.cbFromUWriteSub(encoder, source, target, offsets);
\r
167 }else if(((String)context).equals(SUB_STOP_ON_ILLEGAL)){
\r
168 if(!cr.isUnmappable()){
\r
171 return encoder.cbFromUWriteSub(encoder, source, target, offsets);
\r
177 private static final char[] kSubstituteChar1 = new char[]{0x1A};
\r
178 private static final char[] kSubstituteChar = new char[] {0xFFFD};
\r
180 * Write substitute callback
\r
183 public static final Decoder TO_U_CALLBACK_SUBSTITUTE = new Decoder() {
\r
184 public CoderResult call(CharsetDecoderICU decoder, Object context,
\r
185 ByteBuffer source, CharBuffer target, IntBuffer offsets,
\r
186 char[] buffer, int length, CoderResult cr){
\r
188 CharsetICU cs = (CharsetICU) decoder.charset();
\r
189 /* could optimize this case, just one uchar */
\r
190 if(decoder.invalidCharLength == 1 && cs.subChar1 != 0) {
\r
191 return CharsetDecoderICU.toUWriteUChars(decoder, kSubstituteChar1, 0, 1, target, offsets, source.position());
\r
193 return CharsetDecoderICU.toUWriteUChars(decoder, kSubstituteChar, 0, 1, target, offsets, source.position());
\r
201 public static final Encoder FROM_U_CALLBACK_STOP = new Encoder() {
\r
202 public CoderResult call(CharsetEncoderICU encoder, Object context,
\r
203 CharBuffer source, ByteBuffer target, IntBuffer offsets,
\r
204 char[] buffer, int length, int cp, CoderResult cr){
\r
212 public static final Decoder TO_U_CALLBACK_STOP = new Decoder() {
\r
213 public CoderResult call(CharsetDecoderICU decoder, Object context,
\r
214 ByteBuffer source, CharBuffer target, IntBuffer offsets,
\r
215 char[] buffer, int length, CoderResult cr){
\r
219 private static final int VALUE_STRING_LENGTH = 32;
\r
220 private static final char UNICODE_PERCENT_SIGN_CODEPOINT = 0x0025;
\r
221 private static final char UNICODE_U_CODEPOINT = 0x0055;
\r
222 private static final char UNICODE_X_CODEPOINT = 0x0058;
\r
223 private static final char UNICODE_RS_CODEPOINT = 0x005C;
\r
224 private static final char UNICODE_U_LOW_CODEPOINT = 0x0075;
\r
225 private static final char UNICODE_X_LOW_CODEPOINT = 0x0078;
\r
226 private static final char UNICODE_AMP_CODEPOINT = 0x0026;
\r
227 private static final char UNICODE_HASH_CODEPOINT = 0x0023;
\r
228 private static final char UNICODE_SEMICOLON_CODEPOINT = 0x003B;
\r
229 private static final char UNICODE_PLUS_CODEPOINT = 0x002B;
\r
230 private static final char UNICODE_LEFT_CURLY_CODEPOINT = 0x007B;
\r
231 private static final char UNICODE_RIGHT_CURLY_CODEPOINT = 0x007D;
\r
232 private static final char UNICODE_SPACE_CODEPOINT = 0x0020;
\r
234 * Write escape callback
\r
237 public static final Encoder FROM_U_CALLBACK_ESCAPE = new Encoder() {
\r
238 public CoderResult call(CharsetEncoderICU encoder, Object context,
\r
239 CharBuffer source, ByteBuffer target, IntBuffer offsets,
\r
240 char[] buffer, int length, int cp, CoderResult cr){
\r
241 char[] valueString = new char[VALUE_STRING_LENGTH];
\r
242 int valueStringLength = 0;
\r
245 cr = CoderResult.UNDERFLOW;
\r
247 if (context == null || !(context instanceof String)) {
\r
248 while (i < length) {
\r
249 valueString[valueStringLength++] = UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
\r
250 valueString[valueStringLength++] = UNICODE_U_CODEPOINT; /* adding U */
\r
251 valueStringLength += itou(valueString, valueStringLength, (int)buffer[i++] & UConverterConstants.UNSIGNED_SHORT_MASK, 16, 4);
\r
254 if (((String)context).equals(ESCAPE_JAVA)) {
\r
255 while (i < length) {
\r
256 valueString[valueStringLength++] = UNICODE_RS_CODEPOINT; /* adding \ */
\r
257 valueString[valueStringLength++] = UNICODE_U_LOW_CODEPOINT; /* adding u */
\r
258 valueStringLength += itou(valueString, valueStringLength, (int)buffer[i++] & UConverterConstants.UNSIGNED_SHORT_MASK, 16, 4);
\r
260 } else if (((String)context).equals(ESCAPE_C)) {
\r
261 valueString[valueStringLength++] = UNICODE_RS_CODEPOINT; /* adding \ */
\r
264 valueString[valueStringLength++] = UNICODE_U_CODEPOINT; /* adding U */
\r
265 valueStringLength = itou(valueString, valueStringLength, cp, 16, 8);
\r
267 valueString[valueStringLength++] = UNICODE_U_LOW_CODEPOINT; /* adding u */
\r
268 valueStringLength += itou(valueString, valueStringLength, (int)buffer[0] & UConverterConstants.UNSIGNED_SHORT_MASK, 16, 4);
\r
270 } else if (((String)context).equals(ESCAPE_XML_DEC)) {
\r
271 valueString[valueStringLength++] = UNICODE_AMP_CODEPOINT; /* adding & */
\r
272 valueString[valueStringLength++] = UNICODE_HASH_CODEPOINT; /* adding # */
\r
274 valueStringLength += itou(valueString, valueStringLength, cp, 10, 0);
\r
276 valueStringLength += itou(valueString, valueStringLength, (int)buffer[0] & UConverterConstants.UNSIGNED_SHORT_MASK, 10, 0);
\r
278 valueString[valueStringLength++] = UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
\r
279 } else if (((String)context).equals(ESCAPE_XML_HEX)) {
\r
280 valueString[valueStringLength++] = UNICODE_AMP_CODEPOINT; /* adding & */
\r
281 valueString[valueStringLength++] = UNICODE_HASH_CODEPOINT; /* adding # */
\r
282 valueString[valueStringLength++] = UNICODE_X_LOW_CODEPOINT; /* adding x */
\r
284 valueStringLength += itou(valueString, valueStringLength, cp, 16, 0);
\r
286 valueStringLength += itou(valueString, valueStringLength, (int)buffer[0] & UConverterConstants.UNSIGNED_SHORT_MASK, 16, 0);
\r
288 valueString[valueStringLength++] = UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
\r
289 } else if (((String)context).equals(ESCAPE_UNICODE)) {
\r
290 valueString[valueStringLength++] = UNICODE_LEFT_CURLY_CODEPOINT; /* adding { */
\r
291 valueString[valueStringLength++] = UNICODE_U_CODEPOINT; /* adding U */
\r
292 valueString[valueStringLength++] = UNICODE_PLUS_CODEPOINT; /* adding + */
\r
294 valueStringLength += itou(valueString, valueStringLength,cp, 16, 4);
\r
296 valueStringLength += itou(valueString, valueStringLength, (int)buffer[0] & UConverterConstants.UNSIGNED_SHORT_MASK, 16, 4);
\r
298 valueString[valueStringLength++] = UNICODE_RIGHT_CURLY_CODEPOINT; /* adding } */
\r
299 } else if (((String)context).equals(ESCAPE_CSS2)) {
\r
300 valueString[valueStringLength++] = UNICODE_RS_CODEPOINT; /* adding \ */
\r
301 valueStringLength += itou(valueString, valueStringLength, cp, 16, 0);
\r
302 /* Always add space character, because the next character might be whitespace,
\r
303 which would erroneously be considered the termination of the escape sequence. */
\r
304 valueString[valueStringLength++] = UNICODE_SPACE_CODEPOINT;
\r
306 while (i < length) {
\r
307 valueString[valueStringLength++] = UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
\r
308 valueString[valueStringLength++] = UNICODE_U_CODEPOINT; /* adding U */
\r
309 valueStringLength += itou(valueString, valueStringLength, (int)buffer[i++] & UConverterConstants.UNSIGNED_SHORT_MASK, 16, 4);
\r
314 cr = encoder.cbFromUWriteUChars(encoder, CharBuffer.wrap(valueString, 0, valueStringLength), target, offsets);
\r
319 * Write escape callback
\r
322 public static final Decoder TO_U_CALLBACK_ESCAPE = new Decoder() {
\r
323 public CoderResult call(CharsetDecoderICU decoder, Object context,
\r
324 ByteBuffer source, CharBuffer target, IntBuffer offsets,
\r
325 char[] buffer, int length, CoderResult cr){
\r
326 char[] uniValueString = new char[VALUE_STRING_LENGTH];
\r
327 int valueStringLength = 0;
\r
330 if (context == null || !(context instanceof String)) {
\r
331 while (i < length) {
\r
332 uniValueString[valueStringLength++] = UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
\r
333 uniValueString[valueStringLength++] = UNICODE_X_CODEPOINT; /* adding U */
\r
334 valueStringLength += itou(uniValueString, valueStringLength, buffer[i++] & UConverterConstants.UNSIGNED_BYTE_MASK, 16, 2);
\r
337 if (((String)context).equals(ESCAPE_XML_DEC)) {
\r
338 while (i < length) {
\r
339 uniValueString[valueStringLength++] = UNICODE_AMP_CODEPOINT; /* adding & */
\r
340 uniValueString[valueStringLength++] = UNICODE_HASH_CODEPOINT; /* adding # */
\r
341 valueStringLength += itou(uniValueString, valueStringLength, buffer[i++] & UConverterConstants.UNSIGNED_BYTE_MASK, 10, 0);
\r
342 uniValueString[valueStringLength++] = UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
\r
344 } else if (((String)context).equals(ESCAPE_XML_HEX)) {
\r
345 while (i < length) {
\r
346 uniValueString[valueStringLength++] = UNICODE_AMP_CODEPOINT; /* adding & */
\r
347 uniValueString[valueStringLength++] = UNICODE_HASH_CODEPOINT; /* adding # */
\r
348 uniValueString[valueStringLength++] = UNICODE_X_LOW_CODEPOINT; /* adding x */
\r
349 valueStringLength += itou(uniValueString, valueStringLength, buffer[i++] & UConverterConstants.UNSIGNED_BYTE_MASK, 16, 0);
\r
350 uniValueString[valueStringLength++] = UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
\r
352 } else if (((String)context).equals(ESCAPE_C)) {
\r
353 while (i < length) {
\r
354 uniValueString[valueStringLength++] = UNICODE_RS_CODEPOINT; /* adding \ */
\r
355 uniValueString[valueStringLength++] = UNICODE_X_LOW_CODEPOINT; /* adding x */
\r
356 valueStringLength += itou(uniValueString, valueStringLength, buffer[i++] & UConverterConstants.UNSIGNED_BYTE_MASK, 16, 2);
\r
359 while (i < length) {
\r
360 uniValueString[valueStringLength++] = UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
\r
361 uniValueString[valueStringLength++] = UNICODE_X_CODEPOINT; /* adding X */
\r
362 itou(uniValueString, valueStringLength, buffer[i++] & UConverterConstants.UNSIGNED_BYTE_MASK, 16, 2);
\r
363 valueStringLength += 2;
\r
368 cr = CharsetDecoderICU.toUWriteUChars(decoder, uniValueString, 0, valueStringLength, target, offsets, 0);
\r
374 * Java port of uprv_itou() in ICU4C used by TO_U_CALLBACK_ESCAPE and FROM_U_CALLBACK_ESCAPE.
\r
375 * Fills in a char string with the radix-based representation of a number padded with zeroes
\r
378 private static final int itou(char[] buffer, int sourceIndex, int i, int radix, int minwidth) {
\r
386 buffer[sourceIndex + length++] = (char)(digit <= 9 ? (0x0030+digit) : (0x0030+digit+7));
\r
388 } while (i != 0 && (sourceIndex + length) < buffer.length);
\r
390 while (length < minwidth) {
\r
391 buffer[sourceIndex + length++] = (char)0x0030; /* zero padding */
\r
393 /* reverses the string */
\r
394 for (j = 0; j < (length / 2); j++) {
\r
395 temp = buffer[(sourceIndex + length - 1) - j];
\r
396 buffer[(sourceIndex + length-1) -j] = buffer[sourceIndex + j];
\r
397 buffer[sourceIndex + j] = temp;
\r
404 * No need to create an instance
\r
406 private CharsetCallback() {
\r