2 *******************************************************************************
3 * Copyright (C) 2006-2011, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 *******************************************************************************
7 *******************************************************************************
10 package com.ibm.icu.charset;
12 import java.nio.ByteBuffer;
13 import java.nio.CharBuffer;
14 import java.nio.IntBuffer;
15 import java.nio.charset.CoderResult;
18 * <h2> Callback API for CharsetICU API </h2>
20 * CharsetCallback class defines some error behaviour functions called
21 * by CharsetDecoderICU and CharsetEncoderICU. The class also provides
22 * the facility by which clients can write their own callbacks.
24 * These functions, although public, should NEVER be called directly.
25 * They should be used as parameters to the onUmappableCharacter() and
26 * onMalformedInput() methods, to set the behaviour of a converter
27 * when it encounters UNMAPPED/INVALID sequences.
28 * Currently the only way to set callbacks is by using CodingErrorAction.
29 * In the future we will provide set methods on CharsetEncoder and CharsetDecoder
30 * that will accept CharsetCallback fields.
35 public class CharsetCallback {
37 * FROM_U, TO_U context options for sub callback
39 private static final String SUB_STOP_ON_ILLEGAL = "i";
42 // * FROM_U, TO_U context options for skip callback
44 // private static final String SKIP_STOP_ON_ILLEGAL = "i";
47 // * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to ICU (%UXXXX)
49 // private static final String ESCAPE_ICU = null;
52 * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to JAVA (\\uXXXX)
54 private static final String ESCAPE_JAVA = "J";
57 * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to C (\\uXXXX \\UXXXXXXXX)
58 * TO_U_CALLBACK_ESCAPE option to escape the character value accoding to C (\\xXXXX)
60 private static final String ESCAPE_C = "C";
63 * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly
64 * TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly
66 private static final String ESCAPE_XML_DEC = "D";
69 * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly
70 * TO_U_CALLBACK_ESCAPE context option to escape the character value according to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly
72 private static final String ESCAPE_XML_HEX = "X";
75 * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to Unicode (U+XXXXX)
77 private static final String ESCAPE_UNICODE = "U";
80 * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to Unicode (U+XXXXX)
82 private static final String ESCAPE_CSS2 = "S";
85 * Decoder Callback interface
88 public interface Decoder {
90 * This function is called when the bytes in the source cannot be handled,
91 * and this function is meant to handle or fix the error if possible.
93 * @return Result of decoding action. This returned object is set to an error
94 * if this function could not handle the conversion.
97 public CoderResult call(CharsetDecoderICU decoder, Object context,
98 ByteBuffer source, CharBuffer target, IntBuffer offsets,
99 char[] buffer, int length, CoderResult cr);
102 * Encoder Callback interface
105 public interface Encoder {
107 * This function is called when the Unicode characters in the source cannot be handled,
108 * and this function is meant to handle or fix the error if possible.
109 * @return Result of decoding action. This returned object is set to an error
110 * if this function could not handle the conversion.
113 public CoderResult call(CharsetEncoderICU encoder, Object context,
114 CharBuffer source, ByteBuffer target, IntBuffer offsets,
115 char[] buffer, int length, int cp, CoderResult cr);
121 public static final Encoder FROM_U_CALLBACK_SKIP = new Encoder() {
122 public CoderResult call(CharsetEncoderICU encoder, Object context,
123 CharBuffer source, ByteBuffer target, IntBuffer offsets,
124 char[] buffer, int length, int cp, CoderResult cr){
126 return CoderResult.UNDERFLOW;
127 }else if(((String)context).equals(SUB_STOP_ON_ILLEGAL)){
128 if(!cr.isUnmappable()){
131 return CoderResult.UNDERFLOW;
141 public static final Decoder TO_U_CALLBACK_SKIP = new Decoder() {
142 public CoderResult call(CharsetDecoderICU decoder, Object context,
143 ByteBuffer source, CharBuffer target, IntBuffer offsets,
144 char[] buffer, int length, CoderResult cr){
146 return CoderResult.UNDERFLOW;
147 }else if(((String)context).equals(SUB_STOP_ON_ILLEGAL)){
148 if(!cr.isUnmappable()){
151 return CoderResult.UNDERFLOW;
158 * Write substitute callback
161 public static final Encoder FROM_U_CALLBACK_SUBSTITUTE = new Encoder(){
162 public CoderResult call(CharsetEncoderICU encoder, Object context,
163 CharBuffer source, ByteBuffer target, IntBuffer offsets,
164 char[] buffer, int length, int cp, CoderResult cr){
166 return encoder.cbFromUWriteSub(encoder, source, target, offsets);
167 }else if(((String)context).equals(SUB_STOP_ON_ILLEGAL)){
168 if(!cr.isUnmappable()){
171 return encoder.cbFromUWriteSub(encoder, source, target, offsets);
177 private static final char[] kSubstituteChar1 = new char[]{0x1A};
178 private static final char[] kSubstituteChar = new char[] {0xFFFD};
180 * Write substitute callback
183 public static final Decoder TO_U_CALLBACK_SUBSTITUTE = new Decoder() {
184 public CoderResult call(CharsetDecoderICU decoder, Object context,
185 ByteBuffer source, CharBuffer target, IntBuffer offsets,
186 char[] buffer, int length, CoderResult cr){
188 CharsetICU cs = (CharsetICU) decoder.charset();
189 /* Use the specified replacement character if it is different than the default one. */
190 boolean useReplacement = true;
191 char [] replacementChar = decoder.replacement().toCharArray();
192 if (replacementChar.length == 1 && (replacementChar[0] == kSubstituteChar1[0] || replacementChar[0] == kSubstituteChar[0])) {
193 useReplacement = false;
196 /* could optimize this case, just one uchar */
197 if(decoder.invalidCharLength == 1 && cs.subChar1 != 0) {
198 return CharsetDecoderICU.toUWriteUChars(decoder, useReplacement ? replacementChar : kSubstituteChar1, 0, useReplacement ? replacementChar.length : 1, target, offsets, source.position());
200 return CharsetDecoderICU.toUWriteUChars(decoder, useReplacement ? replacementChar : kSubstituteChar, 0, useReplacement ? replacementChar.length : 1, target, offsets, source.position());
208 public static final Encoder FROM_U_CALLBACK_STOP = new Encoder() {
209 public CoderResult call(CharsetEncoderICU encoder, Object context,
210 CharBuffer source, ByteBuffer target, IntBuffer offsets,
211 char[] buffer, int length, int cp, CoderResult cr){
219 public static final Decoder TO_U_CALLBACK_STOP = new Decoder() {
220 public CoderResult call(CharsetDecoderICU decoder, Object context,
221 ByteBuffer source, CharBuffer target, IntBuffer offsets,
222 char[] buffer, int length, CoderResult cr){
226 private static final int VALUE_STRING_LENGTH = 32;
227 private static final char UNICODE_PERCENT_SIGN_CODEPOINT = 0x0025;
228 private static final char UNICODE_U_CODEPOINT = 0x0055;
229 private static final char UNICODE_X_CODEPOINT = 0x0058;
230 private static final char UNICODE_RS_CODEPOINT = 0x005C;
231 private static final char UNICODE_U_LOW_CODEPOINT = 0x0075;
232 private static final char UNICODE_X_LOW_CODEPOINT = 0x0078;
233 private static final char UNICODE_AMP_CODEPOINT = 0x0026;
234 private static final char UNICODE_HASH_CODEPOINT = 0x0023;
235 private static final char UNICODE_SEMICOLON_CODEPOINT = 0x003B;
236 private static final char UNICODE_PLUS_CODEPOINT = 0x002B;
237 private static final char UNICODE_LEFT_CURLY_CODEPOINT = 0x007B;
238 private static final char UNICODE_RIGHT_CURLY_CODEPOINT = 0x007D;
239 private static final char UNICODE_SPACE_CODEPOINT = 0x0020;
241 * Write escape callback
244 public static final Encoder FROM_U_CALLBACK_ESCAPE = new Encoder() {
245 public CoderResult call(CharsetEncoderICU encoder, Object context,
246 CharBuffer source, ByteBuffer target, IntBuffer offsets,
247 char[] buffer, int length, int cp, CoderResult cr){
248 char[] valueString = new char[VALUE_STRING_LENGTH];
249 int valueStringLength = 0;
252 cr = CoderResult.UNDERFLOW;
254 if (context == null || !(context instanceof String)) {
256 valueString[valueStringLength++] = UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
257 valueString[valueStringLength++] = UNICODE_U_CODEPOINT; /* adding U */
258 valueStringLength += itou(valueString, valueStringLength, (int)buffer[i++] & UConverterConstants.UNSIGNED_SHORT_MASK, 16, 4);
261 if (((String)context).equals(ESCAPE_JAVA)) {
263 valueString[valueStringLength++] = UNICODE_RS_CODEPOINT; /* adding \ */
264 valueString[valueStringLength++] = UNICODE_U_LOW_CODEPOINT; /* adding u */
265 valueStringLength += itou(valueString, valueStringLength, (int)buffer[i++] & UConverterConstants.UNSIGNED_SHORT_MASK, 16, 4);
267 } else if (((String)context).equals(ESCAPE_C)) {
268 valueString[valueStringLength++] = UNICODE_RS_CODEPOINT; /* adding \ */
271 valueString[valueStringLength++] = UNICODE_U_CODEPOINT; /* adding U */
272 valueStringLength = itou(valueString, valueStringLength, cp, 16, 8);
274 valueString[valueStringLength++] = UNICODE_U_LOW_CODEPOINT; /* adding u */
275 valueStringLength += itou(valueString, valueStringLength, (int)buffer[0] & UConverterConstants.UNSIGNED_SHORT_MASK, 16, 4);
277 } else if (((String)context).equals(ESCAPE_XML_DEC)) {
278 valueString[valueStringLength++] = UNICODE_AMP_CODEPOINT; /* adding & */
279 valueString[valueStringLength++] = UNICODE_HASH_CODEPOINT; /* adding # */
281 valueStringLength += itou(valueString, valueStringLength, cp, 10, 0);
283 valueStringLength += itou(valueString, valueStringLength, (int)buffer[0] & UConverterConstants.UNSIGNED_SHORT_MASK, 10, 0);
285 valueString[valueStringLength++] = UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
286 } else if (((String)context).equals(ESCAPE_XML_HEX)) {
287 valueString[valueStringLength++] = UNICODE_AMP_CODEPOINT; /* adding & */
288 valueString[valueStringLength++] = UNICODE_HASH_CODEPOINT; /* adding # */
289 valueString[valueStringLength++] = UNICODE_X_LOW_CODEPOINT; /* adding x */
291 valueStringLength += itou(valueString, valueStringLength, cp, 16, 0);
293 valueStringLength += itou(valueString, valueStringLength, (int)buffer[0] & UConverterConstants.UNSIGNED_SHORT_MASK, 16, 0);
295 valueString[valueStringLength++] = UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
296 } else if (((String)context).equals(ESCAPE_UNICODE)) {
297 valueString[valueStringLength++] = UNICODE_LEFT_CURLY_CODEPOINT; /* adding { */
298 valueString[valueStringLength++] = UNICODE_U_CODEPOINT; /* adding U */
299 valueString[valueStringLength++] = UNICODE_PLUS_CODEPOINT; /* adding + */
301 valueStringLength += itou(valueString, valueStringLength,cp, 16, 4);
303 valueStringLength += itou(valueString, valueStringLength, (int)buffer[0] & UConverterConstants.UNSIGNED_SHORT_MASK, 16, 4);
305 valueString[valueStringLength++] = UNICODE_RIGHT_CURLY_CODEPOINT; /* adding } */
306 } else if (((String)context).equals(ESCAPE_CSS2)) {
307 valueString[valueStringLength++] = UNICODE_RS_CODEPOINT; /* adding \ */
308 valueStringLength += itou(valueString, valueStringLength, cp, 16, 0);
309 /* Always add space character, because the next character might be whitespace,
310 which would erroneously be considered the termination of the escape sequence. */
311 valueString[valueStringLength++] = UNICODE_SPACE_CODEPOINT;
314 valueString[valueStringLength++] = UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
315 valueString[valueStringLength++] = UNICODE_U_CODEPOINT; /* adding U */
316 valueStringLength += itou(valueString, valueStringLength, (int)buffer[i++] & UConverterConstants.UNSIGNED_SHORT_MASK, 16, 4);
321 cr = encoder.cbFromUWriteUChars(encoder, CharBuffer.wrap(valueString, 0, valueStringLength), target, offsets);
326 * Write escape callback
329 public static final Decoder TO_U_CALLBACK_ESCAPE = new Decoder() {
330 public CoderResult call(CharsetDecoderICU decoder, Object context,
331 ByteBuffer source, CharBuffer target, IntBuffer offsets,
332 char[] buffer, int length, CoderResult cr){
333 char[] uniValueString = new char[VALUE_STRING_LENGTH];
334 int valueStringLength = 0;
337 if (context == null || !(context instanceof String)) {
339 uniValueString[valueStringLength++] = UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
340 uniValueString[valueStringLength++] = UNICODE_X_CODEPOINT; /* adding U */
341 valueStringLength += itou(uniValueString, valueStringLength, buffer[i++] & UConverterConstants.UNSIGNED_BYTE_MASK, 16, 2);
344 if (((String)context).equals(ESCAPE_XML_DEC)) {
346 uniValueString[valueStringLength++] = UNICODE_AMP_CODEPOINT; /* adding & */
347 uniValueString[valueStringLength++] = UNICODE_HASH_CODEPOINT; /* adding # */
348 valueStringLength += itou(uniValueString, valueStringLength, buffer[i++] & UConverterConstants.UNSIGNED_BYTE_MASK, 10, 0);
349 uniValueString[valueStringLength++] = UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
351 } else if (((String)context).equals(ESCAPE_XML_HEX)) {
353 uniValueString[valueStringLength++] = UNICODE_AMP_CODEPOINT; /* adding & */
354 uniValueString[valueStringLength++] = UNICODE_HASH_CODEPOINT; /* adding # */
355 uniValueString[valueStringLength++] = UNICODE_X_LOW_CODEPOINT; /* adding x */
356 valueStringLength += itou(uniValueString, valueStringLength, buffer[i++] & UConverterConstants.UNSIGNED_BYTE_MASK, 16, 0);
357 uniValueString[valueStringLength++] = UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
359 } else if (((String)context).equals(ESCAPE_C)) {
361 uniValueString[valueStringLength++] = UNICODE_RS_CODEPOINT; /* adding \ */
362 uniValueString[valueStringLength++] = UNICODE_X_LOW_CODEPOINT; /* adding x */
363 valueStringLength += itou(uniValueString, valueStringLength, buffer[i++] & UConverterConstants.UNSIGNED_BYTE_MASK, 16, 2);
367 uniValueString[valueStringLength++] = UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
368 uniValueString[valueStringLength++] = UNICODE_X_CODEPOINT; /* adding X */
369 itou(uniValueString, valueStringLength, buffer[i++] & UConverterConstants.UNSIGNED_BYTE_MASK, 16, 2);
370 valueStringLength += 2;
375 cr = CharsetDecoderICU.toUWriteUChars(decoder, uniValueString, 0, valueStringLength, target, offsets, 0);
381 * Java port of uprv_itou() in ICU4C used by TO_U_CALLBACK_ESCAPE and FROM_U_CALLBACK_ESCAPE.
382 * Fills in a char string with the radix-based representation of a number padded with zeroes
385 private static final int itou(char[] buffer, int sourceIndex, int i, int radix, int minwidth) {
393 buffer[sourceIndex + length++] = (char)(digit <= 9 ? (0x0030+digit) : (0x0030+digit+7));
395 } while (i != 0 && (sourceIndex + length) < buffer.length);
397 while (length < minwidth) {
398 buffer[sourceIndex + length++] = (char)0x0030; /* zero padding */
400 /* reverses the string */
401 for (j = 0; j < (length / 2); j++) {
402 temp = buffer[(sourceIndex + length - 1) - j];
403 buffer[(sourceIndex + length-1) -j] = buffer[sourceIndex + j];
404 buffer[sourceIndex + j] = temp;
411 * No need to create an instance
413 private CharsetCallback() {