2 *******************************************************************************
\r
3 * Copyright (C) 2006-2009, International Business Machines Corporation and *
\r
4 * others. All Rights Reserved. *
\r
5 *******************************************************************************
\r
7 *******************************************************************************
\r
10 package com.ibm.icu.charset;
\r
12 import java.nio.BufferOverflowException;
\r
13 import java.nio.ByteBuffer;
\r
14 import java.nio.CharBuffer;
\r
15 import java.nio.IntBuffer;
\r
16 import java.nio.charset.CharsetEncoder;
\r
17 import java.nio.charset.CoderResult;
\r
18 import java.nio.charset.CodingErrorAction;
\r
20 import com.ibm.icu.impl.Assert;
\r
21 import com.ibm.icu.lang.UCharacter;
\r
22 import com.ibm.icu.text.UTF16;
\r
25 * An abstract class that provides framework methods of decoding operations for concrete
\r
27 * In the future this class will contain API that will implement converter semantics of ICU4C.
\r
30 public abstract class CharsetEncoderICU extends CharsetEncoder {
\r
32 /* this is used in fromUnicode DBCS tables as an "unassigned" marker */
\r
33 static final char MISSING_CHAR_MARKER = '\uFFFF';
\r
35 byte[] errorBuffer = new byte[30];
\r
37 int errorBufferLength = 0;
\r
39 /** these are for encodeLoopICU */
\r
40 int fromUnicodeStatus;
\r
44 boolean useSubChar1;
\r
46 boolean useFallback;
\r
48 /* maximum number of indexed UChars */
\r
49 static final int EXT_MAX_UCHARS = 19;
\r
51 /* store previous UChars/chars to continue partial matches */
\r
52 int preFromUFirstCP; /* >=0: partial match */
\r
54 char[] preFromUArray = new char[EXT_MAX_UCHARS];
\r
58 int preFromULength; /* negative: replay */
\r
60 char[] invalidUCharBuffer = new char[2];
\r
62 int invalidUCharLength;
\r
64 Object fromUContext;
\r
66 private CharsetCallback.Encoder onUnmappableInput = CharsetCallback.FROM_U_CALLBACK_STOP;
\r
68 private CharsetCallback.Encoder onMalformedInput = CharsetCallback.FROM_U_CALLBACK_STOP;
\r
70 CharsetCallback.Encoder fromCharErrorBehaviour = new CharsetCallback.Encoder() {
\r
71 public CoderResult call(CharsetEncoderICU encoder, Object context,
\r
72 CharBuffer source, ByteBuffer target, IntBuffer offsets,
\r
73 char[] buffer, int length, int cp, CoderResult cr) {
\r
74 if (cr.isUnmappable()) {
\r
75 return onUnmappableInput.call(encoder, context, source, target,
\r
76 offsets, buffer, length, cp, cr);
\r
77 } else /* if (cr.isMalformed()) */ {
\r
78 return onMalformedInput.call(encoder, context, source, target,
\r
79 offsets, buffer, length, cp, cr);
\r
81 // return CharsetCallback.FROM_U_CALLBACK_STOP.call(encoder, context, source, target, offsets, buffer, length, cp, cr);
\r
87 * Construcs a new encoder for the given charset
\r
90 * for which the decoder is created
\r
91 * @param replacement
\r
92 * the substitution bytes
\r
94 CharsetEncoderICU(CharsetICU cs, byte[] replacement) {
\r
95 super(cs, (cs.minBytesPerChar + cs.maxBytesPerChar) / 2,
\r
96 cs.maxBytesPerChar, replacement);
\r
100 * Is this Encoder allowed to use fallbacks? A fallback mapping is a mapping
\r
101 * that will convert a Unicode codepoint sequence to a byte sequence, but
\r
102 * the encoded byte sequence will round trip convert to a different
\r
103 * Unicode codepoint sequence.
\r
104 * @return true if the converter uses fallback, false otherwise.
\r
107 public boolean isFallbackUsed() {
\r
108 return useFallback;
\r
112 * Sets whether this Encoder can use fallbacks?
\r
113 * @param usesFallback true if the user wants the converter to take
\r
114 * advantage of the fallback mapping, false otherwise.
\r
117 public void setFallbackUsed(boolean usesFallback) {
\r
118 useFallback = usesFallback;
\r
122 * Use fallbacks from Unicode to codepage when useFallback or for private-use code points
\r
123 * @param c A codepoint
\r
125 final boolean isFromUUseFallback(int c) {
\r
126 return (useFallback)
\r
127 || (UCharacter.getType(c) == UCharacter.PRIVATE_USE);
\r
131 * Use fallbacks from Unicode to codepage when useFallback or for private-use code points
\r
133 static final boolean isFromUUseFallback(boolean iUseFallback, int c) {
\r
134 return (iUseFallback)
\r
135 || (UCharacter.getType(c) == UCharacter.PRIVATE_USE);
\r
139 * Sets the action to be taken if an illegal sequence is encountered
\r
142 * action to be taken
\r
143 * @exception IllegalArgumentException
\r
146 protected void implOnMalformedInput(CodingErrorAction newAction) {
\r
147 onMalformedInput = getCallback(newAction);
\r
151 * Sets the action to be taken if an illegal sequence is encountered
\r
154 * action to be taken
\r
155 * @exception IllegalArgumentException
\r
158 protected void implOnUnmappableCharacter(CodingErrorAction newAction) {
\r
159 onUnmappableInput = getCallback(newAction);
\r
163 * Sets the callback encoder method and context to be used if an illegal sequence is encountered.
\r
164 * You would normally call this twice to set both the malform and unmappable error. In this case,
\r
165 * newContext should remain the same since using a different newContext each time will negate the last
\r
167 * @param err CoderResult
\r
168 * @param newCallback CharsetCallback.Encoder
\r
169 * @param newContext Object
\r
172 public final void setFromUCallback(CoderResult err, CharsetCallback.Encoder newCallback, Object newContext) {
\r
173 if (err.isMalformed()) {
\r
174 onMalformedInput = newCallback;
\r
175 } else if (err.isUnmappable()) {
\r
176 onUnmappableInput = newCallback;
\r
178 /* Error: Only malformed and unmappable are handled. */
\r
181 if (fromUContext == null || !fromUContext.equals(newContext)) {
\r
182 setFromUContext(newContext);
\r
187 * Sets fromUContext used in callbacks.
\r
189 * @param newContext Object
\r
190 * @exception IllegalArgumentException The object is an illegal argument for UContext.
\r
193 public final void setFromUContext(Object newContext) {
\r
194 fromUContext = newContext;
\r
197 private static CharsetCallback.Encoder getCallback(CodingErrorAction action) {
\r
198 if (action == CodingErrorAction.REPLACE) {
\r
199 return CharsetCallback.FROM_U_CALLBACK_SUBSTITUTE;
\r
200 } else if (action == CodingErrorAction.IGNORE) {
\r
201 return CharsetCallback.FROM_U_CALLBACK_SKIP;
\r
202 } else /* if (action == CodingErrorAction.REPORT) */ {
\r
203 return CharsetCallback.FROM_U_CALLBACK_STOP;
\r
207 private static final CharBuffer EMPTY = CharBuffer.allocate(0);
\r
210 * Flushes any characters saved in the converter's internal buffer and
\r
211 * resets the converter.
\r
212 * @param out action to be taken
\r
213 * @return result of flushing action and completes the decoding all input.
\r
214 * Returns CoderResult.UNDERFLOW if the action succeeds.
\r
217 protected CoderResult implFlush(ByteBuffer out) {
\r
218 return encode(EMPTY, out, null, true);
\r
222 * Resets the from Unicode mode of converter
\r
225 protected void implReset() {
\r
226 errorBufferLength = 0;
\r
227 fromUnicodeStatus = 0;
\r
229 fromUnicodeReset();
\r
232 private void fromUnicodeReset() {
\r
234 preFromUFirstCP = UConverterConstants.U_SENTINEL;
\r
235 preFromULength = 0;
\r
239 * Encodes one or more chars. The default behaviour of the
\r
240 * converter is stop and report if an error in input stream is encountered.
\r
241 * To set different behaviour use @see CharsetEncoder.onMalformedInput()
\r
242 * @param in buffer to decode
\r
243 * @param out buffer to populate with decoded result
\r
244 * @return result of decoding action. Returns CoderResult.UNDERFLOW if the decoding
\r
245 * action succeeds or more input is needed for completing the decoding action.
\r
248 protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) {
\r
249 if (!in.hasRemaining() && this.errorBufferLength == 0) { // make sure the errorBuffer is empty
\r
250 // The Java framework should have already substituted what was left.
\r
252 //fromUnicodeReset();
\r
253 return CoderResult.UNDERFLOW;
\r
255 in.position(in.position() + fromUCountPending());
\r
256 /* do the conversion */
\r
257 CoderResult ret = encode(in, out, null, false);
\r
258 setSourcePosition(in);
\r
259 /* No need to reset to keep the proper state of the encoder.
\r
260 if (ret.isUnderflow() && in.hasRemaining()) {
\r
261 // The Java framework is going to substitute what is left.
\r
262 //fromUnicodeReset();
\r
268 * Implements ICU semantics of buffer management
\r
272 * @return A CoderResult object that contains the error result when an error occurs.
\r
274 abstract CoderResult encodeLoop(CharBuffer source, ByteBuffer target,
\r
275 IntBuffer offsets, boolean flush);
\r
278 * Implements ICU semantics for encoding the buffer
\r
279 * @param source The input character buffer
\r
280 * @param target The output byte buffer
\r
282 * @param flush true if, and only if, the invoker can provide no
\r
283 * additional input bytes beyond those in the given buffer.
\r
284 * @return A CoderResult object that contains the error result when an error occurs.
\r
286 final CoderResult encode(CharBuffer source, ByteBuffer target,
\r
287 IntBuffer offsets, boolean flush) {
\r
289 /* check parameters */
\r
290 if (target == null || source == null) {
\r
291 throw new IllegalArgumentException();
\r
295 * Make sure that the buffer sizes do not exceed the number range for
\r
296 * int32_t because some functions use the size (in units or bytes)
\r
297 * rather than comparing pointers, and because offsets are int32_t values.
\r
299 * size_t is guaranteed to be unsigned and large enough for the job.
\r
301 * Return with an error instead of adjusting the limits because we would
\r
302 * not be able to maintain the semantics that either the source must be
\r
303 * consumed or the target filled (unless an error occurs).
\r
304 * An adjustment would be targetLimit=t+0x7fffffff; for example.
\r
307 /* flush the target overflow buffer */
\r
308 if (errorBufferLength > 0) {
\r
309 byte[] overflowArray;
\r
312 overflowArray = errorBuffer;
\r
313 length = errorBufferLength;
\r
316 if (target.remaining() == 0) {
\r
317 /* the overflow buffer contains too much, keep the rest */
\r
321 overflowArray[j++] = overflowArray[i++];
\r
322 } while (i < length);
\r
324 errorBufferLength = (byte) j;
\r
325 return CoderResult.OVERFLOW;
\r
328 /* copy the overflow contents to the target */
\r
329 target.put(overflowArray[i++]);
\r
330 if (offsets != null) {
\r
331 offsets.put(-1); /* no source index available for old output */
\r
333 } while (i < length);
\r
335 /* the overflow buffer is completely copied to the target */
\r
336 errorBufferLength = 0;
\r
339 if (!flush && source.remaining() == 0 && preFromULength >= 0) {
\r
340 /* the overflow buffer is emptied and there is no new input: we are done */
\r
341 return CoderResult.UNDERFLOW;
\r
345 * Do not simply return with a buffer overflow error if
\r
346 * !flush && t==targetLimit
\r
347 * because it is possible that the source will not generate any output.
\r
348 * For example, the skip callback may be called;
\r
349 * it does not output anything.
\r
352 return fromUnicodeWithCallback(source, target, offsets, flush);
\r
357 * Implementation note for m:n conversions
\r
359 * While collecting source units to find the longest match for m:n conversion,
\r
360 * some source units may need to be stored for a partial match.
\r
361 * When a second buffer does not yield a match on all of the previously stored
\r
362 * source units, then they must be "replayed", i.e., fed back into the converter.
\r
364 * The code relies on the fact that replaying will not nest -
\r
365 * converting a replay buffer will not result in a replay.
\r
366 * This is because a replay is necessary only after the _continuation_ of a
\r
367 * partial match failed, but a replay buffer is converted as a whole.
\r
368 * It may result in some of its units being stored again for a partial match,
\r
369 * but there will not be a continuation _during_ the replay which could fail.
\r
371 * It is conceivable that a callback function could call the converter
\r
372 * recursively in a way that causes another replay to be stored, but that
\r
373 * would be an error in the callback function.
\r
374 * Such violations will cause assertion failures in a debug build,
\r
375 * and wrong output, but they will not cause a crash.
\r
377 final CoderResult fromUnicodeWithCallback(CharBuffer source,
\r
378 ByteBuffer target, IntBuffer offsets, boolean flush) {
\r
381 int errorInputLength;
\r
382 boolean converterSawEndOfInput, calledCallback;
\r
384 /* variables for m:n conversion */
\r
385 CharBuffer replayArray = CharBuffer.allocate(EXT_MAX_UCHARS);
\r
386 int replayArrayIndex = 0;
\r
387 CharBuffer realSource;
\r
390 CoderResult cr = CoderResult.UNDERFLOW;
\r
392 /* get the converter implementation function */
\r
395 if (preFromULength >= 0) {
\r
401 * Previous m:n conversion stored source units from a partial match
\r
402 * and failed to consume all of them.
\r
403 * We need to "replay" them from a temporary buffer and convert them first.
\r
405 realSource = source;
\r
408 //UConverterUtility.uprv_memcpy(replayArray, replayArrayIndex, preFromUArray, 0, -preFromULength*UMachine.U_SIZEOF_UCHAR);
\r
409 replayArray.put(preFromUArray, 0, -preFromULength);
\r
410 source = replayArray;
\r
411 source.position(replayArrayIndex);
\r
412 source.limit(replayArrayIndex - preFromULength); //preFromULength is negative, see declaration
\r
415 preFromULength = 0;
\r
419 * loop for conversion and error handling
\r
425 * handle end of input
\r
426 * handle errors/call callback
\r
432 cr = encodeLoop(source, target, offsets, flush);
\r
434 * set a flag for whether the converter
\r
435 * successfully processed the end of the input
\r
437 * need not check cnv.preFromULength==0 because a replay (<0) will cause
\r
438 * s<sourceLimit before converterSawEndOfInput is checked
\r
440 converterSawEndOfInput = (cr.isUnderflow() && flush
\r
441 && source.remaining() == 0 && fromUChar32 == 0);
\r
443 /* no callback called yet for this iteration */
\r
444 calledCallback = false;
\r
446 /* no sourceIndex adjustment for conversion, only for callback output */
\r
447 errorInputLength = 0;
\r
450 * loop for offsets and error handling
\r
452 * iterates at most 3 times:
\r
453 * 1. to clean up after the conversion function
\r
454 * 2. after the callback
\r
455 * 3. after the callback again if there was truncated input
\r
458 /* update offsets if we write any */
\r
459 /* Currently offsets are not being used in ICU4J */
\r
460 /* if (offsets != null) {
\r
461 int length = target.remaining();
\r
465 * if a converter handles offsets and updates the offsets
\r
466 * pointer at the end, then offset should not change
\r
468 * however, some converters do not handle offsets at all
\r
469 * (sourceIndex<0) or may not update the offsets pointer
\r
471 /* offsets.position(offsets.position() + length);
\r
474 if (sourceIndex >= 0) {
\r
475 sourceIndex += (int) (source.position());
\r
479 if (preFromULength < 0) {
\r
481 * switch the source to new replay units (cannot occur while replaying)
\r
482 * after offset handling and before end-of-input and callback handling
\r
484 if (realSource == null) {
\r
485 realSource = source;
\r
488 //UConverterUtility.uprv_memcpy(replayArray, replayArrayIndex, preFromUArray, 0, -preFromULength*UMachine.U_SIZEOF_UCHAR);
\r
489 replayArray.put(preFromUArray, 0, -preFromULength);
\r
491 source = replayArray;
\r
492 source.position(replayArrayIndex);
\r
493 source.limit(replayArrayIndex - preFromULength);
\r
495 if ((sourceIndex += preFromULength) < 0) {
\r
499 preFromULength = 0;
\r
501 /* see implementation note before _fromUnicodeWithCallback() */
\r
502 //agljport:todo U_ASSERT(realSource==NULL);
\r
503 Assert.assrt(realSource == null);
\r
507 /* update pointers */
\r
508 sBufferIndex = source.position();
\r
509 if (cr.isUnderflow()) {
\r
510 if (sBufferIndex < source.limit()) {
\r
512 * continue with the conversion loop while there is still input left
\r
513 * (continue converting by breaking out of only the inner loop)
\r
516 } else if (realSource != null) {
\r
517 /* switch back from replaying to the real source and continue */
\r
518 source = realSource;
\r
520 sourceIndex = source.position();
\r
523 } else if (flush && fromUChar32 != 0) {
\r
525 * the entire input stream is consumed
\r
526 * and there is a partial, truncated input sequence left
\r
529 /* inject an error and continue with callback handling */
\r
530 //err[0]=ErrorCode.U_TRUNCATED_CHAR_FOUND;
\r
531 cr = CoderResult.malformedForLength(1);
\r
532 calledCallback = false; /* new error condition */
\r
534 /* input consumed */
\r
537 * return to the conversion loop once more if the flush
\r
538 * flag is set and the conversion function has not
\r
539 * successfully processed the end of the input yet
\r
541 * (continue converting by breaking out of only the inner loop)
\r
543 if (!converterSawEndOfInput) {
\r
547 /* reset the converter without calling the callback function */
\r
551 /* done successfully */
\r
556 /*U_FAILURE(*err) */
\r
559 if (calledCallback || cr.isOverflow()
\r
560 || (!cr.isMalformed() && !cr.isUnmappable())) {
\r
562 * the callback did not or cannot resolve the error:
\r
563 * set output pointers and return
\r
565 * the check for buffer overflow is redundant but it is
\r
566 * a high-runner case and hopefully documents the intent
\r
569 * if we were replaying, then the replay buffer must be
\r
570 * copied back into the UConverter
\r
571 * and the real arguments must be restored
\r
573 if (realSource != null) {
\r
576 //agljport:todo U_ASSERT(cnv.preFromULength==0);
\r
578 length = source.remaining();
\r
580 //UConverterUtility.uprv_memcpy(preFromUArray, 0, sourceArray, pArgs.sourceBegin, length*UMachine.U_SIZEOF_UCHAR);
\r
581 source.get(preFromUArray, 0, length);
\r
582 preFromULength = (byte) -length;
\r
584 source = realSource;
\r
591 /* callback handling */
\r
595 /* get and write the code point */
\r
596 codePoint = fromUChar32;
\r
597 errorInputLength = UTF16.append(invalidUCharBuffer, 0,
\r
599 invalidUCharLength = errorInputLength;
\r
601 /* set the converter state to deal with the next character */
\r
604 /* call the callback function */
\r
605 cr = fromCharErrorBehaviour.call(this, fromUContext,
\r
606 source, target, offsets, invalidUCharBuffer,
\r
607 invalidUCharLength, codePoint, cr);
\r
611 * loop back to the offset handling
\r
613 * this flag will indicate after offset handling
\r
614 * that a callback was called;
\r
615 * if the callback did not resolve the error, then we return
\r
617 calledCallback = true;
\r
623 * Ascertains if a given Unicode code point (32bit value for handling surrogates)
\r
624 * can be converted to the target encoding. If the caller wants to test if a
\r
625 * surrogate pair can be converted to target encoding then the
\r
626 * responsibility of assembling the int value lies with the caller.
\r
627 * For assembling a code point the caller can use UTF16 class of ICU4J and do something like:
\r
629 * while(i<mySource.length){
\r
630 * if(UTF16.isLeadSurrogate(mySource[i])&& i+1< mySource.length){
\r
631 * if(UTF16.isTrailSurrogate(mySource[i+1])){
\r
632 * int temp = UTF16.charAt(mySource,i,i+1,0);
\r
633 * if(!((CharsetEncoderICU) myConv).canEncode(temp)){
\r
644 * String src = new String(mySource);
\r
646 * boolean passed = false;
\r
647 * while(i<src.length()){
\r
648 * codepoint = UTF16.charAt(src,i);
\r
649 * i+= (codepoint>0xfff)? 2:1;
\r
650 * if(!(CharsetEncoderICU) myConv).canEncode(codepoint)){
\r
656 * @param codepoint Unicode code point as int value
\r
657 * @return true if a character can be converted
\r
659 /* TODO This is different from Java's canEncode(char) API.
\r
660 * ICU's API should implement getUnicodeSet,
\r
661 * and override canEncode(char) which queries getUnicodeSet.
\r
662 * The getUnicodeSet should return a frozen UnicodeSet or use a fillin parameter, like ICU4C.
\r
664 /*public boolean canEncode(int codepoint) {
\r
668 * Overrides super class method
\r
671 public boolean isLegalReplacement(byte[] repl) {
\r
676 * Writes out the specified output bytes to the target byte buffer or to converter internal buffers.
\r
678 * @param bytesArray
\r
679 * @param bytesBegin
\r
680 * @param bytesLength
\r
683 * @param sourceIndex
\r
684 * @return A CoderResult object that contains the error result when an error occurs.
\r
686 static final CoderResult fromUWriteBytes(CharsetEncoderICU cnv,
\r
687 byte[] bytesArray, int bytesBegin, int bytesLength, ByteBuffer out,
\r
688 IntBuffer offsets, int sourceIndex) {
\r
691 int obl = bytesLength;
\r
692 CoderResult cr = CoderResult.UNDERFLOW;
\r
693 int bytesLimit = bytesBegin + bytesLength;
\r
695 for (; bytesBegin < bytesLimit;) {
\r
696 out.put(bytesArray[bytesBegin]);
\r
701 } catch (BufferOverflowException ex) {
\r
702 cr = CoderResult.OVERFLOW;
\r
705 if (offsets != null) {
\r
706 while (obl > bytesLength) {
\r
707 offsets.put(sourceIndex);
\r
712 cnv.errorBufferLength = bytesLimit - bytesBegin;
\r
713 if (cnv.errorBufferLength > 0) {
\r
715 while (bytesBegin < bytesLimit) {
\r
716 cnv.errorBuffer[index++] = bytesArray[bytesBegin++];
\r
718 cr = CoderResult.OVERFLOW;
\r
724 * Returns the number of chars held in the converter's internal state
\r
725 * because more input is needed for completing the conversion. This function is
\r
726 * useful for mapping semantics of ICU's converter interface to those of iconv,
\r
727 * and this information is not needed for normal conversion.
\r
728 * @return The number of chars in the state. -1 if an error is encountered.
\r
730 /*public*/int fromUCountPending() {
\r
731 if (preFromULength > 0) {
\r
732 return UTF16.getCharCount(preFromUFirstCP) + preFromULength;
\r
733 } else if (preFromULength < 0) {
\r
734 return -preFromULength;
\r
735 } else if (fromUChar32 > 0) {
\r
737 } else if (preFromUFirstCP > 0) {
\r
738 return UTF16.getCharCount(preFromUFirstCP);
\r
747 private final void setSourcePosition(CharBuffer source) {
\r
749 // ok was there input held in the previous invocation of encodeLoop
\r
750 // that resulted in output in this invocation?
\r
751 source.position(source.position() - fromUCountPending());
\r
755 * Write the codepage substitution character.
\r
756 * Subclasses to override this method.
\r
757 * For stateful converters, it is typically necessary to handle this
\r
758 * specificially for the converter in order to properly maintain the state.
\r
759 * @param source The input character buffer
\r
760 * @param target The output byte buffer
\r
762 * @return A CoderResult object that contains the error result when an error occurs.
\r
764 CoderResult cbFromUWriteSub(CharsetEncoderICU encoder, CharBuffer source,
\r
765 ByteBuffer target, IntBuffer offsets) {
\r
766 CharsetICU cs = (CharsetICU) encoder.charset();
\r
767 byte[] sub = encoder.replacement();
\r
768 if (cs.subChar1 != 0 && encoder.invalidUCharBuffer[0] <= 0xff) {
\r
769 return CharsetEncoderICU.fromUWriteBytes(encoder,
\r
770 new byte[] { cs.subChar1 }, 0, 1, target, offsets, source
\r
773 return CharsetEncoderICU.fromUWriteBytes(encoder, sub, 0,
\r
774 sub.length, target, offsets, source.position());
\r
779 * Write the characters to target.
\r
780 * @param source The input character buffer
\r
781 * @param target The output byte buffer
\r
783 * @return A CoderResult object that contains the error result when an error occurs.
\r
785 CoderResult cbFromUWriteUChars(CharsetEncoderICU encoder,
\r
786 CharBuffer source, ByteBuffer target, IntBuffer offsets) {
\r
787 CoderResult cr = CoderResult.UNDERFLOW;
\r
789 /* This is a fun one. Recursion can occur - we're basically going to
\r
790 * just retry shoving data through the same converter. Note, if you got
\r
791 * here through some kind of invalid sequence, you maybe should emit a
\r
792 * reset sequence of some kind. Since this IS an actual conversion,
\r
793 * take care that you've changed the callback or the data, or you'll
\r
794 * get an infinite loop.
\r
797 int oldTargetPosition = target.position();
\r
798 int offsetIndex = source.position();
\r
800 cr = encoder.encode(source, target, null, false); /* no offsets and no flush */
\r
802 if (offsets != null) {
\r
803 while (target.position() != oldTargetPosition) {
\r
804 offsets.put(offsetIndex);
\r
805 oldTargetPosition++;
\r
809 /* Note, if you did something like used a stop subcallback, things would get interesting.
\r
810 * In fact, here's where we want to return the partially consumed in-source!
\r
812 if (cr.isOverflow()) {
\r
813 /* Overflowed target. Now, we'll write into the charErrorBuffer.
\r
814 * It's a fixed size. If we overflow it...Hm
\r
817 /* start the new target at the first free slot in the error buffer */
\r
818 int errBuffLen = encoder.errorBufferLength;
\r
819 ByteBuffer newTarget = ByteBuffer.wrap(encoder.errorBuffer);
\r
820 newTarget.position(errBuffLen); /* set the position at the end of the error buffer */
\r
821 encoder.errorBufferLength = 0;
\r
823 encoder.encode(source, newTarget, null, false);
\r
825 encoder.errorBuffer = newTarget.array();
\r
826 encoder.errorBufferLength = newTarget.position();
\r
834 * Handles a common situation where a character has been read and it may be
\r
835 * a lead surrogate followed by a trail surrogate. This method can change
\r
836 * the source position and will modify fromUChar32.
\r
840 * If <code>null</code> is returned, then there was success in reading a
\r
841 * surrogate pair, the codepoint is stored in <code>fromUChar32</code> and
\r
842 * <code>fromUChar32</code> should be reset (to 0) after being read.
\r
846 * The encoding source.
\r
848 * A character that may be the first in a surrogate pair.
\r
849 * @return <code>CoderResult.malformedForLength(1)</code> or
\r
850 * <code>CoderResult.UNDERFLOW</code> if there is a problem, or
\r
851 * <code>null</code> if there isn't.
\r
852 * @see #handleSurrogates(CharBuffer, char)
\r
853 * @see #handleSurrogates(CharBuffer, int, char)
\r
854 * @see #handleSurrogates(char[], int, int, char)
\r
856 final CoderResult handleSurrogates(CharBuffer source, char lead) {
\r
857 if (!UTF16.isLeadSurrogate(lead)) {
\r
858 fromUChar32 = lead;
\r
859 return CoderResult.malformedForLength(1);
\r
862 if (!source.hasRemaining()) {
\r
863 fromUChar32 = lead;
\r
864 return CoderResult.UNDERFLOW;
\r
867 char trail = source.get();
\r
869 if (!UTF16.isTrailSurrogate(trail)) {
\r
870 fromUChar32 = lead;
\r
871 source.position(source.position() - 1);
\r
872 return CoderResult.malformedForLength(1);
\r
875 fromUChar32 = UCharacter.getCodePoint(lead, trail);
\r
881 * Same as <code>handleSurrogates(CharBuffer, char)</code>, but with arrays. As an added
\r
882 * requirement, the calling method must also increment the index if this method returns
\r
883 * <code>null</code>.
\r
888 * The encoding source.
\r
890 * A character that may be the first in a surrogate pair.
\r
891 * @return <code>CoderResult.malformedForLength(1)</code> or
\r
892 * <code>CoderResult.UNDERFLOW</code> if there is a problem, or <code>null</code> if
\r
894 * @see #handleSurrogates(CharBuffer, char)
\r
895 * @see #handleSurrogates(CharBuffer, int, char)
\r
896 * @see #handleSurrogates(char[], int, int, char)
\r
898 final CoderResult handleSurrogates(char[] sourceArray, int sourceIndex,
\r
899 int sourceLimit, char lead) {
\r
900 if (!UTF16.isLeadSurrogate(lead)) {
\r
901 fromUChar32 = lead;
\r
902 return CoderResult.malformedForLength(1);
\r
905 if (sourceIndex >= sourceLimit) {
\r
906 fromUChar32 = lead;
\r
907 return CoderResult.UNDERFLOW;
\r
910 char trail = sourceArray[sourceIndex];
\r
912 if (!UTF16.isTrailSurrogate(trail)) {
\r
913 fromUChar32 = lead;
\r
914 return CoderResult.malformedForLength(1);
\r
917 fromUChar32 = UCharacter.getCodePoint(lead, trail);
\r