2 *******************************************************************************
3 * Copyright (C) 2006-2013, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 *******************************************************************************
7 *******************************************************************************
10 package com.ibm.icu.charset;
12 import java.nio.BufferOverflowException;
13 import java.nio.ByteBuffer;
14 import java.nio.CharBuffer;
15 import java.nio.IntBuffer;
16 import java.nio.charset.CharsetEncoder;
17 import java.nio.charset.CoderResult;
18 import java.nio.charset.CodingErrorAction;
20 import com.ibm.icu.impl.Assert;
21 import com.ibm.icu.lang.UCharacter;
22 import com.ibm.icu.text.UTF16;
25 * An abstract class that provides framework methods of decoding operations for concrete
27 * In the future this class will contain API that will implement converter semantics of ICU4C.
30 public abstract class CharsetEncoderICU extends CharsetEncoder {
32 /* this is used in fromUnicode DBCS tables as an "unassigned" marker */
33 static final char MISSING_CHAR_MARKER = '\uFFFF';
35 byte[] errorBuffer = new byte[30];
37 int errorBufferLength = 0;
39 /** these are for encodeLoopICU */
40 int fromUnicodeStatus;
48 /* maximum number of indexed UChars */
49 static final int EXT_MAX_UCHARS = 19;
51 /* store previous UChars/chars to continue partial matches */
52 int preFromUFirstCP; /* >=0: partial match */
54 char[] preFromUArray = new char[EXT_MAX_UCHARS];
58 int preFromULength; /* negative: replay */
60 char[] invalidUCharBuffer = new char[2];
62 int invalidUCharLength;
66 private CharsetCallback.Encoder onUnmappableInput = CharsetCallback.FROM_U_CALLBACK_STOP;
68 private CharsetCallback.Encoder onMalformedInput = CharsetCallback.FROM_U_CALLBACK_STOP;
70 CharsetCallback.Encoder fromCharErrorBehaviour = new CharsetCallback.Encoder() {
71 public CoderResult call(CharsetEncoderICU encoder, Object context,
72 CharBuffer source, ByteBuffer target, IntBuffer offsets,
73 char[] buffer, int length, int cp, CoderResult cr) {
74 if (cr.isUnmappable()) {
75 return onUnmappableInput.call(encoder, context, source, target,
76 offsets, buffer, length, cp, cr);
77 } else /* if (cr.isMalformed()) */ {
78 return onMalformedInput.call(encoder, context, source, target,
79 offsets, buffer, length, cp, cr);
81 // return CharsetCallback.FROM_U_CALLBACK_STOP.call(encoder, context, source, target, offsets, buffer, length, cp, cr);
87 * Construcs a new encoder for the given charset
90 * for which the decoder is created
92 * the substitution bytes
94 CharsetEncoderICU(CharsetICU cs, byte[] replacement) {
95 super(cs, (cs.minBytesPerChar + cs.maxBytesPerChar) / 2,
96 cs.maxBytesPerChar, replacement);
100 * Is this Encoder allowed to use fallbacks? A fallback mapping is a mapping
101 * that will convert a Unicode codepoint sequence to a byte sequence, but
102 * the encoded byte sequence will round trip convert to a different
103 * Unicode codepoint sequence.
104 * @return true if the converter uses fallback, false otherwise.
107 public boolean isFallbackUsed() {
112 * Sets whether this Encoder can use fallbacks?
113 * @param usesFallback true if the user wants the converter to take
114 * advantage of the fallback mapping, false otherwise.
117 public void setFallbackUsed(boolean usesFallback) {
118 useFallback = usesFallback;
122 * Use fallbacks from Unicode to codepage when useFallback or for private-use code points
123 * @param c A codepoint
125 final boolean isFromUUseFallback(int c) {
126 return (useFallback) || isUnicodePrivateUse(c);
130 * Use fallbacks from Unicode to codepage when useFallback or for private-use code points
132 static final boolean isFromUUseFallback(boolean iUseFallback, int c) {
133 return (iUseFallback) || isUnicodePrivateUse(c);
136 private static final boolean isUnicodePrivateUse(int c) {
137 // First test for U+E000 to optimize for the most common characters.
138 return c >= 0xE000 && (c <= 0xF8FF ||
139 c >= 0xF0000 && (c <= 0xFFFFD ||
140 (c >= 0x100000 && c <= 0x10FFFD)));
144 * Sets the action to be taken if an illegal sequence is encountered
148 * @exception IllegalArgumentException
151 protected void implOnMalformedInput(CodingErrorAction newAction) {
152 onMalformedInput = getCallback(newAction);
156 * Sets the action to be taken if an illegal sequence is encountered
160 * @exception IllegalArgumentException
163 protected void implOnUnmappableCharacter(CodingErrorAction newAction) {
164 onUnmappableInput = getCallback(newAction);
168 * Sets the callback encoder method and context to be used if an illegal sequence is encountered.
169 * You would normally call this twice to set both the malform and unmappable error. In this case,
170 * newContext should remain the same since using a different newContext each time will negate the last
172 * @param err CoderResult
173 * @param newCallback CharsetCallback.Encoder
174 * @param newContext Object
177 public final void setFromUCallback(CoderResult err, CharsetCallback.Encoder newCallback, Object newContext) {
178 if (err.isMalformed()) {
179 onMalformedInput = newCallback;
180 } else if (err.isUnmappable()) {
181 onUnmappableInput = newCallback;
183 /* Error: Only malformed and unmappable are handled. */
186 if (fromUContext == null || !fromUContext.equals(newContext)) {
187 setFromUContext(newContext);
192 * Sets fromUContext used in callbacks.
194 * @param newContext Object
195 * @exception IllegalArgumentException The object is an illegal argument for UContext.
198 public final void setFromUContext(Object newContext) {
199 fromUContext = newContext;
202 private static CharsetCallback.Encoder getCallback(CodingErrorAction action) {
203 if (action == CodingErrorAction.REPLACE) {
204 return CharsetCallback.FROM_U_CALLBACK_SUBSTITUTE;
205 } else if (action == CodingErrorAction.IGNORE) {
206 return CharsetCallback.FROM_U_CALLBACK_SKIP;
207 } else /* if (action == CodingErrorAction.REPORT) */ {
208 return CharsetCallback.FROM_U_CALLBACK_STOP;
212 private static final CharBuffer EMPTY = CharBuffer.allocate(0);
215 * Flushes any characters saved in the converter's internal buffer and
216 * resets the converter.
217 * @param out action to be taken
218 * @return result of flushing action and completes the decoding all input.
219 * Returns CoderResult.UNDERFLOW if the action succeeds.
222 protected CoderResult implFlush(ByteBuffer out) {
223 return encode(EMPTY, out, null, true);
227 * Resets the from Unicode mode of converter
230 protected void implReset() {
231 errorBufferLength = 0;
232 fromUnicodeStatus = 0;
237 private void fromUnicodeReset() {
239 preFromUFirstCP = UConverterConstants.U_SENTINEL;
244 * Encodes one or more chars. The default behaviour of the
245 * converter is stop and report if an error in input stream is encountered.
246 * To set different behaviour use @see CharsetEncoder.onMalformedInput()
247 * @param in buffer to decode
248 * @param out buffer to populate with decoded result
249 * @return result of decoding action. Returns CoderResult.UNDERFLOW if the decoding
250 * action succeeds or more input is needed for completing the decoding action.
253 protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) {
254 if (!in.hasRemaining() && this.errorBufferLength == 0) { // make sure the errorBuffer is empty
255 // The Java framework should have already substituted what was left.
257 //fromUnicodeReset();
258 return CoderResult.UNDERFLOW;
260 in.position(in.position() + fromUCountPending());
261 /* do the conversion */
262 CoderResult ret = encode(in, out, null, false);
263 setSourcePosition(in);
264 /* No need to reset to keep the proper state of the encoder.
265 if (ret.isUnderflow() && in.hasRemaining()) {
266 // The Java framework is going to substitute what is left.
267 //fromUnicodeReset();
273 * Implements ICU semantics of buffer management
277 * @return A CoderResult object that contains the error result when an error occurs.
279 abstract CoderResult encodeLoop(CharBuffer source, ByteBuffer target,
280 IntBuffer offsets, boolean flush);
283 * Implements ICU semantics for encoding the buffer
284 * @param source The input character buffer
285 * @param target The output byte buffer
287 * @param flush true if, and only if, the invoker can provide no
288 * additional input bytes beyond those in the given buffer.
289 * @return A CoderResult object that contains the error result when an error occurs.
291 final CoderResult encode(CharBuffer source, ByteBuffer target,
292 IntBuffer offsets, boolean flush) {
294 /* check parameters */
295 if (target == null || source == null) {
296 throw new IllegalArgumentException();
300 * Make sure that the buffer sizes do not exceed the number range for
301 * int32_t because some functions use the size (in units or bytes)
302 * rather than comparing pointers, and because offsets are int32_t values.
304 * size_t is guaranteed to be unsigned and large enough for the job.
306 * Return with an error instead of adjusting the limits because we would
307 * not be able to maintain the semantics that either the source must be
308 * consumed or the target filled (unless an error occurs).
309 * An adjustment would be targetLimit=t+0x7fffffff; for example.
312 /* flush the target overflow buffer */
313 if (errorBufferLength > 0) {
314 byte[] overflowArray;
317 overflowArray = errorBuffer;
318 length = errorBufferLength;
321 if (target.remaining() == 0) {
322 /* the overflow buffer contains too much, keep the rest */
326 overflowArray[j++] = overflowArray[i++];
327 } while (i < length);
329 errorBufferLength = (byte) j;
330 return CoderResult.OVERFLOW;
333 /* copy the overflow contents to the target */
334 target.put(overflowArray[i++]);
335 if (offsets != null) {
336 offsets.put(-1); /* no source index available for old output */
338 } while (i < length);
340 /* the overflow buffer is completely copied to the target */
341 errorBufferLength = 0;
344 if (!flush && source.remaining() == 0 && preFromULength >= 0) {
345 /* the overflow buffer is emptied and there is no new input: we are done */
346 return CoderResult.UNDERFLOW;
350 * Do not simply return with a buffer overflow error if
351 * !flush && t==targetLimit
352 * because it is possible that the source will not generate any output.
353 * For example, the skip callback may be called;
354 * it does not output anything.
357 return fromUnicodeWithCallback(source, target, offsets, flush);
362 * Implementation note for m:n conversions
364 * While collecting source units to find the longest match for m:n conversion,
365 * some source units may need to be stored for a partial match.
366 * When a second buffer does not yield a match on all of the previously stored
367 * source units, then they must be "replayed", i.e., fed back into the converter.
369 * The code relies on the fact that replaying will not nest -
370 * converting a replay buffer will not result in a replay.
371 * This is because a replay is necessary only after the _continuation_ of a
372 * partial match failed, but a replay buffer is converted as a whole.
373 * It may result in some of its units being stored again for a partial match,
374 * but there will not be a continuation _during_ the replay which could fail.
376 * It is conceivable that a callback function could call the converter
377 * recursively in a way that causes another replay to be stored, but that
378 * would be an error in the callback function.
379 * Such violations will cause assertion failures in a debug build,
380 * and wrong output, but they will not cause a crash.
382 final CoderResult fromUnicodeWithCallback(CharBuffer source,
383 ByteBuffer target, IntBuffer offsets, boolean flush) {
386 int errorInputLength;
387 boolean converterSawEndOfInput, calledCallback;
389 /* variables for m:n conversion */
390 CharBuffer replayArray = CharBuffer.allocate(EXT_MAX_UCHARS);
391 int replayArrayIndex = 0;
392 CharBuffer realSource;
395 CoderResult cr = CoderResult.UNDERFLOW;
397 /* get the converter implementation function */
400 if (preFromULength >= 0) {
406 * Previous m:n conversion stored source units from a partial match
407 * and failed to consume all of them.
408 * We need to "replay" them from a temporary buffer and convert them first.
413 //UConverterUtility.uprv_memcpy(replayArray, replayArrayIndex, preFromUArray, 0, -preFromULength*UMachine.U_SIZEOF_UCHAR);
414 replayArray.put(preFromUArray, 0, -preFromULength);
415 source = replayArray;
416 source.position(replayArrayIndex);
417 source.limit(replayArrayIndex - preFromULength); //preFromULength is negative, see declaration
424 * loop for conversion and error handling
430 * handle end of input
431 * handle errors/call callback
437 cr = encodeLoop(source, target, offsets, flush);
439 * set a flag for whether the converter
440 * successfully processed the end of the input
442 * need not check cnv.preFromULength==0 because a replay (<0) will cause
443 * s<sourceLimit before converterSawEndOfInput is checked
445 converterSawEndOfInput = (cr.isUnderflow() && flush
446 && source.remaining() == 0 && fromUChar32 == 0);
448 /* no callback called yet for this iteration */
449 calledCallback = false;
451 /* no sourceIndex adjustment for conversion, only for callback output */
452 errorInputLength = 0;
455 * loop for offsets and error handling
457 * iterates at most 3 times:
458 * 1. to clean up after the conversion function
459 * 2. after the callback
460 * 3. after the callback again if there was truncated input
463 /* update offsets if we write any */
464 /* Currently offsets are not being used in ICU4J */
465 /* if (offsets != null) {
466 int length = target.remaining();
470 * if a converter handles offsets and updates the offsets
471 * pointer at the end, then offset should not change
473 * however, some converters do not handle offsets at all
474 * (sourceIndex<0) or may not update the offsets pointer
476 /* offsets.position(offsets.position() + length);
479 if (sourceIndex >= 0) {
480 sourceIndex += (int) (source.position());
484 if (preFromULength < 0) {
486 * switch the source to new replay units (cannot occur while replaying)
487 * after offset handling and before end-of-input and callback handling
489 if (realSource == null) {
493 //UConverterUtility.uprv_memcpy(replayArray, replayArrayIndex, preFromUArray, 0, -preFromULength*UMachine.U_SIZEOF_UCHAR);
494 replayArray.put(preFromUArray, 0, -preFromULength);
496 source = replayArray;
497 source.position(replayArrayIndex);
498 source.limit(replayArrayIndex - preFromULength);
500 if ((sourceIndex += preFromULength) < 0) {
506 /* see implementation note before _fromUnicodeWithCallback() */
507 //agljport:todo U_ASSERT(realSource==NULL);
508 Assert.assrt(realSource == null);
512 /* update pointers */
513 sBufferIndex = source.position();
514 if (cr.isUnderflow()) {
515 if (sBufferIndex < source.limit()) {
517 * continue with the conversion loop while there is still input left
518 * (continue converting by breaking out of only the inner loop)
521 } else if (realSource != null) {
522 /* switch back from replaying to the real source and continue */
525 sourceIndex = source.position();
528 } else if (flush && fromUChar32 != 0) {
530 * the entire input stream is consumed
531 * and there is a partial, truncated input sequence left
534 /* inject an error and continue with callback handling */
535 //err[0]=ErrorCode.U_TRUNCATED_CHAR_FOUND;
536 cr = CoderResult.malformedForLength(1);
537 calledCallback = false; /* new error condition */
542 * return to the conversion loop once more if the flush
543 * flag is set and the conversion function has not
544 * successfully processed the end of the input yet
546 * (continue converting by breaking out of only the inner loop)
548 if (!converterSawEndOfInput) {
552 /* reset the converter without calling the callback function */
556 /* done successfully */
564 if (calledCallback || cr.isOverflow()
565 || (!cr.isMalformed() && !cr.isUnmappable())) {
567 * the callback did not or cannot resolve the error:
568 * set output pointers and return
570 * the check for buffer overflow is redundant but it is
571 * a high-runner case and hopefully documents the intent
574 * if we were replaying, then the replay buffer must be
575 * copied back into the UConverter
576 * and the real arguments must be restored
578 if (realSource != null) {
581 //agljport:todo U_ASSERT(cnv.preFromULength==0);
583 length = source.remaining();
585 //UConverterUtility.uprv_memcpy(preFromUArray, 0, sourceArray, pArgs.sourceBegin, length*UMachine.U_SIZEOF_UCHAR);
586 source.get(preFromUArray, 0, length);
587 preFromULength = (byte) -length;
594 /* callback handling */
598 /* get and write the code point */
599 codePoint = fromUChar32;
600 errorInputLength = UTF16.append(invalidUCharBuffer, 0,
602 invalidUCharLength = errorInputLength;
604 /* set the converter state to deal with the next character */
607 /* call the callback function */
608 cr = fromCharErrorBehaviour.call(this, fromUContext,
609 source, target, offsets, invalidUCharBuffer,
610 invalidUCharLength, codePoint, cr);
614 * loop back to the offset handling
616 * this flag will indicate after offset handling
617 * that a callback was called;
618 * if the callback did not resolve the error, then we return
620 calledCallback = true;
626 * Ascertains if a given Unicode code point (32bit value for handling surrogates)
627 * can be converted to the target encoding. If the caller wants to test if a
628 * surrogate pair can be converted to target encoding then the
629 * responsibility of assembling the int value lies with the caller.
630 * For assembling a code point the caller can use UTF16 class of ICU4J and do something like:
632 * while(i<mySource.length){
633 * if(UTF16.isLeadSurrogate(mySource[i])&& i+1< mySource.length){
634 * if(UTF16.isTrailSurrogate(mySource[i+1])){
635 * int temp = UTF16.charAt(mySource,i,i+1,0);
636 * if(!((CharsetEncoderICU) myConv).canEncode(temp)){
647 * String src = new String(mySource);
649 * boolean passed = false;
650 * while(i<src.length()){
651 * codepoint = UTF16.charAt(src,i);
652 * i+= (codepoint>0xfff)? 2:1;
653 * if(!(CharsetEncoderICU) myConv).canEncode(codepoint)){
659 * @param codepoint Unicode code point as int value
660 * @return true if a character can be converted
662 /* TODO This is different from Java's canEncode(char) API.
663 * ICU's API should implement getUnicodeSet,
664 * and override canEncode(char) which queries getUnicodeSet.
665 * The getUnicodeSet should return a frozen UnicodeSet or use a fillin parameter, like ICU4C.
667 /*public boolean canEncode(int codepoint) {
671 * Overrides super class method
674 public boolean isLegalReplacement(byte[] repl) {
679 * Writes out the specified output bytes to the target byte buffer or to converter internal buffers.
687 * @return A CoderResult object that contains the error result when an error occurs.
689 static final CoderResult fromUWriteBytes(CharsetEncoderICU cnv,
690 byte[] bytesArray, int bytesBegin, int bytesLength, ByteBuffer out,
691 IntBuffer offsets, int sourceIndex) {
694 int obl = bytesLength;
695 CoderResult cr = CoderResult.UNDERFLOW;
696 int bytesLimit = bytesBegin + bytesLength;
698 for (; bytesBegin < bytesLimit;) {
699 out.put(bytesArray[bytesBegin]);
704 } catch (BufferOverflowException ex) {
705 cr = CoderResult.OVERFLOW;
708 if (offsets != null) {
709 while (obl > bytesLength) {
710 offsets.put(sourceIndex);
715 cnv.errorBufferLength = bytesLimit - bytesBegin;
716 if (cnv.errorBufferLength > 0) {
718 while (bytesBegin < bytesLimit) {
719 cnv.errorBuffer[index++] = bytesArray[bytesBegin++];
721 cr = CoderResult.OVERFLOW;
727 * Returns the number of chars held in the converter's internal state
728 * because more input is needed for completing the conversion. This function is
729 * useful for mapping semantics of ICU's converter interface to those of iconv,
730 * and this information is not needed for normal conversion.
731 * @return The number of chars in the state. -1 if an error is encountered.
733 /*public*/int fromUCountPending() {
734 if (preFromULength > 0) {
735 return UTF16.getCharCount(preFromUFirstCP) + preFromULength;
736 } else if (preFromULength < 0) {
737 return -preFromULength;
738 } else if (fromUChar32 > 0) {
740 } else if (preFromUFirstCP > 0) {
741 return UTF16.getCharCount(preFromUFirstCP);
750 private final void setSourcePosition(CharBuffer source) {
752 // ok was there input held in the previous invocation of encodeLoop
753 // that resulted in output in this invocation?
754 source.position(source.position() - fromUCountPending());
758 * Write the codepage substitution character.
759 * Subclasses to override this method.
760 * For stateful converters, it is typically necessary to handle this
761 * specificially for the converter in order to properly maintain the state.
762 * @param source The input character buffer
763 * @param target The output byte buffer
765 * @return A CoderResult object that contains the error result when an error occurs.
767 CoderResult cbFromUWriteSub(CharsetEncoderICU encoder, CharBuffer source,
768 ByteBuffer target, IntBuffer offsets) {
769 CharsetICU cs = (CharsetICU) encoder.charset();
770 byte[] sub = encoder.replacement();
771 if (cs.subChar1 != 0 && encoder.invalidUCharBuffer[0] <= 0xff) {
772 return CharsetEncoderICU.fromUWriteBytes(encoder,
773 new byte[] { cs.subChar1 }, 0, 1, target, offsets, source
776 return CharsetEncoderICU.fromUWriteBytes(encoder, sub, 0,
777 sub.length, target, offsets, source.position());
782 * Write the characters to target.
783 * @param source The input character buffer
784 * @param target The output byte buffer
786 * @return A CoderResult object that contains the error result when an error occurs.
788 CoderResult cbFromUWriteUChars(CharsetEncoderICU encoder,
789 CharBuffer source, ByteBuffer target, IntBuffer offsets) {
790 CoderResult cr = CoderResult.UNDERFLOW;
792 /* This is a fun one. Recursion can occur - we're basically going to
793 * just retry shoving data through the same converter. Note, if you got
794 * here through some kind of invalid sequence, you maybe should emit a
795 * reset sequence of some kind. Since this IS an actual conversion,
796 * take care that you've changed the callback or the data, or you'll
797 * get an infinite loop.
800 int oldTargetPosition = target.position();
801 int offsetIndex = source.position();
803 cr = encoder.encode(source, target, null, false); /* no offsets and no flush */
805 if (offsets != null) {
806 while (target.position() != oldTargetPosition) {
807 offsets.put(offsetIndex);
812 /* Note, if you did something like used a stop subcallback, things would get interesting.
813 * In fact, here's where we want to return the partially consumed in-source!
815 if (cr.isOverflow()) {
816 /* Overflowed target. Now, we'll write into the charErrorBuffer.
817 * It's a fixed size. If we overflow it...Hm
820 /* start the new target at the first free slot in the error buffer */
821 int errBuffLen = encoder.errorBufferLength;
822 ByteBuffer newTarget = ByteBuffer.wrap(encoder.errorBuffer);
823 newTarget.position(errBuffLen); /* set the position at the end of the error buffer */
824 encoder.errorBufferLength = 0;
826 encoder.encode(source, newTarget, null, false);
828 encoder.errorBuffer = newTarget.array();
829 encoder.errorBufferLength = newTarget.position();
837 * Handles a common situation where a character has been read and it may be
838 * a lead surrogate followed by a trail surrogate. This method can change
839 * the source position and will modify fromUChar32.
843 * If <code>null</code> is returned, then there was success in reading a
844 * surrogate pair, the codepoint is stored in <code>fromUChar32</code> and
845 * <code>fromUChar32</code> should be reset (to 0) after being read.
849 * The encoding source.
851 * A character that may be the first in a surrogate pair.
852 * @return <code>CoderResult.malformedForLength(1)</code> or
853 * <code>CoderResult.UNDERFLOW</code> if there is a problem, or
854 * <code>null</code> if there isn't.
855 * @see #handleSurrogates(CharBuffer, char)
856 * @see #handleSurrogates(char[], int, int, char)
858 final CoderResult handleSurrogates(CharBuffer source, char lead) {
859 if (!UTF16.isLeadSurrogate(lead)) {
861 return CoderResult.malformedForLength(1);
864 if (!source.hasRemaining()) {
866 return CoderResult.UNDERFLOW;
869 char trail = source.get();
871 if (!UTF16.isTrailSurrogate(trail)) {
873 source.position(source.position() - 1);
874 return CoderResult.malformedForLength(1);
877 fromUChar32 = UCharacter.getCodePoint(lead, trail);
883 * Same as <code>handleSurrogates(CharBuffer, char)</code>, but with arrays. As an added
884 * requirement, the calling method must also increment the index if this method returns
890 * The encoding source.
892 * A character that may be the first in a surrogate pair.
893 * @return <code>CoderResult.malformedForLength(1)</code> or
894 * <code>CoderResult.UNDERFLOW</code> if there is a problem, or <code>null</code> if
896 * @see #handleSurrogates(CharBuffer, char)
897 * @see #handleSurrogates(char[], int, int, char)
899 final CoderResult handleSurrogates(char[] sourceArray, int sourceIndex,
900 int sourceLimit, char lead) {
901 if (!UTF16.isLeadSurrogate(lead)) {
903 return CoderResult.malformedForLength(1);
906 if (sourceIndex >= sourceLimit) {
908 return CoderResult.UNDERFLOW;
911 char trail = sourceArray[sourceIndex];
913 if (!UTF16.isTrailSurrogate(trail)) {
915 return CoderResult.malformedForLength(1);
918 fromUChar32 = UCharacter.getCodePoint(lead, trail);
923 * Returns the maxCharsPerByte value for the Charset that created this encoder.
924 * @return maxCharsPerByte
927 public final float maxCharsPerByte() {
928 return ((CharsetICU)(this.charset())).maxCharsPerByte;
932 * Calculates the size of a buffer for conversion from Unicode to a charset.
933 * The calculated size is guaranteed to be sufficient for this conversion.
935 * It takes into account initial and final non-character bytes that are output
936 * by some converters.
937 * It does not take into account callbacks which output more than one charset
938 * character sequence per call, like escape callbacks.
939 * The default (substitution) callback only outputs one charset character sequence.
941 * @param length Number of chars to be converted.
942 * @param maxCharSize Return value from maxBytesPerChar for the converter
944 * @return Size of a buffer that will be large enough to hold the output of bytes
948 public static int getMaxBytesForString(int length, int maxCharSize) {
949 return ((length + 10) * maxCharSize);