jars/icu4j-52_1/main/classes/charset/src/com/ibm/icu/charset/CharsetEncoderICU.java

   1 /**
   2  *******************************************************************************
   3  * Copyright (C) 2006-2013, International Business Machines Corporation and    *
   4  * others. All Rights Reserved.                                                *
   5  *******************************************************************************
   6  *
   7  *******************************************************************************
   8  */
   9
  10 package com.ibm.icu.charset;
  11
  12 import java.nio.BufferOverflowException;
  13 import java.nio.ByteBuffer;
  14 import java.nio.CharBuffer;
  15 import java.nio.IntBuffer;
  16 import java.nio.charset.CharsetEncoder;
  17 import java.nio.charset.CoderResult;
  18 import java.nio.charset.CodingErrorAction;
  19
  20 import com.ibm.icu.impl.Assert;
  21 import com.ibm.icu.lang.UCharacter;
  22 import com.ibm.icu.text.UTF16;
  23
  24 /**
  25  * An abstract class that provides framework methods of decoding operations for concrete
  26  * subclasses.
  27  * In the future this class will contain API that will implement converter semantics of ICU4C.
  28  * @stable ICU 3.6
  29  */
  30 public abstract class CharsetEncoderICU extends CharsetEncoder {
  31
  32     /* this is used in fromUnicode DBCS tables as an "unassigned" marker */
  33     static final char MISSING_CHAR_MARKER = '\uFFFF';
  34
  35     byte[] errorBuffer = new byte[30];
  36
  37     int errorBufferLength = 0;
  38
  39     /** these are for encodeLoopICU */
  40     int fromUnicodeStatus;
  41
  42     int fromUChar32;
  43
  44     boolean useSubChar1;
  45
  46     boolean useFallback;
  47
  48     /* maximum number of indexed UChars */
  49     static final int EXT_MAX_UCHARS = 19;
  50
  51     /* store previous UChars/chars to continue partial matches */
  52     int preFromUFirstCP; /* >=0: partial match */
  53
  54     char[] preFromUArray = new char[EXT_MAX_UCHARS];
  55
  56     int preFromUBegin;
  57
  58     int preFromULength; /* negative: replay */
  59
  60     char[] invalidUCharBuffer = new char[2];
  61
  62     int invalidUCharLength;
  63
  64     Object fromUContext;
  65
  66     private CharsetCallback.Encoder onUnmappableInput = CharsetCallback.FROM_U_CALLBACK_STOP;
  67
  68     private CharsetCallback.Encoder onMalformedInput = CharsetCallback.FROM_U_CALLBACK_STOP;
  69
  70     CharsetCallback.Encoder fromCharErrorBehaviour = new CharsetCallback.Encoder() {
  71         public CoderResult call(CharsetEncoderICU encoder, Object context,
  72                 CharBuffer source, ByteBuffer target, IntBuffer offsets,
  73                 char[] buffer, int length, int cp, CoderResult cr) {
  74             if (cr.isUnmappable()) {
  75                 return onUnmappableInput.call(encoder, context, source, target,
  76                         offsets, buffer, length, cp, cr);
  77             } else /* if (cr.isMalformed()) */ {
  78                 return onMalformedInput.call(encoder, context, source, target,
  79                         offsets, buffer, length, cp, cr);
  80             }
  81             // return CharsetCallback.FROM_U_CALLBACK_STOP.call(encoder, context, source, target, offsets, buffer, length, cp, cr);
  82
  83         }
  84     };
  85
  86     /*
  87      * Construcs a new encoder for the given charset
  88      *
  89      * @param cs
  90      *            for which the decoder is created
  91      * @param replacement
  92      *            the substitution bytes
  93      */
  94     CharsetEncoderICU(CharsetICU cs, byte[] replacement) {
  95         super(cs, (cs.minBytesPerChar + cs.maxBytesPerChar) / 2,
  96                 cs.maxBytesPerChar, replacement);
  97     }
  98
  99     /**
 100      * Is this Encoder allowed to use fallbacks? A fallback mapping is a mapping
 101      * that will convert a Unicode codepoint sequence to a byte sequence, but
 102      * the encoded byte sequence will round trip convert to a different
 103      * Unicode codepoint sequence.
 104      * @return true if the converter uses fallback, false otherwise.
 105      * @stable ICU 3.8
 106      */
 107     public boolean isFallbackUsed() {
 108         return useFallback;
 109     }
 110
 111     /**
 112      * Sets whether this Encoder can use fallbacks?
 113      * @param usesFallback true if the user wants the converter to take
 114      *  advantage of the fallback mapping, false otherwise.
 115      * @stable ICU 3.8
 116      */
 117     public void setFallbackUsed(boolean usesFallback) {
 118         useFallback = usesFallback;
 119     }
 120
 121     /*
 122      * Use fallbacks from Unicode to codepage when useFallback or for private-use code points
 123      * @param c A codepoint
 124      */
 125     final boolean isFromUUseFallback(int c) {
 126         return (useFallback) || isUnicodePrivateUse(c);
 127     }
 128
 129     /**
 130      * Use fallbacks from Unicode to codepage when useFallback or for private-use code points
 131      */
 132     static final boolean isFromUUseFallback(boolean iUseFallback, int c) {
 133         return (iUseFallback) || isUnicodePrivateUse(c);
 134     }
 135
 136     private static final boolean isUnicodePrivateUse(int c) {
 137         // First test for U+E000 to optimize for the most common characters.
 138         return c >= 0xE000 && (c <= 0xF8FF ||
 139                 c >= 0xF0000 && (c <= 0xFFFFD ||
 140                 (c >= 0x100000 && c <= 0x10FFFD)));
 141     }
 142
 143     /**
 144      * Sets the action to be taken if an illegal sequence is encountered
 145      *
 146      * @param newAction
 147      *            action to be taken
 148      * @exception IllegalArgumentException
 149      * @stable ICU 3.6
 150      */
 151     protected void implOnMalformedInput(CodingErrorAction newAction) {
 152         onMalformedInput = getCallback(newAction);
 153     }
 154
 155     /**
 156      * Sets the action to be taken if an illegal sequence is encountered
 157      *
 158      * @param newAction
 159      *            action to be taken
 160      * @exception IllegalArgumentException
 161      * @stable ICU 3.6
 162      */
 163     protected void implOnUnmappableCharacter(CodingErrorAction newAction) {
 164         onUnmappableInput = getCallback(newAction);
 165     }
 166
 167     /**
 168      * Sets the callback encoder method and context to be used if an illegal sequence is encountered.
 169      * You would normally call this twice to set both the malform and unmappable error. In this case,
 170      * newContext should remain the same since using a different newContext each time will negate the last
 171      * one used.
 172      * @param err CoderResult
 173      * @param newCallback CharsetCallback.Encoder
 174      * @param newContext Object
 175      * @stable ICU 4.0
 176      */
 177     public final void setFromUCallback(CoderResult err, CharsetCallback.Encoder newCallback, Object newContext) {
 178         if (err.isMalformed()) {
 179             onMalformedInput = newCallback;
 180         } else if (err.isUnmappable()) {
 181             onUnmappableInput = newCallback;
 182         } else {
 183             /* Error: Only malformed and unmappable are handled. */
 184         }
 185
 186         if (fromUContext == null || !fromUContext.equals(newContext)) {
 187             setFromUContext(newContext);
 188         }
 189     }
 190
 191     /**
 192      * Sets fromUContext used in callbacks.
 193      *
 194      * @param newContext Object
 195      * @exception IllegalArgumentException The object is an illegal argument for UContext.
 196      * @stable ICU 4.0
 197      */
 198     public final void setFromUContext(Object newContext) {
 199         fromUContext = newContext;
 200     }
 201
 202     private static CharsetCallback.Encoder getCallback(CodingErrorAction action) {
 203         if (action == CodingErrorAction.REPLACE) {
 204             return CharsetCallback.FROM_U_CALLBACK_SUBSTITUTE;
 205         } else if (action == CodingErrorAction.IGNORE) {
 206             return CharsetCallback.FROM_U_CALLBACK_SKIP;
 207         } else /* if (action == CodingErrorAction.REPORT) */ {
 208             return CharsetCallback.FROM_U_CALLBACK_STOP;
 209         }
 210     }
 211
 212     private static final CharBuffer EMPTY = CharBuffer.allocate(0);
 213
 214     /**
 215      * Flushes any characters saved in the converter's internal buffer and
 216      * resets the converter.
 217      * @param out action to be taken
 218      * @return result of flushing action and completes the decoding all input.
 219      *         Returns CoderResult.UNDERFLOW if the action succeeds.
 220      * @stable ICU 3.6
 221      */
 222     protected CoderResult implFlush(ByteBuffer out) {
 223         return encode(EMPTY, out, null, true);
 224     }
 225
 226     /**
 227      * Resets the from Unicode mode of converter
 228      * @stable ICU 3.6
 229      */
 230     protected void implReset() {
 231         errorBufferLength = 0;
 232         fromUnicodeStatus = 0;
 233         fromUChar32 = 0;
 234         fromUnicodeReset();
 235     }
 236
 237     private void fromUnicodeReset() {
 238         preFromUBegin = 0;
 239         preFromUFirstCP = UConverterConstants.U_SENTINEL;
 240         preFromULength = 0;
 241     }
 242
 243     /**
 244      * Encodes one or more chars. The default behaviour of the
 245      * converter is stop and report if an error in input stream is encountered.
 246      * To set different behaviour use @see CharsetEncoder.onMalformedInput()
 247      * @param in buffer to decode
 248      * @param out buffer to populate with decoded result
 249      * @return result of decoding action. Returns CoderResult.UNDERFLOW if the decoding
 250      *         action succeeds or more input is needed for completing the decoding action.
 251      * @stable ICU 3.6
 252      */
 253     protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) {
 254         if (!in.hasRemaining() && this.errorBufferLength == 0) { // make sure the errorBuffer is empty
 255             // The Java framework should have already substituted what was left.
 256             fromUChar32 = 0;
 257             //fromUnicodeReset();
 258             return CoderResult.UNDERFLOW;
 259         }
 260         in.position(in.position() + fromUCountPending());
 261         /* do the conversion */
 262         CoderResult ret = encode(in, out, null, false);
 263         setSourcePosition(in);
 264         /* No need to reset to keep the proper state of the encoder.
 265          if (ret.isUnderflow() && in.hasRemaining()) {
 266             // The Java framework is going to substitute what is left.
 267             //fromUnicodeReset();
 268         } */
 269         return ret;
 270     }
 271
 272     /*
 273      * Implements ICU semantics of buffer management
 274      * @param source
 275      * @param target
 276      * @param offsets
 277      * @return A CoderResult object that contains the error result when an error occurs.
 278      */
 279     abstract CoderResult encodeLoop(CharBuffer source, ByteBuffer target,
 280             IntBuffer offsets, boolean flush);
 281
 282     /*
 283      * Implements ICU semantics for encoding the buffer
 284      * @param source The input character buffer
 285      * @param target The output byte buffer
 286      * @param offsets
 287      * @param flush true if, and only if, the invoker can provide no
 288      *  additional input bytes beyond those in the given buffer.
 289      * @return A CoderResult object that contains the error result when an error occurs.
 290      */
 291     final CoderResult encode(CharBuffer source, ByteBuffer target,
 292             IntBuffer offsets, boolean flush) {
 293
 294         /* check parameters */
 295         if (target == null || source == null) {
 296             throw new IllegalArgumentException();
 297         }
 298
 299         /*
 300          * Make sure that the buffer sizes do not exceed the number range for
 301          * int32_t because some functions use the size (in units or bytes)
 302          * rather than comparing pointers, and because offsets are int32_t values.
 303          *
 304          * size_t is guaranteed to be unsigned and large enough for the job.
 305          *
 306          * Return with an error instead of adjusting the limits because we would
 307          * not be able to maintain the semantics that either the source must be
 308          * consumed or the target filled (unless an error occurs).
 309          * An adjustment would be targetLimit=t+0x7fffffff; for example.
 310          */
 311
 312         /* flush the target overflow buffer */
 313         if (errorBufferLength > 0) {
 314             byte[] overflowArray;
 315             int i, length;
 316
 317             overflowArray = errorBuffer;
 318             length = errorBufferLength;
 319             i = 0;
 320             do {
 321                 if (target.remaining() == 0) {
 322                     /* the overflow buffer contains too much, keep the rest */
 323                     int j = 0;
 324
 325                     do {
 326                         overflowArray[j++] = overflowArray[i++];
 327                     } while (i < length);
 328
 329                     errorBufferLength = (byte) j;
 330                     return CoderResult.OVERFLOW;
 331                 }
 332
 333                 /* copy the overflow contents to the target */
 334                 target.put(overflowArray[i++]);
 335                 if (offsets != null) {
 336                     offsets.put(-1); /* no source index available for old output */
 337                 }
 338             } while (i < length);
 339
 340             /* the overflow buffer is completely copied to the target */
 341             errorBufferLength = 0;
 342         }
 343
 344         if (!flush && source.remaining() == 0 && preFromULength >= 0) {
 345             /* the overflow buffer is emptied and there is no new input: we are done */
 346             return CoderResult.UNDERFLOW;
 347         }
 348
 349         /*
 350          * Do not simply return with a buffer overflow error if
 351          * !flush && t==targetLimit
 352          * because it is possible that the source will not generate any output.
 353          * For example, the skip callback may be called;
 354          * it does not output anything.
 355          */
 356
 357         return fromUnicodeWithCallback(source, target, offsets, flush);
 358
 359     }
 360
 361     /*
 362      * Implementation note for m:n conversions
 363      *
 364      * While collecting source units to find the longest match for m:n conversion,
 365      * some source units may need to be stored for a partial match.
 366      * When a second buffer does not yield a match on all of the previously stored
 367      * source units, then they must be "replayed", i.e., fed back into the converter.
 368      *
 369      * The code relies on the fact that replaying will not nest -
 370      * converting a replay buffer will not result in a replay.
 371      * This is because a replay is necessary only after the _continuation_ of a
 372      * partial match failed, but a replay buffer is converted as a whole.
 373      * It may result in some of its units being stored again for a partial match,
 374      * but there will not be a continuation _during_ the replay which could fail.
 375      *
 376      * It is conceivable that a callback function could call the converter
 377      * recursively in a way that causes another replay to be stored, but that
 378      * would be an error in the callback function.
 379      * Such violations will cause assertion failures in a debug build,
 380      * and wrong output, but they will not cause a crash.
 381      */
 382     final CoderResult fromUnicodeWithCallback(CharBuffer source,
 383             ByteBuffer target, IntBuffer offsets, boolean flush) {
 384         int sBufferIndex;
 385         int sourceIndex;
 386         int errorInputLength;
 387         boolean converterSawEndOfInput, calledCallback;
 388
 389         /* variables for m:n conversion */
 390         CharBuffer replayArray = CharBuffer.allocate(EXT_MAX_UCHARS);
 391         int replayArrayIndex = 0;
 392         CharBuffer realSource;
 393         boolean realFlush;
 394
 395         CoderResult cr = CoderResult.UNDERFLOW;
 396
 397         /* get the converter implementation function */
 398         sourceIndex = 0;
 399
 400         if (preFromULength >= 0) {
 401             /* normal mode */
 402             realSource = null;
 403             realFlush = false;
 404         } else {
 405             /*
 406              * Previous m:n conversion stored source units from a partial match
 407              * and failed to consume all of them.
 408              * We need to "replay" them from a temporary buffer and convert them first.
 409              */
 410             realSource = source;
 411             realFlush = flush;
 412
 413             //UConverterUtility.uprv_memcpy(replayArray, replayArrayIndex, preFromUArray, 0, -preFromULength*UMachine.U_SIZEOF_UCHAR);
 414             replayArray.put(preFromUArray, 0, -preFromULength);
 415             source = replayArray;
 416             source.position(replayArrayIndex);
 417             source.limit(replayArrayIndex - preFromULength); //preFromULength is negative, see declaration
 418             flush = false;
 419
 420             preFromULength = 0;
 421         }
 422
 423         /*
 424          * loop for conversion and error handling
 425          *
 426          * loop {
 427          *   convert
 428          *   loop {
 429          *     update offsets
 430          *     handle end of input
 431          *     handle errors/call callback
 432          *   }
 433          * }
 434          */
 435         for (;;) {
 436             /* convert */
 437             cr = encodeLoop(source, target, offsets, flush);
 438             /*
 439              * set a flag for whether the converter
 440              * successfully processed the end of the input
 441              *
 442              * need not check cnv.preFromULength==0 because a replay (<0) will cause
 443              * s<sourceLimit before converterSawEndOfInput is checked
 444              */
 445             converterSawEndOfInput = (cr.isUnderflow() && flush
 446                     && source.remaining() == 0 && fromUChar32 == 0);
 447
 448             /* no callback called yet for this iteration */
 449             calledCallback = false;
 450
 451             /* no sourceIndex adjustment for conversion, only for callback output */
 452             errorInputLength = 0;
 453
 454             /*
 455              * loop for offsets and error handling
 456              *
 457              * iterates at most 3 times:
 458              * 1. to clean up after the conversion function
 459              * 2. after the callback
 460              * 3. after the callback again if there was truncated input
 461              */
 462             for (;;) {
 463                 /* update offsets if we write any */
 464                 /* Currently offsets are not being used in ICU4J */
 465                 /* if (offsets != null) {
 466                     int length = target.remaining();
 467                     if (length > 0) {
 468
 469                         /*
 470                          * if a converter handles offsets and updates the offsets
 471                          * pointer at the end, then offset should not change
 472                          * here;
 473                          * however, some converters do not handle offsets at all
 474                          * (sourceIndex<0) or may not update the offsets pointer
 475                          */
 476                  /*       offsets.position(offsets.position() + length);
 477                     }
 478
 479                     if (sourceIndex >= 0) {
 480                         sourceIndex += (int) (source.position());
 481                     }
 482                 } */
 483
 484                 if (preFromULength < 0) {
 485                     /*
 486                      * switch the source to new replay units (cannot occur while replaying)
 487                      * after offset handling and before end-of-input and callback handling
 488                      */
 489                     if (realSource == null) {
 490                         realSource = source;
 491                         realFlush = flush;
 492
 493                         //UConverterUtility.uprv_memcpy(replayArray, replayArrayIndex, preFromUArray, 0, -preFromULength*UMachine.U_SIZEOF_UCHAR);
 494                         replayArray.put(preFromUArray, 0, -preFromULength);
 495
 496                         source = replayArray;
 497                         source.position(replayArrayIndex);
 498                         source.limit(replayArrayIndex - preFromULength);
 499                         flush = false;
 500                         if ((sourceIndex += preFromULength) < 0) {
 501                             sourceIndex = -1;
 502                         }
 503
 504                         preFromULength = 0;
 505                     } else {
 506                         /* see implementation note before _fromUnicodeWithCallback() */
 507                         //agljport:todo U_ASSERT(realSource==NULL);
 508                         Assert.assrt(realSource == null);
 509                     }
 510                 }
 511
 512                 /* update pointers */
 513                 sBufferIndex = source.position();
 514                 if (cr.isUnderflow()) {
 515                     if (sBufferIndex < source.limit()) {
 516                         /*
 517                          * continue with the conversion loop while there is still input left
 518                          * (continue converting by breaking out of only the inner loop)
 519                          */
 520                         break;
 521                     } else if (realSource != null) {
 522                         /* switch back from replaying to the real source and continue */
 523                         source = realSource;
 524                         flush = realFlush;
 525                         sourceIndex = source.position();
 526                         realSource = null;
 527                         break;
 528                     } else if (flush && fromUChar32 != 0) {
 529                         /*
 530                          * the entire input stream is consumed
 531                          * and there is a partial, truncated input sequence left
 532                          */
 533
 534                         /* inject an error and continue with callback handling */
 535                         //err[0]=ErrorCode.U_TRUNCATED_CHAR_FOUND;
 536                         cr = CoderResult.malformedForLength(1);
 537                         calledCallback = false; /* new error condition */
 538                     } else {
 539                         /* input consumed */
 540                         if (flush) {
 541                             /*
 542                              * return to the conversion loop once more if the flush
 543                              * flag is set and the conversion function has not
 544                              * successfully processed the end of the input yet
 545                              *
 546                              * (continue converting by breaking out of only the inner loop)
 547                              */
 548                             if (!converterSawEndOfInput) {
 549                                 break;
 550                             }
 551
 552                             /* reset the converter without calling the callback function */
 553                             implReset();
 554                         }
 555
 556                         /* done successfully */
 557                         return cr;
 558                     }
 559                 }
 560
 561                 /*U_FAILURE(*err) */
 562                 {
 563
 564                     if (calledCallback || cr.isOverflow()
 565                             || (!cr.isMalformed() && !cr.isUnmappable())) {
 566                         /*
 567                          * the callback did not or cannot resolve the error:
 568                          * set output pointers and return
 569                          *
 570                          * the check for buffer overflow is redundant but it is
 571                          * a high-runner case and hopefully documents the intent
 572                          * well
 573                          *
 574                          * if we were replaying, then the replay buffer must be
 575                          * copied back into the UConverter
 576                          * and the real arguments must be restored
 577                          */
 578                         if (realSource != null) {
 579                             int length;
 580
 581                             //agljport:todo U_ASSERT(cnv.preFromULength==0);
 582
 583                             length = source.remaining();
 584                             if (length > 0) {
 585                                 //UConverterUtility.uprv_memcpy(preFromUArray, 0, sourceArray, pArgs.sourceBegin, length*UMachine.U_SIZEOF_UCHAR);
 586                                 source.get(preFromUArray, 0, length);
 587                                 preFromULength = (byte) -length;
 588                             }
 589                         }
 590                         return cr;
 591                     }
 592                 }
 593
 594                 /* callback handling */
 595                 {
 596                     int codePoint;
 597
 598                     /* get and write the code point */
 599                     codePoint = fromUChar32;
 600                     errorInputLength = UTF16.append(invalidUCharBuffer, 0,
 601                             fromUChar32);
 602                     invalidUCharLength = errorInputLength;
 603
 604                     /* set the converter state to deal with the next character */
 605                     fromUChar32 = 0;
 606
 607                     /* call the callback function */
 608                     cr = fromCharErrorBehaviour.call(this, fromUContext,
 609                             source, target, offsets, invalidUCharBuffer,
 610                             invalidUCharLength, codePoint, cr);
 611                 }
 612
 613                 /*
 614                  * loop back to the offset handling
 615                  *
 616                  * this flag will indicate after offset handling
 617                  * that a callback was called;
 618                  * if the callback did not resolve the error, then we return
 619                  */
 620                 calledCallback = true;
 621             }
 622         }
 623     }
 624
 625     /*
 626      * Ascertains if a given Unicode code point (32bit value for handling surrogates)
 627      * can be converted to the target encoding. If the caller wants to test if a
 628      * surrogate pair can be converted to target encoding then the
 629      * responsibility of assembling the int value lies with the caller.
 630      * For assembling a code point the caller can use UTF16 class of ICU4J and do something like:
 631      * <pre>
 632      *  while(i<mySource.length){
 633      *      if(UTF16.isLeadSurrogate(mySource[i])&& i+1< mySource.length){
 634      *          if(UTF16.isTrailSurrogate(mySource[i+1])){
 635      *              int temp = UTF16.charAt(mySource,i,i+1,0);
 636      *              if(!((CharsetEncoderICU) myConv).canEncode(temp)){
 637      *                  passed=false;
 638      *              }
 639      *              i++;
 640      *              i++;
 641      *          }
 642      *      }
 643      *  }
 644      * </pre>
 645      * or
 646      * <pre>
 647      *  String src = new String(mySource);
 648      *  int i,codepoint;
 649      *  boolean passed = false;
 650      *  while(i<src.length()){
 651      *      codepoint = UTF16.charAt(src,i);
 652      *      i+= (codepoint>0xfff)? 2:1;
 653      *      if(!(CharsetEncoderICU) myConv).canEncode(codepoint)){
 654      *          passed = false;
 655      *      }
 656      *  }
 657      * </pre>
 658      *
 659      * @param codepoint Unicode code point as int value
 660      * @return true if a character can be converted
 661      */
 662     /* TODO This is different from Java's canEncode(char) API.
 663      * ICU's API should implement getUnicodeSet,
 664      * and override canEncode(char) which queries getUnicodeSet.
 665      * The getUnicodeSet should return a frozen UnicodeSet or use a fillin parameter, like ICU4C.
 666      */
 667     /*public boolean canEncode(int codepoint) {
 668         return true;
 669     }*/
 670     /**
 671      * Overrides super class method
 672      * @stable ICU 3.6
 673      */
 674     public boolean isLegalReplacement(byte[] repl) {
 675         return true;
 676     }
 677
 678     /*
 679      * Writes out the specified output bytes to the target byte buffer or to converter internal buffers.
 680      * @param cnv
 681      * @param bytesArray
 682      * @param bytesBegin
 683      * @param bytesLength
 684      * @param out
 685      * @param offsets
 686      * @param sourceIndex
 687      * @return A CoderResult object that contains the error result when an error occurs.
 688      */
 689     static final CoderResult fromUWriteBytes(CharsetEncoderICU cnv,
 690             byte[] bytesArray, int bytesBegin, int bytesLength, ByteBuffer out,
 691             IntBuffer offsets, int sourceIndex) {
 692
 693         //write bytes
 694         int obl = bytesLength;
 695         CoderResult cr = CoderResult.UNDERFLOW;
 696         int bytesLimit = bytesBegin + bytesLength;
 697         try {
 698             for (; bytesBegin < bytesLimit;) {
 699                 out.put(bytesArray[bytesBegin]);
 700                 bytesBegin++;
 701             }
 702             // success
 703             bytesLength = 0;
 704         } catch (BufferOverflowException ex) {
 705             cr = CoderResult.OVERFLOW;
 706         }
 707
 708         if (offsets != null) {
 709             while (obl > bytesLength) {
 710                 offsets.put(sourceIndex);
 711                 --obl;
 712             }
 713         }
 714         //write overflow
 715         cnv.errorBufferLength = bytesLimit - bytesBegin;
 716         if (cnv.errorBufferLength > 0) {
 717             int index = 0;
 718             while (bytesBegin < bytesLimit) {
 719                 cnv.errorBuffer[index++] = bytesArray[bytesBegin++];
 720             }
 721             cr = CoderResult.OVERFLOW;
 722         }
 723         return cr;
 724     }
 725
 726     /*
 727      * Returns the number of chars held in the converter's internal state
 728      * because more input is needed for completing the conversion. This function is
 729      * useful for mapping semantics of ICU's converter interface to those of iconv,
 730      * and this information is not needed for normal conversion.
 731      * @return The number of chars in the state. -1 if an error is encountered.
 732      */
 733     /*public*/int fromUCountPending() {
 734         if (preFromULength > 0) {
 735             return UTF16.getCharCount(preFromUFirstCP) + preFromULength;
 736         } else if (preFromULength < 0) {
 737             return -preFromULength;
 738         } else if (fromUChar32 > 0) {
 739             return 1;
 740         } else if (preFromUFirstCP > 0) {
 741             return UTF16.getCharCount(preFromUFirstCP);
 742         }
 743         return 0;
 744     }
 745
 746     /**
 747      *
 748      * @param source
 749      */
 750     private final void setSourcePosition(CharBuffer source) {
 751
 752         // ok was there input held in the previous invocation of encodeLoop
 753         // that resulted in output in this invocation?
 754         source.position(source.position() - fromUCountPending());
 755     }
 756
 757     /*
 758      * Write the codepage substitution character.
 759      * Subclasses to override this method.
 760      * For stateful converters, it is typically necessary to handle this
 761      * specificially for the converter in order to properly maintain the state.
 762      * @param source The input character buffer
 763      * @param target The output byte buffer
 764      * @param offsets
 765      * @return A CoderResult object that contains the error result when an error occurs.
 766      */
 767     CoderResult cbFromUWriteSub(CharsetEncoderICU encoder, CharBuffer source,
 768             ByteBuffer target, IntBuffer offsets) {
 769         CharsetICU cs = (CharsetICU) encoder.charset();
 770         byte[] sub = encoder.replacement();
 771         if (cs.subChar1 != 0 && encoder.invalidUCharBuffer[0] <= 0xff) {
 772             return CharsetEncoderICU.fromUWriteBytes(encoder,
 773                     new byte[] { cs.subChar1 }, 0, 1, target, offsets, source
 774                             .position());
 775         } else {
 776             return CharsetEncoderICU.fromUWriteBytes(encoder, sub, 0,
 777                     sub.length, target, offsets, source.position());
 778         }
 779     }
 780
 781     /*
 782      * Write the characters to target.
 783      * @param source The input character buffer
 784      * @param target The output byte buffer
 785      * @param offsets
 786      * @return A CoderResult object that contains the error result when an error occurs.
 787      */
 788     CoderResult cbFromUWriteUChars(CharsetEncoderICU encoder,
 789             CharBuffer source, ByteBuffer target, IntBuffer offsets) {
 790         CoderResult cr = CoderResult.UNDERFLOW;
 791
 792         /* This is a fun one.  Recursion can occur - we're basically going to
 793          * just retry shoving data through the same converter. Note, if you got
 794          * here through some kind of invalid sequence, you maybe should emit a
 795          * reset sequence of some kind. Since this IS an actual conversion,
 796          * take care that you've changed the callback or the data, or you'll
 797          * get an infinite loop.
 798          */
 799
 800         int oldTargetPosition = target.position();
 801         int offsetIndex = source.position();
 802
 803         cr = encoder.encode(source, target, null, false); /* no offsets and no flush */
 804
 805         if (offsets != null) {
 806             while (target.position() != oldTargetPosition) {
 807                 offsets.put(offsetIndex);
 808                 oldTargetPosition++;
 809             }
 810         }
 811
 812         /* Note, if you did something like used a stop subcallback, things would get interesting.
 813          * In fact, here's where we want to return the partially consumed in-source!
 814          */
 815         if (cr.isOverflow()) {
 816             /* Overflowed target. Now, we'll write into the charErrorBuffer.
 817              * It's a fixed size. If we overflow it...Hm
 818              */
 819
 820             /* start the new target at the first free slot in the error buffer */
 821             int errBuffLen = encoder.errorBufferLength;
 822             ByteBuffer newTarget = ByteBuffer.wrap(encoder.errorBuffer);
 823             newTarget.position(errBuffLen); /* set the position at the end of the error buffer */
 824             encoder.errorBufferLength = 0;
 825
 826             encoder.encode(source, newTarget, null, false);
 827
 828             encoder.errorBuffer = newTarget.array();
 829             encoder.errorBufferLength = newTarget.position();
 830         }
 831
 832         return cr;
 833     }
 834
 835     /**
 836      * <p>
 837      * Handles a common situation where a character has been read and it may be
 838      * a lead surrogate followed by a trail surrogate. This method can change
 839      * the source position and will modify fromUChar32.
 840      * </p>
 841      *
 842      * <p>
 843      * If <code>null</code> is returned, then there was success in reading a
 844      * surrogate pair, the codepoint is stored in <code>fromUChar32</code> and
 845      * <code>fromUChar32</code> should be reset (to 0) after being read.
 846      * </p>
 847      *
 848      * @param source
 849      *            The encoding source.
 850      * @param lead
 851      *            A character that may be the first in a surrogate pair.
 852      * @return <code>CoderResult.malformedForLength(1)</code> or
 853      *         <code>CoderResult.UNDERFLOW</code> if there is a problem, or
 854      *         <code>null</code> if there isn't.
 855      * @see #handleSurrogates(CharBuffer, char)
 856      * @see #handleSurrogates(char[], int, int, char)
 857      */
 858     final CoderResult handleSurrogates(CharBuffer source, char lead) {
 859         if (!UTF16.isLeadSurrogate(lead)) {
 860             fromUChar32 = lead;
 861             return CoderResult.malformedForLength(1);
 862         }
 863
 864         if (!source.hasRemaining()) {
 865             fromUChar32 = lead;
 866             return CoderResult.UNDERFLOW;
 867         }
 868
 869         char trail = source.get();
 870
 871         if (!UTF16.isTrailSurrogate(trail)) {
 872             fromUChar32 = lead;
 873             source.position(source.position() - 1);
 874             return CoderResult.malformedForLength(1);
 875         }
 876
 877         fromUChar32 = UCharacter.getCodePoint(lead, trail);
 878         return null;
 879     }
 880
 881     /**
 882      * <p>
 883      * Same as <code>handleSurrogates(CharBuffer, char)</code>, but with arrays. As an added
 884      * requirement, the calling method must also increment the index if this method returns
 885      * <code>null</code>.
 886      * </p>
 887      *
 888      *
 889      * @param source
 890      *            The encoding source.
 891      * @param lead
 892      *            A character that may be the first in a surrogate pair.
 893      * @return <code>CoderResult.malformedForLength(1)</code> or
 894      *         <code>CoderResult.UNDERFLOW</code> if there is a problem, or <code>null</code> if
 895      *         there isn't.
 896      * @see #handleSurrogates(CharBuffer, char)
 897      * @see #handleSurrogates(char[], int, int, char)
 898      */
 899     final CoderResult handleSurrogates(char[] sourceArray, int sourceIndex,
 900             int sourceLimit, char lead) {
 901         if (!UTF16.isLeadSurrogate(lead)) {
 902             fromUChar32 = lead;
 903             return CoderResult.malformedForLength(1);
 904         }
 905
 906         if (sourceIndex >= sourceLimit) {
 907             fromUChar32 = lead;
 908             return CoderResult.UNDERFLOW;
 909         }
 910
 911         char trail = sourceArray[sourceIndex];
 912
 913         if (!UTF16.isTrailSurrogate(trail)) {
 914             fromUChar32 = lead;
 915             return CoderResult.malformedForLength(1);
 916         }
 917
 918         fromUChar32 = UCharacter.getCodePoint(lead, trail);
 919         return null;
 920     }
 921
 922     /**
 923      * Returns the maxCharsPerByte value for the Charset that created this encoder.
 924      * @return maxCharsPerByte
 925      * @stable ICU 4.8
 926      */
 927     public final float maxCharsPerByte() {
 928         return ((CharsetICU)(this.charset())).maxCharsPerByte;
 929     }
 930
 931     /**
 932      * Calculates the size of a buffer for conversion from Unicode to a charset.
 933      * The calculated size is guaranteed to be sufficient for this conversion.
 934      *
 935      * It takes into account initial and final non-character bytes that are output
 936      * by some converters.
 937      * It does not take into account callbacks which output more than one charset
 938      * character sequence per call, like escape callbacks.
 939      * The default (substitution) callback only outputs one charset character sequence.
 940      *
 941      * @param length Number of chars to be converted.
 942      * @param maxCharSize Return value from maxBytesPerChar for the converter
 943      *                    that will be used.
 944      * @return Size of a buffer that will be large enough to hold the output of bytes
 945      *
 946      * @stable ICU 49
 947      */
 948     public static int getMaxBytesForString(int length, int maxCharSize) {
 949         return ((length + 10) * maxCharSize);
 950     }
 951
 952 }