jars/icu4j-4_2_1-src/src/com/ibm/icu/charset/CharsetDecoderICU.java

   1 /**\r
   2 *******************************************************************************\r
   3 * Copyright (C) 2006-2009, International Business Machines Corporation and    *\r
   4 * others. All Rights Reserved.                                                *\r
   5 *******************************************************************************\r
   6 *\r
   7 *******************************************************************************\r
   8 */ \r
   9 \r
  10 package com.ibm.icu.charset;\r
  11 \r
  12 import java.nio.ByteBuffer;\r
  13 import java.nio.CharBuffer;\r
  14 import java.nio.IntBuffer;\r
  15 import java.nio.charset.CharsetDecoder;\r
  16 import java.nio.charset.CoderResult;\r
  17 import java.nio.charset.CodingErrorAction;\r
  18 \r
  19 import com.ibm.icu.impl.Assert;\r
  20 \r
  21 /**\r
  22  * An abstract class that provides framework methods of decoding operations for concrete\r
  23  * subclasses. \r
  24  * In the future this class will contain API that will implement converter sematics of ICU4C.\r
  25  * @stable ICU 3.6\r
  26  */\r
  27 public abstract class CharsetDecoderICU extends CharsetDecoder{ \r
  28 \r
  29     int    toUnicodeStatus;\r
  30     byte[] toUBytesArray = new byte[128];\r
  31     int    toUBytesBegin = 0;\r
  32     int    toULength;\r
  33     char[] charErrorBufferArray = new char[128];\r
  34     int    charErrorBufferLength;\r
  35     int    charErrorBufferBegin;\r
  36     char[] invalidCharBuffer = new char[128];\r
  37     int    invalidCharLength;\r
  38     \r
  39     /* maximum number of indexed bytes */\r
  40     private static final int EXT_MAX_BYTES = 0x1f;\r
  41 \r
  42     /* store previous UChars/chars to continue partial matches */\r
  43     byte[] preToUArray = new byte[EXT_MAX_BYTES];\r
  44     int    preToUBegin;\r
  45     int    preToULength;       /* negative: replay */\r
  46     int    preToUFirstLength;  /* length of first character */\r
  47     int mode;\r
  48     \r
  49     Object toUContext = null;\r
  50     private CharsetCallback.Decoder onUnmappableCharacter = CharsetCallback.TO_U_CALLBACK_STOP;\r
  51     private CharsetCallback.Decoder onMalformedInput = CharsetCallback.TO_U_CALLBACK_STOP;\r
  52     CharsetCallback.Decoder toCharErrorBehaviour = new CharsetCallback.Decoder() {\r
  53         public CoderResult call(CharsetDecoderICU decoder, Object context, ByteBuffer source,\r
  54                 CharBuffer target, IntBuffer offsets, char[] buffer, int length, CoderResult cr) {\r
  55             if (cr.isUnmappable()) {\r
  56                 return onUnmappableCharacter.call(decoder, context, source, target, offsets, buffer,\r
  57                         length, cr);\r
  58             } else /* if (cr.isMalformed()) */ {\r
  59                 return onMalformedInput.call(decoder, context, source, target, offsets, buffer,\r
  60                         length, cr);\r
  61             }\r
  62             // return CharsetCallback.TO_U_CALLBACK_STOP.call(decoder, context, source, target, offsets, buffer, length, cr);\r
  63         }\r
  64     };\r
  65                                               \r
  66     // exist to keep implOnMalformedInput and implOnUnmappableInput from being too recursive\r
  67     private boolean malformedInputCalled = false;\r
  68     private boolean unmappableCharacterCalled = false;\r
  69     \r
  70     /*\r
  71      * Construct a CharsetDecorderICU based on the information provided from a CharsetICU object.\r
  72      * \r
  73      * @param cs The CharsetICU object containing information about how to charset to decode.\r
  74      */\r
  75     CharsetDecoderICU(CharsetICU cs) {\r
  76         super(cs, (float) (1/(float)cs.maxCharsPerByte), cs.maxCharsPerByte);\r
  77     }\r
  78 \r
  79     /*\r
  80      * Is this Decoder allowed to use fallbacks? A fallback mapping is a mapping\r
  81      * that will convert a byte sequence to a Unicode codepoint sequence, but\r
  82      * the encoded Unicode codepoint sequence will round trip convert to a different\r
  83      * byte sequence. In ICU, this is can be called a reverse fallback.\r
  84      * @return A boolean\r
  85      */\r
  86     final boolean isFallbackUsed() {\r
  87         return true;\r
  88     }\r
  89     \r
  90     /**\r
  91      * Fallback is currently always used by icu4j decoders.\r
  92      */\r
  93     static final boolean isToUUseFallback() {\r
  94         return isToUUseFallback(true);\r
  95     }\r
  96     \r
  97     /**\r
  98      * Fallback is currently always used by icu4j decoders.\r
  99      */\r
 100     static final boolean isToUUseFallback(boolean iUseFallback) {\r
 101         return true;\r
 102     }\r
 103     \r
 104     /**\r
 105      * Sets the action to be taken if an illegal sequence is encountered\r
 106      * \r
 107      * @param newAction action to be taken\r
 108      * @exception IllegalArgumentException\r
 109      * @stable ICU 3.6\r
 110      */\r
 111     protected final void implOnMalformedInput(CodingErrorAction newAction) {\r
 112         // don't run infinitely\r
 113         if (malformedInputCalled)\r
 114             return;\r
 115         \r
 116         // if we get a replace, do not let the nio replace\r
 117         if (newAction == CodingErrorAction.REPLACE) {\r
 118             malformedInputCalled = true;\r
 119             super.onMalformedInput(CodingErrorAction.IGNORE);\r
 120             malformedInputCalled = false;\r
 121         }\r
 122         \r
 123         onMalformedInput = getCallback(newAction);\r
 124     }\r
 125     \r
 126     /**\r
 127      * Sets the action to be taken if an illegal sequence is encountered\r
 128      * \r
 129      * @param newAction action to be taken\r
 130      * @exception IllegalArgumentException\r
 131      * @stable ICU 3.6\r
 132      */\r
 133     protected final void implOnUnmappableCharacter(CodingErrorAction newAction) {\r
 134         // dont run infinitely\r
 135         if (unmappableCharacterCalled)\r
 136             return;\r
 137         \r
 138         // if we get a replace, do not let the nio replace\r
 139         if (newAction == CodingErrorAction.REPLACE) {\r
 140             unmappableCharacterCalled = true;\r
 141             super.onUnmappableCharacter(CodingErrorAction.IGNORE);\r
 142             unmappableCharacterCalled = false;\r
 143         }\r
 144         \r
 145         onUnmappableCharacter = getCallback(newAction);\r
 146     }\r
 147     \r
 148     /**\r
 149      * Sets the callback encoder method and context to be used if an illegal sequence is encounterd.\r
 150      * You would normally call this twice to set both the malform and unmappable error. In this case,\r
 151      * newContext should remain the same since using a different newContext each time will negate the last\r
 152      * one used.\r
 153      * @param err CoderResult\r
 154      * @param newCallback CharsetCallback.Encoder\r
 155      * @param newContext Object\r
 156      * @stable ICU 4.0\r
 157      */\r
 158     public final void setToUCallback(CoderResult err, CharsetCallback.Decoder newCallback, Object newContext) {\r
 159         if (err.isMalformed()) {\r
 160             onMalformedInput = newCallback;\r
 161         } else if (err.isUnmappable()) {\r
 162             onUnmappableCharacter = newCallback;\r
 163         } else {\r
 164             /* Error: Only malformed and unmappable are handled. */\r
 165         }\r
 166         \r
 167         if (toUContext == null || !toUContext.equals(newContext)) {\r
 168             toUContext = newContext;\r
 169         }\r
 170     }\r
 171     \r
 172     private static CharsetCallback.Decoder getCallback(CodingErrorAction action){\r
 173         if(action==CodingErrorAction.REPLACE){\r
 174             return CharsetCallback.TO_U_CALLBACK_SUBSTITUTE;\r
 175         }else if(action==CodingErrorAction.IGNORE){\r
 176             return CharsetCallback.TO_U_CALLBACK_SKIP;\r
 177         }else /* if(action==CodingErrorAction.REPORT) */ {\r
 178             return CharsetCallback.TO_U_CALLBACK_STOP;\r
 179         }\r
 180     }\r
 181     private final ByteBuffer EMPTY = ByteBuffer.allocate(0);\r
 182     /**\r
 183      * Flushes any characters saved in the converter's internal buffer and\r
 184      * resets the converter.\r
 185      * @param out action to be taken\r
 186      * @return result of flushing action and completes the decoding all input. \r
 187      *         Returns CoderResult.UNDERFLOW if the action succeeds.\r
 188      * @stable ICU 3.6\r
 189      */\r
 190     protected final CoderResult implFlush(CharBuffer out) {\r
 191         return decode(EMPTY, out, null, true);\r
 192     }\r
 193     \r
 194     /**\r
 195      * Resets the to Unicode mode of converter\r
 196      * @stable ICU 3.6\r
 197      */\r
 198     protected void implReset() {\r
 199         toUnicodeStatus = 0 ;\r
 200         toULength = 0;\r
 201         charErrorBufferLength = 0;\r
 202         charErrorBufferBegin = 0;\r
 203         \r
 204         /* store previous UChars/chars to continue partial matches */\r
 205         preToUBegin = 0;\r
 206         preToULength = 0;       /* negative: replay */\r
 207         preToUFirstLength = 0; \r
 208 \r
 209         mode = 0;\r
 210     }\r
 211       \r
 212     /**\r
 213      * Decodes one or more bytes. The default behaviour of the converter\r
 214      * is stop and report if an error in input stream is encountered. \r
 215      * To set different behaviour use @see CharsetDecoder.onMalformedInput()\r
 216      * This  method allows a buffer by buffer conversion of a data stream.  \r
 217      * The state of the conversion is saved between calls to convert.  \r
 218      * Among other things, this means multibyte input sequences can be \r
 219      * split between calls. If a call to convert results in an Error, the \r
 220      * conversion may be continued by calling convert again with suitably \r
 221      * modified parameters.All conversions should be finished with a call to \r
 222      * the flush method.\r
 223      * @param in buffer to decode\r
 224      * @param out buffer to populate with decoded result\r
 225      * @return Result of decoding action. Returns CoderResult.UNDERFLOW if the decoding\r
 226      *         action succeeds or more input is needed for completing the decoding action.\r
 227      * @stable ICU 3.6\r
 228      */\r
 229     protected CoderResult decodeLoop(ByteBuffer in,CharBuffer out){\r
 230         if(in.remaining() < toUCountPending()){\r
 231             return CoderResult.UNDERFLOW;\r
 232         }\r
 233 //        if (!in.hasRemaining()) {\r
 234 //            toULength = 0;\r
 235 //            return CoderResult.UNDERFLOW;\r
 236 //        }\r
 237         \r
 238         in.position(in.position() + toUCountPending());\r
 239         \r
 240         /* do the conversion */\r
 241         CoderResult ret = decode(in, out, null, false);\r
 242 \r
 243         // ok was there input held in the previous invocation of decodeLoop \r
 244         // that resulted in output in this invocation?\r
 245         in.position(in.position() - toUCountPending());\r
 246         \r
 247         return ret;\r
 248     }\r
 249 \r
 250     /*\r
 251      * Implements the ICU semantic for decode operation\r
 252      * @param in The input byte buffer\r
 253      * @param out The output character buffer\r
 254      * @return Result of decoding action. Returns CoderResult.UNDERFLOW if the decoding\r
 255      *         action succeeds or more input is needed for completing the decoding action.\r
 256      */\r
 257     abstract CoderResult decodeLoop(ByteBuffer in, CharBuffer out, IntBuffer offsets, boolean flush);\r
 258     \r
 259     /*\r
 260      * Implements the ICU semantic for decode operation\r
 261      * @param source The input byte buffer\r
 262      * @param target The output character buffer\r
 263      * @param offsets\r
 264      * @param flush true if, and only if, the invoker can provide no\r
 265      *  additional input bytes beyond those in the given buffer.\r
 266      * @return Result of decoding action. Returns CoderResult.UNDERFLOW if the decoding\r
 267      *         action succeeds or more input is needed for completing the decoding action.\r
 268      */\r
 269     final CoderResult decode(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {\r
 270     \r
 271         /* check parameters */\r
 272         if (target == null || source == null) {\r
 273             throw new IllegalArgumentException();\r
 274         }\r
 275         \r
 276         /*\r
 277          * Make sure that the buffer sizes do not exceed the number range for\r
 278          * int32_t because some functions use the size (in units or bytes)\r
 279          * rather than comparing pointers, and because offsets are int32_t values.\r
 280          *\r
 281          * size_t is guaranteed to be unsigned and large enough for the job.\r
 282          *\r
 283          * Return with an error instead of adjusting the limits because we would\r
 284          * not be able to maintain the semantics that either the source must be\r
 285          * consumed or the target filled (unless an error occurs).\r
 286          * An adjustment would be sourceLimit=t+0x7fffffff; for example.\r
 287          */\r
 288             /*agljport:fix\r
 289         if(\r
 290             ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s) ||\r
 291             ((size_t)(targetLimit-t)>(size_t)0x3fffffff && targetLimit>t)\r
 292         ) {\r
 293             *err=U_ILLEGAL_ARGUMENT_ERROR;\r
 294             return;\r
 295         }\r
 296             */\r
 297         \r
 298         /* flush the target overflow buffer */\r
 299         if (charErrorBufferLength > 0) {\r
 300             int i = 0;\r
 301             do {\r
 302                 if (!target.hasRemaining()) {\r
 303                     /* the overflow buffer contains too much, keep the rest */\r
 304                     int j = 0;\r
 305 \r
 306                     do {\r
 307                         charErrorBufferArray[j++] = charErrorBufferArray[i++];\r
 308                     } while (i < charErrorBufferLength);\r
 309 \r
 310                     charErrorBufferLength = (byte) j;\r
 311                     return CoderResult.OVERFLOW;\r
 312                 }\r
 313 \r
 314                 /* copy the overflow contents to the target */\r
 315                 target.put(charErrorBufferArray[i++]);\r
 316                 if (offsets != null) {\r
 317                     offsets.put(-1); /* no source index available for old output */\r
 318                 }\r
 319             } while (i < charErrorBufferLength);\r
 320 \r
 321             /* the overflow buffer is completely copied to the target */\r
 322             charErrorBufferLength = 0;\r
 323         }\r
 324     \r
 325         if (!flush && !source.hasRemaining() && preToULength >= 0) {\r
 326             /* the overflow buffer is emptied and there is no new input: we are done */\r
 327             return CoderResult.UNDERFLOW;\r
 328         }\r
 329     \r
 330         /*\r
 331          * Do not simply return with a buffer overflow error if\r
 332          * !flush && t==targetLimit\r
 333          * because it is possible that the source will not generate any output.\r
 334          * For example, the skip callback may be called;\r
 335          * it does not output anything.\r
 336          */\r
 337         \r
 338         return toUnicodeWithCallback(source, target, offsets, flush);\r
 339     }\r
 340 \r
 341     /* Currently, we are not using offsets in ICU4J. */\r
 342     /* private void updateOffsets(IntBuffer offsets,int length, int sourceIndex, int errorInputLength) {\r
 343         int limit;\r
 344         int delta, offset;\r
 345 \r
 346         if(sourceIndex>=0) {\r
 347             /*\r
 348              * adjust each offset by adding the previous sourceIndex\r
 349              * minus the length of the input sequence that caused an\r
 350              * error, if any\r
 351              */\r
 352        /*     delta=sourceIndex-errorInputLength;\r
 353         } else {\r
 354             /*\r
 355              * set each offset to -1 because this conversion function\r
 356              * does not handle offsets\r
 357              */\r
 358         /*    delta=-1;\r
 359         }\r
 360         limit=offsets.position()+length;\r
 361         if(delta==0) {\r
 362             /* most common case, nothing to do */\r
 363         /* } else if(delta>0) {\r
 364             /* add the delta to each offset (but not if the offset is <0) */\r
 365         /*    while(offsets.position()<limit) {\r
 366                 offset=offsets.get(offsets.position());\r
 367                 if(offset>=0) {\r
 368                     offsets.put(offset+delta);\r
 369                 }\r
 370                 //FIXME: ++offsets;\r
 371             }\r
 372         } else /* delta<0 */ /* {\r
 373             /*\r
 374              * set each offset to -1 because this conversion function\r
 375              * does not handle offsets\r
 376              * or the error input sequence started in a previous buffer\r
 377              */\r
 378         /*    while(offsets.position()<limit) {\r
 379                 offsets.put(-1);\r
 380             }\r
 381         }\r
 382     } */\r
 383     final CoderResult toUnicodeWithCallback(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush){\r
 384         \r
 385         int sourceIndex;\r
 386         int errorInputLength;\r
 387         boolean converterSawEndOfInput, calledCallback;\r
 388         //int t=target.position();\r
 389         int s=source.position();\r
 390         /* variables for m:n conversion */\r
 391         ByteBuffer replayArray = ByteBuffer.allocate(EXT_MAX_BYTES);\r
 392         int replayArrayIndex = 0;\r
 393             \r
 394         ByteBuffer realSource=null;\r
 395         boolean realFlush=false;\r
 396         int realSourceIndex=0;\r
 397     \r
 398 \r
 399         CoderResult cr = CoderResult.UNDERFLOW;\r
 400         \r
 401         /* get the converter implementation function */\r
 402         sourceIndex=0;\r
 403 \r
 404         if(preToULength>=0) {\r
 405             /* normal mode */\r
 406         } else {\r
 407             /*\r
 408              * Previous m:n conversion stored source units from a partial match\r
 409              * and failed to consume all of them.\r
 410              * We need to "replay" them from a temporary buffer and convert them first.\r
 411              */\r
 412             realSource=source;\r
 413             realFlush=flush;\r
 414             realSourceIndex=sourceIndex;\r
 415             //UConverterUtility.uprv_memcpy(replayArray, replayBegin, preToUArray, preToUBegin, -preToULength);\r
 416             replayArray.put(preToUArray,0, -preToULength);\r
 417             source=replayArray;\r
 418             source.position(0);\r
 419             source.limit(replayArrayIndex-preToULength);\r
 420             flush=false;\r
 421             sourceIndex=-1;\r
 422             preToULength=0;\r
 423         }\r
 424     \r
 425         /*\r
 426          * loop for conversion and error handling\r
 427          *\r
 428          * loop {\r
 429          *   convert\r
 430          *   loop {\r
 431          *     update offsets\r
 432          *     handle end of input\r
 433          *     handle errors/call callback\r
 434          *   }\r
 435          * }\r
 436          */\r
 437         for(;;) {\r
 438 \r
 439             /* convert */\r
 440             cr = decodeLoop(source, target, offsets, flush);\r
 441 \r
 442             /*\r
 443              * set a flag for whether the converter\r
 444              * successfully processed the end of the input\r
 445              *\r
 446              * need not check cnv->preToULength==0 because a replay (<0) will cause\r
 447              * s<sourceLimit before converterSawEndOfInput is checked\r
 448              */\r
 449             converterSawEndOfInput= (cr.isUnderflow() && flush && source.remaining()==0 && toULength == 0);\r
 450             \r
 451             /* no callback called yet for this iteration */\r
 452             calledCallback=false;\r
 453     \r
 454             /* no sourceIndex adjustment for conversion, only for callback output */\r
 455             errorInputLength=0;\r
 456     \r
 457             /*\r
 458              * loop for offsets and error handling\r
 459              *\r
 460              * iterates at most 3 times:\r
 461              * 1. to clean up after the conversion function\r
 462              * 2. after the callback\r
 463              * 3. after the callback again if there was truncated input\r
 464              */\r
 465             for(;;) {\r
 466                 /* update offsets if we write any */\r
 467                 /* Currently offsets are not being used in ICU4J */\r
 468                 /* if(offsets!=null) {\r
 469 \r
 470                     int length=(target.position()-t);\r
 471                     if(length>0) {\r
 472                         updateOffsets(offsets, length, sourceIndex, errorInputLength);\r
 473     \r
 474                                             \r
 475                         /*\r
 476                          * if a converter handles offsets and updates the offsets\r
 477                          * pointer at the end, then pArgs->offset should not change\r
 478                          * here;\r
 479                          * however, some converters do not handle offsets at all\r
 480                          * (sourceIndex<0) or may not update the offsets pointer\r
 481                          */\r
 482                         //TODO: pArgs->offsets=offsets+=length;\r
 483                   /*  }\r
 484     \r
 485                     if(sourceIndex>=0) {\r
 486                         sourceIndex+=(source.position()-s);\r
 487                     }\r
 488                                     \r
 489                 } */\r
 490     \r
 491                 if(preToULength<0) {\r
 492                     /*\r
 493                      * switch the source to new replay units (cannot occur while replaying)\r
 494                      * after offset handling and before end-of-input and callback handling\r
 495                      */\r
 496                     if(realSource==null)\r
 497                                     {\r
 498                         realSource=source;\r
 499                         realFlush=flush;\r
 500                         realSourceIndex=sourceIndex;\r
 501     \r
 502                         //UConverterUtility.uprv_memcpy(replayArray, replayBegin, preToUArray, preToUBegin, -preToULength);\r
 503                         replayArray.put(preToUArray,0, -preToULength);\r
 504                         // reset position\r
 505                         replayArray.position(0);\r
 506 \r
 507                         source=replayArray;\r
 508                         source.limit(replayArrayIndex-preToULength);\r
 509                         flush=false;\r
 510                         if((sourceIndex+=preToULength)<0) {\r
 511                             sourceIndex=-1;\r
 512                         }\r
 513     \r
 514                         preToULength=0;\r
 515                     } else {\r
 516                         /* see implementation note before _fromUnicodeWithCallback() */\r
 517                         //agljport:todo U_ASSERT(realSource==NULL);\r
 518                        Assert.assrt(realSource==null);\r
 519                     }\r
 520                 }\r
 521     \r
 522                 /* update pointers */\r
 523                 s=source.position();\r
 524                 //t=target.position();\r
 525     \r
 526                 if(cr.isUnderflow()) {\r
 527                     if(s<source.limit())\r
 528                                     {\r
 529                         /*\r
 530                          * continue with the conversion loop while there is still input left\r
 531                          * (continue converting by breaking out of only the inner loop)\r
 532                          */\r
 533                         break;\r
 534                     } else if(realSource!=null) {\r
 535                         /* switch back from replaying to the real source and continue */\r
 536                         source = realSource;\r
 537                         flush=realFlush;\r
 538                         sourceIndex=realSourceIndex;\r
 539                         realSource=null;\r
 540                         break;\r
 541                     } else if(flush && toULength>0) {\r
 542                         /*\r
 543                          * the entire input stream is consumed\r
 544                          * and there is a partial, truncated input sequence left\r
 545                          */\r
 546     \r
 547                         /* inject an error and continue with callback handling */\r
 548                         cr = CoderResult.malformedForLength(toULength);\r
 549                         calledCallback=false; /* new error condition */\r
 550                     } else {\r
 551                         /* input consumed */\r
 552                         if(flush) {\r
 553                             /*\r
 554                              * return to the conversion loop once more if the flush\r
 555                              * flag is set and the conversion function has not\r
 556                              * successfully processed the end of the input yet\r
 557                              *\r
 558                              * (continue converting by breaking out of only the inner loop)\r
 559                              */\r
 560                             if(!converterSawEndOfInput) {\r
 561                                 break;\r
 562                             }\r
 563     \r
 564                             /* reset the converter without calling the callback function */\r
 565                             implReset();\r
 566                         }\r
 567     \r
 568                         /* done successfully */\r
 569                         return cr;\r
 570                     }\r
 571                 }\r
 572     \r
 573                 /* U_FAILURE(*err) */\r
 574                 {\r
 575     \r
 576                     if( calledCallback || cr.isOverflow() ||\r
 577                         (cr.isMalformed() && cr.isUnmappable())\r
 578                       ) {\r
 579                         /*\r
 580                          * the callback did not or cannot resolve the error:\r
 581                          * set output pointers and return\r
 582                          *\r
 583                          * the check for buffer overflow is redundant but it is\r
 584                          * a high-runner case and hopefully documents the intent\r
 585                          * well\r
 586                          *\r
 587                          * if we were replaying, then the replay buffer must be\r
 588                          * copied back into the UConverter\r
 589                          * and the real arguments must be restored\r
 590                          */\r
 591                         if(realSource!=null) {\r
 592                             int length;\r
 593                             Assert.assrt(preToULength==0);\r
 594                             length=(int)(source.limit()-source.position());\r
 595                             if(length>0) {\r
 596                                 //UConverterUtility.uprv_memcpy(preToUArray, preToUBegin, pArgs.sourceArray, pArgs.sourceBegin, length);\r
 597                                 source.get(preToUArray, preToUBegin, length);\r
 598                                 preToULength=(byte)-length;\r
 599                             }\r
 600     \r
 601                             source=realSource;\r
 602                             flush=realFlush;\r
 603                         }\r
 604                         return cr;\r
 605                     }\r
 606                 }\r
 607     \r
 608                 /* copy toUBytes[] to invalidCharBuffer[] */\r
 609                 errorInputLength=invalidCharLength=toULength;\r
 610                 if(errorInputLength>0) {\r
 611                     copy(toUBytesArray, 0, invalidCharBuffer, 0, errorInputLength);\r
 612                 }\r
 613     \r
 614                 /* set the converter state to deal with the next character */\r
 615                 toULength=0;\r
 616     \r
 617                 /* call the callback function */\r
 618                 cr = toCharErrorBehaviour.call(this, toUContext, source, target, offsets, invalidCharBuffer, errorInputLength, cr);\r
 619                 /*\r
 620                  * loop back to the offset handling\r
 621                  *\r
 622                  * this flag will indicate after offset handling\r
 623                  * that a callback was called;\r
 624                  * if the callback did not resolve the error, then we return\r
 625                  */\r
 626                 calledCallback=true;\r
 627             }\r
 628         }\r
 629     }\r
 630 \r
 631     /*\r
 632      * Returns the number of chars held in the converter's internal state\r
 633      * because more input is needed for completing the conversion. This function is \r
 634      * useful for mapping semantics of ICU's converter interface to those of iconv,\r
 635      * and this information is not needed for normal conversion.\r
 636      * @return The number of chars in the state. -1 if an error is encountered.\r
 637      */\r
 638     /*public*/ int toUCountPending()    {\r
 639         if(preToULength > 0){\r
 640             return preToULength ;\r
 641         } else if(preToULength < 0){\r
 642             return -preToULength;\r
 643         } else if(toULength > 0){\r
 644             return toULength;\r
 645         } else {\r
 646             return 0;\r
 647         }\r
 648     }\r
 649     \r
 650 \r
 651     private void copy(byte[] src, int srcOffset, char[] dst, int dstOffset, int length) {\r
 652         for(int i=srcOffset; i<length; i++){\r
 653             dst[dstOffset++]=(char)(src[srcOffset++] & UConverterConstants.UNSIGNED_BYTE_MASK);\r
 654         }\r
 655     }\r
 656     /*\r
 657      * ONLY used by ToU callback functions.\r
 658      * This function will write out the specified characters to the target\r
 659      * character buffer.\r
 660      * @return A CoderResult object that contains the error result when an error occurs.\r
 661      */\r
 662     static final CoderResult toUWriteUChars( CharsetDecoderICU cnv,\r
 663                                                 char[] ucharsArray, int ucharsBegin, int length,  \r
 664                                                 CharBuffer target, IntBuffer offsets, int sourceIndex) {\r
 665         \r
 666         CoderResult cr = CoderResult.UNDERFLOW;\r
 667         \r
 668         /* write UChars */\r
 669         if(offsets==null) {\r
 670             while(length>0 && target.hasRemaining()) {\r
 671                 target.put(ucharsArray[ucharsBegin++]);\r
 672                 --length;\r
 673             }\r
 674 \r
 675         } else {\r
 676             /* output with offsets */\r
 677             while(length>0 && target.hasRemaining()) {\r
 678                 target.put(ucharsArray[ucharsBegin++]);\r
 679                 offsets.put(sourceIndex);\r
 680                 --length;\r
 681             }\r
 682         }\r
 683         /* write overflow */\r
 684         if(length>0) {        \r
 685             cnv.charErrorBufferLength= 0;\r
 686             cr = CoderResult.OVERFLOW;\r
 687             do {\r
 688                 cnv.charErrorBufferArray[cnv.charErrorBufferLength++]=ucharsArray[ucharsBegin++];\r
 689             } while(--length>0);\r
 690         }\r
 691         return cr;\r
 692     }\r
 693     /*\r
 694      * This function will write out the Unicode substitution character to the\r
 695      * target character buffer.\r
 696      * Sub classes to override this method if required\r
 697      * @param decoder\r
 698      * @param source\r
 699      * @param target\r
 700      * @param offsets\r
 701      * @return A CoderResult object that contains the error result when an error occurs.\r
 702      */\r
 703     /* Note: Currently, this method is not being used because the callback method calls toUWriteUChars with\r
 704      * the substitution characters. Will leave in here for the time being. To be removed later. (4.0)\r
 705      */\r
 706      /*CoderResult cbToUWriteSub(CharsetDecoderICU decoder, \r
 707                                         ByteBuffer source, CharBuffer target, \r
 708                                         IntBuffer offsets){\r
 709         String sub = decoder.replacement();\r
 710         CharsetICU cs = (CharsetICU) decoder.charset();\r
 711         if (decoder.invalidCharLength==1 && cs.subChar1 != 0x00) {\r
 712             char[] subArr = new char[] { 0x1a };\r
 713             return CharsetDecoderICU.toUWriteUChars(decoder, subArr, 0, sub\r
 714                     .length(), target, offsets, source.position());\r
 715         } else {\r
 716             return CharsetDecoderICU.toUWriteUChars(decoder, sub.toCharArray(),\r
 717                     0, sub.length(), target, offsets, source.position());\r
 718             \r
 719         }\r
 720     }*/\r
 721 }\r