jars/icu4j-4_4_2-src/main/classes/charset/src/com/ibm/icu/charset/CharsetMBCS.java

   1 /**\r
   2  *******************************************************************************\r
   3  * Copyright (C) 2006-2010, International Business Machines Corporation and    *\r
   4  * others. All Rights Reserved.                                                *\r
   5  *******************************************************************************\r
   6  *\r
   7  *******************************************************************************\r
   8  */\r
   9 package com.ibm.icu.charset;\r
  10 \r
  11 import java.io.BufferedInputStream;\r
  12 import java.io.IOException;\r
  13 import java.io.InputStream;\r
  14 import java.nio.Buffer;\r
  15 import java.nio.BufferOverflowException;\r
  16 import java.nio.ByteBuffer;\r
  17 import java.nio.CharBuffer;\r
  18 import java.nio.IntBuffer;\r
  19 import java.nio.charset.CharsetDecoder;\r
  20 import java.nio.charset.CharsetEncoder;\r
  21 import java.nio.charset.CoderResult;\r
  22 \r
  23 import com.ibm.icu.charset.UConverterSharedData.UConverterType;\r
  24 import com.ibm.icu.impl.ICUData;\r
  25 import com.ibm.icu.impl.ICUResourceBundle;\r
  26 import com.ibm.icu.impl.InvalidFormatException;\r
  27 import com.ibm.icu.lang.UCharacter;\r
  28 import com.ibm.icu.text.UTF16;\r
  29 import com.ibm.icu.text.UnicodeSet;\r
  30 \r
  31 class CharsetMBCS extends CharsetICU {\r
  32 \r
  33     private byte[] fromUSubstitution = null;\r
  34     UConverterSharedData sharedData = null;\r
  35     private static final int MAX_VERSION_LENGTH = 4;\r
  36     \r
  37     // these variables are used in getUnicodeSet() and may be changed in future\r
  38     // typedef enum UConverterSetFilter {\r
  39       static final int UCNV_SET_FILTER_NONE = 1;\r
  40       static final int UCNV_SET_FILTER_DBCS_ONLY = 2;\r
  41       static final int UCNV_SET_FILTER_2022_CN = 3;\r
  42       static final int UCNV_SET_FILTER_SJIS= 4 ;\r
  43       static final int UCNV_SET_FILTER_GR94DBCS = 5;\r
  44       static final int UCNV_SET_FILTER_HZ = 6;\r
  45       static final int UCNV_SET_FILTER_COUNT = 7;\r
  46    //  } UConverterSetFilter;\r
  47 \r
  48     /**\r
  49      * Fallbacks to Unicode are stored outside the normal state table and code point structures in a vector of items of\r
  50      * this type. They are sorted by offset.\r
  51      */\r
  52     final class MBCSToUFallback {\r
  53         int offset;\r
  54         int codePoint;\r
  55     }\r
  56 \r
  57     /**\r
  58      * This is the MBCS part of the UConverterTable union (a runtime data structure). It keeps all the per-converter\r
  59      * data and points into the loaded mapping tables.\r
  60      */\r
  61     static final class UConverterMBCSTable {\r
  62         /* toUnicode */\r
  63         short countStates;\r
  64         byte dbcsOnlyState;\r
  65         boolean stateTableOwned;\r
  66         int countToUFallbacks;\r
  67 \r
  68         int stateTable[/* countStates */][/* 256 */];\r
  69         int swapLFNLStateTable[/* countStates */][/* 256 */]; /* for swaplfnl */\r
  70         char unicodeCodeUnits[/* countUnicodeResults */];\r
  71         MBCSToUFallback toUFallbacks[/* countToUFallbacks */];\r
  72 \r
  73         /* fromUnicode */\r
  74         char fromUnicodeTable[];\r
  75         byte fromUnicodeBytes[];\r
  76         byte swapLFNLFromUnicodeBytes[]; /* for swaplfnl */\r
  77         int fromUBytesLength;\r
  78         short outputType, unicodeMask;\r
  79 \r
  80         /* converter name for swaplfnl */\r
  81         String swapLFNLName;\r
  82 \r
  83         /* extension data */\r
  84         UConverterSharedData baseSharedData;\r
  85         // int extIndexes[];\r
  86         ByteBuffer extIndexes; // create int[] view etc. as needed\r
  87         \r
  88         CharBuffer mbcsIndex;                     /* for fast conversion from most of BMP to MBCS (utf8Friendly data) */\r
  89         char sbcsIndex[/* SBCS_FAST_LIMIT>>6 */]; /* for fast conversion from low BMP to SBCS (utf8Friendly data) */\r
  90         boolean utf8Friendly;                     /* for utf8Friendly data */\r
  91         char maxFastUChar;                        /* for utf8Friendly data */\r
  92 \r
  93         /* roundtrips */\r
  94         long asciiRoundtrips;\r
  95 \r
  96         UConverterMBCSTable() {\r
  97             utf8Friendly = false;\r
  98             mbcsIndex = null;\r
  99             sbcsIndex = new char[SBCS_FAST_LIMIT>>6];\r
 100         }\r
 101 \r
 102         /*\r
 103          * UConverterMBCSTable(UConverterMBCSTable t) { countStates = t.countStates; dbcsOnlyState = t.dbcsOnlyState;\r
 104          * stateTableOwned = t.stateTableOwned; countToUFallbacks = t.countToUFallbacks; stateTable = t.stateTable;\r
 105          * swapLFNLStateTable = t.swapLFNLStateTable; unicodeCodeUnits = t.unicodeCodeUnits; toUFallbacks =\r
 106          * t.toUFallbacks; fromUnicodeTable = t.fromUnicodeTable; fromUnicodeBytes = t.fromUnicodeBytes;\r
 107          * swapLFNLFromUnicodeBytes = t.swapLFNLFromUnicodeBytes; fromUBytesLength = t.fromUBytesLength; outputType =\r
 108          * t.outputType; unicodeMask = t.unicodeMask; swapLFNLName = t.swapLFNLName; baseSharedData = t.baseSharedData;\r
 109          * extIndexes = t.extIndexes; }\r
 110          */\r
 111     }\r
 112 \r
 113     /* Constants used in MBCS data header */\r
 114     // enum {\r
 115         static final int MBCS_OPT_LENGTH_MASK=0x3f;\r
 116         static final int MBCS_OPT_NO_FROM_U=0x40;\r
 117         /*\r
 118          * If any of the following options bits are set,\r
 119          * then the file must be rejected.\r
 120          */\r
 121         static final int MBCS_OPT_INCOMPATIBLE_MASK=0xffc0;\r
 122         /*\r
 123          * Remove bits from this mask as more options are recognized\r
 124          * by all implementations that use this constant.\r
 125          */\r
 126         static final int MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK=0xff80;\r
 127     // };\r
 128     /* Constants for fast and UTF-8-friendly conversion. */\r
 129     // enum {\r
 130         static final int SBCS_FAST_MAX=0x0fff;               /* maximum code point with UTF-8-friendly SBCS runtime code, see makeconv SBCS_UTF8_MAX */\r
 131         static final int SBCS_FAST_LIMIT=SBCS_FAST_MAX+1;    /* =0x1000 */\r
 132         static final int MBCS_FAST_MAX=0xd7ff;               /* maximum code point with UTF-8-friendly MBCS runtime code, see makeconv MBCS_UTF8_MAX */\r
 133         static final int MBCS_FAST_LIMIT=MBCS_FAST_MAX+1;    /* =0xd800 */\r
 134     // };\r
 135     /**\r
 136      * MBCS data header. See data format description above.\r
 137      */\r
 138     final class MBCSHeader {\r
 139         byte version[/* U_MAX_VERSION_LENGTH */];\r
 140         int countStates, countToUFallbacks, offsetToUCodeUnits, offsetFromUTable, offsetFromUBytes;\r
 141         int flags;\r
 142         int fromUBytesLength;\r
 143         \r
 144         /* new and required in version 5 */\r
 145         int options;\r
 146 \r
 147         /* new and optional in version 5; used if options&MBCS_OPT_NO_FROM_U */\r
 148         int fullStage2Length;  /* number of 32-bit units */\r
 149 \r
 150         MBCSHeader() {\r
 151             version = new byte[MAX_VERSION_LENGTH];\r
 152         }\r
 153     }\r
 154 \r
 155     public CharsetMBCS(String icuCanonicalName, String javaCanonicalName, String[] aliases, String classPath,\r
 156             ClassLoader loader) throws InvalidFormatException {\r
 157         super(icuCanonicalName, javaCanonicalName, aliases);\r
 158         \r
 159         /* See if the icuCanonicalName contains certain option information. */\r
 160         if (icuCanonicalName.indexOf(UConverterConstants.OPTION_SWAP_LFNL_STRING) > -1) {\r
 161             options = UConverterConstants.OPTION_SWAP_LFNL;\r
 162             icuCanonicalName = icuCanonicalName.substring(0, icuCanonicalName.indexOf(UConverterConstants.OPTION_SWAP_LFNL_STRING));\r
 163             super.icuCanonicalName = icuCanonicalName;\r
 164         }\r
 165         \r
 166         // now try to load the data\r
 167         sharedData = loadConverter(1, icuCanonicalName, classPath, loader);\r
 168 \r
 169         maxBytesPerChar = sharedData.staticData.maxBytesPerChar;\r
 170         minBytesPerChar = sharedData.staticData.minBytesPerChar;\r
 171         maxCharsPerByte = 1;\r
 172         fromUSubstitution = sharedData.staticData.subChar;\r
 173         subChar = sharedData.staticData.subChar;\r
 174         subCharLen = sharedData.staticData.subCharLen;\r
 175         subChar1 = sharedData.staticData.subChar1;\r
 176         fromUSubstitution = new byte[sharedData.staticData.subCharLen];\r
 177         System.arraycopy(sharedData.staticData.subChar, 0, fromUSubstitution, 0, sharedData.staticData.subCharLen);\r
 178         \r
 179         initializeConverter(options);\r
 180     }\r
 181 \r
 182     public CharsetMBCS(String icuCanonicalName, String javaCanonicalName, String[] aliases)\r
 183             throws InvalidFormatException {\r
 184         this(icuCanonicalName, javaCanonicalName, aliases, ICUResourceBundle.ICU_BUNDLE, null);\r
 185     }\r
 186 \r
 187     private UConverterSharedData loadConverter(int nestedLoads, String myName, String classPath, ClassLoader loader)\r
 188             throws InvalidFormatException {\r
 189         boolean noFromU = false;\r
 190         // Read converter data from file\r
 191         UConverterStaticData staticData = new UConverterStaticData();\r
 192         UConverterDataReader reader = null;\r
 193         try {\r
 194             String resourceName = classPath + "/" + myName + "." + UConverterSharedData.DATA_TYPE;\r
 195             InputStream i;\r
 196 \r
 197             if (loader != null) {\r
 198                 i = ICUData.getRequiredStream(loader, resourceName);\r
 199             } else {\r
 200                 i = ICUData.getRequiredStream(resourceName);\r
 201             }\r
 202             BufferedInputStream b = new BufferedInputStream(i, UConverterConstants.CNV_DATA_BUFFER_SIZE);\r
 203             reader = new UConverterDataReader(b);\r
 204             reader.readStaticData(staticData);\r
 205         } catch (IOException e) {\r
 206             throw new InvalidFormatException();\r
 207         } catch (Exception e) {\r
 208             throw new InvalidFormatException();\r
 209         }\r
 210 \r
 211         UConverterSharedData data = null;\r
 212         int type = staticData.conversionType;\r
 213 \r
 214         if (type != UConverterSharedData.UConverterType.MBCS\r
 215                 || staticData.structSize != UConverterStaticData.SIZE_OF_UCONVERTER_STATIC_DATA) {\r
 216             throw new InvalidFormatException();\r
 217         }\r
 218 \r
 219         data = new UConverterSharedData(1, null, false, 0);\r
 220         data.dataReader = reader;\r
 221         data.staticData = staticData;\r
 222         data.sharedDataCached = false;\r
 223 \r
 224         // Load data\r
 225         UConverterMBCSTable mbcsTable = data.mbcs;\r
 226         MBCSHeader header = new MBCSHeader();\r
 227         try {\r
 228             reader.readMBCSHeader(header);\r
 229         } catch (IOException e) {\r
 230             throw new InvalidFormatException();\r
 231         }\r
 232 \r
 233         int offset;\r
 234         // int[] extIndexesArray = null;\r
 235         String baseNameString = null;\r
 236         int[][] stateTableArray = null;\r
 237         MBCSToUFallback[] toUFallbacksArray = null;\r
 238         char[] unicodeCodeUnitsArray = null;\r
 239         char[] fromUnicodeTableArray = null;\r
 240         byte[] fromUnicodeBytesArray = null;\r
 241 \r
 242         if (header.version[0] == 5 && header.version[1] >= 3 && (header.options & MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK) == 0) {\r
 243             noFromU = ((header.options & MBCS_OPT_NO_FROM_U) != 0);\r
 244         } else if (header.version[0] != 4) {\r
 245             throw new InvalidFormatException();\r
 246         }\r
 247 \r
 248         mbcsTable.outputType = (byte) header.flags;\r
 249 \r
 250         /* extension data, header version 4.2 and higher */\r
 251         offset = header.flags >>> 8;\r
 252         // if(offset!=0 && mbcsTable.outputType == MBCS_OUTPUT_EXT_ONLY) {\r
 253         if (mbcsTable.outputType == MBCS_OUTPUT_EXT_ONLY) {\r
 254             try {\r
 255                 baseNameString = reader.readBaseTableName();\r
 256                 if (offset != 0) {\r
 257                     // agljport:commment subtract 32 for sizeof(_MBCSHeader) and length of baseNameString and 1 null\r
 258                     // terminator byte all already read;\r
 259                     mbcsTable.extIndexes = reader.readExtIndexes(offset\r
 260                             - (reader.bytesRead - reader.staticDataBytesRead));\r
 261                 }\r
 262             } catch (IOException e) {\r
 263                 throw new InvalidFormatException();\r
 264             }\r
 265         }\r
 266 \r
 267         // agljport:add this would be unnecessary if extIndexes were memory mapped\r
 268         /*\r
 269          * if(mbcsTable.extIndexes != null) {\r
 270          * \r
 271          * try { //int nbytes = mbcsTable.extIndexes[UConverterExt.UCNV_EXT_TO_U_LENGTH]*4 +\r
 272          * mbcsTable.extIndexes[UConverterExt.UCNV_EXT_TO_U_UCHARS_LENGTH]*2 +\r
 273          * mbcsTable.extIndexes[UConverterExt.UCNV_EXT_FROM_U_LENGTH]*6 +\r
 274          * mbcsTable.extIndexes[UConverterExt.UCNV_EXT_FROM_U_BYTES_LENGTH] +\r
 275          * mbcsTable.extIndexes[UConverterExt.UCNV_EXT_FROM_U_STAGE_12_LENGTH]*2 +\r
 276          * mbcsTable.extIndexes[UConverterExt.UCNV_EXT_FROM_U_STAGE_3_LENGTH]*2 +\r
 277          * mbcsTable.extIndexes[UConverterExt.UCNV_EXT_FROM_U_STAGE_3B_LENGTH]*4; //int nbytes =\r
 278          * mbcsTable.extIndexes[UConverterExt.UCNV_EXT_SIZE] //byte[] extTables = dataReader.readExtTables(nbytes);\r
 279          * //mbcsTable.extTables = ByteBuffer.wrap(extTables); } catch(IOException e) { System.err.println("Caught\r
 280          * IOException: " + e.getMessage()); pErrorCode[0] = UErrorCode.U_INVALID_FORMAT_ERROR; return; } }\r
 281          */\r
 282         if (mbcsTable.outputType == MBCS_OUTPUT_EXT_ONLY) {\r
 283             UConverterSharedData baseSharedData = null;\r
 284             ByteBuffer extIndexes;\r
 285             String baseName;\r
 286 \r
 287             /* extension-only file, load the base table and set values appropriately */\r
 288             extIndexes = mbcsTable.extIndexes;\r
 289             if (extIndexes == null) {\r
 290                 /* extension-only file without extension */\r
 291                 throw new InvalidFormatException();\r
 292             }\r
 293 \r
 294             if (nestedLoads != 1) {\r
 295                 /* an extension table must not be loaded as a base table */\r
 296                 throw new InvalidFormatException();\r
 297             }\r
 298 \r
 299             /* load the base table */\r
 300             baseName = baseNameString;\r
 301             if (baseName.equals(staticData.name)) {\r
 302                 /* forbid loading this same extension-only file */\r
 303                 throw new InvalidFormatException();\r
 304             }\r
 305 \r
 306             // agljport:fix args.size=sizeof(UConverterLoadArgs);\r
 307             baseSharedData = loadConverter(2, baseName, classPath, loader);\r
 308 \r
 309             if (baseSharedData.staticData.conversionType != UConverterType.MBCS\r
 310                     || baseSharedData.mbcs.baseSharedData != null) {\r
 311                 // agljport:fix ucnv_unload(baseSharedData);\r
 312                 throw new InvalidFormatException();\r
 313             }\r
 314 \r
 315             /* copy the base table data */\r
 316             // agljport:comment deep copy in C changes mbcs through local reference mbcsTable; in java we probably don't\r
 317             // need the deep copy so can just make sure mbcs and its local reference both refer to the same new object\r
 318             mbcsTable = data.mbcs = baseSharedData.mbcs;\r
 319 \r
 320             /* overwrite values with relevant ones for the extension converter */\r
 321             mbcsTable.baseSharedData = baseSharedData;\r
 322             mbcsTable.extIndexes = extIndexes;\r
 323 \r
 324             /*\r
 325              * It would be possible to share the swapLFNL data with a base converter, but the generated name would have\r
 326              * to be different, and the memory would have to be free'd only once. It is easier to just create the data\r
 327              * for the extension converter separately when it is requested.\r
 328              */\r
 329             mbcsTable.swapLFNLStateTable = null;\r
 330             mbcsTable.swapLFNLFromUnicodeBytes = null;\r
 331             mbcsTable.swapLFNLName = null;\r
 332 \r
 333             /*\r
 334              * Set a special, runtime-only outputType if the extension converter is a DBCS version of a base converter\r
 335              * that also maps single bytes.\r
 336              */\r
 337             if (staticData.conversionType == UConverterType.DBCS\r
 338                     || (staticData.conversionType == UConverterType.MBCS && staticData.minBytesPerChar >= 2)) {\r
 339 \r
 340                 if (baseSharedData.mbcs.outputType == MBCS_OUTPUT_2_SISO) {\r
 341                     /* the base converter is SI/SO-stateful */\r
 342                     int entry;\r
 343 \r
 344                     /* get the dbcs state from the state table entry for SO=0x0e */\r
 345                     entry = mbcsTable.stateTable[0][0xe];\r
 346                     if (MBCS_ENTRY_IS_FINAL(entry) && MBCS_ENTRY_FINAL_ACTION(entry) == MBCS_STATE_CHANGE_ONLY\r
 347                             && MBCS_ENTRY_FINAL_STATE(entry) != 0) {\r
 348                         mbcsTable.dbcsOnlyState = (byte) MBCS_ENTRY_FINAL_STATE(entry);\r
 349 \r
 350                         mbcsTable.outputType = MBCS_OUTPUT_DBCS_ONLY;\r
 351                     }\r
 352                 } else if (baseSharedData.staticData.conversionType == UConverterType.MBCS\r
 353                         && baseSharedData.staticData.minBytesPerChar == 1\r
 354                         && baseSharedData.staticData.maxBytesPerChar == 2 && mbcsTable.countStates <= 127) {\r
 355 \r
 356                     /* non-stateful base converter, need to modify the state table */\r
 357                     int newStateTable[][/* 256 */];\r
 358                     int state[]; // this works because java 2-D array is array of references and we can have state =\r
 359                     // newStateTable[i];\r
 360                     int i, count;\r
 361 \r
 362                     /* allocate a new state table and copy the base state table contents */\r
 363                     count = mbcsTable.countStates;\r
 364                     newStateTable = new int[(count + 1) * 1024][256];\r
 365 \r
 366                     for (i = 0; i < mbcsTable.stateTable.length; ++i)\r
 367                         System.arraycopy(mbcsTable.stateTable[i], 0, newStateTable[i], 0,\r
 368                                 mbcsTable.stateTable[i].length);\r
 369 \r
 370                     /* change all final single-byte entries to go to a new all-illegal state */\r
 371                     state = newStateTable[0];\r
 372                     for (i = 0; i < 256; ++i) {\r
 373                         if (MBCS_ENTRY_IS_FINAL(state[i])) {\r
 374                             state[i] = MBCS_ENTRY_TRANSITION(count, 0);\r
 375                         }\r
 376                     }\r
 377 \r
 378                     /* build the new all-illegal state */\r
 379                     state = newStateTable[count];\r
 380                     for (i = 0; i < 256; ++i) {\r
 381                         state[i] = MBCS_ENTRY_FINAL(0, MBCS_STATE_ILLEGAL, 0);\r
 382                     }\r
 383                     mbcsTable.stateTable = newStateTable;\r
 384                     mbcsTable.countStates = (byte) (count + 1);\r
 385                     mbcsTable.stateTableOwned = true;\r
 386 \r
 387                     mbcsTable.outputType = MBCS_OUTPUT_DBCS_ONLY;\r
 388                 }\r
 389             }\r
 390 \r
 391             /*\r
 392              * unlike below for files with base tables, do not get the unicodeMask from the sharedData; instead, use the\r
 393              * base table's unicodeMask, which we copied in the memcpy above; this is necessary because the static data\r
 394              * unicodeMask, especially the UCNV_HAS_SUPPLEMENTARY flag, is part of the base table data\r
 395              */\r
 396         } else {\r
 397             /* conversion file with a base table; an additional extension table is optional */\r
 398             /* make sure that the output type is known */\r
 399             switch (mbcsTable.outputType) {\r
 400             case MBCS_OUTPUT_1:\r
 401             case MBCS_OUTPUT_2:\r
 402             case MBCS_OUTPUT_3:\r
 403             case MBCS_OUTPUT_4:\r
 404             case MBCS_OUTPUT_3_EUC:\r
 405             case MBCS_OUTPUT_4_EUC:\r
 406             case MBCS_OUTPUT_2_SISO:\r
 407                 /* OK */\r
 408                 break;\r
 409             default:\r
 410                 throw new InvalidFormatException();\r
 411             }\r
 412 \r
 413             stateTableArray = new int[header.countStates][256];\r
 414             toUFallbacksArray = new MBCSToUFallback[header.countToUFallbacks];\r
 415             for (int i = 0; i < toUFallbacksArray.length; ++i)\r
 416                 toUFallbacksArray[i] = new MBCSToUFallback();\r
 417             unicodeCodeUnitsArray = new char[(header.offsetFromUTable - header.offsetToUCodeUnits) / 2];\r
 418             fromUnicodeTableArray = new char[(header.offsetFromUBytes - header.offsetFromUTable) / 2];\r
 419             fromUnicodeBytesArray = new byte[header.fromUBytesLength];\r
 420             try {\r
 421                 reader.readMBCSTable(stateTableArray, toUFallbacksArray, unicodeCodeUnitsArray, fromUnicodeTableArray,\r
 422                         fromUnicodeBytesArray);\r
 423             } catch (IOException e) {\r
 424                 throw new InvalidFormatException();\r
 425             }\r
 426 \r
 427             mbcsTable.countStates = (byte) header.countStates;\r
 428             mbcsTable.countToUFallbacks = header.countToUFallbacks;\r
 429             mbcsTable.stateTable = stateTableArray;\r
 430             mbcsTable.toUFallbacks = toUFallbacksArray;\r
 431             mbcsTable.unicodeCodeUnits = unicodeCodeUnitsArray;\r
 432 \r
 433             mbcsTable.fromUnicodeTable = fromUnicodeTableArray;\r
 434             mbcsTable.fromUnicodeBytes = fromUnicodeBytesArray;\r
 435             mbcsTable.fromUBytesLength = header.fromUBytesLength;\r
 436 \r
 437             /*\r
 438              * converter versions 6.1 and up contain a unicodeMask that is used here to select the most efficient\r
 439              * function implementations\r
 440              */\r
 441             // agljport:fix info.size=sizeof(UDataInfo);\r
 442             // agljport:fix udata_getInfo((UDataMemory *)sharedData->dataMemory, &info);\r
 443             // agljport:fix if(info.formatVersion[0]>6 || (info.formatVersion[0]==6 && info.formatVersion[1]>=1)) {\r
 444             /* mask off possible future extensions to be safe */\r
 445             mbcsTable.unicodeMask = (short) (staticData.unicodeMask & 3);\r
 446             // agljport:fix } else {\r
 447             /* for older versions, assume worst case: contains anything possible (prevent over-optimizations) */\r
 448             // agljport:fix mbcsTable->unicodeMask=UCNV_HAS_SUPPLEMENTARY|UCNV_HAS_SURROGATES;\r
 449             // agljport:fix }\r
 450             if (offset != 0) {\r
 451                 try {\r
 452                     // agljport:commment subtract 32 for sizeof(_MBCSHeader) and length of baseNameString and 1 null\r
 453                     // terminator byte all already read;\r
 454                     // int namelen = baseNameString != null? baseNameString.length() + 1: 0;\r
 455                     mbcsTable.extIndexes = reader.readExtIndexes(offset\r
 456                             - (reader.bytesRead - reader.staticDataBytesRead));\r
 457                 } catch (IOException e) {\r
 458                     throw new InvalidFormatException();\r
 459                 }\r
 460             }\r
 461             \r
 462             if (header.version[1] >= 3 && (mbcsTable.unicodeMask & UConverterConstants.HAS_SURROGATES) == 0 &&\r
 463                     (mbcsTable.countStates == 1 ? ((char)header.version[2] >= (SBCS_FAST_MAX>>8)) : ((char)header.version[2] >= (MBCS_FAST_MAX>>8)))) {\r
 464                 mbcsTable.utf8Friendly = true;\r
 465                 \r
 466                 if (mbcsTable.countStates == 1) {\r
 467                     /*\r
 468                      * SBCS: Stage 3 is allocated in 64-entry blocks for U+0000..SBCS_FAST_MAX or higher.\r
 469                      * Build a table with indexes to each block, to be used instaed of\r
 470                      * the regular stage 1/2 table.\r
 471                      */\r
 472                     for (int i = 0; i < (SBCS_FAST_LIMIT>>6); ++i) {\r
 473                         mbcsTable.sbcsIndex[i] = mbcsTable.fromUnicodeTable[mbcsTable.fromUnicodeTable[i>>4]+((i<<2)&0x3c)];\r
 474                     }\r
 475                     /* set SBCS_FAST_MAX to reflect the reach of sbcsIndex[] even if header.version[2]>(SBCS_FAST_MAX>>8) */\r
 476                     mbcsTable.maxFastUChar = SBCS_FAST_MAX;\r
 477                 } else {\r
 478                     /*\r
 479                      * MBCS: Stage 3 is allocated in 64-entry blocks for U+0000..MBCS_FAST_MAX or higher.\r
 480                      * The .cnv file is prebuilt with an additional stage table with indexes to each block.\r
 481                      */\r
 482                     if (noFromU) {\r
 483                         mbcsTable.mbcsIndex = ByteBuffer.wrap(mbcsTable.fromUnicodeBytes).asCharBuffer();\r
 484                     }\r
 485                     mbcsTable.maxFastUChar = (char)((header.version[2]<<8) | 0xff);\r
 486                 }\r
 487             }\r
 488             /* calculate a bit set of 4 ASCII characters per bit that round-trip to ASCII bytes */\r
 489             {\r
 490                 long asciiRoundtrips = 0xffffffff;\r
 491                 for (int i = 0; i < 0x80; ++i) {\r
 492                     if (mbcsTable.stateTable[0][i] != MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, i)) {\r
 493                         asciiRoundtrips&=~((long)1<<(i>>2))&UConverterConstants.UNSIGNED_INT_MASK;\r
 494                     }\r
 495                 }\r
 496                 mbcsTable.asciiRoundtrips = asciiRoundtrips&UConverterConstants.UNSIGNED_INT_MASK;\r
 497             }\r
 498             \r
 499             if (noFromU) {\r
 500                 int stage1Length = (mbcsTable.unicodeMask&UConverterConstants.HAS_SUPPLEMENTARY) != 0 ? 0x440 : 0x40;\r
 501                 int stage2Length = (header.offsetFromUBytes - header.offsetFromUTable)/4 - stage1Length/2;\r
 502                 reconstituteData(mbcsTable, stage1Length, stage2Length, header.fullStage2Length);\r
 503             }\r
 504             if (mbcsTable.outputType == MBCS_OUTPUT_DBCS_ONLY || mbcsTable.outputType == MBCS_OUTPUT_2_SISO) {\r
 505                 /*\r
 506                  * MBCS_OUTPUT_DBCS_ONLY: No SBCS mappings, therefore ASCII does not roundtrip.\r
 507                  * MBCS_OUTPUT_2_SISO: Bypass the ASCII fastpath to handle prevLength correctly.\r
 508                  */\r
 509                 mbcsTable.asciiRoundtrips = 0;\r
 510             }\r
 511         }\r
 512         return data;\r
 513     }\r
 514     \r
 515     private static boolean writeStage3Roundtrip(UConverterMBCSTable mbcsTable, long value, int codePoints[]) {\r
 516         char[] table;\r
 517         byte[] bytes;\r
 518         int stage2;\r
 519         int p;\r
 520         int c;\r
 521         int i, st3;\r
 522         long temp;\r
 523 \r
 524         table = mbcsTable.fromUnicodeTable;\r
 525         bytes = mbcsTable.fromUnicodeBytes;\r
 526 \r
 527         /* for EUC outputTypes, modify the value like genmbcs.c's transformEUC() */\r
 528         switch(mbcsTable.outputType) {\r
 529         case MBCS_OUTPUT_3_EUC:\r
 530             if(value<=0xffff) {\r
 531                 /* short sequences are stored directly */\r
 532                 /* code set 0 or 1 */\r
 533             } else if(value<=0x8effff) {\r
 534                 /* code set 2 */\r
 535                 value&=0x7fff;\r
 536             } else /* first byte is 0x8f */ {\r
 537                 /* code set 3 */\r
 538                 value&=0xff7f;\r
 539             }\r
 540             break;\r
 541         case MBCS_OUTPUT_4_EUC:\r
 542             if(value<=0xffffff) {\r
 543                 /* short sequences are stored directly */\r
 544                 /* code set 0 or 1 */\r
 545             } else if(value<=0x8effffff) {\r
 546                 /* code set 2 */\r
 547                 value&=0x7fffff;\r
 548             } else /* first byte is 0x8f */ {\r
 549                 /* code set 3 */\r
 550                 value&=0xff7fff;\r
 551             }\r
 552             break;\r
 553         default:\r
 554             break;\r
 555         }\r
 556 \r
 557         for(i=0; i<=0x1f; ++value, ++i) {\r
 558             c=codePoints[i];\r
 559             if(c<0) {\r
 560                 continue;\r
 561             }\r
 562 \r
 563             /* locate the stage 2 & 3 data */\r
 564             stage2 = table[c>>10] + ((c>>4)&0x3f);\r
 565             st3 = table[stage2*2]<<16|table[stage2*2 + 1];\r
 566             st3 = (int)(char)(st3 * 16 + (c&0xf));\r
 567 \r
 568             /* write the codepage bytes into stage 3 */\r
 569             switch(mbcsTable.outputType) {\r
 570             case MBCS_OUTPUT_3:\r
 571             case MBCS_OUTPUT_4_EUC:\r
 572                 p = st3*3;\r
 573                 bytes[p] = (byte)(value>>16);\r
 574                 bytes[p+1] = (byte)(value>>8);\r
 575                 bytes[p+2] = (byte)value;\r
 576                 break;\r
 577             case MBCS_OUTPUT_4:\r
 578                 bytes[st3*4] = (byte)(value >> 24);\r
 579                 bytes[st3*4 + 1] = (byte)(value >> 16);\r
 580                 bytes[st3*4 + 2] = (byte)(value >> 8);\r
 581                 bytes[st3*4 + 3] = (byte)value;\r
 582                 break;\r
 583             default:\r
 584                 /* 2 bytes per character */\r
 585                 bytes[st3*2] = (byte)(value >> 8);\r
 586                 bytes[st3*2 + 1] = (byte)value;\r
 587                 break;\r
 588             }\r
 589 \r
 590             /* set the roundtrip flag */\r
 591             temp = (1L<<(16+(c&0xf)));\r
 592             table[stage2*2] |= (char)(temp>>16);\r
 593             table[stage2*2 + 1] |= (char)temp;\r
 594         }\r
 595         return true;\r
 596      }\r
 597     \r
 598     private static void reconstituteData(UConverterMBCSTable mbcsTable, int stage1Length, int stage2Length, int fullStage2Length) {\r
 599         int datalength = stage1Length*2+fullStage2Length*4+mbcsTable.fromUBytesLength;\r
 600         int offset = 0;\r
 601         byte[] stage = new byte[datalength];\r
 602         \r
 603         for (int i = 0; i < stage1Length; ++i) {\r
 604             stage[i*2]   = (byte)(mbcsTable.fromUnicodeTable[i]>>8);\r
 605             stage[i*2+1] = (byte)(mbcsTable.fromUnicodeTable[i]);\r
 606         }\r
 607         \r
 608         offset = ((fullStage2Length - stage2Length) * 4) + (stage1Length * 2);\r
 609         for (int i = 0; i < stage2Length; ++i) {\r
 610             stage[offset + i*4]   = (byte)(mbcsTable.fromUnicodeTable[stage1Length + i*2]>>8);\r
 611             stage[offset + i*4+1] = (byte)(mbcsTable.fromUnicodeTable[stage1Length + i*2]);\r
 612             stage[offset + i*4+2] = (byte)(mbcsTable.fromUnicodeTable[stage1Length + i*2+1]>>8);\r
 613             stage[offset + i*4+3] = (byte)(mbcsTable.fromUnicodeTable[stage1Length + i*2+1]);\r
 614         }\r
 615         \r
 616         /* indexes into stage 2 count from the bottom of the fromUnicodeTable */\r
 617         \r
 618         /* reconsitute the initial part of stage 2 from the mbcsIndex */\r
 619         {\r
 620             int stageUTF8Length=(mbcsTable.maxFastUChar+1)>>6;\r
 621             int stageUTF8Index=0;\r
 622             int st1, st2, st3, i;\r
 623             \r
 624             for (st1 = 0; stageUTF8Index < stageUTF8Length; ++st1) {\r
 625                 st2 = ((char)stage[2*st1]<<8) | stage[2*st1+1];\r
 626                 if (st2 != stage1Length/2) {\r
 627                     /* each stage 2 block has 64 entries corresponding to 16 entries in the mbcsIndex */\r
 628                     for (i = 0; i < 16; ++i) {\r
 629                         st3 = mbcsTable.mbcsIndex.get(stageUTF8Index++);\r
 630                         if (st3 != 0) {\r
 631                             /* a stage 2 entry's index is per stage 3 16-block, not per stage 3 entry */\r
 632                             st3>>=4;\r
 633                             /*\r
 634                              * 4 stage 2 entries point to 4 consecutive stage 3 16-blocks which are\r
 635                              * allocated together as a single 64-block for access from the mbcsIndex\r
 636                              */\r
 637                             stage[4*st2] = (byte)(st3>>24); stage[4*st2+1] = (byte)(st3>>16); stage[4*st2+2] = (byte)(st3>>8); stage[4*st2+3] = (byte)(st3); st2++; st3++;\r
 638                             stage[4*st2] = (byte)(st3>>24); stage[4*st2+1] = (byte)(st3>>16); stage[4*st2+2] = (byte)(st3>>8); stage[4*st2+3] = (byte)(st3); st2++; st3++;\r
 639                             stage[4*st2] = (byte)(st3>>24); stage[4*st2+1] = (byte)(st3>>16); stage[4*st2+2] = (byte)(st3>>8); stage[4*st2+3] = (byte)(st3); st2++; st3++;\r
 640                             stage[4*st2] = (byte)(st3>>24); stage[4*st2+1] = (byte)(st3>>16); stage[4*st2+2] = (byte)(st3>>8); stage[4*st2+3] = (byte)(st3);\r
 641                         } else {\r
 642                             /* no stage 3 block, skip */\r
 643                             st2+=4;\r
 644                         }\r
 645                     }\r
 646                 } else {\r
 647                     /* no stage 2 block, skip */\r
 648                     stageUTF8Index+=16;\r
 649                 }\r
 650             }\r
 651         }\r
 652         \r
 653         char[] stage1 = new char[stage.length/2];\r
 654         for (int i = 0; i < stage1.length; ++i) {\r
 655             stage1[i] = (char)(((stage[i*2])<<8)|(stage[i*2+1] & UConverterConstants.UNSIGNED_BYTE_MASK));\r
 656         }\r
 657         byte[] stage2 = new byte[stage.length - ((stage1Length * 2) + (fullStage2Length * 4))];\r
 658         System.arraycopy(stage, ((stage1Length * 2) + (fullStage2Length * 4)), stage2, 0, stage2.length);\r
 659         \r
 660         mbcsTable.fromUnicodeTable = stage1;\r
 661         mbcsTable.fromUnicodeBytes = stage2;\r
 662         \r
 663         /* reconstitute fromUnicodeBytes with roundtrips from toUnicode data */\r
 664         MBCSEnumToUnicode(mbcsTable);\r
 665     }\r
 666     \r
 667     /*\r
 668      * Internal function enumerating the toUnicode data of an MBCS converter.\r
 669      * Currently only used for reconstituting data for a MBCS_OPT_NO_FROM_U\r
 670      * table, but could also be used for a future getUnicodeSet() option\r
 671      * that includes reverse fallbacks (after updating this function's implementation).\r
 672      * Currently only handles roundtrip mappings.\r
 673      * Does not currently handle extensions.\r
 674      */\r
 675     private static void MBCSEnumToUnicode(UConverterMBCSTable mbcsTable) {\r
 676         /*\r
 677          * Properties for each state, to speed up the enumeration.\r
 678          * Ignorable actions are unassigned/illegal/state-change-only:\r
 679          * They do not lead to mappings.\r
 680          * \r
 681          * Bits 7..6\r
 682          * 1 direct/initial state (stateful converters have mulitple)\r
 683          * 0 non-initial state with transitions or with nonignorable result actions\r
 684          * -1 final state with only ignorable actions\r
 685          * \r
 686          * Bits 5..3\r
 687          * The lowest byte value with non-ignorable actions is\r
 688          * value<<5 (rounded down).\r
 689          * \r
 690          * Bits 2..0:\r
 691          * The highest byte value with non-ignorable actions is\r
 692          * (value<<5)&0x1f (rounded up).\r
 693          */\r
 694         byte stateProps[] = new byte[MBCS_MAX_STATE_COUNT];\r
 695         int state;\r
 696         \r
 697         /* recurse from state 0 and set all stateProps */\r
 698         getStateProp(mbcsTable.stateTable, stateProps, 0);\r
 699         \r
 700         for (state = 0; state < mbcsTable.countStates; ++state) {\r
 701             if (stateProps[state] >= 0x40) {\r
 702                 /* start from each direct state */\r
 703                 enumToU(mbcsTable, stateProps, state, 0, 0);\r
 704             }\r
 705         }\r
 706         \r
 707         \r
 708     }\r
 709     \r
 710     private static boolean enumToU(UConverterMBCSTable mbcsTable, byte stateProps[], int state, int offset, int value) {\r
 711         int[] codePoints = new int[32];\r
 712         int[] row;\r
 713         char[] unicodeCodeUnits;\r
 714         int anyCodePoints;\r
 715         int b, limit;\r
 716         \r
 717         row = mbcsTable.stateTable[state];\r
 718         unicodeCodeUnits = mbcsTable.unicodeCodeUnits;\r
 719         \r
 720         value<<=8;\r
 721         anyCodePoints = -1; /* becomes non-negative if there is a mapping */\r
 722         \r
 723         b = (stateProps[state]&0x38)<<2;\r
 724         if (b == 0 && stateProps[state] >= 0x40) {\r
 725             /* skip byte sequences with leading zeros because they are note stored in the fromUnicode table */\r
 726             codePoints[0] = UConverterConstants.U_SENTINEL;\r
 727             b = 1;\r
 728         }\r
 729         limit = ((stateProps[state]&7)+1)<<5;\r
 730         while (b < limit) {\r
 731             int entry = row[b];\r
 732             if (MBCS_ENTRY_IS_TRANSITION(entry)) {\r
 733                 int nextState = MBCS_ENTRY_TRANSITION_STATE(entry);\r
 734                 if (stateProps[nextState] >= 0) {\r
 735                     /* recurse to a state with non-ignorable actions */\r
 736                     if (!enumToU(mbcsTable, stateProps, nextState, offset+MBCS_ENTRY_TRANSITION_OFFSET(entry), value|b)) {\r
 737                         return false;\r
 738                     }\r
 739                 }\r
 740                 codePoints[b&0x1f] = UConverterConstants.U_SENTINEL;\r
 741             } else {\r
 742                 int c;\r
 743                 int action;\r
 744                 \r
 745                 /*\r
 746                  * An if-else-if chain provides more reliable performance for\r
 747                  * the most common cases compared to a switch.\r
 748                  */\r
 749                 action = MBCS_ENTRY_FINAL_ACTION(entry);\r
 750                 if (action == MBCS_STATE_VALID_DIRECT_16) {\r
 751                     /* output BMP code point */\r
 752                     c = MBCS_ENTRY_FINAL_VALUE_16(entry);\r
 753                 } else if (action == MBCS_STATE_VALID_16) {\r
 754                     int finalOffset = offset+MBCS_ENTRY_FINAL_VALUE_16(entry);\r
 755                     c = unicodeCodeUnits[finalOffset];\r
 756                     if (c < 0xfffe) {\r
 757                         /* output BMP code point */\r
 758                     } else {\r
 759                         c = UConverterConstants.U_SENTINEL;\r
 760                     }\r
 761                 } else if (action == MBCS_STATE_VALID_16_PAIR) {\r
 762                     int finalOffset = offset+MBCS_ENTRY_FINAL_VALUE_16(entry);\r
 763                     c = unicodeCodeUnits[finalOffset++];\r
 764                     if (c < 0xd800) {\r
 765                         /* output BMP code point below 0xd800 */\r
 766                     } else if (c <= 0xdbff) {\r
 767                         /* output roundtrip or fallback supplementary code point */\r
 768                         c = ((c&0x3ff)<<10)+unicodeCodeUnits[finalOffset]+(0x10000-0xdc00);\r
 769                     } else if (c == 0xe000) {\r
 770                         /* output roundtrip BMP code point above 0xd800 or fallback BMP code point */\r
 771                         c = unicodeCodeUnits[finalOffset];\r
 772                     } else {\r
 773                         c = UConverterConstants.U_SENTINEL;\r
 774                     }\r
 775                 } else if (action == MBCS_STATE_VALID_DIRECT_20) {\r
 776                     /* output supplementary code point */\r
 777                     c = MBCS_ENTRY_FINAL_VALUE(entry)+0x10000;\r
 778                 } else {\r
 779                     c = UConverterConstants.U_SENTINEL;\r
 780                 }\r
 781                 \r
 782                 codePoints[b&0x1f] = c;\r
 783                 anyCodePoints&=c;\r
 784             }\r
 785             if (((++b)&0x1f) == 0) {\r
 786                 if(anyCodePoints>=0) {\r
 787                     if(!writeStage3Roundtrip(mbcsTable, value|(b-0x20)&UConverterConstants.UNSIGNED_INT_MASK, codePoints)) {\r
 788                         return false;\r
 789                     }\r
 790                     anyCodePoints=-1;\r
 791                 }\r
 792             }\r
 793         }\r
 794         \r
 795         return true;\r
 796     }\r
 797     \r
 798     /*\r
 799      * Only called if stateProps[state]==-1.\r
 800      * A recursive call may do stateProps[state]|=0x40 if this state is the target of an\r
 801      * MBCS_STATE_CHANGE_ONLY.\r
 802      */\r
 803     private static byte getStateProp(int stateTable[][], byte stateProps[], int state) {\r
 804         int[] row;\r
 805         int min, max, entry, nextState;\r
 806         \r
 807         row = stateTable[state];\r
 808         stateProps[state] = 0;\r
 809         \r
 810         /* find first non-ignorable state */\r
 811         for (min = 0;;++min) {\r
 812             entry = row[min];\r
 813             nextState = MBCS_ENTRY_STATE(entry);\r
 814             if (stateProps[nextState] == -1) {\r
 815                 getStateProp(stateTable, stateProps, nextState);\r
 816             }\r
 817             if (MBCS_ENTRY_IS_TRANSITION(entry)) {\r
 818                 if (stateProps[nextState] >- 0) {\r
 819                     break;\r
 820                 }\r
 821             } else if (MBCS_ENTRY_FINAL_ACTION(entry) < MBCS_STATE_UNASSIGNED) {\r
 822                 break;\r
 823             }\r
 824             if (min == 0xff) {\r
 825                 stateProps[state] = -0x40;  /* (byte)0xc0 */\r
 826                 return stateProps[state];\r
 827             }\r
 828         }\r
 829         stateProps[state]|=(byte)((min>>5)<<3);\r
 830         \r
 831         /* find last non-ignorable state */\r
 832         for (max = 0xff; min < max; --max) {\r
 833             entry = row[max];\r
 834             nextState = MBCS_ENTRY_STATE(entry);\r
 835             if (stateProps[nextState] == -1) {\r
 836                 getStateProp(stateTable, stateProps, nextState);\r
 837             }\r
 838             if (MBCS_ENTRY_IS_TRANSITION(entry)) {\r
 839                 if (stateProps[nextState] >- 0) {\r
 840                     break;\r
 841                 }\r
 842             } else if (MBCS_ENTRY_FINAL_ACTION(entry) < MBCS_STATE_UNASSIGNED) {\r
 843                 break;\r
 844             }\r
 845         }\r
 846         stateProps[state]|=(byte)(max>>5);\r
 847         \r
 848         /* recurse further and collect direct-state information */\r
 849         while (min <= max) {\r
 850             entry = row[min];\r
 851             nextState = MBCS_ENTRY_STATE(entry);\r
 852             if (stateProps[nextState] == -1) {\r
 853                 getStateProp(stateTable, stateProps, nextState);\r
 854             }\r
 855             if (MBCS_ENTRY_IS_TRANSITION(entry)) {\r
 856                 stateProps[nextState]|=0x40;\r
 857                 if (MBCS_ENTRY_FINAL_ACTION(entry) <= MBCS_STATE_FALLBACK_DIRECT_20) {\r
 858                     stateProps[state]|=0x40;\r
 859                 }\r
 860             }\r
 861             ++min;\r
 862         }\r
 863         return stateProps[state];\r
 864     }\r
 865 \r
 866     protected void initializeConverter(int myOptions) {\r
 867         UConverterMBCSTable mbcsTable;\r
 868         ByteBuffer extIndexes;\r
 869         short outputType;\r
 870         byte maxBytesPerUChar;\r
 871 \r
 872         mbcsTable = sharedData.mbcs;\r
 873         outputType = mbcsTable.outputType;\r
 874 \r
 875         if (outputType == MBCS_OUTPUT_DBCS_ONLY) {\r
 876             /* the swaplfnl option does not apply, remove it */\r
 877             this.options = myOptions &= ~UConverterConstants.OPTION_SWAP_LFNL;\r
 878         }\r
 879 \r
 880         if ((myOptions & UConverterConstants.OPTION_SWAP_LFNL) != 0) {\r
 881             /* do this because double-checked locking is broken */\r
 882             boolean isCached;\r
 883 \r
 884             // agljport:todo umtx_lock(NULL);\r
 885             isCached = mbcsTable.swapLFNLStateTable != null;\r
 886             // agljport:todo umtx_unlock(NULL);\r
 887 \r
 888             if (!isCached) {\r
 889                 try {\r
 890                     if (!EBCDICSwapLFNL()) {\r
 891                         /* this option does not apply, remove it */\r
 892                         this.options = myOptions &= ~UConverterConstants.OPTION_SWAP_LFNL;\r
 893                     }\r
 894                 } catch (Exception e) {\r
 895                     /* something went wrong. */\r
 896                     return;\r
 897                 }\r
 898             }\r
 899         }\r
 900 \r
 901         if (icuCanonicalName.toLowerCase().indexOf("gb18030") >= 0) {\r
 902             /* set a flag for GB 18030 mode, which changes the callback behavior */\r
 903             this.options |= MBCS_OPTION_GB18030;\r
 904         } else if (icuCanonicalName.toLowerCase().indexOf("keis") >= 0) {\r
 905             this.options |= MBCS_OPTION_KEIS;\r
 906         } else if (icuCanonicalName.toLowerCase().indexOf("jef") >= 0) {\r
 907             this.options |= MBCS_OPTION_JEF;\r
 908         } else if (icuCanonicalName.toLowerCase().indexOf("jips") >= 0) {\r
 909             this.options |= MBCS_OPTION_JIPS;\r
 910         }\r
 911 \r
 912         /* fix maxBytesPerUChar depending on outputType and options etc. */\r
 913         if (outputType == MBCS_OUTPUT_2_SISO) {\r
 914             maxBytesPerChar = 3; /* SO+DBCS */\r
 915         }\r
 916 \r
 917         extIndexes = mbcsTable.extIndexes;\r
 918         if (extIndexes != null) {\r
 919             maxBytesPerUChar = (byte) GET_MAX_BYTES_PER_UCHAR(extIndexes);\r
 920             if (outputType == MBCS_OUTPUT_2_SISO) {\r
 921                 ++maxBytesPerUChar; /* SO + multiple DBCS */\r
 922             }\r
 923 \r
 924             if (maxBytesPerUChar > maxBytesPerChar) {\r
 925                 maxBytesPerChar = maxBytesPerUChar;\r
 926             }\r
 927         }\r
 928     }\r
 929      /* EBCDIC swap LF<->NL--------------------------------------------------------------------------------*/\r
 930      /*\r
 931       * This code modifies a standard EBCDIC<->Unicode mappling table for\r
 932       * OS/390 (z/OS) Unix System Services (Open Edition).\r
 933       * The difference is in the mapping of Line Feed and New Line control codes:\r
 934       * Standard EBDIC maps\r
 935       * \r
 936       * <U000A> \x25 |0\r
 937       * <U0085> \x15 |0\r
 938       * \r
 939       * but OS/390 USS EBCDIC swaps the control codes for LF and NL,\r
 940       * mapping\r
 941       * \r
 942       * <U000A> \x15 |0\r
 943       * <U0085> \x25 |0\r
 944       * \r
 945       * This code modifies a loaded standard EBCDIC<->Unicode mapping table\r
 946       * by copying it into allocated memory and swapping the LF and NL values.\r
 947       * It allows to support the same EBCDIC charset in both version without\r
 948       * duplicating the entire installed table.\r
 949       */\r
 950     /* standard EBCDIC codes */\r
 951     private static final short EBCDIC_LF = 0x0025;\r
 952     private static final short EBCDIC_NL = 0x0015;\r
 953     \r
 954     /* standard EBCDIC codes with roundtrip flag as stored in Unicode-to-single-byte tables */\r
 955     private static final short EBCDIC_RT_LF = 0x0f25;\r
 956     private static final short EBCDIC_RT_NL = 0x0f15;\r
 957     \r
 958     /* Unicode code points */\r
 959     private static final short U_LF = 0x000A;\r
 960     private static final short U_NL = 0x0085;\r
 961     \r
 962     private boolean EBCDICSwapLFNL() throws Exception {\r
 963         UConverterMBCSTable mbcsTable;\r
 964         \r
 965         char[] table;\r
 966         byte[] results;\r
 967         byte[] bytes;\r
 968         \r
 969         int[][] newStateTable;\r
 970         byte[] newResults;\r
 971         String newName;\r
 972         \r
 973         int stage2Entry;\r
 974 //        int size;\r
 975         int sizeofFromUBytes;\r
 976         \r
 977         mbcsTable = sharedData.mbcs;\r
 978         \r
 979         table = mbcsTable.fromUnicodeTable;\r
 980         bytes = mbcsTable.fromUnicodeBytes;\r
 981         results = bytes;\r
 982         \r
 983         /*\r
 984          * Check that this is an EBCDIC table with SBCS portion -\r
 985          * SBCS or EBCDIC with standard EBCDIC LF and NL mappings.\r
 986          * \r
 987          * If not, ignore the option Options are always ignored if they do not apply.\r
 988          */\r
 989         if (!((mbcsTable.outputType == MBCS_OUTPUT_1 || mbcsTable.outputType == MBCS_OUTPUT_2_SISO) &&\r
 990               mbcsTable.stateTable[0][EBCDIC_LF] == MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_LF) &&\r
 991               mbcsTable.stateTable[0][EBCDIC_NL] == MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_NL))) {\r
 992             return false;\r
 993         }\r
 994         \r
 995         if (mbcsTable.outputType == MBCS_OUTPUT_1) {\r
 996             if (!(EBCDIC_RT_LF == MBCS_SINGLE_RESULT_FROM_U(table, results, U_LF) &&\r
 997                   EBCDIC_RT_NL == MBCS_SINGLE_RESULT_FROM_U(table, results, U_NL))) {\r
 998                 return false;\r
 999             }\r
1000         } else /* MBCS_OUTPUT_2_SISO */ {\r
1001             stage2Entry = MBCS_STAGE_2_FROM_U(table, U_LF);\r
1002             if (!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, U_LF) &&\r
1003                   EBCDIC_LF == MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, U_LF))) {\r
1004                 return false;\r
1005             }\r
1006             \r
1007             stage2Entry = MBCS_STAGE_2_FROM_U(table, U_NL);\r
1008             if (!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, U_NL) &&\r
1009                   EBCDIC_NL == MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, U_NL))) {\r
1010                 return false;\r
1011             }\r
1012         }\r
1013         \r
1014         if (mbcsTable.fromUBytesLength > 0) {\r
1015             /*\r
1016              * We _know_ the number of bytes in the fromUnicodeBytes array\r
1017              * starting with header.version 4.1.\r
1018              */\r
1019             sizeofFromUBytes = mbcsTable.fromUBytesLength;\r
1020         } else {\r
1021             /*\r
1022              * Otherwise:\r
1023              * There used to be code to enumerate the fromUnicode\r
1024              * trie and find the highest entry, but it was removed in ICU 3.2\r
1025              * because it was not tested and caused a low code coverage number.\r
1026              */\r
1027             throw new Exception("U_INVALID_FORMAT_ERROR");\r
1028         }\r
1029         \r
1030         /*\r
1031          * The table has an appropriate format.\r
1032          * Allocate and build\r
1033          * - a modified to-Unicode state table\r
1034          * - a modified from-Unicode output array\r
1035          * - a converter name string with the swap option appended\r
1036          */\r
1037 //        size = mbcsTable.countStates * 1024 + sizeofFromUBytes + UConverterConstants.MAX_CONVERTER_NAME_LENGTH + 20;\r
1038         \r
1039         /* copy and modify the to-Unicode state table */\r
1040         newStateTable = new int[mbcsTable.stateTable.length][mbcsTable.stateTable[0].length];\r
1041         for (int i = 0; i < newStateTable.length; i++) {\r
1042             System.arraycopy(mbcsTable.stateTable[i], 0, newStateTable[i], 0, newStateTable[i].length);\r
1043         }\r
1044         \r
1045         newStateTable[0][EBCDIC_LF] = MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_NL);\r
1046         newStateTable[0][EBCDIC_NL] = MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_LF);\r
1047         \r
1048         /* copy and modify the from-Unicode result table */\r
1049         newResults = new byte[sizeofFromUBytes];\r
1050         System.arraycopy(bytes, 0, newResults, 0, sizeofFromUBytes);\r
1051         /* conveniently, the table access macros work on the left side of expressions */\r
1052         if (mbcsTable.outputType == MBCS_OUTPUT_1) {\r
1053             MBCS_SINGLE_RESULT_FROM_U_SET(table, newResults, U_LF, EBCDIC_RT_NL);\r
1054             MBCS_SINGLE_RESULT_FROM_U_SET(table, newResults, U_NL, EBCDIC_RT_LF);\r
1055         } else /* MBCS_OUTPUT_2_SISO */ {\r
1056             stage2Entry = MBCS_STAGE_2_FROM_U(table, U_LF);\r
1057             MBCS_VALUE_2_FROM_STAGE_2_SET(newResults, stage2Entry, U_LF, EBCDIC_NL);\r
1058             \r
1059             stage2Entry = MBCS_STAGE_2_FROM_U(table, U_NL);\r
1060             MBCS_VALUE_2_FROM_STAGE_2_SET(newResults, stage2Entry, U_NL, EBCDIC_LF);\r
1061         }\r
1062         \r
1063         /* set the canonical converter name */\r
1064         newName = new String(icuCanonicalName);\r
1065         newName.concat(UConverterConstants.OPTION_SWAP_LFNL_STRING);\r
1066         \r
1067         if (mbcsTable.swapLFNLStateTable == null) {\r
1068             mbcsTable.swapLFNLStateTable = newStateTable;\r
1069             mbcsTable.swapLFNLFromUnicodeBytes = newResults;\r
1070             mbcsTable.swapLFNLName = newName;\r
1071         }\r
1072         return true;\r
1073     }\r
1074 \r
1075     /**\r
1076      * MBCS output types for conversions from Unicode. These per-converter types determine the storage method in stage 3\r
1077      * of the lookup table, mostly how many bytes are stored per entry.\r
1078      */\r
1079     static final int MBCS_OUTPUT_1 = 0; /* 0 */\r
1080     static final int MBCS_OUTPUT_2 = MBCS_OUTPUT_1 + 1; /* 1 */\r
1081     static final int MBCS_OUTPUT_3 = MBCS_OUTPUT_2 + 1; /* 2 */\r
1082     static final int MBCS_OUTPUT_4 = MBCS_OUTPUT_3 + 1; /* 3 */\r
1083     static final int MBCS_OUTPUT_3_EUC = 8; /* 8 */\r
1084     static final int MBCS_OUTPUT_4_EUC = MBCS_OUTPUT_3_EUC + 1; /* 9 */\r
1085     static final int MBCS_OUTPUT_2_SISO = 12; /* c */\r
1086     static final int MBCS_OUTPUT_2_HZ = MBCS_OUTPUT_2_SISO + 1; /* d */\r
1087     static final int MBCS_OUTPUT_EXT_ONLY = MBCS_OUTPUT_2_HZ + 1; /* e */\r
1088     // static final int MBCS_OUTPUT_COUNT = MBCS_OUTPUT_EXT_ONLY + 1;\r
1089     static final int MBCS_OUTPUT_DBCS_ONLY = 0xdb; /* runtime-only type for DBCS-only handling of SISO tables */\r
1090 \r
1091     /* GB 18030 data ------------------------------------------------------------ */\r
1092 \r
1093     /* helper macros for linear values for GB 18030 four-byte sequences */\r
1094     private static long LINEAR_18030(long a, long b, long c, long d) {\r
1095         return ((((a & 0xff) * 10 + (b & 0xff)) * 126L + (c & 0xff)) * 10L + (d & 0xff));\r
1096     }\r
1097 \r
1098     private static long LINEAR_18030_BASE = LINEAR_18030(0x81, 0x30, 0x81, 0x30);\r
1099 \r
1100     private static long LINEAR(long x) {\r
1101         return LINEAR_18030(x >>> 24, (x >>> 16) & 0xff, (x >>> 8) & 0xff, x & 0xff);\r
1102     }\r
1103 \r
1104     /*\r
1105      * Some ranges of GB 18030 where both the Unicode code points and the GB four-byte sequences are contiguous and are\r
1106      * handled algorithmically by the special callback functions below. The values are start & end of Unicode & GB\r
1107      * codes.\r
1108      * \r
1109      * Note that single surrogates are not mapped by GB 18030 as of the re-released mapping tables from 2000-nov-30.\r
1110      */\r
1111     private static final long gb18030Ranges[][] = new long[/* 13 */][/* 4 */] {\r
1112             { 0x10000L, 0x10FFFFL, LINEAR(0x90308130L), LINEAR(0xE3329A35L) },\r
1113             { 0x9FA6L, 0xD7FFL, LINEAR(0x82358F33L), LINEAR(0x8336C738L) },\r
1114             { 0x0452L, 0x200FL, LINEAR(0x8130D330L), LINEAR(0x8136A531L) },\r
1115             { 0xE865L, 0xF92BL, LINEAR(0x8336D030L), LINEAR(0x84308534L) },\r
1116             { 0x2643L, 0x2E80L, LINEAR(0x8137A839L), LINEAR(0x8138FD38L) },\r
1117             { 0xFA2AL, 0xFE2FL, LINEAR(0x84309C38L), LINEAR(0x84318537L) },\r
1118             { 0x3CE1L, 0x4055L, LINEAR(0x8231D438L), LINEAR(0x8232AF32L) },\r
1119             { 0x361BL, 0x3917L, LINEAR(0x8230A633L), LINEAR(0x8230F237L) },\r
1120             { 0x49B8L, 0x4C76L, LINEAR(0x8234A131L), LINEAR(0x8234E733L) },\r
1121             { 0x4160L, 0x4336L, LINEAR(0x8232C937L), LINEAR(0x8232F837L) },\r
1122             { 0x478EL, 0x4946L, LINEAR(0x8233E838L), LINEAR(0x82349638L) },\r
1123             { 0x44D7L, 0x464BL, LINEAR(0x8233A339L), LINEAR(0x8233C931L) },\r
1124             { 0xFFE6L, 0xFFFFL, LINEAR(0x8431A234L), LINEAR(0x8431A439L) } };\r
1125 \r
1126     /* bit flag for UConverter.options indicating GB 18030 special handling */\r
1127     private static final int MBCS_OPTION_GB18030 = 0x8000;\r
1128     \r
1129     /* bit flag for UConverter.options indicating KEIS,JEF,JIF special handling */ \r
1130     private static final int MBCS_OPTION_KEIS = 0x01000;\r
1131     private static final int MBCS_OPTION_JEF = 0x02000;\r
1132     private static final int MBCS_OPTION_JIPS = 0x04000; \r
1133     \r
1134     private static enum SISO_Option {\r
1135         SI,\r
1136         SO\r
1137     }\r
1138     \r
1139     private static final byte[] KEIS_SO_CHAR = { 0x0A, 0x42 };\r
1140     private static final byte[] KEIS_SI_CHAR = { 0x0A, 0x41 };\r
1141     private static final byte JEF_SO_CHAR = 0x28;\r
1142     private static final byte JEF_SI_CHAR = 0x29;\r
1143     private static final byte[] JIPS_SO_CHAR = { 0x1A, 0x70 };\r
1144     private static final byte[] JIPS_SI_CHAR = { 0x1A, 0x71 };\r
1145     \r
1146     private static int getSISOBytes(SISO_Option option, int cnvOption, byte[] value) {\r
1147         int SISOLength = 0;\r
1148 \r
1149         switch (option) {\r
1150             case SI:\r
1151                 if ((cnvOption&MBCS_OPTION_KEIS)!=0) {\r
1152                     value[0] = KEIS_SI_CHAR[0];\r
1153                     value[1] = KEIS_SI_CHAR[1];\r
1154                     SISOLength = 2;\r
1155                 } else if ((cnvOption&MBCS_OPTION_JEF)!=0) {\r
1156                     value[0] = JEF_SI_CHAR;\r
1157                     SISOLength = 1;\r
1158                 } else if ((cnvOption&MBCS_OPTION_JIPS)!=0) {\r
1159                     value[0] = JIPS_SI_CHAR[0];\r
1160                     value[1] = JIPS_SI_CHAR[1];\r
1161                     SISOLength = 2;\r
1162                 } else {\r
1163                     value[0] = UConverterConstants.SI;\r
1164                     SISOLength = 1;\r
1165                 }\r
1166                 break;\r
1167             case SO:\r
1168                 if ((cnvOption&MBCS_OPTION_KEIS)!=0) {\r
1169                     value[0] = KEIS_SO_CHAR[0];\r
1170                     value[1] = KEIS_SO_CHAR[1];\r
1171                     SISOLength = 2;\r
1172                 } else if ((cnvOption&MBCS_OPTION_JEF)!=0) {\r
1173                     value[0] = JEF_SO_CHAR;\r
1174                     SISOLength = 1;\r
1175                 } else if ((cnvOption&MBCS_OPTION_JIPS)!=0) {\r
1176                     value[0] = JIPS_SO_CHAR[0];\r
1177                     value[1] = JIPS_SO_CHAR[1];\r
1178                     SISOLength = 2;\r
1179                 } else {\r
1180                     value[0] = UConverterConstants.SO;\r
1181                     SISOLength = 1;\r
1182                 }\r
1183                 break;\r
1184             default:\r
1185                 /* Should never happen. */\r
1186                 break;\r
1187         }\r
1188 \r
1189         return SISOLength;\r
1190     }\r
1191     // enum {\r
1192         static final int MBCS_MAX_STATE_COUNT = 128;\r
1193     // };\r
1194     /**\r
1195      * MBCS action codes for conversions to Unicode. These values are in bits 23..20 of the state table entries.\r
1196      */\r
1197     static final int MBCS_STATE_VALID_DIRECT_16 = 0;\r
1198     static final int MBCS_STATE_VALID_DIRECT_20 = MBCS_STATE_VALID_DIRECT_16 + 1;\r
1199     static final int MBCS_STATE_FALLBACK_DIRECT_16 = MBCS_STATE_VALID_DIRECT_20 + 1;\r
1200     static final int MBCS_STATE_FALLBACK_DIRECT_20 = MBCS_STATE_FALLBACK_DIRECT_16 + 1;\r
1201     static final int MBCS_STATE_VALID_16 = MBCS_STATE_FALLBACK_DIRECT_20 + 1;\r
1202     static final int MBCS_STATE_VALID_16_PAIR = MBCS_STATE_VALID_16 + 1;\r
1203     static final int MBCS_STATE_UNASSIGNED = MBCS_STATE_VALID_16_PAIR + 1;\r
1204     static final int MBCS_STATE_ILLEGAL = MBCS_STATE_UNASSIGNED + 1;\r
1205     static final int MBCS_STATE_CHANGE_ONLY = MBCS_STATE_ILLEGAL + 1;\r
1206     \r
1207     static int MBCS_ENTRY_SET_STATE(int entry, int state) { \r
1208         return (entry&0x80ffffff)|(state<<24L);\r
1209     }\r
1210 \r
1211     static int MBCS_ENTRY_STATE(int entry) {\r
1212         return (((entry)>>24)&0x7f);\r
1213     }\r
1214 \r
1215     /* Methods for state table entries */\r
1216     static int MBCS_ENTRY_TRANSITION(int state, int offset) {\r
1217         return (state << 24L) | offset;\r
1218     }\r
1219 \r
1220     static int MBCS_ENTRY_FINAL(int state, int action, int value) {\r
1221         return 0x80000000 | (state << 24L) | (action << 20L) | value;\r
1222     }\r
1223 \r
1224     static boolean MBCS_ENTRY_IS_TRANSITION(int entry) {\r
1225         return (entry) >= 0;\r
1226     }\r
1227 \r
1228     static boolean MBCS_ENTRY_IS_FINAL(int entry) {\r
1229         return (entry) < 0;\r
1230     }\r
1231 \r
1232     static int MBCS_ENTRY_TRANSITION_STATE(int entry) {\r
1233         return ((entry) >>> 24);\r
1234     }\r
1235 \r
1236     static int MBCS_ENTRY_TRANSITION_OFFSET(int entry) {\r
1237         return ((entry) & 0xffffff);\r
1238     }\r
1239 \r
1240     static int MBCS_ENTRY_FINAL_STATE(int entry) {\r
1241         return ((entry) >>> 24) & 0x7f;\r
1242     }\r
1243 \r
1244     static boolean MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(int entry) {\r
1245         return ((entry) < 0x80100000);\r
1246     }\r
1247 \r
1248     static int MBCS_ENTRY_FINAL_ACTION(int entry) {\r
1249         return ((entry) >>> 20) & 0xf;\r
1250     }\r
1251 \r
1252     static int MBCS_ENTRY_FINAL_VALUE(int entry) {\r
1253         return ((entry) & 0xfffff);\r
1254     }\r
1255 \r
1256     static char MBCS_ENTRY_FINAL_VALUE_16(int entry) {\r
1257         return (char) (entry);\r
1258     }\r
1259     \r
1260     static boolean MBCS_IS_ASCII_ROUNDTRIP(int b, long asciiRoundtrips) {\r
1261         return (((asciiRoundtrips) & (1<<((b)>>2)))!=0);\r
1262     }\r
1263     \r
1264     /**\r
1265      * This macro version of _MBCSSingleSimpleGetNextUChar() gets a code point from a byte. It works for single-byte,\r
1266      * single-state codepages that only map to and from BMP code points, and it always returns fallback values.\r
1267      */\r
1268     static char MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(UConverterMBCSTable mbcs, final int b) {\r
1269         return MBCS_ENTRY_FINAL_VALUE_16(mbcs.stateTable[0][b & UConverterConstants.UNSIGNED_BYTE_MASK]);\r
1270     }\r
1271 \r
1272     /* single-byte fromUnicode: get the 16-bit result word */\r
1273     static char MBCS_SINGLE_RESULT_FROM_U(char[] table, byte[] results, int c) {\r
1274         int i1 = table[c >>> 10] + ((c >>> 4) & 0x3f);\r
1275         int i = 2 * (table[i1] + (c & 0xf)); // used as index into byte[] array treated as char[] array\r
1276         return (char) (((results[i] & UConverterConstants.UNSIGNED_BYTE_MASK) << 8) | (results[i + 1] & UConverterConstants.UNSIGNED_BYTE_MASK));\r
1277     }\r
1278     \r
1279     /* single-byte fromUnicode: set the 16-bit result word with newValue*/\r
1280     static void MBCS_SINGLE_RESULT_FROM_U_SET(char[] table, byte[] results, int c, int newValue) {\r
1281         int i1 = table[c >>> 10] + ((c >>> 4) & 0x3f);\r
1282         int i = 2 * (table[i1] + (c & 0xf)); // used as index into byte[] array treated as char[] array\r
1283         results[i] = (byte)((newValue >> 8) & UConverterConstants.UNSIGNED_BYTE_MASK);\r
1284         results[i + 1] =  (byte)(newValue & UConverterConstants.UNSIGNED_BYTE_MASK);\r
1285     }\r
1286 \r
1287     /* multi-byte fromUnicode: get the 32-bit stage 2 entry */\r
1288     static int MBCS_STAGE_2_FROM_U(char[] table, int c) {\r
1289         int i = 2 * (table[(c) >>> 10] + ((c >>> 4) & 0x3f)); // 2x because used as index into char[] array treated as\r
1290         // int[] array\r
1291         return ((table[i] & UConverterConstants.UNSIGNED_SHORT_MASK) << 16)\r
1292                 | (table[i + 1] & UConverterConstants.UNSIGNED_SHORT_MASK);\r
1293     }\r
1294 \r
1295     private static boolean MBCS_FROM_U_IS_ROUNDTRIP(int stage2Entry, int c) {\r
1296         return (((stage2Entry) & (1 << (16 + ((c) & 0xf)))) != 0);\r
1297     }\r
1298 \r
1299     static char MBCS_VALUE_2_FROM_STAGE_2(byte[] bytes, int stage2Entry, int c) {\r
1300         int i = 2 * (16 * ((char) stage2Entry & UConverterConstants.UNSIGNED_SHORT_MASK) + (c & 0xf));\r
1301         return (char) (((bytes[i] & UConverterConstants.UNSIGNED_BYTE_MASK) << 8) | (bytes[i + 1] & UConverterConstants.UNSIGNED_BYTE_MASK));\r
1302     }\r
1303     \r
1304     static void MBCS_VALUE_2_FROM_STAGE_2_SET(byte[] bytes, int stage2Entry, int c, int newValue) {\r
1305         int i = 2 * (16 * ((char) stage2Entry & UConverterConstants.UNSIGNED_SHORT_MASK) + (c & 0xf));\r
1306         bytes[i] = (byte)((newValue >> 8) & UConverterConstants.UNSIGNED_BYTE_MASK);\r
1307         bytes[i + 1] = (byte)(newValue & UConverterConstants.UNSIGNED_BYTE_MASK);\r
1308     }\r
1309 \r
1310     private static int MBCS_VALUE_4_FROM_STAGE_2(byte[] bytes, int stage2Entry, int c) {\r
1311         int i = 4 * (16 * ((char) stage2Entry & UConverterConstants.UNSIGNED_SHORT_MASK) + (c & 0xf));\r
1312         return ((bytes[i] & UConverterConstants.UNSIGNED_BYTE_MASK) << 24)\r
1313                 | ((bytes[i + 1] & UConverterConstants.UNSIGNED_BYTE_MASK) << 16)\r
1314                 | ((bytes[i + 2] & UConverterConstants.UNSIGNED_BYTE_MASK) << 8)\r
1315                 | (bytes[i + 3] & UConverterConstants.UNSIGNED_BYTE_MASK);\r
1316     }\r
1317 \r
1318     static int MBCS_POINTER_3_FROM_STAGE_2(byte[] bytes, int stage2Entry, int c) {\r
1319         return ((16 * ((char) (stage2Entry) & UConverterConstants.UNSIGNED_SHORT_MASK) + ((c) & 0xf)) * 3);\r
1320     }\r
1321 \r
1322     // ------------UConverterExt-------------------------------------------------------\r
1323 \r
1324     static final int EXT_INDEXES_LENGTH = 0; /* 0 */\r
1325 \r
1326     static final int EXT_TO_U_INDEX = EXT_INDEXES_LENGTH + 1; /* 1 */\r
1327     static final int EXT_TO_U_LENGTH = EXT_TO_U_INDEX + 1;\r
1328     static final int EXT_TO_U_UCHARS_INDEX = EXT_TO_U_LENGTH + 1;\r
1329     static final int EXT_TO_U_UCHARS_LENGTH = EXT_TO_U_UCHARS_INDEX + 1;\r
1330 \r
1331     static final int EXT_FROM_U_UCHARS_INDEX = EXT_TO_U_UCHARS_LENGTH + 1; /* 5 */\r
1332     static final int EXT_FROM_U_VALUES_INDEX = EXT_FROM_U_UCHARS_INDEX + 1;\r
1333     static final int EXT_FROM_U_LENGTH = EXT_FROM_U_VALUES_INDEX + 1;\r
1334     static final int EXT_FROM_U_BYTES_INDEX = EXT_FROM_U_LENGTH + 1;\r
1335     static final int EXT_FROM_U_BYTES_LENGTH = EXT_FROM_U_BYTES_INDEX + 1;\r
1336 \r
1337     static final int EXT_FROM_U_STAGE_12_INDEX = EXT_FROM_U_BYTES_LENGTH + 1; /* 10 */\r
1338     static final int EXT_FROM_U_STAGE_1_LENGTH = EXT_FROM_U_STAGE_12_INDEX + 1;\r
1339     static final int EXT_FROM_U_STAGE_12_LENGTH = EXT_FROM_U_STAGE_1_LENGTH + 1;\r
1340     static final int EXT_FROM_U_STAGE_3_INDEX = EXT_FROM_U_STAGE_12_LENGTH + 1;\r
1341     static final int EXT_FROM_U_STAGE_3_LENGTH = EXT_FROM_U_STAGE_3_INDEX + 1;\r
1342     static final int EXT_FROM_U_STAGE_3B_INDEX = EXT_FROM_U_STAGE_3_LENGTH + 1;\r
1343     static final int EXT_FROM_U_STAGE_3B_LENGTH = EXT_FROM_U_STAGE_3B_INDEX + 1;\r
1344 \r
1345     private static final int EXT_COUNT_BYTES = EXT_FROM_U_STAGE_3B_LENGTH + 1; /* 17 */\r
1346     // private static final int EXT_COUNT_UCHARS = EXT_COUNT_BYTES + 1;\r
1347     // private static final int EXT_FLAGS = EXT_COUNT_UCHARS + 1;\r
1348     //\r
1349     // private static final int EXT_RESERVED_INDEX = EXT_FLAGS + 1; /* 20, moves with additional indexes */\r
1350     //\r
1351     // private static final int EXT_SIZE=31;\r
1352     // private static final int EXT_INDEXES_MIN_LENGTH=32;\r
1353 \r
1354     static final int EXT_FROM_U_MAX_DIRECT_LENGTH = 3;\r
1355 \r
1356     /* toUnicode helpers -------------------------------------------------------- */\r
1357 \r
1358     private static final int TO_U_BYTE_SHIFT = 24;\r
1359     private static final int TO_U_VALUE_MASK = 0xffffff;\r
1360     private static final int TO_U_MIN_CODE_POINT = 0x1f0000;\r
1361     private static final int TO_U_MAX_CODE_POINT = 0x2fffff;\r
1362     private static final int TO_U_ROUNDTRIP_FLAG = (1 << 23);\r
1363     private static final int TO_U_INDEX_MASK = 0x3ffff;\r
1364     private static final int TO_U_LENGTH_SHIFT = 18;\r
1365     private static final int TO_U_LENGTH_OFFSET = 12;\r
1366 \r
1367     /* maximum number of indexed UChars */\r
1368     static final int MAX_UCHARS = 19;\r
1369 \r
1370     static int TO_U_GET_BYTE(int word) {\r
1371         return word >>> TO_U_BYTE_SHIFT;\r
1372     }\r
1373 \r
1374     static int TO_U_GET_VALUE(int word) {\r
1375         return word & TO_U_VALUE_MASK;\r
1376     }\r
1377 \r
1378     static boolean TO_U_IS_ROUNDTRIP(int value) {\r
1379         return (value & TO_U_ROUNDTRIP_FLAG) != 0;\r
1380     }\r
1381 \r
1382     static boolean TO_U_IS_PARTIAL(int value) {\r
1383         return (value & UConverterConstants.UNSIGNED_INT_MASK) < TO_U_MIN_CODE_POINT;\r
1384     }\r
1385 \r
1386     static int TO_U_GET_PARTIAL_INDEX(int value) {\r
1387         return value;\r
1388     }\r
1389 \r
1390     static int TO_U_MASK_ROUNDTRIP(int value) {\r
1391         return value & ~TO_U_ROUNDTRIP_FLAG;\r
1392     }\r
1393 \r
1394     private static int TO_U_MAKE_WORD(byte b, int value) {\r
1395         return ((b & UConverterConstants.UNSIGNED_BYTE_MASK) << TO_U_BYTE_SHIFT) | value;\r
1396     }\r
1397 \r
1398     /* use after masking off the roundtrip flag */\r
1399     static boolean TO_U_IS_CODE_POINT(int value) {\r
1400         return (value & UConverterConstants.UNSIGNED_INT_MASK) <= TO_U_MAX_CODE_POINT;\r
1401     }\r
1402 \r
1403     static int TO_U_GET_CODE_POINT(int value) {\r
1404         return (int) ((value & UConverterConstants.UNSIGNED_INT_MASK) - TO_U_MIN_CODE_POINT);\r
1405     }\r
1406 \r
1407     private static int TO_U_GET_INDEX(int value) {\r
1408         return value & TO_U_INDEX_MASK;\r
1409     }\r
1410 \r
1411     private static int TO_U_GET_LENGTH(int value) {\r
1412         return (value >>> TO_U_LENGTH_SHIFT) - TO_U_LENGTH_OFFSET;\r
1413     }\r
1414 \r
1415     /* fromUnicode helpers ------------------------------------------------------ */\r
1416 \r
1417     /* most trie constants are shared with ucnvmbcs.h */\r
1418     private static final int STAGE_2_LEFT_SHIFT = 2;\r
1419 \r
1420     // private static final int STAGE_3_GRANULARITY = 4;\r
1421 \r
1422     /* trie access, returns the stage 3 value=index to stage 3b; s1Index=c>>10 */\r
1423     static int FROM_U(CharBuffer stage12, CharBuffer stage3, int s1Index, int c) {\r
1424         return stage3.get(((int) stage12.get((stage12.get(s1Index) + ((c >>> 4) & 0x3f))) << STAGE_2_LEFT_SHIFT)\r
1425                 + (c & 0xf));\r
1426     }\r
1427 \r
1428     private static final int FROM_U_LENGTH_SHIFT = 24;\r
1429     private static final int FROM_U_ROUNDTRIP_FLAG = 1 << 31;\r
1430     static final int FROM_U_RESERVED_MASK = 0x60000000;\r
1431     private static final int FROM_U_DATA_MASK = 0xffffff;\r
1432 \r
1433     /* special value for "no mapping" to <subchar1> (impossible roundtrip to 0 bytes, value 01) */\r
1434     static final int FROM_U_SUBCHAR1 = 0x80000001;\r
1435 \r
1436     /* at most 3 bytes in the lower part of the value */\r
1437     private static final int FROM_U_MAX_DIRECT_LENGTH = 3;\r
1438 \r
1439     /* maximum number of indexed bytes */\r
1440     static final int MAX_BYTES = 0x1f;\r
1441 \r
1442     static boolean FROM_U_IS_PARTIAL(int value) {\r
1443         return (value >>> FROM_U_LENGTH_SHIFT) == 0;\r
1444     }\r
1445 \r
1446     static int FROM_U_GET_PARTIAL_INDEX(int value) {\r
1447         return value;\r
1448     }\r
1449 \r
1450     static boolean FROM_U_IS_ROUNDTRIP(int value) {\r
1451         return (value & FROM_U_ROUNDTRIP_FLAG) != 0;\r
1452     }\r
1453 \r
1454     private static int FROM_U_MASK_ROUNDTRIP(int value) {\r
1455         return value & ~FROM_U_ROUNDTRIP_FLAG;\r
1456     }\r
1457 \r
1458     /* use after masking off the roundtrip flag */\r
1459     static int FROM_U_GET_LENGTH(int value) {\r
1460         return (value >>> FROM_U_LENGTH_SHIFT) & MAX_BYTES;\r
1461     }\r
1462 \r
1463     /* get bytes or bytes index */\r
1464     static int FROM_U_GET_DATA(int value) {\r
1465         return value & FROM_U_DATA_MASK;\r
1466     }\r
1467 \r
1468     /* get the pointer to an extension array from indexes[index] */\r
1469     static Buffer ARRAY(ByteBuffer indexes, int index, Class<?> itemType) {\r
1470         int oldpos = indexes.position();\r
1471         Buffer b;\r
1472 \r
1473         indexes.position(indexes.getInt(index << 2));\r
1474         if (itemType == int.class)\r
1475             b = indexes.asIntBuffer();\r
1476         else if (itemType == char.class)\r
1477             b = indexes.asCharBuffer();\r
1478         else if (itemType == short.class)\r
1479             b = indexes.asShortBuffer();\r
1480         else\r
1481             // default or (itemType == byte.class)\r
1482             b = indexes.slice();\r
1483         indexes.position(oldpos);\r
1484         return b;\r
1485     }\r
1486 \r
1487     private static int GET_MAX_BYTES_PER_UCHAR(ByteBuffer indexes) {\r
1488         indexes.position(0);\r
1489         return indexes.getInt(EXT_COUNT_BYTES) & 0xff;\r
1490     }\r
1491 \r
1492     /*\r
1493      * @return index of the UChar, if found; else <0\r
1494      */\r
1495     static int findFromU(CharBuffer fromUSection, int length, char u) {\r
1496         int i, start, limit;\r
1497 \r
1498         /* binary search */\r
1499         start = 0;\r
1500         limit = length;\r
1501         for (;;) {\r
1502             i = limit - start;\r
1503             if (i <= 1) {\r
1504                 break; /* done */\r
1505             }\r
1506             /* start<limit-1 */\r
1507 \r
1508             if (i <= 4) {\r
1509                 /* linear search for the last part */\r
1510                 if (u <= fromUSection.get(fromUSection.position() + start)) {\r
1511                     break;\r
1512                 }\r
1513                 if (++start < limit && u <= fromUSection.get(fromUSection.position() + start)) {\r
1514                     break;\r
1515                 }\r
1516                 if (++start < limit && u <= fromUSection.get(fromUSection.position() + start)) {\r
1517                     break;\r
1518                 }\r
1519                 /* always break at start==limit-1 */\r
1520                 ++start;\r
1521                 break;\r
1522             }\r
1523 \r
1524             i = (start + limit) / 2;\r
1525             if (u < fromUSection.get(fromUSection.position() + i)) {\r
1526                 limit = i;\r
1527             } else {\r
1528                 start = i;\r
1529             }\r
1530         }\r
1531 \r
1532         /* did we really find it? */\r
1533         if (start < limit && u == fromUSection.get(fromUSection.position() + start)) {\r
1534             return start;\r
1535         } else {\r
1536             return -1; /* not found */\r
1537         }\r
1538     }\r
1539 \r
1540     /*\r
1541      * @return lookup value for the byte, if found; else 0\r
1542      */\r
1543     static int findToU(IntBuffer toUSection, int length, short byt) {\r
1544         long word0, word;\r
1545         int i, start, limit;\r
1546 \r
1547         /* check the input byte against the lowest and highest section bytes */\r
1548         // agljport:comment instead of receiving a start position parameter for toUSection we'll rely on its position\r
1549         // property\r
1550         start = TO_U_GET_BYTE(toUSection.get(toUSection.position()));\r
1551         limit = TO_U_GET_BYTE(toUSection.get(toUSection.position() + length - 1));\r
1552         if (byt < start || limit < byt) {\r
1553             return 0; /* the byte is out of range */\r
1554         }\r
1555 \r
1556         if (length == ((limit - start) + 1)) {\r
1557             /* direct access on a linear array */\r
1558             return TO_U_GET_VALUE(toUSection.get(toUSection.position() + byt - start)); /* could be 0 */\r
1559         }\r
1560 \r
1561         /* word0 is suitable for <=toUSection[] comparison, word for <toUSection[] */\r
1562         word0 = TO_U_MAKE_WORD((byte) byt, 0) & UConverterConstants.UNSIGNED_INT_MASK;\r
1563 \r
1564         /*\r
1565          * Shift byte once instead of each section word and add 0xffffff. We will compare the shifted/added byte\r
1566          * (bbffffff) against section words which have byte values in the same bit position. If and only if byte bb <\r
1567          * section byte ss then bbffffff<ssvvvvvv for all v=0..f so we need not mask off the lower 24 bits of each\r
1568          * section word.\r
1569          */\r
1570         word = word0 | TO_U_VALUE_MASK;\r
1571 \r
1572         /* binary search */\r
1573         start = 0;\r
1574         limit = length;\r
1575         for (;;) {\r
1576             i = limit - start;\r
1577             if (i <= 1) {\r
1578                 break; /* done */\r
1579             }\r
1580             /* start<limit-1 */\r
1581 \r
1582             if (i <= 4) {\r
1583                 /* linear search for the last part */\r
1584                 if (word0 <= (toUSection.get(toUSection.position() + start) & UConverterConstants.UNSIGNED_INT_MASK)) {\r
1585                     break;\r
1586                 }\r
1587                 if (++start < limit\r
1588                         && word0 <= (toUSection.get(toUSection.position() + start) & UConverterConstants.UNSIGNED_INT_MASK)) {\r
1589                     break;\r
1590                 }\r
1591                 if (++start < limit\r
1592                         && word0 <= (toUSection.get(toUSection.position() + start) & UConverterConstants.UNSIGNED_INT_MASK)) {\r
1593                     break;\r
1594                 }\r
1595                 /* always break at start==limit-1 */\r
1596                 ++start;\r
1597                 break;\r
1598             }\r
1599 \r
1600             i = (start + limit) / 2;\r
1601             if (word < (toUSection.get(toUSection.position() + i) & UConverterConstants.UNSIGNED_INT_MASK)) {\r
1602                 limit = i;\r
1603             } else {\r
1604                 start = i;\r
1605             }\r
1606         }\r
1607 \r
1608         /* did we really find it? */\r
1609         if (start < limit) {\r
1610             word = (toUSection.get(toUSection.position() + start) & UConverterConstants.UNSIGNED_INT_MASK);\r
1611             if (byt == TO_U_GET_BYTE((int)word)) {\r
1612                 return TO_U_GET_VALUE((int) word); /* never 0 */\r
1613             }\r
1614         } \r
1615         return 0; /* not found */\r
1616     }\r
1617 \r
1618     /*\r
1619      * TRUE if not an SI/SO stateful converter, or if the match length fits with the current converter state\r
1620      */\r
1621     static boolean TO_U_VERIFY_SISO_MATCH(byte sisoState, int match) {\r
1622         return sisoState < 0 || (sisoState == 0) == (match == 1);\r
1623     }\r
1624 \r
1625     /*\r
1626      * get the SI/SO toU state (state 0 is for SBCS, 1 for DBCS), or 1 for DBCS-only, or -1 if the converter is not\r
1627      * SI/SO stateful\r
1628      * \r
1629      * Note: For SI/SO stateful converters getting here, cnv->mode==0 is equivalent to firstLength==1.\r
1630      */\r
1631     private static int SISO_STATE(UConverterSharedData sharedData, int mode) {\r
1632         return sharedData.mbcs.outputType == MBCS_OUTPUT_2_SISO ? (byte) mode\r
1633                 : sharedData.mbcs.outputType == MBCS_OUTPUT_DBCS_ONLY ? 1 : -1;\r
1634     }\r
1635 \r
1636     class CharsetDecoderMBCS extends CharsetDecoderICU {\r
1637 \r
1638         CharsetDecoderMBCS(CharsetICU cs) {\r
1639             super(cs);\r
1640         }\r
1641 \r
1642         protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {\r
1643         /* Just call cnvMBCSToUnicodeWithOffsets() to remove duplicate code. */\r
1644             return cnvMBCSToUnicodeWithOffsets(source, target, offsets, flush);\r
1645         }\r
1646 \r
1647         /*\r
1648          * continue partial match with new input never called for simple, single-character conversion\r
1649          */\r
1650         private CoderResult continueMatchToU(ByteBuffer source, CharBuffer target, IntBuffer offsets, int srcIndex,\r
1651                 boolean flush) {\r
1652             CoderResult cr = CoderResult.UNDERFLOW;\r
1653 \r
1654             int[] value = new int[1];\r
1655             int match, length;\r
1656 \r
1657             match = matchToU((byte) SISO_STATE(sharedData, mode), preToUArray, preToUBegin, preToULength, source,\r
1658                     value, isToUUseFallback(), flush);\r
1659 \r
1660             if (match > 0) {\r
1661                 if (match >= preToULength) {\r
1662                     /* advance src pointer for the consumed input */\r
1663                     source.position(source.position() + match - preToULength);\r
1664                     preToULength = 0;\r
1665                 } else {\r
1666                     /* the match did not use all of preToU[] - keep the rest for replay */\r
1667                     length = preToULength - match;\r
1668                     System.arraycopy(preToUArray, preToUBegin + match, preToUArray, preToUBegin, length);\r
1669                     preToULength = (byte) -length;\r
1670                 }\r
1671 \r
1672                 /* write result */\r
1673                 cr = writeToU(value[0], target, offsets, srcIndex);\r
1674             } else if (match < 0) {\r
1675                 /* save state for partial match */\r
1676                 int j, sArrayIndex;\r
1677 \r
1678                 /* just _append_ the newly consumed input to preToU[] */\r
1679                 sArrayIndex = source.position();\r
1680                 match = -match;\r
1681                 for (j = preToULength; j < match; ++j) {\r
1682                     preToUArray[j] = source.get(sArrayIndex++);\r
1683                 }\r
1684                 source.position(sArrayIndex); /* same as *src=srcLimit; because we reached the end of input */\r
1685                 preToULength = (byte) match;\r
1686             } else /* match==0 */{\r
1687                 /*\r
1688                  * no match\r
1689                  * \r
1690                  * We need to split the previous input into two parts:\r
1691                  * \r
1692                  * 1. The first codepage character is unmappable - that's how we got into trying the extension data in\r
1693                  * the first place. We need to move it from the preToU buffer to the error buffer, set an error code,\r
1694                  * and prepare the rest of the previous input for 2.\r
1695                  * \r
1696                  * 2. The rest of the previous input must be converted once we come back from the callback for the first\r
1697                  * character. At that time, we have to try again from scratch to convert these input characters. The\r
1698                  * replay will be handled by the ucnv.c conversion code.\r
1699                  */\r
1700 \r
1701                 /* move the first codepage character to the error field */\r
1702                 System.arraycopy(preToUArray, preToUBegin, toUBytesArray, toUBytesBegin, preToUFirstLength);\r
1703                 toULength = preToUFirstLength;\r
1704 \r
1705                 /* move the rest up inside the buffer */\r
1706                 length = preToULength - preToUFirstLength;\r
1707                 if (length > 0) {\r
1708                     System.arraycopy(preToUArray, preToUBegin + preToUFirstLength, preToUArray, preToUBegin, length);\r
1709                 }\r
1710 \r
1711                 /* mark preToU for replay */\r
1712                 preToULength = (byte) -length;\r
1713 \r
1714                 /* set the error code for unassigned */\r
1715                 cr = CoderResult.unmappableForLength(preToUFirstLength);\r
1716             }\r
1717             return cr;\r
1718         }\r
1719 \r
1720         /*\r
1721          * this works like matchFromU() except - the first character is in pre - no trie is used - the returned\r
1722          * matchLength is not offset by 2\r
1723          */\r
1724         private int matchToU(byte sisoState, byte[] preArray, int preArrayBegin, int preLength, ByteBuffer source,\r
1725                 int[] pMatchValue, boolean isUseFallback, boolean flush) {\r
1726             ByteBuffer cx = sharedData.mbcs.extIndexes;\r
1727             IntBuffer toUTable, toUSection;\r
1728 \r
1729             int value, matchValue, srcLength = 0;\r
1730             int i, j, index, length, matchLength;\r
1731             short b;\r
1732 \r
1733             if (cx == null || cx.asIntBuffer().get(EXT_TO_U_LENGTH) <= 0) {\r
1734                 return 0; /* no extension data, no match */\r
1735             }\r
1736 \r
1737             /* initialize */\r
1738             toUTable = (IntBuffer) ARRAY(cx, EXT_TO_U_INDEX, int.class);\r
1739             index = 0;\r
1740 \r
1741             matchValue = 0;\r
1742             i = j = matchLength = 0;\r
1743             if (source != null) { \r
1744                 srcLength = source.remaining();\r
1745             }\r
1746 \r
1747             if (sisoState == 0) {\r
1748                 /* SBCS state of an SI/SO stateful converter, look at only exactly 1 byte */\r
1749                 if (preLength > 1) {\r
1750                     return 0; /* no match of a DBCS sequence in SBCS mode */\r
1751                 } else if (preLength == 1) {\r
1752                     srcLength = 0;\r
1753                 } else /* preLength==0 */{\r
1754                     if (srcLength > 1) {\r
1755                         srcLength = 1;\r
1756                     }\r
1757                 }\r
1758                 flush = true;\r
1759             }\r
1760 \r
1761             /* we must not remember fallback matches when not using fallbacks */\r
1762 \r
1763             /* match input units until there is a full match or the input is consumed */\r
1764             for (;;) {\r
1765                 /* go to the next section */\r
1766                 int oldpos = toUTable.position();\r
1767                 toUSection = ((IntBuffer) toUTable.position(index)).slice();\r
1768                 toUTable.position(oldpos);\r
1769 \r
1770                 /* read first pair of the section */\r
1771                 value = toUSection.get();\r
1772                 length = TO_U_GET_BYTE(value);\r
1773                 value = TO_U_GET_VALUE(value);\r
1774                 if (value != 0 && (TO_U_IS_ROUNDTRIP(value) || isToUUseFallback(isUseFallback))\r
1775                         && TO_U_VERIFY_SISO_MATCH(sisoState, i + j)) {\r
1776                     /* remember longest match so far */\r
1777                     matchValue = value;\r
1778                     matchLength = i + j;\r
1779                 }\r
1780 \r
1781                 /* match pre[] then src[] */\r
1782                 if (i < preLength) {\r
1783                     b = (short) (preArray[preArrayBegin + i++] & UConverterConstants.UNSIGNED_BYTE_MASK);\r
1784                 } else if (j < srcLength) {\r
1785                     b = (short) (source.get(source.position() + j++) & UConverterConstants.UNSIGNED_BYTE_MASK);\r
1786                 } else {\r
1787                     /* all input consumed, partial match */\r
1788                     if (flush || (length = (i + j)) > MAX_BYTES) {\r
1789                         /*\r
1790                          * end of the entire input stream, stop with the longest match so far or: partial match must not\r
1791                          * be longer than UCNV_EXT_MAX_BYTES because it must fit into state buffers\r
1792                          */\r
1793                         break;\r
1794                     } else {\r
1795                         /* continue with more input next time */\r
1796                         return -length;\r
1797                     }\r
1798                 }\r
1799 \r
1800                 /* search for the current UChar */\r
1801                 value = findToU(toUSection, length, b);\r
1802                 if (value == 0) {\r
1803                     /* no match here, stop with the longest match so far */\r
1804                     break;\r
1805                 } else {\r
1806                     if (TO_U_IS_PARTIAL(value)) {\r
1807                         /* partial match, continue */\r
1808                         index = TO_U_GET_PARTIAL_INDEX(value);\r
1809                     } else {\r
1810                         if ((TO_U_IS_ROUNDTRIP(value) || isToUUseFallback(isUseFallback)) && TO_U_VERIFY_SISO_MATCH(sisoState, i + j)) {\r
1811                             /* full match, stop with result */\r
1812                             matchValue = value;\r
1813                             matchLength = i + j;\r
1814                         } else {\r
1815                             /* full match on fallback not taken, stop with the longest match so far */\r
1816                         }\r
1817                         break;\r
1818                     }\r
1819                 }\r
1820             }\r
1821 \r
1822             if (matchLength == 0) {\r
1823                 /* no match at all */\r
1824                 return 0;\r
1825             }\r
1826 \r
1827             /* return result */\r
1828             pMatchValue[0] = TO_U_MASK_ROUNDTRIP(matchValue);\r
1829             return matchLength;\r
1830         }\r
1831 \r
1832         private CoderResult writeToU(int value, CharBuffer target, IntBuffer offsets, int srcIndex) {\r
1833             ByteBuffer cx = sharedData.mbcs.extIndexes;\r
1834             /* output the result */\r
1835             if (TO_U_IS_CODE_POINT(value)) {\r
1836                 /* output a single code point */\r
1837                 return toUWriteCodePoint(TO_U_GET_CODE_POINT(value), target, offsets, srcIndex);\r
1838             } else {\r
1839                 /* output a string - with correct data we have resultLength>0 */\r
1840 \r
1841                 char[] a = new char[TO_U_GET_LENGTH(value)];\r
1842                 CharBuffer cb = ((CharBuffer) ARRAY(cx, EXT_TO_U_UCHARS_INDEX, char.class));\r
1843                 cb.position(TO_U_GET_INDEX(value));\r
1844                 cb.get(a, 0, a.length);\r
1845                 return toUWriteUChars(this, a, 0, a.length, target, offsets, srcIndex);\r
1846             }\r
1847         }\r
1848 \r
1849         private CoderResult toUWriteCodePoint(int c, CharBuffer target, IntBuffer offsets, int sourceIndex) {\r
1850             CoderResult cr = CoderResult.UNDERFLOW;\r
1851             int tBeginIndex = target.position();\r
1852 \r
1853             if (target.hasRemaining()) {\r
1854                 if (c <= 0xffff) {\r
1855                     target.put((char) c);\r
1856                     c = UConverterConstants.U_SENTINEL;\r
1857                 } else /* c is a supplementary code point */{\r
1858                     target.put(UTF16.getLeadSurrogate(c));\r
1859                     c = UTF16.getTrailSurrogate(c);\r
1860                     if (target.hasRemaining()) {\r
1861                         target.put((char) c);\r
1862                         c = UConverterConstants.U_SENTINEL;\r
1863                     }\r
1864                 }\r
1865 \r
1866                 /* write offsets */\r
1867                 if (offsets != null) {\r
1868                     offsets.put(sourceIndex);\r
1869                     if ((tBeginIndex + 1) < target.position()) {\r
1870                         offsets.put(sourceIndex);\r
1871                     }\r
1872                 }\r
1873             }\r
1874 \r
1875             /* write overflow from c */\r
1876             if (c >= 0) {\r
1877                 charErrorBufferLength = UTF16.append(charErrorBufferArray, 0, c);\r
1878                 cr = CoderResult.OVERFLOW;\r
1879             }\r
1880 \r
1881             return cr;\r
1882         }\r
1883 \r
1884         /*\r
1885          * Input sequence: cnv->toUBytes[0..length[ @return if(U_FAILURE) return the length (toULength, byteIndex) for\r
1886          * the input else return 0 after output has been written to the target\r
1887          */\r
1888         private int toU(int length, ByteBuffer source, CharBuffer target, IntBuffer offsets, int sourceIndex,\r
1889                 boolean flush, CoderResult[] cr) {\r
1890             // ByteBuffer cx;\r
1891 \r
1892             if (sharedData.mbcs.extIndexes != null\r
1893                     && initialMatchToU(length, source, target, offsets, sourceIndex, flush, cr)) {\r
1894                 return 0; /* an extension mapping handled the input */\r
1895             }\r
1896 \r
1897             /* GB 18030 */\r
1898             if (length == 4 && (options & MBCS_OPTION_GB18030) != 0) {\r
1899                 long[] range;\r
1900                 long linear;\r
1901                 int i;\r
1902 \r
1903                 linear = LINEAR_18030(toUBytesArray[0], toUBytesArray[1], toUBytesArray[2], toUBytesArray[3]);\r
1904                 for (i = 0; i < gb18030Ranges.length; ++i) {\r
1905                     range = gb18030Ranges[i];\r
1906                     if (range[2] <= linear && linear <= range[3]) {\r
1907                         /* found the sequence, output the Unicode code point for it */\r
1908                         cr[0] = CoderResult.UNDERFLOW;\r
1909 \r
1910                         /* add the linear difference between the input and start sequences to the start code point */\r
1911                         linear = range[0] + (linear - range[2]);\r
1912 \r
1913                         /* output this code point */\r
1914                         cr[0] = toUWriteCodePoint((int) linear, target, offsets, sourceIndex);\r
1915 \r
1916                         return 0;\r
1917                     }\r
1918                 }\r
1919             }\r
1920 \r
1921             /* no mapping */\r
1922             cr[0] = CoderResult.unmappableForLength(length);\r
1923             return length;\r
1924         }\r
1925 \r
1926         /*\r
1927          * target<targetLimit; set error code for overflow\r
1928          */\r
1929         private boolean initialMatchToU(int firstLength, ByteBuffer source, CharBuffer target, IntBuffer offsets,\r
1930                 int srcIndex, boolean flush, CoderResult[] cr) {\r
1931             int[] value = new int[1];\r
1932             int match = 0;\r
1933 \r
1934             /* try to match */\r
1935             match = matchToU((byte) SISO_STATE(sharedData, mode), toUBytesArray, toUBytesBegin, firstLength, source,\r
1936                     value, isToUUseFallback(), flush);\r
1937             if (match > 0) {\r
1938                 /* advance src pointer for the consumed input */\r
1939                 source.position(source.position() + match - firstLength);\r
1940 \r
1941                 /* write result to target */\r
1942                 cr[0] = writeToU(value[0], target, offsets, srcIndex);\r
1943                 return true;\r
1944             } else if (match < 0) {\r
1945                 /* save state for partial match */\r
1946                 byte[] sArray;\r
1947                 int sArrayIndex;\r
1948                 int j;\r
1949 \r
1950                 /* copy the first code point */\r
1951                 sArray = toUBytesArray;\r
1952                 sArrayIndex = toUBytesBegin;\r
1953                 preToUFirstLength = (byte) firstLength;\r
1954                 for (j = 0; j < firstLength; ++j) {\r
1955                     preToUArray[j] = sArray[sArrayIndex++];\r
1956                 }\r
1957 \r
1958                 /* now copy the newly consumed input */\r
1959                 sArrayIndex = source.position();\r
1960                 match = -match;\r
1961                 for (; j < match; ++j) {\r
1962                     preToUArray[j] = source.get(sArrayIndex++);\r
1963                 }\r
1964                 source.position(sArrayIndex);\r
1965                 preToULength = (byte) match;\r
1966                 return true;\r
1967             } else /* match==0 no match */{\r
1968                 return false;\r
1969             }\r
1970         }\r
1971 \r
1972         private int simpleMatchToU(ByteBuffer source, boolean useFallback) {\r
1973             int[] value = new int[1];\r
1974             int match;\r
1975 \r
1976             if (source.remaining() <= 0) {\r
1977                 return 0xffff;\r
1978             }\r
1979 \r
1980             /* try to match */\r
1981             byte[] sourceArray;\r
1982             int sourcePosition, sourceLimit;\r
1983             if (source.isReadOnly()) {\r
1984                 // source.array() would throw an exception\r
1985                 sourcePosition = source.position();  // relative to source.array()\r
1986                 sourceArray = new byte[Math.min(source.remaining(), EXT_MAX_BYTES)];\r
1987                 source.get(sourceArray).position(sourcePosition);\r
1988                 sourcePosition = 0;  // relative to sourceArray\r
1989                 sourceLimit = sourceArray.length;\r
1990             } else {\r
1991                 sourceArray = source.array();\r
1992                 sourcePosition = source.position();\r
1993                 sourceLimit = source.limit();\r
1994             }\r
1995             match = matchToU((byte) -1, sourceArray, sourcePosition, sourceLimit, null, value, useFallback, true);\r
1996 \r
1997             if (match == source.remaining()) {\r
1998                 /* write result for simple, single-character conversion */\r
1999                 if (TO_U_IS_CODE_POINT(value[0])) {\r
2000                     return TO_U_GET_CODE_POINT(value[0]);\r
2001                 }\r
2002             }\r
2003 \r
2004             /*\r
2005              * return no match because - match>0 && value points to string: simple conversion cannot handle multiple\r
2006              * code points - match>0 && match!=length: not all input consumed, forbidden for this function - match==0:\r
2007              * no match found in the first place - match<0: partial match, not supported for simple conversion (and\r
2008              * flush==TRUE)\r
2009              */\r
2010             return 0xfffe;\r
2011         }\r
2012 \r
2013         CoderResult cnvMBCSToUnicodeWithOffsets(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {\r
2014             CoderResult[] cr = { CoderResult.UNDERFLOW };\r
2015 \r
2016             int sourceArrayIndex, sourceArrayIndexStart;\r
2017             int stateTable[][/* 256 */];\r
2018             char[] unicodeCodeUnits;\r
2019 \r
2020             int offset;\r
2021             byte state;\r
2022             int byteIndex;\r
2023             byte[] bytes;\r
2024 \r
2025             int sourceIndex, nextSourceIndex;\r
2026 \r
2027             int entry = 0;\r
2028             char c;\r
2029             byte action;\r
2030 \r
2031             if (preToULength > 0) {\r
2032                 /*\r
2033                  * pass sourceIndex=-1 because we continue from an earlier buffer in the future, this may change with\r
2034                  * continuous offsets\r
2035                  */\r
2036                 cr[0] = continueMatchToU(source, target, offsets, -1, flush);\r
2037 \r
2038                 if (cr[0].isError() || preToULength < 0) {\r
2039                     return cr[0];\r
2040                 }\r
2041             }\r
2042 \r
2043             if (sharedData.mbcs.countStates == 1) {\r
2044                 if ((sharedData.mbcs.unicodeMask & UConverterConstants.HAS_SUPPLEMENTARY) == 0) {\r
2045                     cr[0] = cnvMBCSSingleToBMPWithOffsets(source, target, offsets, flush);\r
2046                 } else {\r
2047                     cr[0] = cnvMBCSSingleToUnicodeWithOffsets(source, target, offsets, flush);\r
2048                 }\r
2049                 return cr[0];\r
2050             }\r
2051 \r
2052             /* set up the local pointers */\r
2053             sourceArrayIndex = sourceArrayIndexStart = source.position();\r
2054 \r
2055             if ((options & UConverterConstants.OPTION_SWAP_LFNL) != 0) {\r
2056                 stateTable = sharedData.mbcs.swapLFNLStateTable;\r
2057             } else {\r
2058                 stateTable = sharedData.mbcs.stateTable;\r
2059             }\r
2060             unicodeCodeUnits = sharedData.mbcs.unicodeCodeUnits;\r
2061 \r
2062             /* get the converter state from UConverter */\r
2063             offset = toUnicodeStatus;\r
2064             byteIndex = toULength;\r
2065             bytes = toUBytesArray;\r
2066 \r
2067             /*\r
2068              * if we are in the SBCS state for a DBCS-only converter, then load the DBCS state from the MBCS data\r
2069              * (dbcsOnlyState==0 if it is not a DBCS-only converter)\r
2070              */\r
2071             state = (byte)mode;\r
2072             if (state == 0) {\r
2073                 state = sharedData.mbcs.dbcsOnlyState;\r
2074             }\r
2075 \r
2076             /* sourceIndex=-1 if the current character began in the previous buffer */\r
2077             sourceIndex = byteIndex == 0 ? 0 : -1;\r
2078             nextSourceIndex = 0;\r
2079 \r
2080             /* conversion loop */\r
2081             while (sourceArrayIndex < source.limit()) {\r
2082                 /*\r
2083                  * This following test is to see if available input would overflow the output. It does not catch output\r
2084                  * of more than one code unit that overflows as a result of a surrogate pair or callback output from the\r
2085                  * last source byte. Therefore, those situations also test for overflows and will then break the loop,\r
2086                  * too.\r
2087                  */\r
2088                 if (!target.hasRemaining()) {\r
2089                     /* target is full */\r
2090                     cr[0] = CoderResult.OVERFLOW;\r
2091                     break;\r
2092                 }\r
2093 \r
2094                 if (byteIndex == 0) {\r
2095                     /* optimized loop for 1/2-byte input and BMP output */\r
2096                     // agljport:todo see ucnvmbcs.c for deleted block\r
2097                     do {\r
2098                         entry = stateTable[state][source.get(sourceArrayIndex)&UConverterConstants.UNSIGNED_BYTE_MASK];\r
2099                         if (MBCS_ENTRY_IS_TRANSITION(entry)) {\r
2100                             state = (byte)MBCS_ENTRY_TRANSITION_STATE(entry);\r
2101                             offset = MBCS_ENTRY_TRANSITION_OFFSET(entry);\r
2102                             ++sourceArrayIndex;\r
2103                             if (sourceArrayIndex < source.limit()\r
2104                                     && MBCS_ENTRY_IS_FINAL(entry = stateTable[state][source.get(sourceArrayIndex)&UConverterConstants.UNSIGNED_BYTE_MASK])\r
2105                                     && MBCS_ENTRY_FINAL_ACTION(entry) == MBCS_STATE_VALID_16\r
2106                                     && (c = unicodeCodeUnits[offset + MBCS_ENTRY_FINAL_VALUE_16(entry)]) < 0xfffe) {\r
2107                                 ++sourceArrayIndex;\r
2108                                 target.put(c);\r
2109                                 if (offsets != null) {\r
2110                                     offsets.put(sourceIndex);\r
2111                                     sourceIndex = (nextSourceIndex += 2);\r
2112                                 }\r
2113                                 state = (byte)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */\r
2114                                 offset = 0;\r
2115                             } else {\r
2116                                 /* set the state and leave the optimized loop */\r
2117                                 ++nextSourceIndex;\r
2118                                 bytes[0] = source.get(sourceArrayIndex - 1);\r
2119                                 byteIndex = 1;\r
2120                                 break;\r
2121                             }\r
2122                         } else {\r
2123                             if (MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {\r
2124                                 /* output BMP code point */\r
2125                                 ++sourceArrayIndex;\r
2126                                 target.put(MBCS_ENTRY_FINAL_VALUE_16(entry));\r
2127                                 if (offsets != null) {\r
2128                                     offsets.put(sourceIndex);\r
2129                                     sourceIndex = ++nextSourceIndex;\r
2130                                 }\r
2131                                 state = (byte)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */\r
2132                             } else {\r
2133                                 /* leave the optimized loop */\r
2134                                 break;\r
2135                             }\r
2136                         }\r
2137                     } while (sourceArrayIndex < source.limit() && target.hasRemaining());\r
2138                     /*\r
2139                      * these tests and break statements could be put inside the loop if C had "break outerLoop" like\r
2140                      * Java\r
2141                      */\r
2142                     if (sourceArrayIndex >= source.limit()) {\r
2143                         break;\r
2144                     }\r
2145                     if (!target.hasRemaining()) {\r
2146                         /* target is full */\r
2147                         cr[0] = CoderResult.OVERFLOW;\r
2148                         break;\r
2149                     }\r
2150 \r
2151                     ++nextSourceIndex;\r
2152                     bytes[byteIndex++] = source.get(sourceArrayIndex++);\r
2153                 } else /* byteIndex>0 */{\r
2154                     ++nextSourceIndex;\r
2155                     entry = stateTable[state][(bytes[byteIndex++] = source.get(sourceArrayIndex++))\r
2156                             & UConverterConstants.UNSIGNED_BYTE_MASK];\r
2157                 }\r
2158 \r
2159                 if (MBCS_ENTRY_IS_TRANSITION(entry)) {\r
2160                     state = (byte)MBCS_ENTRY_TRANSITION_STATE(entry);\r
2161                     offset += MBCS_ENTRY_TRANSITION_OFFSET(entry);\r
2162                     continue;\r
2163                 }\r
2164 \r
2165                 /* save the previous state for proper extension mapping with SI/SO-stateful converters */\r
2166                 mode = state;\r
2167 \r
2168                 /* set the next state early so that we can reuse the entry variable */\r
2169                 state = (byte)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */\r
2170 \r
2171                 /*\r
2172                  * An if-else-if chain provides more reliable performance for the most common cases compared to a\r
2173                  * switch.\r
2174                  */\r
2175                 action = (byte)MBCS_ENTRY_FINAL_ACTION(entry);\r
2176                 if (action == MBCS_STATE_VALID_16) {\r
2177                     offset += MBCS_ENTRY_FINAL_VALUE_16(entry);\r
2178                     c = unicodeCodeUnits[offset];\r
2179                     if (c < 0xfffe) {\r
2180                         /* output BMP code point */\r
2181                         target.put(c);\r
2182                         if (offsets != null) {\r
2183                             offsets.put(sourceIndex);\r
2184                         }\r
2185                         byteIndex = 0;\r
2186                     } else if (c == 0xfffe) {\r
2187                         if (isFallbackUsed() && (entry = getFallback(sharedData.mbcs, offset)) != 0xfffe) {\r
2188                             /* output fallback BMP code point */\r
2189                             target.put((char)entry);\r
2190                             if (offsets != null) {\r
2191                                 offsets.put(sourceIndex);\r
2192                             }\r
2193                             byteIndex = 0;\r
2194                         }\r
2195                     } else {\r
2196                         /* callback(illegal) */\r
2197                         cr[0] = CoderResult.malformedForLength(byteIndex);\r
2198                     }\r
2199                 } else if (action == MBCS_STATE_VALID_DIRECT_16) {\r
2200                     /* output BMP code point */\r
2201                     target.put(MBCS_ENTRY_FINAL_VALUE_16(entry));\r
2202                     if (offsets != null) {\r
2203                         offsets.put(sourceIndex);\r
2204                     }\r
2205                     byteIndex = 0;\r
2206                 } else if (action == MBCS_STATE_VALID_16_PAIR) {\r
2207                     offset += MBCS_ENTRY_FINAL_VALUE_16(entry);\r
2208                     c = unicodeCodeUnits[offset++];\r
2209                     if (c < 0xd800) {\r
2210                         /* output BMP code point below 0xd800 */\r
2211                         target.put(c);\r
2212                         if (offsets != null) {\r
2213                             offsets.put(sourceIndex);\r
2214                         }\r
2215                         byteIndex = 0;\r
2216                     } else if (isFallbackUsed() ? c <= 0xdfff : c <= 0xdbff) {\r
2217                         /* output roundtrip or fallback surrogate pair */\r
2218                         target.put((char)(c & 0xdbff));\r
2219                         if (offsets != null) {\r
2220                             offsets.put(sourceIndex);\r
2221                         }\r
2222                         byteIndex = 0;\r
2223                         if (target.hasRemaining()) {\r
2224                             target.put(unicodeCodeUnits[offset]);\r
2225                             if (offsets != null) {\r
2226                                 offsets.put(sourceIndex);\r
2227                             }\r
2228                         } else {\r
2229                             /* target overflow */\r
2230                             charErrorBufferArray[0] = unicodeCodeUnits[offset];\r
2231                             charErrorBufferLength = 1;\r
2232                             cr[0] = CoderResult.OVERFLOW;\r
2233 \r
2234                             offset = 0;\r
2235                             break;\r
2236                         }\r
2237                     } else if (isFallbackUsed() ? (c & 0xfffe) == 0xe000 : c == 0xe000) {\r
2238                         /* output roundtrip BMP code point above 0xd800 or fallback BMP code point */\r
2239                         target.put(unicodeCodeUnits[offset]);\r
2240                         if (offsets != null) {\r
2241                             offsets.put(sourceIndex);\r
2242                         }\r
2243                         byteIndex = 0;\r
2244                     } else if (c == 0xffff) {\r
2245                         /* callback(illegal) */\r
2246                         cr[0] = CoderResult.malformedForLength(byteIndex);\r
2247                     }\r
2248                 } else if (action == MBCS_STATE_VALID_DIRECT_20\r
2249                         || (action == MBCS_STATE_FALLBACK_DIRECT_20 && isFallbackUsed())) {\r
2250                     entry = MBCS_ENTRY_FINAL_VALUE(entry);\r
2251                     /* output surrogate pair */\r
2252                     target.put((char)(0xd800 | (char)(entry >> 10)));\r
2253                     if (offsets != null) {\r
2254                         offsets.put(sourceIndex);\r
2255                     }\r
2256                     byteIndex = 0;\r
2257                     c = (char)(0xdc00 | (char)(entry & 0x3ff));\r
2258                     if (target.hasRemaining()) {\r
2259                         target.put(c);\r
2260                         if (offsets != null) {\r
2261                             offsets.put(sourceIndex);\r
2262                         }\r
2263                     } else {\r
2264                         /* target overflow */\r
2265                         charErrorBufferArray[0] = c;\r
2266                         charErrorBufferLength = 1;\r
2267                         cr[0] = CoderResult.OVERFLOW;\r
2268 \r
2269                         offset = 0;\r
2270                         break;\r
2271                     }\r
2272                 } else if (action == MBCS_STATE_CHANGE_ONLY) {\r
2273                     /*\r
2274                      * This serves as a state change without any output. It is useful for reading simple stateful\r
2275                      * encodings, for example using just Shift-In/Shift-Out codes. The 21 unused bits may later be used\r
2276                      * for more sophisticated state transitions.\r
2277                      */\r
2278                     if (sharedData.mbcs.dbcsOnlyState == 0) {\r
2279                         byteIndex = 0;\r
2280                     } else {\r
2281                         /* SI/SO are illegal for DBCS-only conversion */\r
2282                         state = (byte)(mode); /* restore the previous state */\r
2283 \r
2284                         /* callback(illegal) */\r
2285                         cr[0] = CoderResult.malformedForLength(byteIndex);\r
2286                     }\r
2287                 } else if (action == MBCS_STATE_FALLBACK_DIRECT_16) {\r
2288                     if (isFallbackUsed()) {\r
2289                         /* output BMP code point */\r
2290                         target.put(MBCS_ENTRY_FINAL_VALUE_16(entry));\r
2291                         if (offsets != null) {\r
2292                             offsets.put(sourceIndex);\r
2293                         }\r
2294                         byteIndex = 0;\r
2295                     }\r
2296                 } else if (action == MBCS_STATE_UNASSIGNED) {\r
2297                     /* just fall through */\r
2298                 } else if (action == MBCS_STATE_ILLEGAL) {\r
2299                     /* callback(illegal) */\r
2300                     cr[0] = CoderResult.malformedForLength(byteIndex);\r
2301                 } else {\r
2302                     /* reserved, must never occur */\r
2303                     byteIndex = 0;\r
2304                 }\r
2305 \r
2306                 /* end of action codes: prepare for a new character */\r
2307                 offset = 0;\r
2308 \r
2309                 if (byteIndex == 0) {\r
2310                     sourceIndex = nextSourceIndex;\r
2311                 } else if (cr[0].isError()) {\r
2312                     /* callback(illegal) */\r
2313                     if (byteIndex > 1) {\r
2314                         /*\r
2315                          * Ticket 5691: consistent illegal sequences:\r
2316                          * - We include at least the first byte in the illegal sequence.\r
2317                          * - If any of the non-initial bytes could be the start of a character,\r
2318                          *   we stop the illegal sequence before the first one of those.\r
2319                          */\r
2320                         boolean isDBCSOnly = (sharedData.mbcs.dbcsOnlyState != 0);\r
2321                         byte i;\r
2322                         for (i = 1; i < byteIndex && !isSingleOrLead(stateTable, state, isDBCSOnly, (short)(bytes[i] & UConverterConstants.UNSIGNED_BYTE_MASK)); i++) {}\r
2323                         if (i < byteIndex) {\r
2324                             byte backOutDistance = (byte)(byteIndex - i);\r
2325                             int bytesFromThisBuffer = sourceArrayIndex - sourceArrayIndexStart;\r
2326                             byteIndex = i; /* length of reported illegal byte sequence */\r
2327                             if (backOutDistance <= bytesFromThisBuffer) {\r
2328                                 sourceArrayIndex -= backOutDistance;\r
2329                             } else {\r
2330                                 /* Back out bytes from the previous buffer: Need to replay them. */\r
2331                                 this.preToULength = (byte)(bytesFromThisBuffer - backOutDistance);\r
2332                                 /* preToULength is negative! */\r
2333                                 for (int n = 0; n < -this.preToULength; n++) {\r
2334                                     this.preToUArray[n] = bytes[i+n];\r
2335                                 }\r
2336                                 sourceArrayIndex = sourceArrayIndexStart;\r
2337                             }\r
2338                         }\r
2339                     }\r
2340                     break;\r
2341                 } else /* unassigned sequences indicated with byteIndex>0 */{\r
2342                     /* try an extension mapping */\r
2343                     int sourceBeginIndex = sourceArrayIndex;\r
2344                     source.position(sourceArrayIndex);\r
2345                     byteIndex = toU(byteIndex, source, target, offsets, sourceIndex, flush, cr);\r
2346                     sourceArrayIndex = source.position();\r
2347                     sourceIndex = nextSourceIndex += (sourceArrayIndex - sourceBeginIndex);\r
2348 \r
2349                     if (cr[0].isError() || cr[0].isOverflow()) {\r
2350                         /* not mappable or buffer overflow */\r
2351                         break;\r
2352                     }\r
2353                 }\r
2354             }\r
2355 \r
2356             /* set the converter state back into UConverter */\r
2357             toUnicodeStatus = offset;\r
2358             mode = state;\r
2359             toULength = byteIndex;\r
2360 \r
2361             /* write back the updated pointers */\r
2362             source.position(sourceArrayIndex);\r
2363 \r
2364             return cr[0];\r
2365         }\r
2366         /*\r
2367          * This version of cnvMBCSSingleToUnicodeWithOffsets() is optimized for single-byte, single-state codepages that\r
2368          * only map to and from the BMP. In addition to single-byte optimizations, the offset calculations become much\r
2369          * easier.\r
2370          */\r
2371         private CoderResult cnvMBCSSingleToBMPWithOffsets(ByteBuffer source, CharBuffer target, IntBuffer offsets,\r
2372                 boolean flush) {\r
2373             CoderResult[] cr = { CoderResult.UNDERFLOW };\r
2374 \r
2375             int sourceArrayIndex, lastSource;\r
2376             int targetCapacity, length;\r
2377             int[][] stateTable;\r
2378 \r
2379             int sourceIndex;\r
2380 \r
2381             int entry;\r
2382             byte action;\r
2383 \r
2384             /* set up the local pointers */\r
2385             sourceArrayIndex = source.position();\r
2386             targetCapacity = target.remaining();\r
2387 \r
2388             if ((options & UConverterConstants.OPTION_SWAP_LFNL) != 0) {\r
2389                 stateTable = sharedData.mbcs.swapLFNLStateTable;\r
2390             } else {\r
2391                 stateTable = sharedData.mbcs.stateTable;\r
2392             }\r
2393 \r
2394             /* sourceIndex=-1 if the current character began in the previous buffer */\r
2395             sourceIndex = 0;\r
2396             lastSource = sourceArrayIndex;\r
2397 \r
2398             /*\r
2399              * since the conversion here is 1:1 UChar:uint8_t, we need only one counter for the minimum of the\r
2400              * sourceLength and targetCapacity\r
2401              */\r
2402             length = source.remaining();\r
2403             if (length < targetCapacity) {\r
2404                 targetCapacity = length;\r
2405             }\r
2406 \r
2407             /* conversion loop */\r
2408             while (targetCapacity > 0 && sourceArrayIndex < source.limit()) {\r
2409                 entry = stateTable[0][source.get(sourceArrayIndex++) & UConverterConstants.UNSIGNED_BYTE_MASK];\r
2410                 /* MBCS_ENTRY_IS_FINAL(entry) */\r
2411 \r
2412                 /* test the most common case first */\r
2413                 if (MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {\r
2414                     /* output BMP code point */\r
2415                     target.put(MBCS_ENTRY_FINAL_VALUE_16(entry));\r
2416                     --targetCapacity;\r
2417                     continue;\r
2418                 }\r
2419 \r
2420                 /*\r
2421                  * An if-else-if chain provides more reliable performance for the most common cases compared to a\r
2422                  * switch.\r
2423                  */\r
2424                 action = (byte) (MBCS_ENTRY_FINAL_ACTION(entry));\r
2425                 if (action == MBCS_STATE_FALLBACK_DIRECT_16) {\r
2426                     if (isFallbackUsed()) {\r
2427                         /* output BMP code point */\r
2428                         target.put(MBCS_ENTRY_FINAL_VALUE_16(entry));\r
2429                         --targetCapacity;\r
2430                         continue;\r
2431                     }\r
2432                 } else if (action == MBCS_STATE_UNASSIGNED) {\r
2433                     /* just fall through */\r
2434                 } else if (action == MBCS_STATE_ILLEGAL) {\r
2435                     /* callback(illegal) */\r
2436                     cr[0] = CoderResult.malformedForLength(sourceArrayIndex - lastSource);\r
2437                 } else {\r
2438                     /* reserved, must never occur */\r
2439                     continue;\r
2440                 }\r
2441 \r
2442                 /* set offsets since the start or the last extension */\r
2443                 if (offsets != null) {\r
2444                     int count = sourceArrayIndex - lastSource;\r
2445 \r
2446                     /* predecrement: do not set the offset for the callback-causing character */\r
2447                     while (--count > 0) {\r
2448                         offsets.put(sourceIndex++);\r
2449                     }\r
2450                     /* offset and sourceIndex are now set for the current character */\r
2451                 }\r
2452 \r
2453                 if (cr[0].isError()) {\r
2454                     /* callback(illegal) */\r
2455                     break;\r
2456                 } else /* unassigned sequences indicated with byteIndex>0 */{\r
2457                     /* try an extension mapping */\r
2458                     lastSource = sourceArrayIndex;\r
2459                     toUBytesArray[0] = source.get(sourceArrayIndex - 1);\r
2460                     source.position(sourceArrayIndex);\r
2461                     toULength = toU((byte) 1, source, target, offsets, sourceIndex, flush, cr);\r
2462                     sourceArrayIndex = source.position();\r
2463                     sourceIndex += 1 + (sourceArrayIndex - lastSource);\r
2464 \r
2465                     if (cr[0].isError()) {\r
2466                         /* not mappable or buffer overflow */\r
2467                         break;\r
2468                     }\r
2469 \r
2470                     /* recalculate the targetCapacity after an extension mapping */\r
2471                     targetCapacity = target.remaining();\r
2472                     length = source.remaining();\r
2473                     if (length < targetCapacity) {\r
2474                         targetCapacity = length;\r
2475                     }\r
2476                 }\r
2477             }\r
2478 \r
2479             if (!cr[0].isError() && sourceArrayIndex < source.limit() && !target.hasRemaining()) {\r
2480                 /* target is full */\r
2481                 cr[0] = CoderResult.OVERFLOW;\r
2482             }\r
2483 \r
2484             /* set offsets since the start or the last callback */\r
2485             if (offsets != null) {\r
2486                 int count = sourceArrayIndex - lastSource;\r
2487                 while (count > 0) {\r
2488                     offsets.put(sourceIndex++);\r
2489                     --count;\r
2490                 }\r
2491             }\r
2492 \r
2493             /* write back the updated pointers */\r
2494             source.position(sourceArrayIndex);\r
2495 \r
2496             return cr[0];\r
2497         }\r
2498 \r
2499         /* This version of cnvMBCSToUnicodeWithOffsets() is optimized for single-byte, single-state codepages. */\r
2500         private CoderResult cnvMBCSSingleToUnicodeWithOffsets(ByteBuffer source, CharBuffer target, IntBuffer offsets,\r
2501                 boolean flush) {\r
2502             CoderResult[] cr = { CoderResult.UNDERFLOW };\r
2503 \r
2504             int sourceArrayIndex;\r
2505             int[][] stateTable;\r
2506 \r
2507             int sourceIndex;\r
2508 \r
2509             int entry;\r
2510             char c;\r
2511             byte action;\r
2512 \r
2513             /* set up the local pointers */\r
2514             sourceArrayIndex = source.position();\r
2515 \r
2516             if ((options & UConverterConstants.OPTION_SWAP_LFNL) != 0) {\r
2517                 stateTable = sharedData.mbcs.swapLFNLStateTable;\r
2518             } else {\r
2519                 stateTable = sharedData.mbcs.stateTable;\r
2520             }\r
2521 \r
2522             /* sourceIndex=-1 if the current character began in the previous buffer */\r
2523             sourceIndex = 0;\r
2524 \r
2525             /* conversion loop */\r
2526             while (sourceArrayIndex < source.limit()) {\r
2527                 /*\r
2528                  * This following test is to see if available input would overflow the output. It does not catch output\r
2529                  * of more than one code unit that overflows as a result of a surrogate pair or callback output from the\r
2530                  * last source byte. Therefore, those situations also test for overflows and will then break the loop,\r
2531                  * too.\r
2532                  */\r
2533                 if (!target.hasRemaining()) {\r
2534                     /* target is full */\r
2535                     cr[0] = CoderResult.OVERFLOW;\r
2536                     break;\r
2537                 }\r
2538 \r
2539                 entry = stateTable[0][source.get(sourceArrayIndex++) & UConverterConstants.UNSIGNED_BYTE_MASK];\r
2540                 /* MBCS_ENTRY_IS_FINAL(entry) */\r
2541 \r
2542                 /* test the most common case first */\r
2543                 if (MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {\r
2544                     /* output BMP code point */\r
2545                     target.put(MBCS_ENTRY_FINAL_VALUE_16(entry));\r
2546                     if (offsets != null) {\r
2547                         offsets.put(sourceIndex);\r
2548                     }\r
2549 \r
2550                     /* normal end of action codes: prepare for a new character */\r
2551                     ++sourceIndex;\r
2552                     continue;\r
2553                 }\r
2554 \r
2555                 /*\r
2556                  * An if-else-if chain provides more reliable performance for the most common cases compared to a\r
2557                  * switch.\r
2558                  */\r
2559                 action = (byte) (MBCS_ENTRY_FINAL_ACTION(entry));\r
2560                 if (action == MBCS_STATE_VALID_DIRECT_20\r
2561                         || (action == MBCS_STATE_FALLBACK_DIRECT_20 && isFallbackUsed())) {\r
2562 \r
2563                     entry = MBCS_ENTRY_FINAL_VALUE(entry);\r
2564                     /* output surrogate pair */\r
2565                     target.put((char) (0xd800 | (char) (entry >>> 10)));\r
2566                     if (offsets != null) {\r
2567                         offsets.put(sourceIndex);\r
2568                     }\r
2569                     c = (char) (0xdc00 | (char) (entry & 0x3ff));\r
2570                     if (target.hasRemaining()) {\r
2571                         target.put(c);\r
2572                         if (offsets != null) {\r
2573                             offsets.put(sourceIndex);\r
2574                         }\r
2575                     } else {\r
2576                         /* target overflow */\r
2577                         charErrorBufferArray[0] = c;\r
2578                         charErrorBufferLength = 1;\r
2579                         cr[0] = CoderResult.OVERFLOW;\r
2580                         break;\r
2581                     }\r
2582 \r
2583                     ++sourceIndex;\r
2584                     continue;\r
2585                 } else if (action == MBCS_STATE_FALLBACK_DIRECT_16) {\r
2586                     if (isFallbackUsed()) {\r
2587                         /* output BMP code point */\r
2588                         target.put(MBCS_ENTRY_FINAL_VALUE_16(entry));\r
2589                         if (offsets != null) {\r
2590                             offsets.put(sourceIndex);\r
2591                         }\r
2592 \r
2593                         ++sourceIndex;\r
2594                         continue;\r
2595                     }\r
2596                 } else if (action == MBCS_STATE_UNASSIGNED) {\r
2597                     /* just fall through */\r
2598                 } else if (action == MBCS_STATE_ILLEGAL) {\r
2599                     /* callback(illegal) */\r
2600                     cr[0] = CoderResult.malformedForLength(1);\r
2601                 } else {\r
2602                     /* reserved, must never occur */\r
2603                     ++sourceIndex;\r
2604                     continue;\r
2605                 }\r
2606 \r
2607                 if (cr[0].isError()) {\r
2608                     /* callback(illegal) */\r
2609                     break;\r
2610                 } else /* unassigned sequences indicated with byteIndex>0 */{\r
2611                     /* try an extension mapping */\r
2612                     int sourceBeginIndex = sourceArrayIndex;\r
2613                     toUBytesArray[0] = source.get(sourceArrayIndex - 1);\r
2614                     source.position(sourceArrayIndex);\r
2615                     toULength = toU((byte) 1, source, target, offsets, sourceIndex, flush, cr);\r
2616                     sourceArrayIndex = source.position();\r
2617                     sourceIndex += 1 + (sourceArrayIndex - sourceBeginIndex);\r
2618 \r
2619                     if (cr[0].isError()) {\r
2620                         /* not mappable or buffer overflow */\r
2621                         break;\r
2622                     }\r
2623                 }\r
2624             }\r
2625 \r
2626             /* write back the updated pointers */\r
2627             source.position(sourceArrayIndex);\r
2628 \r
2629             return cr[0];\r
2630         }\r
2631 \r
2632         private int getFallback(UConverterMBCSTable mbcsTable, int offset) {\r
2633             MBCSToUFallback[] toUFallbacks;\r
2634             int i, start, limit;\r
2635 \r
2636             limit = mbcsTable.countToUFallbacks;\r
2637             if (limit > 0) {\r
2638                 /* do a binary search for the fallback mapping */\r
2639                 toUFallbacks = mbcsTable.toUFallbacks;\r
2640                 start = 0;\r
2641                 while (start < limit - 1) {\r
2642                     i = (start + limit) / 2;\r
2643                     if (offset < toUFallbacks[i].offset) {\r
2644                         limit = i;\r
2645                     } else {\r
2646                         start = i;\r
2647                     }\r
2648                 }\r
2649 \r
2650                 /* did we really find it? */\r
2651                 if (offset == toUFallbacks[start].offset) {\r
2652                     return toUFallbacks[start].codePoint;\r
2653                 }\r
2654             }\r
2655 \r
2656             return 0xfffe;\r
2657         }\r
2658 \r
2659         /**\r
2660          * This is a simple version of _MBCSGetNextUChar() that is used by other converter implementations. It only\r
2661          * returns an "assigned" result if it consumes the entire input. It does not use state from the converter, nor\r
2662          * error codes. It does not handle the EBCDIC swaplfnl option (set in UConverter). It handles conversion\r
2663          * extensions but not GB 18030.\r
2664          * \r
2665          * @return U+fffe unassigned U+ffff illegal otherwise the Unicode code point\r
2666          */\r
2667         int simpleGetNextUChar(ByteBuffer source, boolean useFallback) {\r
2668 \r
2669             // #if 0\r
2670             // /*\r
2671             // * Code disabled 2002dec09 (ICU 2.4) because it is not currently used in ICU. markus\r
2672             // * TODO In future releases, verify that this function is never called for SBCS\r
2673             // * conversions, i.e., that sharedData->mbcs.countStates==1 is still true.\r
2674             // * Removal improves code coverage.\r
2675             // */\r
2676             // /* use optimized function if possible */\r
2677             // if(sharedData->mbcs.countStates==1) {\r
2678             // if(length==1) {\r
2679             // return ucnv_MBCSSingleSimpleGetNextUChar(sharedData, (uint8_t)*source, useFallback);\r
2680             // } else {\r
2681             // return 0xffff; /* illegal: more than a single byte for an SBCS converter */\r
2682             // }\r
2683             // }\r
2684             // #endif\r
2685 \r
2686             /* set up the local pointers */\r
2687             int[][] stateTable = sharedData.mbcs.stateTable;\r
2688             char[] unicodeCodeUnits = sharedData.mbcs.unicodeCodeUnits;\r
2689 \r
2690             /* converter state */\r
2691             int offset = 0;\r
2692             int state = sharedData.mbcs.dbcsOnlyState;\r
2693 \r
2694             int action;\r
2695             int entry;\r
2696             int c;\r
2697             int i = source.position();\r
2698             int length = source.limit() - i;\r
2699 \r
2700             /* conversion loop */\r
2701             while (true) {\r
2702                 // entry=stateTable[state][(uint8_t)source[i++]];\r
2703                 entry = stateTable[state][source.get(i++) & UConverterConstants.UNSIGNED_BYTE_MASK];\r
2704 \r
2705                 if (MBCS_ENTRY_IS_TRANSITION(entry)) {\r
2706                     state = MBCS_ENTRY_TRANSITION_STATE(entry);\r
2707                     offset += MBCS_ENTRY_TRANSITION_OFFSET(entry);\r
2708 \r
2709                     if (i == source.limit()) {\r
2710                         return 0xffff; /* truncated character */\r
2711                     }\r
2712                 } else {\r
2713                     /*\r
2714                      * An if-else-if chain provides more reliable performance for the most common cases compared to a\r
2715                      * switch.\r
2716                      */\r
2717                     action = MBCS_ENTRY_FINAL_ACTION(entry);\r
2718                     if (action == MBCS_STATE_VALID_16) {\r
2719                         offset += MBCS_ENTRY_FINAL_VALUE_16(entry);\r
2720                         c = unicodeCodeUnits[offset];\r
2721                         if (c != 0xfffe) {\r
2722                             /* done */\r
2723                         } else if (isToUUseFallback()) {\r
2724                             c = getFallback(sharedData.mbcs, offset);\r
2725                         }\r
2726                         /* else done with 0xfffe */\r
2727                     } else if (action == MBCS_STATE_VALID_DIRECT_16) {\r
2728                         // /* output BMP code point */\r
2729                         c = MBCS_ENTRY_FINAL_VALUE_16(entry);\r
2730                     } else if (action == MBCS_STATE_VALID_16_PAIR) {\r
2731                         offset += MBCS_ENTRY_FINAL_VALUE_16(entry);\r
2732                         c = unicodeCodeUnits[offset++];\r
2733                         if (c < 0xd800) {\r
2734                             /* output BMP code point below 0xd800 */\r
2735                         } else if (isToUUseFallback() ? c <= 0xdfff : c <= 0xdbff) {\r
2736                             /* output roundtrip or fallback supplementary code point */\r
2737                             c = (((c & 0x3ff) << 10) + unicodeCodeUnits[offset] + (0x10000 - 0xdc00));\r
2738                         } else if (isToUUseFallback() ? (c & 0xfffe) == 0xe000 : c == 0xe000) {\r
2739                             /* output roundtrip BMP code point above 0xd800 or fallback BMP code point */\r
2740                             c = unicodeCodeUnits[offset];\r
2741                         } else if (c == 0xffff) {\r
2742                             return 0xffff;\r
2743                         } else {\r
2744                             c = 0xfffe;\r
2745                         }\r
2746                     } else if (action == MBCS_STATE_VALID_DIRECT_20) {\r
2747                         /* output supplementary code point */\r
2748                         c = 0x10000 + MBCS_ENTRY_FINAL_VALUE(entry);\r
2749                     } else if (action == MBCS_STATE_FALLBACK_DIRECT_16) {\r
2750                         if (!isToUUseFallback(useFallback)) {\r
2751                             c = 0xfffe;\r
2752                         } else {\r
2753                             /* output BMP code point */\r
2754                             c = MBCS_ENTRY_FINAL_VALUE_16(entry);\r
2755                         }\r
2756                     } else if (action == MBCS_STATE_FALLBACK_DIRECT_20) {\r
2757                         if (!isToUUseFallback(useFallback)) {\r
2758                             c = 0xfffe;\r
2759                         } else {\r
2760                             /* output supplementary code point */\r
2761                             c = 0x10000 + MBCS_ENTRY_FINAL_VALUE(entry);\r
2762                         }\r
2763                     } else if (action == MBCS_STATE_UNASSIGNED) {\r
2764                         c = 0xfffe;\r
2765                     } else {\r
2766                         /*\r
2767                          * forbid MBCS_STATE_CHANGE_ONLY for this function, and MBCS_STATE_ILLEGAL and reserved action\r
2768                          * codes\r
2769                          */\r
2770                         return 0xffff;\r
2771                     }\r
2772                     break;\r
2773                 }\r
2774             }\r
2775 \r
2776             if (i != source.limit()) {\r
2777                 /* illegal for this function: not all input consumed */\r
2778                 return 0xffff;\r
2779             }\r
2780 \r
2781             if (c == 0xfffe) {\r
2782                 /* try an extension mapping */\r
2783                 if (sharedData.mbcs.extIndexes != null) {\r
2784                     /* Increase the limit for proper handling. Used in LMBCS. */\r
2785                     if (source.limit() > i + length) {\r
2786                         source.limit(i + length);\r
2787                     }\r
2788                     return simpleMatchToU(source, useFallback);\r
2789                 }\r
2790             }\r
2791 \r
2792             return c;\r
2793         }\r
2794         private boolean hasValidTrailBytes(int[][] stateTable, short state) {\r
2795             int[] row = stateTable[state];\r
2796             int b, entry;\r
2797             /* First test for final entries in this state for some commonly valid byte values. */\r
2798             entry = row[0xa1];\r
2799             if (!MBCS_ENTRY_IS_TRANSITION(entry) && MBCS_ENTRY_FINAL_ACTION(entry) != MBCS_STATE_ILLEGAL) {\r
2800                 return true;\r
2801             }\r
2802             entry = row[0x41];\r
2803             if (!MBCS_ENTRY_IS_TRANSITION(entry) && MBCS_ENTRY_FINAL_ACTION(entry) != MBCS_STATE_ILLEGAL) {\r
2804                 return true;\r
2805             }\r
2806             /* Then test for final entries in this state. */\r
2807             for (b = 0; b <= 0xff; b++) {\r
2808                 entry = row[b];\r
2809                 if (!MBCS_ENTRY_IS_TRANSITION(entry) && MBCS_ENTRY_FINAL_ACTION(entry) != MBCS_STATE_ILLEGAL) {\r
2810                     return true;\r
2811                 }\r
2812             }\r
2813             /* Then recurse for transition entries. */\r
2814             for (b = 0; b <= 0xff; b++) {\r
2815                 entry = row[b];\r
2816                 if (MBCS_ENTRY_IS_TRANSITION(entry) && \r
2817                         hasValidTrailBytes(stateTable, (short)(MBCS_ENTRY_TRANSITION_STATE(entry) & UConverterConstants.UNSIGNED_BYTE_MASK))) {\r
2818                     return true;\r
2819                 }\r
2820             }\r
2821             return false;\r
2822         }\r
2823         \r
2824         private boolean isSingleOrLead(int[][] stateTable, int state, boolean isDBCSOnly, int b) {\r
2825             int[] row = stateTable[state];\r
2826             int entry = row[b];\r
2827             if (MBCS_ENTRY_IS_TRANSITION(entry)) { /* lead byte */\r
2828                 return hasValidTrailBytes(stateTable, (short)(MBCS_ENTRY_TRANSITION_STATE(entry) & UConverterConstants.UNSIGNED_BYTE_MASK));\r
2829             } else {\r
2830                 short action = (short)(MBCS_ENTRY_FINAL_ACTION(entry) & UConverterConstants.UNSIGNED_BYTE_MASK);\r
2831                 if (action == MBCS_STATE_CHANGE_ONLY && isDBCSOnly) {\r
2832                     return false;   /* SI/SO are illegal for DBCS-only conversion */\r
2833                 } else {\r
2834                     return (action != MBCS_STATE_ILLEGAL);\r
2835                 }\r
2836             }\r
2837         }\r
2838         \r
2839 \r
2840     }\r
2841 \r
2842     class CharsetEncoderMBCS extends CharsetEncoderICU {\r
2843         private boolean allowReplacementChanges = false;\r
2844 \r
2845         CharsetEncoderMBCS(CharsetICU cs) {\r
2846             super(cs, fromUSubstitution);\r
2847             allowReplacementChanges = true; // allow changes in implReplaceWith\r
2848             implReset();\r
2849         }\r
2850 \r
2851         protected void implReset() {\r
2852             super.implReset();\r
2853             preFromUFirstCP = UConverterConstants.U_SENTINEL;\r
2854         }\r
2855 \r
2856         @SuppressWarnings("fallthrough")\r
2857         protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {\r
2858             CoderResult[] cr = { CoderResult.UNDERFLOW };\r
2859             // if (!source.hasRemaining() && fromUChar32 == 0)\r
2860             // return cr[0];\r
2861 \r
2862             int sourceArrayIndex;\r
2863             char[] table;\r
2864             byte[] pArray, bytes;\r
2865             int pArrayIndex, outputType, c;\r
2866             int prevSourceIndex, sourceIndex, nextSourceIndex;\r
2867             int stage2Entry = 0, value = 0, length = 0, prevLength;\r
2868             short uniMask;\r
2869             // long asciiRoundtrips;\r
2870             \r
2871             byte[] si_value = new byte[2];\r
2872             byte[] so_value = new byte[2];\r
2873             int si_value_length = 0, so_value_length = 0;\r
2874 \r
2875             boolean gotoUnassigned = false;\r
2876 \r
2877             try {\r
2878 \r
2879                 if (!flush && preFromUFirstCP >= 0) {\r
2880                     /*\r
2881                      * pass sourceIndex=-1 because we continue from an earlier buffer in the future, this may change\r
2882                      * with continuous offsets\r
2883                      */\r
2884                     cr[0] = continueMatchFromU(source, target, offsets, flush, -1);\r
2885 \r
2886                     if (cr[0].isError() || preFromULength < 0) {\r
2887                         return cr[0];\r
2888                     }\r
2889                 }\r
2890 \r
2891                 /* use optimized function if possible */\r
2892                 outputType = sharedData.mbcs.outputType;\r
2893                 uniMask = sharedData.mbcs.unicodeMask;\r
2894                 if (outputType == MBCS_OUTPUT_1 && (uniMask & UConverterConstants.HAS_SURROGATES) == 0) {\r
2895                     if ((uniMask & UConverterConstants.HAS_SUPPLEMENTARY) == 0) {\r
2896                         cr[0] = cnvMBCSSingleFromBMPWithOffsets(source, target, offsets, flush);\r
2897                     } else {\r
2898                         cr[0] = cnvMBCSSingleFromUnicodeWithOffsets(source, target, offsets, flush);\r
2899                     }\r
2900                     return cr[0];\r
2901                 } else if (outputType == MBCS_OUTPUT_2) {\r
2902                     cr[0] = cnvMBCSDoubleFromUnicodeWithOffsets(source, target, offsets, flush);\r
2903                     return cr[0];\r
2904                 }\r
2905 \r
2906                 table = sharedData.mbcs.fromUnicodeTable;\r
2907                 sourceArrayIndex = source.position();\r
2908 \r
2909                 if ((options & UConverterConstants.OPTION_SWAP_LFNL) != 0) {\r
2910                     bytes = sharedData.mbcs.swapLFNLFromUnicodeBytes;\r
2911                 } else {\r
2912                     bytes = sharedData.mbcs.fromUnicodeBytes;\r
2913                 }\r
2914 \r
2915                 // asciiRoundtrips = sharedData.mbcs.asciiRoundtrips;\r
2916 \r
2917                 /* get the converter state from UConverter */\r
2918                 c = fromUChar32;\r
2919 \r
2920                 if (outputType == MBCS_OUTPUT_2_SISO) {\r
2921                     prevLength = fromUnicodeStatus;\r
2922                     if (prevLength == 0) {\r
2923                         /* set the real value */\r
2924                         prevLength = 1;\r
2925                     }\r
2926                 } else {\r
2927                     /* prevent fromUnicodeStatus from being set to something non-0 */\r
2928                     prevLength = 0;\r
2929                 }\r
2930 \r
2931                 /* sourceIndex=-1 if the current character began in the previous buffer */\r
2932                 prevSourceIndex = -1;\r
2933                 sourceIndex = c == 0 ? 0 : -1;\r
2934                 nextSourceIndex = 0;\r
2935 \r
2936                 /* Get the SI/SO character for the converter */\r
2937                 si_value_length = getSISOBytes(SISO_Option.SI, options, si_value);\r
2938                 so_value_length = getSISOBytes(SISO_Option.SO, options, so_value);\r
2939 \r
2940                 /* conversion loop */\r
2941                 /*\r
2942                  * This is another piece of ugly code: A goto into the loop if the converter state contains a first\r
2943                  * surrogate from the previous function call. It saves me to check in each loop iteration a check of\r
2944                  * if(c==0) and duplicating the trail-surrogate-handling code in the else branch of that check. I could\r
2945                  * not find any other way to get around this other than using a function call for the conversion and\r
2946                  * callback, which would be even more inefficient.\r
2947                  * \r
2948                  * Markus Scherer 2000-jul-19\r
2949                  */\r
2950                 boolean doloop = true;\r
2951                 boolean doread = true;\r
2952                 if (c != 0 && target.hasRemaining()) {\r
2953                     if (UTF16.isLeadSurrogate((char) c) && (uniMask & UConverterConstants.HAS_SURROGATES) == 0) {\r
2954                         // c is a lead surrogate, read another input\r
2955                         SideEffects x = new SideEffects(c, sourceArrayIndex, sourceIndex, nextSourceIndex,\r
2956                                 prevSourceIndex, prevLength);\r
2957                         doloop = getTrail(source, target, uniMask, x, flush, cr);\r
2958                         doread = x.doread;\r
2959                         c = x.c;\r
2960                         sourceArrayIndex = x.sourceArrayIndex;\r
2961                         sourceIndex = x.sourceIndex;\r
2962                         nextSourceIndex = x.nextSourceIndex;\r
2963                         prevSourceIndex = x.prevSourceIndex;\r
2964                         prevLength = x.prevLength;\r
2965                     } else {\r
2966                         // c is not a lead surrogate, do not read another input\r
2967                         doread = false;\r
2968                     }\r
2969                 }\r
2970 \r
2971                 if (doloop) {\r
2972                     while (!doread || sourceArrayIndex < source.limit()) {\r
2973                         /*\r
2974                          * This following test is to see if available input would overflow the output. It does not catch\r
2975                          * output of more than one byte that overflows as a result of a multi-byte character or callback\r
2976                          * output from the last source character. Therefore, those situations also test for overflows\r
2977                          * and will then break the loop, too.\r
2978                          */\r
2979                         if (target.hasRemaining()) {\r
2980                             /*\r
2981                              * Get a correct Unicode code point: a single UChar for a BMP code point or a matched\r
2982                              * surrogate pair for a "supplementary code point".\r
2983                              */\r
2984 \r
2985                             if (doread) {\r
2986                                 // doread might be false only on the first looping\r
2987 \r
2988                                 c = source.get(sourceArrayIndex++);\r
2989                                 ++nextSourceIndex;\r
2990 \r
2991                                 /*\r
2992                                  * This also tests if the codepage maps single surrogates. If it does, then surrogates\r
2993                                  * are not paired but mapped separately. Note that in this case unmatched surrogates are\r
2994                                  * not detected.\r
2995                                  */\r
2996                                 if (UTF16.isSurrogate((char) c)\r
2997                                         && (uniMask & UConverterConstants.HAS_SURROGATES) == 0) {\r
2998                                     if (UTF16.isLeadSurrogate((char) c)) {\r
2999                                         // getTrail:\r
3000                                         SideEffects x = new SideEffects(c, sourceArrayIndex, sourceIndex,\r
3001                                                 nextSourceIndex, prevSourceIndex, prevLength);\r
3002                                         doloop = getTrail(source, target, uniMask, x, flush, cr);\r
3003                                         c = x.c;\r
3004                                         sourceArrayIndex = x.sourceArrayIndex;\r
3005                                         sourceIndex = x.sourceIndex;\r
3006                                         nextSourceIndex = x.nextSourceIndex;\r
3007                                         prevSourceIndex = x.prevSourceIndex;\r
3008 \r
3009                                         if (x.doread) {\r
3010                                             if (doloop)\r
3011                                                 continue;\r
3012                                             else\r
3013                                                 break;\r
3014                                         }\r
3015                                     } else {\r
3016                                         /* this is an unmatched trail code unit (2nd surrogate) */\r
3017                                         /* callback(illegal) */\r
3018                                         cr[0] = CoderResult.malformedForLength(1);\r
3019                                         break;\r
3020                                     }\r
3021                                 }\r
3022                             } else {\r
3023                                 doread = true;\r
3024                             }\r
3025                             /* convert the Unicode code point in c into codepage bytes */\r
3026 \r
3027                             /*\r
3028                              * The basic lookup is a triple-stage compact array (trie) lookup. For details see the\r
3029                              * beginning of this file.\r
3030                              * \r
3031                              * Single-byte codepages are handled with a different data structure by _MBCSSingle...\r
3032                              * functions.\r
3033                              * \r
3034                              * The result consists of a 32-bit value from stage 2 and a pointer to as many bytes as are\r
3035                              * stored per character. The pointer points to the character's bytes in stage 3. Bits 15..0\r
3036                              * of the stage 2 entry contain the stage 3 index for that pointer, while bits 31..16 are\r
3037                              * flags for which of the 16 characters in the block are roundtrip-assigned.\r
3038                              * \r
3039                              * For 2-byte and 4-byte codepages, the bytes are stored as uint16_t respectively as\r
3040                              * uint32_t, in the platform encoding. For 3-byte codepages, the bytes are always stored in\r
3041                              * big-endian order.\r
3042                              * \r
3043                              * For EUC encodings that use only either 0x8e or 0x8f as the first byte of their longest\r
3044                              * byte sequences, the first two bytes in this third stage indicate with their 7th bits\r
3045                              * whether these bytes are to be written directly or actually need to be preceeded by one of\r
3046                              * the two Single-Shift codes. With this, the third stage stores one byte fewer per\r
3047                              * character than the actual maximum length of EUC byte sequences.\r
3048                              * \r
3049                              * Other than that, leading zero bytes are removed and the other bytes output. A single zero\r
3050                              * byte may be output if the "assigned" bit in stage 2 was on. The data structure does not\r
3051                              * support zero byte output as a fallback, and also does not allow output of leading zeros.\r
3052                              */\r
3053                             stage2Entry = MBCS_STAGE_2_FROM_U(table, c);\r
3054 \r
3055                             /* get the bytes and the length for the output */\r
3056                             switch (outputType) {\r
3057                             /* This is handled above with the method cnvMBCSDoubleFromUnicodeWithOffsets() */\r
3058                             /* case MBCS_OUTPUT_2:\r
3059                                 value = MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);\r
3060                                 if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) {\r
3061                                     length = 1;\r
3062                                 } else {\r
3063                                     length = 2;\r
3064                                 }\r
3065                                 break; */\r
3066                             case MBCS_OUTPUT_2_SISO:\r
3067                                 /* 1/2-byte stateful with Shift-In/Shift-Out */\r
3068                                 /*\r
3069                                  * Save the old state in the converter object right here, then change the local\r
3070                                  * prevLength state variable if necessary. Then, if this character turns out to be\r
3071                                  * unassigned or a fallback that is not taken, the callback code must not save the new\r
3072                                  * state in the converter because the new state is for a character that is not output.\r
3073                                  * However, the callback must still restore the state from the converter in case the\r
3074                                  * callback function changed it for its output.\r
3075                                  */\r
3076                                 fromUnicodeStatus = prevLength; /* save the old state */\r
3077                                 value = MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);\r
3078                                 if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) {\r
3079                                     if (value == 0 && MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) == false) {\r
3080                                         /* no mapping, leave value==0 */\r
3081                                         length = 0;\r
3082                                     } else if (prevLength <= 1) {\r
3083                                         length = 1;\r
3084                                     } else {\r
3085                                         /* change from double-byte mode to single-byte */\r
3086                                         if (si_value_length == 1) {\r
3087                                             value|=si_value[0]<<8;\r
3088                                             length = 2;\r
3089                                         } else if (si_value_length == 2) {\r
3090                                             value|=si_value[1]<<8;\r
3091                                             value|=si_value[0]<<16;\r
3092                                             length = 3;\r
3093                                         }\r
3094                                         prevLength = 1;\r
3095                                     }\r
3096                                 } else {\r
3097                                     if (prevLength == 2) {\r
3098                                         length = 2;\r
3099                                     } else {\r
3100                                         /* change from single-byte mode to double-byte */\r
3101                                         if (so_value_length == 1) {\r
3102                                             value|=so_value[0]<<16;\r
3103                                             length = 3;\r
3104                                         } else if (so_value_length == 2) {\r
3105                                             value|=so_value[1]<<16;\r
3106                                             value|=so_value[0]<<24;\r
3107                                             length = 4;\r
3108                                         }\r
3109                                         prevLength = 2;\r
3110                                     }\r
3111                                 }\r
3112                                 break;\r
3113                             case MBCS_OUTPUT_DBCS_ONLY:\r
3114                                 /* table with single-byte results, but only DBCS mappings used */\r
3115                                 value = MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);\r
3116                                 if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) {\r
3117                                     /* no mapping or SBCS result, not taken for DBCS-only */\r
3118                                     value = stage2Entry = 0; /* stage2Entry=0 to reset roundtrip flags */\r
3119                                     length = 0;\r
3120                                 } else {\r
3121                                     length = 2;\r
3122                                 }\r
3123                                 break;\r
3124                             case MBCS_OUTPUT_3:\r
3125                                 pArray = bytes;\r
3126                                 pArrayIndex = MBCS_POINTER_3_FROM_STAGE_2(bytes, stage2Entry, c);\r
3127                                 value = ((pArray[pArrayIndex] & UConverterConstants.UNSIGNED_BYTE_MASK) << 16)\r
3128                                         | ((pArray[pArrayIndex + 1] & UConverterConstants.UNSIGNED_BYTE_MASK) << 8)\r
3129                                         | (pArray[pArrayIndex + 2] & UConverterConstants.UNSIGNED_BYTE_MASK);\r
3130                                 if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) {\r
3131                                     length = 1;\r
3132                                 } else if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xffff) {\r
3133                                     length = 2;\r
3134                                 } else {\r
3135                                     length = 3;\r
3136                                 }\r
3137                                 break;\r
3138                             case MBCS_OUTPUT_4:\r
3139                                 value = MBCS_VALUE_4_FROM_STAGE_2(bytes, stage2Entry, c);\r
3140                                 if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) {\r
3141                                     length = 1;\r
3142                                 } else if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xffff) {\r
3143                                     length = 2;\r
3144                                 } else if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xffffff) {\r
3145                                     length = 3;\r
3146                                 } else {\r
3147                                     length = 4;\r
3148                                 }\r
3149                                 break;\r
3150                             case MBCS_OUTPUT_3_EUC:\r
3151                                 value = MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);\r
3152                                 /* EUC 16-bit fixed-length representation */\r
3153                                 if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) {\r
3154                                     length = 1;\r
3155                                 } else if ((value & 0x8000) == 0) {\r
3156                                     value |= 0x8e8000;\r
3157                                     length = 3;\r
3158                                 } else if ((value & 0x80) == 0) {\r
3159                                     value |= 0x8f0080;\r
3160                                     length = 3;\r
3161                                 } else {\r
3162                                     length = 2;\r
3163                                 }\r
3164                                 break;\r
3165                             case MBCS_OUTPUT_4_EUC:\r
3166                                 pArray = bytes;\r
3167                                 pArrayIndex = MBCS_POINTER_3_FROM_STAGE_2(bytes, stage2Entry, c);\r
3168                                 value = ((pArray[pArrayIndex] & UConverterConstants.UNSIGNED_BYTE_MASK) << 16)\r
3169                                         | ((pArray[pArrayIndex + 1] & UConverterConstants.UNSIGNED_BYTE_MASK) << 8)\r
3170                                         | (pArray[pArrayIndex + 2] & UConverterConstants.UNSIGNED_BYTE_MASK);\r
3171                                 /* EUC 16-bit fixed-length representation applied to the first two bytes */\r
3172                                 if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) {\r
3173                                     length = 1;\r
3174                                 } else if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xffff) {\r
3175                                     length = 2;\r
3176                                 } else if ((value & 0x800000) == 0) {\r
3177                                     value |= 0x8e800000;\r
3178                                     length = 4;\r
3179                                 } else if ((value & 0x8000) == 0) {\r
3180                                     value |= 0x8f008000;\r
3181                                     length = 4;\r
3182                                 } else {\r
3183                                     length = 3;\r
3184                                 }\r
3185                                 break;\r
3186                             default:\r
3187                                 /* must not occur */\r
3188                                 /*\r
3189                                  * To avoid compiler warnings that value & length may be used without having been\r
3190                                  * initialized, we set them here. In reality, this is unreachable code. Not having a\r
3191                                  * default branch also causes warnings with some compilers.\r
3192                                  */\r
3193                                 value = stage2Entry = 0; /* stage2Entry=0 to reset roundtrip flags */\r
3194                                 length = 0;\r
3195                                 break;\r
3196                             }\r
3197                             \r
3198                             /* is this code point assigned, or do we use fallbacks? */\r
3199                             if (gotoUnassigned || (!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) || (isFromUUseFallback(c) && value != 0)))) {\r
3200                                 gotoUnassigned = false;\r
3201                                 /*\r
3202                                  * We allow a 0 byte output if the "assigned" bit is set for this entry. There is no way\r
3203                                  * with this data structure for fallback output to be a zero byte.\r
3204                                  */\r
3205 \r
3206                                 // unassigned:\r
3207                                 SideEffects x = new SideEffects(c, sourceArrayIndex, sourceIndex, nextSourceIndex,\r
3208                                         prevSourceIndex, prevLength);\r
3209                                 doloop = unassigned(source, target, offsets, x, flush, cr);\r
3210                                 c = x.c;\r
3211                                 sourceArrayIndex = x.sourceArrayIndex;\r
3212                                 sourceIndex = x.sourceIndex;\r
3213                                 nextSourceIndex = x.nextSourceIndex;\r
3214                                 prevSourceIndex = x.prevSourceIndex;\r
3215                                 prevLength = x.prevLength;\r
3216                                 if (doloop)\r
3217                                     continue;\r
3218                                 else\r
3219                                     break;\r
3220                             }\r
3221 \r
3222                             /* write the output character bytes from value and length */\r
3223                             /* from the first if in the loop we know that targetCapacity>0 */\r
3224                             if (length <= target.remaining()) {\r
3225                                 switch (length) {\r
3226                                 /* each branch falls through to the next one */\r
3227                                 case 4:\r
3228                                     target.put((byte) (value >>> 24));\r
3229                                     if (offsets != null) {\r
3230                                         offsets.put(sourceIndex);\r
3231                                     }\r
3232                                 case 3:\r
3233                                     target.put((byte) (value >>> 16));\r
3234                                     if (offsets != null) {\r
3235                                         offsets.put(sourceIndex);\r
3236                                     }\r
3237                                 case 2:\r
3238                                     target.put((byte) (value >>> 8));\r
3239                                     if (offsets != null) {\r
3240                                         offsets.put(sourceIndex);\r
3241                                     }\r
3242                                 case 1:\r
3243                                     target.put((byte) value);\r
3244                                     if (offsets != null) {\r
3245                                         offsets.put(sourceIndex);\r
3246                                     }\r
3247                                 default:\r
3248                                     /* will never occur */\r
3249                                     break;\r
3250                                 }\r
3251                             } else {\r
3252                                 int errorBufferArrayIndex;\r
3253 \r
3254                                 /*\r
3255                                  * We actually do this backwards here: In order to save an intermediate variable, we\r
3256                                  * output first to the overflow buffer what does not fit into the regular target.\r
3257                                  */\r
3258                                 /* we know that 1<=targetCapacity<length<=4 */\r
3259                                 length -= target.remaining();\r
3260 \r
3261                                 errorBufferArrayIndex = 0;\r
3262                                 switch (length) {\r
3263                                 /* each branch falls through to the next one */\r
3264                                 case 3:\r
3265                                     errorBuffer[errorBufferArrayIndex++] = (byte) (value >>> 16);\r
3266                                 case 2:\r
3267                                     errorBuffer[errorBufferArrayIndex++] = (byte) (value >>> 8);\r
3268                                 case 1:\r
3269                                     errorBuffer[errorBufferArrayIndex] = (byte) value;\r
3270                                 default:\r
3271                                     /* will never occur */\r
3272                                     break;\r
3273                                 }\r
3274                                 errorBufferLength = (byte) length;\r
3275 \r
3276                                 /* now output what fits into the regular target */\r
3277                                 value >>>= 8 * length; /* length was reduced by targetCapacity */\r
3278                                 switch (target.remaining()) {\r
3279                                 /* each branch falls through to the next one */\r
3280                                 case 3:\r
3281                                     target.put((byte) (value >>> 16));\r
3282                                     if (offsets != null) {\r
3283                                         offsets.put(sourceIndex);\r
3284                                     }\r
3285                                 case 2:\r
3286                                     target.put((byte) (value >>> 8));\r
3287                                     if (offsets != null) {\r
3288                                         offsets.put(sourceIndex);\r
3289                                     }\r
3290                                 case 1:\r
3291                                     target.put((byte) value);\r
3292                                     if (offsets != null) {\r
3293                                         offsets.put(sourceIndex);\r
3294                                     }\r
3295                                 default:\r
3296                                     /* will never occur */\r
3297                                     break;\r
3298                                 }\r
3299 \r
3300                                 /* target overflow */\r
3301                                 cr[0] = CoderResult.OVERFLOW;\r
3302                                 c = 0;\r
3303                                 break;\r
3304                             }\r
3305 \r
3306                             /* normal end of conversion: prepare for a new character */\r
3307                             c = 0;\r
3308                             if (offsets != null) {\r
3309                                 prevSourceIndex = sourceIndex;\r
3310                                 sourceIndex = nextSourceIndex;\r
3311                             }\r
3312                             continue;\r
3313                         } else {\r
3314                             /* target is full */\r
3315                             cr[0] = CoderResult.OVERFLOW;\r
3316                             break;\r
3317                         }\r
3318                     }\r
3319                 }\r
3320 \r
3321                 /*\r
3322                  * the end of the input stream and detection of truncated input are handled by the framework, but for\r
3323                  * EBCDIC_STATEFUL conversion we need to emit an SI at the very end\r
3324                  * \r
3325                  * conditions: successful EBCDIC_STATEFUL in DBCS mode end of input and no truncated input\r
3326                  */\r
3327                 if (outputType == MBCS_OUTPUT_2_SISO && prevLength == 2 && flush && sourceArrayIndex >= source.limit()\r
3328                         && c == 0) {\r
3329 \r
3330                     /* EBCDIC_STATEFUL ending with DBCS: emit an SI to return the output stream to SBCS */\r
3331                     if (target.hasRemaining()) {\r
3332                         target.put(si_value[0]);\r
3333                         if (si_value_length == 2) {\r
3334                             if (target.remaining() > 0) {\r
3335                                 target.put(si_value[1]);\r
3336                             } else {\r
3337                                 errorBuffer[0] = si_value[1];\r
3338                                 errorBufferLength = 1;\r
3339                                 cr[0] = CoderResult.OVERFLOW;\r
3340                             }\r
3341                         }\r
3342                         if (offsets != null) {\r
3343                             /* set the last source character's index (sourceIndex points at sourceLimit now) */\r
3344                             offsets.put(prevSourceIndex);\r
3345                         }\r
3346                     } else {\r
3347                         /* target is full */\r
3348                         errorBuffer[0] = si_value[0];\r
3349                         if (si_value_length == 2) {\r
3350                             errorBuffer[1] = si_value[1];\r
3351                         }\r
3352                         errorBufferLength = si_value_length;\r
3353                         cr[0] = CoderResult.OVERFLOW;\r
3354                     }\r
3355                     prevLength = 1; /* we switched into SBCS */\r
3356                 }\r
3357 \r
3358                 /* set the converter state back into UConverter */\r
3359                 fromUChar32 = c;\r
3360                 fromUnicodeStatus = prevLength;\r
3361 \r
3362                 source.position(sourceArrayIndex);\r
3363             } catch (BufferOverflowException ex) {\r
3364                 cr[0] = CoderResult.OVERFLOW;\r
3365             }\r
3366 \r
3367             return cr[0];\r
3368         }\r
3369 \r
3370         /*\r
3371          * This is another simple conversion function for internal use by other conversion implementations. It does not\r
3372          * use the converter state nor call callbacks. It does not handle the EBCDIC swaplfnl option (set in\r
3373          * UConverter). It handles conversion extensions but not GB 18030.\r
3374          * \r
3375          * It converts one single Unicode code point into codepage bytes, encoded as one 32-bit value. The function\r
3376          * returns the number of bytes in *pValue: 1..4 the number of bytes in *pValue 0 unassigned (*pValue undefined)\r
3377          * -1 illegal (currently not used, *pValue undefined)\r
3378          * \r
3379          * *pValue will contain the resulting bytes with the last byte in bits 7..0, the second to last byte in bits\r
3380          * 15..8, etc. Currently, the function assumes but does not check that 0<=c<=0x10ffff.\r
3381          */\r
3382         int fromUChar32(int c, int[] pValue, boolean isUseFallback) {\r
3383             // #if 0\r
3384             // /* #if 0 because this is not currently used in ICU - reduce code, increase code coverage */\r
3385             // const uint8_t *p;\r
3386             // #endif\r
3387 \r
3388             char[] table;\r
3389             int stage2Entry;\r
3390             int value;\r
3391             int length;\r
3392             int p;\r
3393 \r
3394             /* BMP-only codepages are stored without stage 1 entries for supplementary code points */\r
3395             if (c <= 0xffff || ((sharedData.mbcs.unicodeMask & UConverterConstants.HAS_SUPPLEMENTARY) != 0)) {\r
3396                 table = sharedData.mbcs.fromUnicodeTable;\r
3397 \r
3398                 /* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */\r
3399                 if (sharedData.mbcs.outputType == MBCS_OUTPUT_1) {\r
3400                     value = MBCS_SINGLE_RESULT_FROM_U(table, sharedData.mbcs.fromUnicodeBytes, c);\r
3401                     /* is this code point assigned, or do we use fallbacks? */\r
3402                     if (isUseFallback ? value >= 0x800 : value >= 0xc00) {\r
3403                         pValue[0] = value & 0xff;\r
3404                         return 1;\r
3405                     }\r
3406                 } else /* outputType!=MBCS_OUTPUT_1 */{\r
3407                     stage2Entry = MBCS_STAGE_2_FROM_U(table, c);\r
3408 \r
3409                     /* get the bytes and the length for the output */\r
3410                     switch (sharedData.mbcs.outputType) {\r
3411                     case MBCS_OUTPUT_2:\r
3412                         value = MBCS_VALUE_2_FROM_STAGE_2(sharedData.mbcs.fromUnicodeBytes, stage2Entry, c);\r
3413                         if (value <= 0xff) {\r
3414                             length = 1;\r
3415                         } else {\r
3416                             length = 2;\r
3417                         }\r
3418                         break;\r
3419                     // #if 0\r
3420                     // /* #if 0 because this is not currently used in ICU - reduce code, increase code coverage */\r
3421                     // case MBCS_OUTPUT_DBCS_ONLY:\r
3422                     // /* table with single-byte results, but only DBCS mappings used */\r
3423                     // value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);\r
3424                     // if(value<=0xff) {\r
3425                     // /* no mapping or SBCS result, not taken for DBCS-only */\r
3426                     // value=stage2Entry=0; /* stage2Entry=0 to reset roundtrip flags */\r
3427                     // length=0;\r
3428                     // } else {\r
3429                     // length=2;\r
3430                     // }\r
3431                     // break;\r
3432                     case MBCS_OUTPUT_3:\r
3433                         byte[] bytes = sharedData.mbcs.fromUnicodeBytes;\r
3434                         p = CharsetMBCS.MBCS_POINTER_3_FROM_STAGE_2(bytes, stage2Entry, c);\r
3435                         value = ((bytes[p] & UConverterConstants.UNSIGNED_BYTE_MASK)<<16) |\r
3436                             ((bytes[p+1] & UConverterConstants.UNSIGNED_BYTE_MASK)<<8) |\r
3437                             (bytes[p+2] & UConverterConstants.UNSIGNED_BYTE_MASK);\r
3438                         if (value <= 0xff) {\r
3439                             length = 1;\r
3440                         } else if (value <= 0xffff) {\r
3441                             length = 2;\r
3442                         } else {\r
3443                             length = 3;\r
3444                         }\r
3445                         break;\r
3446                     // case MBCS_OUTPUT_4:\r
3447                     // value=MBCS_VALUE_4_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);\r
3448                     // if(value<=0xff) {\r
3449                     // length=1;\r
3450                     // } else if(value<=0xffff) {\r
3451                     // length=2;\r
3452                     // } else if(value<=0xffffff) {\r
3453                     // length=3;\r
3454                     // } else {\r
3455                     // length=4;\r
3456                     // }\r
3457                     // break;\r
3458                     // case MBCS_OUTPUT_3_EUC:\r
3459                     // value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);\r
3460                     // /* EUC 16-bit fixed-length representation */\r
3461                     // if(value<=0xff) {\r
3462                     // length=1;\r
3463                     // } else if((value&0x8000)==0) {\r
3464                     // value|=0x8e8000;\r
3465                     // length=3;\r
3466                     // } else if((value&0x80)==0) {\r
3467                     // value|=0x8f0080;\r
3468                     // length=3;\r
3469                     // } else {\r
3470                     // length=2;\r
3471                     // }\r
3472                     // break;\r
3473                     // case MBCS_OUTPUT_4_EUC:\r
3474                     // p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);\r
3475                     // value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2];\r
3476                     // /* EUC 16-bit fixed-length representation applied to the first two bytes */\r
3477                     // if(value<=0xff) {\r
3478                     // length=1;\r
3479                     // } else if(value<=0xffff) {\r
3480                     // length=2;\r
3481                     // } else if((value&0x800000)==0) {\r
3482                     // value|=0x8e800000;\r
3483                     // length=4;\r
3484                     // } else if((value&0x8000)==0) {\r
3485                     // value|=0x8f008000;\r
3486                     // length=4;\r
3487                     // } else {\r
3488                     // length=3;\r
3489                     // }\r
3490                     // break;\r
3491                     // #endif\r
3492                     default:\r
3493                         /* must not occur */\r
3494                         return -1;\r
3495                     }\r
3496 \r
3497                     /* is this code point assigned, or do we use fallbacks? */\r
3498                     if (MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c)\r
3499                             || (CharsetEncoderICU.isFromUUseFallback(isUseFallback, c) && value != 0)) {\r
3500                         /*\r
3501                          * We allow a 0 byte output if the "assigned" bit is set for this entry. There is no way with\r
3502                          * this data structure for fallback output to be a zero byte.\r
3503                          */\r
3504                         /* assigned */\r
3505                         pValue[0] = value;\r
3506                         return length;\r
3507                     }\r
3508                 }\r
3509             }\r
3510 \r
3511             if (sharedData.mbcs.extIndexes != null) {\r
3512                 length = simpleMatchFromU(c, pValue, isUseFallback);\r
3513                 return length >= 0 ? length : -length; /* return abs(length); */\r
3514             }\r
3515 \r
3516             /* unassigned */\r
3517             return 0;\r
3518         }\r
3519 \r
3520         /*\r
3521          * continue partial match with new input, requires cnv->preFromUFirstCP>=0 never called for simple,\r
3522          * single-character conversion\r
3523          */\r
3524         private CoderResult continueMatchFromU(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush,\r
3525                 int srcIndex) {\r
3526             CoderResult cr = CoderResult.UNDERFLOW;\r
3527             int[] value = new int[1];\r
3528             int match;\r
3529 \r
3530             match = matchFromU(preFromUFirstCP, preFromUArray, preFromUBegin, preFromULength, source, value, useFallback, flush);\r
3531             if (match >= 2) {\r
3532                 match -= 2; /* remove 2 for the initial code point */\r
3533 \r
3534                 if (match >= preFromULength) {\r
3535                     /* advance src pointer for the consumed input */\r
3536                     source.position(source.position() + match - preFromULength);\r
3537                     preFromULength = 0;\r
3538                 } else {\r
3539                     /* the match did not use all of preFromU[] - keep the rest for replay */\r
3540                     int length = preFromULength - match;\r
3541                     System.arraycopy(preFromUArray, preFromUBegin + match, preFromUArray, preFromUBegin, length);\r
3542                     preFromULength = (byte) -length;\r
3543                 }\r
3544 \r
3545                 /* finish the partial match */\r
3546                 preFromUFirstCP = UConverterConstants.U_SENTINEL;\r
3547 \r
3548                 /* write result */\r
3549                 writeFromU(value[0], target, offsets, srcIndex);\r
3550             } else if (match < 0) {\r
3551                 /* save state for partial match */\r
3552                 int sArrayIndex;\r
3553                 int j;\r
3554 \r
3555                 /* just _append_ the newly consumed input to preFromU[] */\r
3556                 sArrayIndex = source.position();\r
3557                 match = -match - 2; /* remove 2 for the initial code point */\r
3558                 for (j = preFromULength; j < match; ++j) {\r
3559                     preFromUArray[j] = source.get(sArrayIndex++);\r
3560                 }\r
3561                 source.position(sArrayIndex); /* same as *src=srcLimit; because we reached the end of input */\r
3562                 preFromULength = (byte) match;\r
3563             } else { /* match==0 or 1 */\r
3564                 /*\r
3565                  * no match\r
3566                  * \r
3567                  * We need to split the previous input into two parts:\r
3568                  * \r
3569                  * 1. The first code point is unmappable - that's how we got into trying the extension data in the first\r
3570                  * place. We need to move it from the preFromU buffer to the error buffer, set an error code, and\r
3571                  * prepare the rest of the previous input for 2.\r
3572                  * \r
3573                  * 2. The rest of the previous input must be converted once we come back from the callback for the first\r
3574                  * code point. At that time, we have to try again from scratch to convert these input characters. The\r
3575                  * replay will be handled by the ucnv.c conversion code.\r
3576                  */\r
3577 \r
3578                 if (match == 1) {\r
3579                     /* matched, no mapping but request for <subchar1> */\r
3580                     useSubChar1 = true;\r
3581                 }\r
3582 \r
3583                 /* move the first code point to the error field */\r
3584                 fromUChar32 = preFromUFirstCP;\r
3585                 preFromUFirstCP = UConverterConstants.U_SENTINEL;\r
3586 \r
3587                 /* mark preFromU for replay */\r
3588                 preFromULength = (byte) -preFromULength;\r
3589 \r
3590                 /* set the error code for unassigned */\r
3591                 // TODO: figure out what the unmappable length really should be\r
3592                 cr = CoderResult.unmappableForLength(1);\r
3593             }\r
3594             return cr;\r
3595         }\r
3596 \r
3597         /**\r
3598          * @param cx\r
3599          *            pointer to extension data; if NULL, returns 0\r
3600          * @param firstCP\r
3601          *            the first code point before all the other UChars\r
3602          * @param pre\r
3603          *            UChars that must match; !initialMatch: partial match with them\r
3604          * @param preLength\r
3605          *            length of pre, >=0\r
3606          * @param src\r
3607          *            UChars that can be used to complete a match\r
3608          * @param srcLength\r
3609          *            length of src, >=0\r
3610          * @param pMatchValue\r
3611          *            [out] output result value for the match from the data structure\r
3612          * @param useFallback\r
3613          *            "use fallback" flag, usually from cnv->useFallback\r
3614          * @param flush\r
3615          *            TRUE if the end of the input stream is reached\r
3616          * @return >1: matched, return value=total match length (number of input units matched) 1: matched, no mapping\r
3617          *         but request for <subchar1> (only for the first code point) 0: no match <0: partial match, return\r
3618          *         value=negative total match length (partial matches are never returned for flush==TRUE) (partial\r
3619          *         matches are never returned as being longer than UCNV_EXT_MAX_UCHARS) the matchLength is 2 if only\r
3620          *         firstCP matched, and >2 if firstCP and further code units matched\r
3621          */\r
3622         // static int32_t ucnv_extMatchFromU(const int32_t *cx, UChar32 firstCP, const UChar *pre, int32_t preLength,\r
3623         // const UChar *src, int32_t srcLength, uint32_t *pMatchValue, UBool useFallback, UBool flush)\r
3624         private int matchFromU(int firstCP, char[] preArray, int preArrayBegin, int preLength, CharBuffer source,\r
3625                 int[] pMatchValue, boolean isUseFallback, boolean flush) {\r
3626             ByteBuffer cx = sharedData.mbcs.extIndexes;\r
3627 \r
3628             CharBuffer stage12, stage3;\r
3629             IntBuffer stage3b;\r
3630 \r
3631             CharBuffer fromUTableUChars, fromUSectionUChars;\r
3632             IntBuffer fromUTableValues, fromUSectionValues;\r
3633 \r
3634             int value, matchValue;\r
3635             int i, j, index, length, matchLength;\r
3636             char c;\r
3637 \r
3638             if (cx == null) {\r
3639                 return 0; /* no extension data, no match */\r
3640             }\r
3641 \r
3642             /* trie lookup of firstCP */\r
3643             index = firstCP >>> 10; /* stage 1 index */\r
3644             if (index >= cx.asIntBuffer().get(EXT_FROM_U_STAGE_1_LENGTH)) {\r
3645                 return 0; /* the first code point is outside the trie */\r
3646             }\r
3647 \r
3648             stage12 = (CharBuffer) ARRAY(cx, EXT_FROM_U_STAGE_12_INDEX, char.class);\r
3649             stage3 = (CharBuffer) ARRAY(cx, EXT_FROM_U_STAGE_3_INDEX, char.class);\r
3650             index = FROM_U(stage12, stage3, index, firstCP);\r
3651 \r
3652             stage3b = (IntBuffer) ARRAY(cx, EXT_FROM_U_STAGE_3B_INDEX, int.class);\r
3653             value = stage3b.get(stage3b.position() + index);\r
3654             if (value == 0) {\r
3655                 return 0;\r
3656             }\r
3657 \r
3658             if (TO_U_IS_PARTIAL(value)) {\r
3659                 /* partial match, enter the loop below */\r
3660                 index = FROM_U_GET_PARTIAL_INDEX(value);\r
3661 \r
3662                 /* initialize */\r
3663                 fromUTableUChars = (CharBuffer) ARRAY(cx, EXT_FROM_U_UCHARS_INDEX, char.class);\r
3664                 fromUTableValues = (IntBuffer) ARRAY(cx, EXT_FROM_U_VALUES_INDEX, int.class);\r
3665 \r
3666                 matchValue = 0;\r
3667                 i = j = matchLength = 0;\r
3668 \r
3669                 /* we must not remember fallback matches when not using fallbacks */\r
3670 \r
3671                 /* match input units until there is a full match or the input is consumed */\r
3672                 for (;;) {\r
3673                     /* go to the next section */\r
3674                     int oldpos = fromUTableUChars.position();\r
3675                     fromUSectionUChars = ((CharBuffer) fromUTableUChars.position(index)).slice();\r
3676                     fromUTableUChars.position(oldpos);\r
3677                     oldpos = fromUTableValues.position();\r
3678                     fromUSectionValues = ((IntBuffer) fromUTableValues.position(index)).slice();\r
3679                     fromUTableValues.position(oldpos);\r
3680 \r
3681                     /* read first pair of the section */\r
3682                     length = fromUSectionUChars.get();\r
3683                     value = fromUSectionValues.get();\r
3684                     if (value != 0 && (FROM_U_IS_ROUNDTRIP(value) || isFromUUseFallback(isUseFallback, firstCP))) {\r
3685                         /* remember longest match so far */\r
3686                         matchValue = value;\r
3687                         matchLength = 2 + i + j;\r
3688                     }\r
3689 \r
3690                     /* match pre[] then src[] */\r
3691                     if (i < preLength) {\r
3692                         c = preArray[preArrayBegin + i++];\r
3693                     } else if (source != null && j < source.remaining()) {\r
3694                         c = source.get(source.position() + j++);\r
3695                     } else {\r
3696                         /* all input consumed, partial match */\r
3697                         if (flush || (length = (i + j)) > MAX_UCHARS) {\r
3698                             /*\r
3699                              * end of the entire input stream, stop with the longest match so far or: partial match must\r
3700                              * not be longer than UCNV_EXT_MAX_UCHARS because it must fit into state buffers\r
3701                              */\r
3702                             break;\r
3703                         } else {\r
3704                             /* continue with more input next time */\r
3705                             return -(2 + length);\r
3706                         }\r
3707                     }\r
3708 \r
3709                     /* search for the current UChar */\r
3710                     index = findFromU(fromUSectionUChars, length, c);\r
3711                     if (index < 0) {\r
3712                         /* no match here, stop with the longest match so far */\r
3713                         break;\r
3714                     } else {\r
3715                         value = fromUSectionValues.get(fromUSectionValues.position() + index);\r
3716                         if (FROM_U_IS_PARTIAL(value)) {\r
3717                             /* partial match, continue */\r
3718                             index = FROM_U_GET_PARTIAL_INDEX(value);\r
3719                         } else {\r
3720                             if (FROM_U_IS_ROUNDTRIP(value) || isFromUUseFallback(isUseFallback, firstCP)) {\r
3721                                 /* full match, stop with result */\r
3722                                 matchValue = value;\r
3723                                 matchLength = 2 + i + j;\r
3724                             } else {\r
3725                                 /* full match on fallback not taken, stop with the longest match so far */\r
3726                             }\r
3727                             break;\r
3728                         }\r
3729                     }\r
3730                 }\r
3731 \r
3732                 if (matchLength == 0) {\r
3733                     /* no match at all */\r
3734                     return 0;\r
3735                 }\r
3736             } else /* result from firstCP trie lookup */{\r
3737                 if (FROM_U_IS_ROUNDTRIP(value) || isFromUUseFallback(isUseFallback, firstCP)) {\r
3738                     /* full match, stop with result */\r
3739                     matchValue = value;\r
3740                     matchLength = 2;\r
3741                 } else {\r
3742                     /* fallback not taken */\r
3743                     return 0;\r
3744                 }\r
3745             }\r
3746 \r
3747             if ((matchValue & FROM_U_RESERVED_MASK) != 0) {\r
3748                 /* do not interpret values with reserved bits used, for forward compatibility */\r
3749                 return 0;\r
3750             }\r
3751 \r
3752             /* return result */\r
3753             if (matchValue == FROM_U_SUBCHAR1) {\r
3754                 return 1; /* assert matchLength==2 */\r
3755             }\r
3756 \r
3757             pMatchValue[0] = FROM_U_MASK_ROUNDTRIP(matchValue);\r
3758             return matchLength;\r
3759         }\r
3760 \r
3761         private int simpleMatchFromU(int cp, int[] pValue, boolean isUseFallback) {\r
3762             int[] value = new int[1];\r
3763             int match; // signed\r
3764 \r
3765             /* try to match */\r
3766             match = matchFromU(cp, null, 0, 0, null, value, isUseFallback, true);\r
3767             if (match >= 2) {\r
3768                 /* write result for simple, single-character conversion */\r
3769                 int length;\r
3770                 boolean isRoundtrip;\r
3771 \r
3772                 isRoundtrip = FROM_U_IS_ROUNDTRIP(value[0]);\r
3773                 length = FROM_U_GET_LENGTH(value[0]);\r
3774                 value[0] = FROM_U_GET_DATA(value[0]);\r
3775 \r
3776                 if (length <= EXT_FROM_U_MAX_DIRECT_LENGTH) {\r
3777                     pValue[0] = value[0];\r
3778                     return isRoundtrip ? length : -length;\r
3779                     // #if 0 /* not currently used */\r
3780                     // } else if(length==4) {\r
3781                     // /* de-serialize a 4-byte result */\r
3782                     // const uint8_t *result=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_BYTES_INDEX, uint8_t)+value;\r
3783                     // *pValue=\r
3784                     // ((uint32_t)result[0]<<24)|\r
3785                     // ((uint32_t)result[1]<<16)|\r
3786                     // ((uint32_t)result[2]<<8)|\r
3787                     // result[3];\r
3788                     // return isRoundtrip ? 4 : -4;\r
3789                     // #endif\r
3790                 }\r
3791             }\r
3792 \r
3793             /*\r
3794              * return no match because - match>1 && resultLength>4: result too long for simple conversion - match==1: no\r
3795              * match found, <subchar1> preferred - match==0: no match found in the first place - match<0: partial\r
3796              * match, not supported for simple conversion (and flush==TRUE)\r
3797              */\r
3798             return 0;\r
3799         }\r
3800 \r
3801         @SuppressWarnings("fallthrough")\r
3802         private CoderResult writeFromU(int value, ByteBuffer target, IntBuffer offsets, int srcIndex) {\r
3803             ByteBuffer cx = sharedData.mbcs.extIndexes;\r
3804 \r
3805             byte bufferArray[] = new byte[1 + MAX_BYTES];\r
3806             int bufferArrayIndex = 0;\r
3807             byte[] resultArray;\r
3808             int resultArrayIndex;\r
3809             int length, prevLength;\r
3810 \r
3811             length = FROM_U_GET_LENGTH(value);\r
3812             value = FROM_U_GET_DATA(value);\r
3813 \r
3814             /* output the result */\r
3815             if (length <= FROM_U_MAX_DIRECT_LENGTH) {\r
3816                 /*\r
3817                  * Generate a byte array and then write it below. This is not the fastest possible way, but it should be\r
3818                  * ok for extension mappings, and it is much simpler. Offset and overflow handling are only done once\r
3819                  * this way.\r
3820                  */\r
3821                 int p = bufferArrayIndex + 1; /* reserve buffer[0] for shiftByte below */\r
3822                 switch (length) {\r
3823                 case 3:\r
3824                     bufferArray[p++] = (byte) (value >>> 16);\r
3825                 case 2:\r
3826                     bufferArray[p++] = (byte) (value >>> 8);\r
3827                 case 1:\r
3828                     bufferArray[p++] = (byte) value;\r
3829                 default:\r
3830                     break; /* will never occur */\r
3831                 }\r
3832                 resultArray = bufferArray;\r
3833                 resultArrayIndex = bufferArrayIndex + 1;\r
3834             } else {\r
3835                 byte[] slice = new byte[length];\r
3836 \r
3837                 ByteBuffer bb = ((ByteBuffer) ARRAY(cx, EXT_FROM_U_BYTES_INDEX, byte.class));\r
3838                 bb.position(value);\r
3839                 bb.get(slice, 0, slice.length);\r
3840 \r
3841                 resultArray = slice;\r
3842                 resultArrayIndex = 0;\r
3843             }\r
3844 \r
3845             /* with correct data we have length>0 */\r
3846 \r
3847             if ((prevLength = fromUnicodeStatus) != 0) {\r
3848                 /* handle SI/SO stateful output */\r
3849                 byte shiftByte;\r
3850 \r
3851                 if (prevLength > 1 && length == 1) {\r
3852                     /* change from double-byte mode to single-byte */\r
3853                     shiftByte = (byte) UConverterConstants.SI;\r
3854                     fromUnicodeStatus = 1;\r
3855                 } else if (prevLength == 1 && length > 1) {\r
3856                     /* change from single-byte mode to double-byte */\r
3857                     shiftByte = (byte) UConverterConstants.SO;\r
3858                     fromUnicodeStatus = 2;\r
3859                 } else {\r
3860                     shiftByte = 0;\r
3861                 }\r
3862 \r
3863                 if (shiftByte != 0) {\r
3864                     /* prepend the shift byte to the result bytes */\r
3865                     bufferArray[0] = shiftByte;\r
3866                     if (resultArray != bufferArray || resultArrayIndex != bufferArrayIndex + 1) {\r
3867                         System.arraycopy(resultArray, resultArrayIndex, bufferArray, bufferArrayIndex + 1, length);\r
3868                     }\r
3869                     resultArray = bufferArray;\r
3870                     resultArrayIndex = bufferArrayIndex;\r
3871                     ++length;\r
3872                 }\r
3873             }\r
3874 \r
3875             return fromUWriteBytes(this, resultArray, resultArrayIndex, length, target, offsets, srcIndex);\r
3876         }\r
3877 \r
3878         /*\r
3879          * @return if(U_FAILURE) return the code point for cnv->fromUChar32 else return 0 after output has been written\r
3880          * to the target\r
3881          */\r
3882         private int fromU(int cp_, CharBuffer source, ByteBuffer target, IntBuffer offsets, int sourceIndex,\r
3883                 int length, boolean flush, CoderResult[] cr) {\r
3884             // ByteBuffer cx;\r
3885             long cp = cp_ & UConverterConstants.UNSIGNED_INT_MASK;\r
3886 \r
3887             useSubChar1 = false;\r
3888 \r
3889             if (sharedData.mbcs.extIndexes != null\r
3890                     && initialMatchFromU((int) cp, source, target, offsets, sourceIndex, flush, cr)) {\r
3891                 return 0; /* an extension mapping handled the input */\r
3892             }\r
3893 \r
3894             /* GB 18030 */\r
3895             if ((options & MBCS_OPTION_GB18030) != 0) {\r
3896                 long[] range;\r
3897                 int i;\r
3898 \r
3899                 for (i = 0; i < gb18030Ranges.length; ++i) {\r
3900                     range = gb18030Ranges[i];\r
3901                     if (range[0] <= cp && cp <= range[1]) {\r
3902                         /* found the Unicode code point, output the four-byte sequence for it */\r
3903                         long linear;\r
3904                         byte bytes[] = new byte[4];\r
3905 \r
3906                         /* get the linear value of the first GB 18030 code in this range */\r
3907                         linear = range[2] - LINEAR_18030_BASE;\r
3908 \r
3909                         /* add the offset from the beginning of the range */\r
3910                         linear += (cp - range[0]);\r
3911 \r
3912                         bytes[3] = (byte) (0x30 + linear % 10);\r
3913                         linear /= 10;\r
3914                         bytes[2] = (byte) (0x81 + linear % 126);\r
3915                         linear /= 126;\r
3916                         bytes[1] = (byte) (0x30 + linear % 10);\r
3917                         linear /= 10;\r
3918                         bytes[0] = (byte) (0x81 + linear);\r
3919 \r
3920                         /* output this sequence */\r
3921                         cr[0] = fromUWriteBytes(this, bytes, 0, 4, target, offsets, sourceIndex);\r
3922                         return 0;\r
3923                     }\r
3924                 }\r
3925             }\r
3926 \r
3927             /* no mapping */\r
3928             cr[0] = CoderResult.unmappableForLength(length);\r
3929             return (int) cp;\r
3930         }\r
3931 \r
3932         /*\r
3933          * target<targetLimit; set error code for overflow\r
3934          */\r
3935         private boolean initialMatchFromU(int cp, CharBuffer source, ByteBuffer target, IntBuffer offsets,\r
3936                 int srcIndex, boolean flush, CoderResult[] cr) {\r
3937             int[] value = new int[1];\r
3938             int match;\r
3939 \r
3940             /* try to match */\r
3941             match = matchFromU(cp, null, 0, 0, source, value, useFallback, flush);\r
3942 \r
3943             /* reject a match if the result is a single byte for DBCS-only */\r
3944             if (match >= 2\r
3945                     && !(FROM_U_GET_LENGTH(value[0]) == 1 && sharedData.mbcs.outputType == MBCS_OUTPUT_DBCS_ONLY)) {\r
3946                 /* advance src pointer for the consumed input */\r
3947                 source.position(source.position() + match - 2); /* remove 2 for the initial code point */\r
3948 \r
3949                 /* write result to target */\r
3950                 cr[0] = writeFromU(value[0], target, offsets, srcIndex);\r
3951                 return true;\r
3952             } else if (match < 0) {\r
3953                 /* save state for partial match */\r
3954                 int sArrayIndex;\r
3955                 int j;\r
3956 \r
3957                 /* copy the first code point */\r
3958                 preFromUFirstCP = cp;\r
3959 \r
3960                 /* now copy the newly consumed input */\r
3961                 sArrayIndex = source.position();\r
3962                 match = -match - 2; /* remove 2 for the initial code point */\r
3963                 for (j = 0; j < match; ++j) {\r
3964                     preFromUArray[j] = source.get(sArrayIndex++);\r
3965                 }\r
3966                 source.position(sArrayIndex); /* same as *src=srcLimit; because we reached the end of input */\r
3967                 preFromULength = (byte) match;\r
3968                 return true;\r
3969             } else if (match == 1) {\r
3970                 /* matched, no mapping but request for <subchar1> */\r
3971                 useSubChar1 = true;\r
3972                 return false;\r
3973             } else /* match==0 no match */{\r
3974                 return false;\r
3975             }\r
3976         }\r
3977         \r
3978         CoderResult cnvMBCSFromUnicodeWithOffsets(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {\r
3979             // Just call encodeLoop to remove duplicate code.\r
3980             return encodeLoop(source, target, offsets, flush);\r
3981         }\r
3982 \r
3983         /*\r
3984          * This version of ucnv_MBCSFromUnicode() is optimized for single-byte codepages that map only to and from the\r
3985          * BMP. In addition to single-byte/state optimizations, the offset calculations become much easier.\r
3986          */\r
3987         private CoderResult cnvMBCSSingleFromBMPWithOffsets(CharBuffer source, ByteBuffer target, IntBuffer offsets,\r
3988                 boolean flush) {\r
3989 \r
3990             CoderResult[] cr = { CoderResult.UNDERFLOW };\r
3991 \r
3992             int sourceArrayIndex, lastSource;\r
3993             int targetCapacity, length;\r
3994             char[] table;\r
3995             byte[] results;\r
3996 \r
3997             int c, sourceIndex;\r
3998             char value, minValue;\r
3999 \r
4000             /* set up the local pointers */\r
4001             sourceArrayIndex = source.position();\r
4002             targetCapacity = target.remaining();\r
4003             table = sharedData.mbcs.fromUnicodeTable;\r
4004 \r
4005             if ((options & UConverterConstants.OPTION_SWAP_LFNL) != 0) {\r
4006                 results = sharedData.mbcs.swapLFNLFromUnicodeBytes; // agljport:comment should swapLFNLFromUnicodeBytes\r
4007                 // be a ByteBuffer so results can be a 16-bit view\r
4008                 // of it?\r
4009             } else {\r
4010                 results = sharedData.mbcs.fromUnicodeBytes; // agljport:comment should swapLFNLFromUnicodeBytes be a\r
4011                 // ByteBuffer so results can be a 16-bit view of it?\r
4012             }\r
4013 \r
4014             if (useFallback) {\r
4015                 /* use all roundtrip and fallback results */\r
4016                 minValue = 0x800;\r
4017             } else {\r
4018                 /* use only roundtrips and fallbacks from private-use characters */\r
4019                 minValue = 0xc00;\r
4020             }\r
4021 \r
4022             /* get the converter state from UConverter */\r
4023             c = fromUChar32;\r
4024 \r
4025             /* sourceIndex=-1 if the current character began in the previous buffer */\r
4026             sourceIndex = c == 0 ? 0 : -1;\r
4027             lastSource = sourceArrayIndex;\r
4028 \r
4029             /*\r
4030              * since the conversion here is 1:1 UChar:uint8_t, we need only one counter for the minimum of the\r
4031              * sourceLength and targetCapacity\r
4032              */\r
4033             length = source.limit() - sourceArrayIndex;\r
4034             if (length < targetCapacity) {\r
4035                 targetCapacity = length;\r
4036             }\r
4037 \r
4038             boolean doloop = true;\r
4039             if (c != 0 && targetCapacity > 0) {\r
4040                 SideEffectsSingleBMP x = new SideEffectsSingleBMP(c, sourceArrayIndex);\r
4041                 doloop = getTrailSingleBMP(source, x, cr);\r
4042                 c = x.c;\r
4043                 sourceArrayIndex = x.sourceArrayIndex;\r
4044             }\r
4045 \r
4046             if (doloop) {\r
4047                 while (targetCapacity > 0) {\r
4048                     /*\r
4049                      * Get a correct Unicode code point: a single UChar for a BMP code point or a matched surrogate pair\r
4050                      * for a "supplementary code point".\r
4051                      */\r
4052                     c = source.get(sourceArrayIndex++);\r
4053                     /*\r
4054                      * Do not immediately check for single surrogates: Assume that they are unassigned and check for\r
4055                      * them in that case. This speeds up the conversion of assigned characters.\r
4056                      */\r
4057                     /* convert the Unicode code point in c into codepage bytes */\r
4058                     value = MBCS_SINGLE_RESULT_FROM_U(table, results, c);\r
4059 \r
4060                     /* is this code point assigned, or do we use fallbacks? */\r
4061                     if (value >= minValue) {\r
4062                         /* assigned, write the output character bytes from value and length */\r
4063                         /* length==1 */\r
4064                         /* this is easy because we know that there is enough space */\r
4065                         target.put((byte) value);\r
4066                         --targetCapacity;\r
4067 \r
4068                         /* normal end of conversion: prepare for a new character */\r
4069                         c = 0;\r
4070                         continue;\r
4071                     } else if (!UTF16.isSurrogate((char) c)) {\r
4072                         /* normal, unassigned BMP character */\r
4073                     } else if (UTF16.isLeadSurrogate((char) c)) {\r
4074                         // getTrail:\r
4075                         SideEffectsSingleBMP x = new SideEffectsSingleBMP(c, sourceArrayIndex);\r
4076                         doloop = getTrailSingleBMP(source, x, cr);\r
4077                         c = x.c;\r
4078                         sourceArrayIndex = x.sourceArrayIndex;\r
4079                         if (!doloop)\r
4080                             break;\r
4081                     } else {\r
4082                         /* this is an unmatched trail code unit (2nd surrogate) */\r
4083                         /* callback(illegal) */\r
4084                         cr[0] = CoderResult.malformedForLength(1);\r
4085                         break;\r
4086                     }\r
4087 \r
4088                     /* c does not have a mapping */\r
4089 \r
4090                     /* get the number of code units for c to correctly advance sourceIndex */\r
4091                     length = UTF16.getCharCount(c);\r
4092 \r
4093                     /* set offsets since the start or the last extension */\r
4094                     if (offsets != null) {\r
4095                         int count = sourceArrayIndex - lastSource;\r
4096 \r
4097                         /* do not set the offset for this character */\r
4098                         count -= length;\r
4099 \r
4100                         while (count > 0) {\r
4101                             offsets.put(sourceIndex++);\r
4102                             --count;\r
4103                         }\r
4104                         /* offsets and sourceIndex are now set for the current character */\r
4105                     }\r
4106 \r
4107                     /* try an extension mapping */\r
4108                     lastSource = sourceArrayIndex;\r
4109                     source.position(sourceArrayIndex);\r
4110                     c = fromU(c, source, target, offsets, sourceIndex, length, flush, cr);\r
4111                     sourceArrayIndex = source.position();\r
4112                     sourceIndex += length + (sourceArrayIndex - lastSource);\r
4113                     lastSource = sourceArrayIndex;\r
4114 \r
4115                     if (cr[0].isError()) {\r
4116                         /* not mappable or buffer overflow */\r
4117                         break;\r
4118                     } else {\r
4119                         /* a mapping was written to the target, continue */\r
4120 \r
4121                         /* recalculate the targetCapacity after an extension mapping */\r
4122                         targetCapacity = target.remaining();\r
4123                         length = source.limit() - sourceArrayIndex;\r
4124                         if (length < targetCapacity) {\r
4125                             targetCapacity = length;\r
4126                         }\r
4127                     }\r
4128                 }\r
4129             }\r
4130 \r
4131             if (sourceArrayIndex < source.limit() && !target.hasRemaining()) {\r
4132                 /* target is full */\r
4133                 cr[0] = CoderResult.OVERFLOW;\r
4134             }\r
4135 \r
4136             /* set offsets since the start or the last callback */\r
4137             if (offsets != null) {\r
4138                 int count = sourceArrayIndex - lastSource;\r
4139                 while (count > 0) {\r
4140                     offsets.put(sourceIndex++);\r
4141                     --count;\r
4142                 }\r
4143             }\r
4144 \r
4145             /* set the converter state back into UConverter */\r
4146             fromUChar32 = c;\r
4147 \r
4148             /* write back the updated pointers */\r
4149             source.position(sourceArrayIndex);\r
4150 \r
4151             return cr[0];\r
4152         }\r
4153 \r
4154         /* This version of ucnv_MBCSFromUnicodeWithOffsets() is optimized for single-byte codepages. */\r
4155         private CoderResult cnvMBCSSingleFromUnicodeWithOffsets(CharBuffer source, ByteBuffer target,\r
4156                 IntBuffer offsets, boolean flush) {\r
4157 \r
4158             CoderResult[] cr = { CoderResult.UNDERFLOW };\r
4159 \r
4160             int sourceArrayIndex;\r
4161 \r
4162             char[] table;\r
4163             byte[] results; // agljport:comment results is used to to get 16-bit values out of byte[] array\r
4164 \r
4165             int c;\r
4166             int sourceIndex, nextSourceIndex;\r
4167 \r
4168             char value, minValue;\r
4169 \r
4170             /* set up the local pointers */\r
4171             short uniMask;\r
4172             sourceArrayIndex = source.position();\r
4173 \r
4174             table = sharedData.mbcs.fromUnicodeTable;\r
4175 \r
4176             if ((options & UConverterConstants.OPTION_SWAP_LFNL) != 0) {\r
4177                 results = sharedData.mbcs.swapLFNLFromUnicodeBytes; // agljport:comment should swapLFNLFromUnicodeBytes\r
4178                 // be a ByteBuffer so results can be a 16-bit view\r
4179                 // of it?\r
4180             } else {\r
4181                 results = sharedData.mbcs.fromUnicodeBytes; // agljport:comment should swapLFNLFromUnicodeBytes be a\r
4182                 // ByteBuffer so results can be a 16-bit view of it?\r
4183             }\r
4184 \r
4185             if (useFallback) {\r
4186                 /* use all roundtrip and fallback results */\r
4187                 minValue = 0x800;\r
4188             } else {\r
4189                 /* use only roundtrips and fallbacks from private-use characters */\r
4190                 minValue = 0xc00;\r
4191             }\r
4192             // agljport:comment hasSupplementary only used in getTrail block which now simply repeats the mask operation\r
4193             uniMask = sharedData.mbcs.unicodeMask;\r
4194 \r
4195             /* get the converter state from UConverter */\r
4196             c = fromUChar32;\r
4197 \r
4198             /* sourceIndex=-1 if the current character began in the previous buffer */\r
4199             sourceIndex = c == 0 ? 0 : -1;\r
4200             nextSourceIndex = 0;\r
4201 \r
4202             boolean doloop = true;\r
4203             boolean doread = true;\r
4204             if (c != 0 && target.hasRemaining()) {\r
4205                 if (UTF16.isLeadSurrogate((char) c)) {\r
4206                     SideEffectsDouble x = new SideEffectsDouble(c, sourceArrayIndex, sourceIndex, nextSourceIndex);\r
4207                     doloop = getTrailDouble(source, target, uniMask, x, flush, cr);\r
4208                     doread = x.doread;\r
4209                     c = x.c;\r
4210                     sourceArrayIndex = x.sourceArrayIndex;\r
4211                     sourceIndex = x.sourceIndex;\r
4212                     nextSourceIndex = x.nextSourceIndex;\r
4213                 } else {\r
4214                     doread = false;\r
4215                 }\r
4216             }\r
4217 \r
4218             if (doloop) {\r
4219                 while (!doread || sourceArrayIndex < source.limit()) {\r
4220                     /*\r
4221                      * This following test is to see if available input would overflow the output. It does not catch\r
4222                      * output of more than one byte that overflows as a result of a multi-byte character or callback\r
4223                      * output from the last source character. Therefore, those situations also test for overflows and\r
4224                      * will then break the loop, too.\r
4225                      */\r
4226                     if (target.hasRemaining()) {\r
4227                         /*\r
4228                          * Get a correct Unicode code point: a single UChar for a BMP code point or a matched surrogate\r
4229                          * pair for a "supplementary code point".\r
4230                          */\r
4231 \r
4232                         if (doread) {\r
4233                             c = source.get(sourceArrayIndex++);\r
4234                             ++nextSourceIndex;\r
4235                             if (UTF16.isSurrogate((char) c)) {\r
4236                                 if (UTF16.isLeadSurrogate((char) c)) {\r
4237                                     // getTrail:\r
4238                                     SideEffectsDouble x = new SideEffectsDouble(c, sourceArrayIndex, sourceIndex,\r
4239                                             nextSourceIndex);\r
4240                                     doloop = getTrailDouble(source, target, uniMask, x, flush, cr);\r
4241                                     c = x.c;\r
4242                                     sourceArrayIndex = x.sourceArrayIndex;\r
4243                                     sourceIndex = x.sourceIndex;\r
4244                                     nextSourceIndex = x.nextSourceIndex;\r
4245                                     if (x.doread) {\r
4246                                         if (doloop)\r
4247                                             continue;\r
4248                                         else\r
4249                                             break;\r
4250                                     }\r
4251                                 } else {\r
4252                                     /* this is an unmatched trail code unit (2nd surrogate) */\r
4253                                     /* callback(illegal) */\r
4254                                     cr[0] = CoderResult.malformedForLength(1);\r
4255                                     break;\r
4256                                 }\r
4257                             }\r
4258                         } else {\r
4259                             doread = true;\r
4260                         }\r
4261 \r
4262                         /* convert the Unicode code point in c into codepage bytes */\r
4263                         value = MBCS_SINGLE_RESULT_FROM_U(table, results, c);\r
4264 \r
4265                         /* is this code point assigned, or do we use fallbacks? */\r
4266                         if (value >= minValue) {\r
4267                             /* assigned, write the output character bytes from value and length */\r
4268                             /* length==1 */\r
4269                             /* this is easy because we know that there is enough space */\r
4270                             target.put((byte) value);\r
4271                             if (offsets != null) {\r
4272                                 offsets.put(sourceIndex);\r
4273                             }\r
4274 \r
4275                             /* normal end of conversion: prepare for a new character */\r
4276                             c = 0;\r
4277                             sourceIndex = nextSourceIndex;\r
4278                         } else { /* unassigned */\r
4279                             /* try an extension mapping */\r
4280                             SideEffectsDouble x = new SideEffectsDouble(c, sourceArrayIndex, sourceIndex,\r
4281                                     nextSourceIndex);\r
4282                             doloop = unassignedDouble(source, target, x, flush, cr);\r
4283                             c = x.c;\r
4284                             sourceArrayIndex = x.sourceArrayIndex;\r
4285                             sourceIndex = x.sourceIndex;\r
4286                             nextSourceIndex = x.nextSourceIndex;\r
4287                             if (!doloop)\r
4288                                 break;\r
4289                         }\r
4290                     } else {\r
4291                         /* target is full */\r
4292                         cr[0] = CoderResult.OVERFLOW;\r
4293                         break;\r
4294                     }\r
4295                 }\r
4296             }\r
4297 \r
4298             /* set the converter state back into UConverter */\r
4299             fromUChar32 = c;\r
4300 \r
4301             /* write back the updated pointers */\r
4302             source.position(sourceArrayIndex);\r
4303 \r
4304             return cr[0];\r
4305         }\r
4306 \r
4307         /* This version of ucnv_MBCSFromUnicodeWithOffsets() is optimized for double-byte codepages. */\r
4308         private CoderResult cnvMBCSDoubleFromUnicodeWithOffsets(CharBuffer source, ByteBuffer target,\r
4309                 IntBuffer offsets, boolean flush) {\r
4310             CoderResult[] cr = { CoderResult.UNDERFLOW };\r
4311 \r
4312             int sourceArrayIndex;\r
4313 \r
4314             char[] table;\r
4315             byte[] bytes;\r
4316 \r
4317             int c, sourceIndex, nextSourceIndex;\r
4318 \r
4319             int stage2Entry;\r
4320             int value;\r
4321             int length;\r
4322             short uniMask;\r
4323 \r
4324             /* use optimized function if possible */\r
4325             uniMask = sharedData.mbcs.unicodeMask;\r
4326 \r
4327             /* set up the local pointers */\r
4328             sourceArrayIndex = source.position();\r
4329 \r
4330             table = sharedData.mbcs.fromUnicodeTable;\r
4331 \r
4332             if ((options & UConverterConstants.OPTION_SWAP_LFNL) != 0) {\r
4333                 bytes = sharedData.mbcs.swapLFNLFromUnicodeBytes;\r
4334             } else {\r
4335                 bytes = sharedData.mbcs.fromUnicodeBytes;\r
4336             }\r
4337 \r
4338             /* get the converter state from UConverter */\r
4339             c = fromUChar32;\r
4340 \r
4341             /* sourceIndex=-1 if the current character began in the previous buffer */\r
4342             sourceIndex = c == 0 ? 0 : -1;\r
4343             nextSourceIndex = 0;\r
4344 \r
4345             /* conversion loop */\r
4346             boolean doloop = true;\r
4347             boolean doread = true;\r
4348             if (c != 0 && target.hasRemaining()) {\r
4349                 if (UTF16.isLeadSurrogate((char) c)) {\r
4350                     SideEffectsDouble x = new SideEffectsDouble(c, sourceArrayIndex, sourceIndex, nextSourceIndex);\r
4351                     doloop = getTrailDouble(source, target, uniMask, x, flush, cr);\r
4352                     doread = x.doread;\r
4353                     c = x.c;\r
4354                     sourceArrayIndex = x.sourceArrayIndex;\r
4355                     sourceIndex = x.sourceIndex;\r
4356                     nextSourceIndex = x.nextSourceIndex;\r
4357                 } else {\r
4358                     doread = false;\r
4359                 }\r
4360             }\r
4361 \r
4362             if (doloop) {\r
4363                 while (!doread || sourceArrayIndex < source.limit()) {\r
4364                     /*\r
4365                      * This following test is to see if available input would overflow the output. It does not catch\r
4366                      * output of more than one byte that overflows as a result of a multi-byte character or callback\r
4367                      * output from the last source character. Therefore, those situations also test for overflows and\r
4368                      * will then break the loop, too.\r
4369                      */\r
4370                     if (target.hasRemaining()) {\r
4371                         if (doread) {\r
4372                             /*\r
4373                              * Get a correct Unicode code point: a single UChar for a BMP code point or a matched\r
4374                              * surrogate pair for a "supplementary code point".\r
4375                              */\r
4376                             c = source.get(sourceArrayIndex++);\r
4377                             ++nextSourceIndex;\r
4378                             /*\r
4379                              * This also tests if the codepage maps single surrogates. If it does, then surrogates are\r
4380                              * not paired but mapped separately. Note that in this case unmatched surrogates are not\r
4381                              * detected.\r
4382                              */\r
4383                             if (UTF16.isSurrogate((char) c) && (uniMask & UConverterConstants.HAS_SURROGATES) == 0) {\r
4384                                 if (UTF16.isLeadSurrogate((char) c)) {\r
4385                                     // getTrail:\r
4386                                     SideEffectsDouble x = new SideEffectsDouble(c, sourceArrayIndex, sourceIndex,\r
4387                                             nextSourceIndex);\r
4388                                     doloop = getTrailDouble(source, target, uniMask, x, flush, cr);\r
4389                                     c = x.c;\r
4390                                     sourceArrayIndex = x.sourceArrayIndex;\r
4391                                     sourceIndex = x.sourceIndex;\r
4392                                     nextSourceIndex = x.nextSourceIndex;\r
4393 \r
4394                                     if (x.doread) {\r
4395                                         if (doloop)\r
4396                                             continue;\r
4397                                         else\r
4398                                             break;\r
4399                                     }\r
4400                                 } else {\r
4401                                     /* this is an unmatched trail code unit (2nd surrogate) */\r
4402                                     /* callback(illegal) */\r
4403                                     cr[0] = CoderResult.malformedForLength(1);\r
4404                                     break;\r
4405                                 }\r
4406                             }\r
4407                         } else {\r
4408                             doread = true;\r
4409                         }\r
4410 \r
4411                         /* convert the Unicode code point in c into codepage bytes */\r
4412                         stage2Entry = MBCS_STAGE_2_FROM_U(table, c);\r
4413 \r
4414                         /* get the bytes and the length for the output */\r
4415                         /* MBCS_OUTPUT_2 */\r
4416                         value = MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);\r
4417                         if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) {\r
4418                             length = 1;\r
4419                         } else {\r
4420                             length = 2;\r
4421                         }\r
4422 \r
4423                         /* is this code point assigned, or do we use fallbacks? */\r
4424                         if (!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) || (isFromUUseFallback(c) && value != 0))) {\r
4425                             /*\r
4426                              * We allow a 0 byte output if the "assigned" bit is set for this entry. There is no way\r
4427                              * with this data structure for fallback output to be a zero byte.\r
4428                              */\r
4429 \r
4430                             // unassigned:\r
4431                             SideEffectsDouble x = new SideEffectsDouble(c, sourceArrayIndex, sourceIndex,\r
4432                                     nextSourceIndex);\r
4433 \r
4434                             doloop = unassignedDouble(source, target, x, flush, cr);\r
4435                             c = x.c;\r
4436                             sourceArrayIndex = x.sourceArrayIndex;\r
4437                             sourceIndex = x.sourceIndex;\r
4438                             nextSourceIndex = x.nextSourceIndex;\r
4439                             if (doloop)\r
4440                                 continue;\r
4441                             else\r
4442                                 break;\r
4443                         }\r
4444 \r
4445                         /* write the output character bytes from value and length */\r
4446                         /* from the first if in the loop we know that targetCapacity>0 */\r
4447                         if (length == 1) {\r
4448                             /* this is easy because we know that there is enough space */\r
4449                             target.put((byte) value);\r
4450                             if (offsets != null) {\r
4451                                 offsets.put(sourceIndex);\r
4452                             }\r
4453                         } else /* length==2 */{\r
4454                             target.put((byte) (value >>> 8));\r
4455                             if (2 <= target.remaining()) {\r
4456                                 target.put((byte) value);\r
4457                                 if (offsets != null) {\r
4458                                     offsets.put(sourceIndex);\r
4459                                     offsets.put(sourceIndex);\r
4460                                 }\r
4461                             } else {\r
4462                                 if (offsets != null) {\r
4463                                     offsets.put(sourceIndex);\r
4464                                 }\r
4465                                 errorBuffer[0] = (byte) value;\r
4466                                 errorBufferLength = 1;\r
4467 \r
4468                                 /* target overflow */\r
4469                                 cr[0] = CoderResult.OVERFLOW;\r
4470                                 c = 0;\r
4471                                 break;\r
4472                             }\r
4473                         }\r
4474 \r
4475                         /* normal end of conversion: prepare for a new character */\r
4476                         c = 0;\r
4477                         sourceIndex = nextSourceIndex;\r
4478                         continue;\r
4479                     } else {\r
4480                         /* target is full */\r
4481                         cr[0] = CoderResult.OVERFLOW;\r
4482                         break;\r
4483                     }\r
4484                 }\r
4485             }\r
4486 \r
4487             /* set the converter state back into UConverter */\r
4488             fromUChar32 = c;\r
4489 \r
4490             /* write back the updated pointers */\r
4491             source.position(sourceArrayIndex);\r
4492 \r
4493             return cr[0];\r
4494         }\r
4495 \r
4496         private final class SideEffectsSingleBMP {\r
4497             int c, sourceArrayIndex;\r
4498 \r
4499             SideEffectsSingleBMP(int c_, int sourceArrayIndex_) {\r
4500                 c = c_;\r
4501                 sourceArrayIndex = sourceArrayIndex_;\r
4502             }\r
4503         }\r
4504 \r
4505         // function made out of block labeled getTrail in ucnv_MBCSSingleFromUnicodeWithOffsets\r
4506         // assumes input c is lead surrogate\r
4507         private final boolean getTrailSingleBMP(CharBuffer source, SideEffectsSingleBMP x, CoderResult[] cr) {\r
4508             if (x.sourceArrayIndex < source.limit()) {\r
4509                 /* test the following code unit */\r
4510                 char trail = source.get(x.sourceArrayIndex);\r
4511                 if (UTF16.isTrailSurrogate(trail)) {\r
4512                     ++x.sourceArrayIndex;\r
4513                     x.c = UCharacter.getCodePoint((char) x.c, trail);\r
4514                     /* this codepage does not map supplementary code points */\r
4515                     /* callback(unassigned) */\r
4516                     cr[0] = CoderResult.unmappableForLength(2);\r
4517                     return false;\r
4518                 } else {\r
4519                     /* this is an unmatched lead code unit (1st surrogate) */\r
4520                     /* callback(illegal) */\r
4521                     cr[0] = CoderResult.malformedForLength(1);\r
4522                     return false;\r
4523                 }\r
4524             } else {\r
4525                 /* no more input */\r
4526                 return false;\r
4527             }\r
4528             // return true;\r
4529         }\r
4530 \r
4531         private final class SideEffects {\r
4532             int c, sourceArrayIndex, sourceIndex, nextSourceIndex, prevSourceIndex, prevLength;\r
4533             boolean doread = true;\r
4534 \r
4535             SideEffects(int c_, int sourceArrayIndex_, int sourceIndex_, int nextSourceIndex_, int prevSourceIndex_,\r
4536                     int prevLength_) {\r
4537                 c = c_;\r
4538                 sourceArrayIndex = sourceArrayIndex_;\r
4539                 sourceIndex = sourceIndex_;\r
4540                 nextSourceIndex = nextSourceIndex_;\r
4541                 prevSourceIndex = prevSourceIndex_;\r
4542                 prevLength = prevLength_;\r
4543             }\r
4544         }\r
4545 \r
4546         // function made out of block labeled getTrail in ucnv_MBCSFromUnicodeWithOffsets\r
4547         // assumes input c is lead surrogate\r
4548         private final boolean getTrail(CharBuffer source, ByteBuffer target, int uniMask, SideEffects x,\r
4549                 boolean flush, CoderResult[] cr) {\r
4550             if (x.sourceArrayIndex < source.limit()) {\r
4551                 /* test the following code unit */\r
4552                 char trail = source.get(x.sourceArrayIndex);\r
4553                 if (UTF16.isTrailSurrogate(trail)) {\r
4554                     ++x.sourceArrayIndex;\r
4555                     ++x.nextSourceIndex;\r
4556                     /* convert this supplementary code point */\r
4557                     x.c = UCharacter.getCodePoint((char) x.c, trail);\r
4558                     if ((uniMask & UConverterConstants.HAS_SUPPLEMENTARY) == 0) {\r
4559                         /* BMP-only codepages are stored without stage 1 entries for supplementary code points */\r
4560                         fromUnicodeStatus = x.prevLength; /* save the old state */\r
4561                         /* callback(unassigned) */\r
4562                         x.doread = true;\r
4563                         return unassigned(source, target, null, x, flush, cr);\r
4564                     } else {\r
4565                         x.doread = false;\r
4566                         return true;\r
4567                     }\r
4568                 } else {\r
4569                     /* this is an unmatched lead code unit (1st surrogate) */\r
4570                     /* callback(illegal) */\r
4571                     cr[0] = CoderResult.malformedForLength(1);\r
4572                     return false;\r
4573                 }\r
4574             } else {\r
4575                 /* no more input */\r
4576                 return false;\r
4577             }\r
4578         }\r
4579 \r
4580         // function made out of block labeled unassigned in ucnv_MBCSFromUnicodeWithOffsets\r
4581         private final boolean unassigned(CharBuffer source, ByteBuffer target, IntBuffer offsets, SideEffects x,\r
4582                 boolean flush, CoderResult[] cr) {\r
4583             /* try an extension mapping */\r
4584             int sourceBegin = x.sourceArrayIndex;\r
4585             source.position(x.sourceArrayIndex);\r
4586             x.c = fromU(x.c, source, target, null, x.sourceIndex, x.nextSourceIndex, flush, cr);\r
4587             x.sourceArrayIndex = source.position();\r
4588             x.nextSourceIndex += x.sourceArrayIndex - sourceBegin;\r
4589             x.prevLength = fromUnicodeStatus;\r
4590 \r
4591             if (cr[0].isError()) {\r
4592                 /* not mappable or buffer overflow */\r
4593                 return false;\r
4594             } else {\r
4595                 /* a mapping was written to the target, continue */\r
4596 \r
4597                 /* recalculate the targetCapacity after an extension mapping */\r
4598                 // x.targetCapacity=pArgs.targetLimit-x.targetArrayIndex;\r
4599                 /* normal end of conversion: prepare for a new character */\r
4600                 if (offsets != null) {\r
4601                     x.prevSourceIndex = x.sourceIndex;\r
4602                     x.sourceIndex = x.nextSourceIndex;\r
4603                 }\r
4604                 return true;\r
4605             }\r
4606         }\r
4607 \r
4608         private final class SideEffectsDouble {\r
4609             int c, sourceArrayIndex, sourceIndex, nextSourceIndex;\r
4610             boolean doread = true;\r
4611 \r
4612             SideEffectsDouble(int c_, int sourceArrayIndex_, int sourceIndex_, int nextSourceIndex_) {\r
4613                 c = c_;\r
4614                 sourceArrayIndex = sourceArrayIndex_;\r
4615                 sourceIndex = sourceIndex_;\r
4616                 nextSourceIndex = nextSourceIndex_;\r
4617             }\r
4618         }\r
4619 \r
4620         // function made out of block labeled getTrail in ucnv_MBCSDoubleFromUnicodeWithOffsets\r
4621         // assumes input c is lead surrogate\r
4622         private final boolean getTrailDouble(CharBuffer source, ByteBuffer target, int uniMask,\r
4623                 SideEffectsDouble x, boolean flush, CoderResult[] cr) {\r
4624             if (x.sourceArrayIndex < source.limit()) {\r
4625                 /* test the following code unit */\r
4626                 char trail = source.get(x.sourceArrayIndex);\r
4627                 if (UTF16.isTrailSurrogate(trail)) {\r
4628                     ++x.sourceArrayIndex;\r
4629                     ++x.nextSourceIndex;\r
4630                     /* convert this supplementary code point */\r
4631                     x.c = UCharacter.getCodePoint((char) x.c, trail);\r
4632                     if ((uniMask & UConverterConstants.HAS_SUPPLEMENTARY) == 0) {\r
4633                         /* BMP-only codepages are stored without stage 1 entries for supplementary code points */\r
4634                         /* callback(unassigned) */\r
4635                         x.doread = true;\r
4636                         return unassignedDouble(source, target, x, flush, cr);\r
4637                     } else {\r
4638                         x.doread = false;\r
4639                         return true;\r
4640                     }\r
4641                 } else {\r
4642                     /* this is an unmatched lead code unit (1st surrogate) */\r
4643                     /* callback(illegal) */\r
4644                     cr[0] = CoderResult.malformedForLength(1);\r
4645                     return false;\r
4646                 }\r
4647             } else {\r
4648                 /* no more input */\r
4649                 return false;\r
4650             }\r
4651         }\r
4652 \r
4653         // function made out of block labeled unassigned in ucnv_MBCSDoubleFromUnicodeWithOffsets\r
4654         private final boolean unassignedDouble(CharBuffer source, ByteBuffer target, SideEffectsDouble x,\r
4655                 boolean flush, CoderResult[] cr) {\r
4656             /* try an extension mapping */\r
4657             int sourceBegin = x.sourceArrayIndex;\r
4658             source.position(x.sourceArrayIndex);\r
4659             x.c = fromU(x.c, source, target, null, x.sourceIndex, x.nextSourceIndex, flush, cr);\r
4660             x.sourceArrayIndex = source.position();\r
4661             x.nextSourceIndex += x.sourceArrayIndex - sourceBegin;\r
4662 \r
4663             if (cr[0].isError()) {\r
4664                 /* not mappable or buffer overflow */\r
4665                 return false;\r
4666             } else {\r
4667                 /* a mapping was written to the target, continue */\r
4668 \r
4669                 /* recalculate the targetCapacity after an extension mapping */\r
4670                 // x.targetCapacity=pArgs.targetLimit-x.targetArrayIndex;\r
4671                 /* normal end of conversion: prepare for a new character */\r
4672                 x.sourceIndex = x.nextSourceIndex;\r
4673                 return true;\r
4674             }\r
4675         }\r
4676 \r
4677         /**\r
4678          * Overrides super class method\r
4679          * \r
4680          * @param encoder\r
4681          * @param source\r
4682          * @param target\r
4683          * @param offsets\r
4684          * @return\r
4685          */\r
4686         protected CoderResult cbFromUWriteSub(CharsetEncoderICU encoder, CharBuffer source, ByteBuffer target,\r
4687                 IntBuffer offsets) {\r
4688             CharsetMBCS cs = (CharsetMBCS) encoder.charset();\r
4689             byte[] subchar;\r
4690             int length;\r
4691 \r
4692             if (cs.subChar1 != 0\r
4693                     && (cs.sharedData.mbcs.extIndexes != null ? encoder.useSubChar1\r
4694                             : (encoder.invalidUCharBuffer[0] <= 0xff))) {\r
4695                 /*\r
4696                  * select subChar1 if it is set (not 0) and the unmappable Unicode code point is up to U+00ff (IBM MBCS\r
4697                  * behavior)\r
4698                  */\r
4699                 subchar = new byte[] { cs.subChar1 };\r
4700                 length = 1;\r
4701             } else {\r
4702                 /* select subChar in all other cases */\r
4703                 subchar = cs.subChar;\r
4704                 length = cs.subCharLen;\r
4705             }\r
4706 \r
4707             /* reset the selector for the next code point */\r
4708             encoder.useSubChar1 = false;\r
4709 \r
4710             if (cs.sharedData.mbcs.outputType == MBCS_OUTPUT_2_SISO) {\r
4711                 byte[] buffer = new byte[4];\r
4712                 int i = 0;\r
4713 \r
4714                 /* fromUnicodeStatus contains prevLength */\r
4715                 switch (length) {\r
4716                 case 1:\r
4717                     if (encoder.fromUnicodeStatus == 2) {\r
4718                         /* DBCS mode and SBCS sub char: change to SBCS */\r
4719                         encoder.fromUnicodeStatus = 1;\r
4720                         buffer[i++] = UConverterConstants.SI;\r
4721                     }\r
4722                     buffer[i++] = subchar[0];\r
4723                     break;\r
4724                 case 2:\r
4725                     if (encoder.fromUnicodeStatus <= 1) {\r
4726                         /* SBCS mode and DBCS sub char: change to DBCS */\r
4727                         encoder.fromUnicodeStatus = 2;\r
4728                         buffer[i++] = UConverterConstants.SO;\r
4729                     }\r
4730                     buffer[i++] = subchar[0];\r
4731                     buffer[i++] = subchar[1];\r
4732                     break;\r
4733                 default:\r
4734                     throw new IllegalArgumentException();\r
4735                 }\r
4736 \r
4737                 subchar = buffer;\r
4738                 length = i;\r
4739             }\r
4740             return CharsetEncoderICU.fromUWriteBytes(encoder, subchar, 0, length, target, offsets, source.position());\r
4741         }\r
4742 \r
4743         /**\r
4744          * Gets called whenever CharsetEncoder.replaceWith gets called. allowReplacementChanges only allows subChar and\r
4745          * subChar1 to be modified outside construction (since replaceWith is called once during construction).\r
4746          * \r
4747          * @param replacement\r
4748          *            The replacement for subchar.\r
4749          */\r
4750         protected void implReplaceWith(byte[] replacement) {\r
4751             if (allowReplacementChanges) {\r
4752                 CharsetMBCS cs = (CharsetMBCS) this.charset();\r
4753 \r
4754                 System.arraycopy(replacement, 0, cs.subChar, 0, replacement.length);\r
4755                 cs.subCharLen = (byte) replacement.length;\r
4756                 cs.subChar1 = 0;\r
4757             }\r
4758         }\r
4759     }\r
4760 \r
4761     public CharsetDecoder newDecoder() {\r
4762         return new CharsetDecoderMBCS(this);\r
4763     }\r
4764 \r
4765     public CharsetEncoder newEncoder() {\r
4766         return new CharsetEncoderMBCS(this);\r
4767     }\r
4768 \r
4769     @SuppressWarnings("fallthrough")\r
4770     void MBCSGetFilteredUnicodeSetForUnicode(UConverterSharedData data, UnicodeSet setFillIn, int which, int filter){\r
4771         UConverterMBCSTable mbcsTable;\r
4772         char[] table;\r
4773         char st1,maxStage1, st2;\r
4774         int st3;\r
4775         int c ;\r
4776         \r
4777         mbcsTable = data.mbcs;\r
4778         table = mbcsTable.fromUnicodeTable; \r
4779         if((mbcsTable.unicodeMask & UConverterConstants.HAS_SUPPLEMENTARY)!=0){\r
4780             maxStage1 = 0x440;\r
4781         }\r
4782         else{\r
4783             maxStage1 = 0x40;\r
4784         }\r
4785         c=0; /* keep track of current code point while enumerating */\r
4786         \r
4787         if(mbcsTable.outputType==MBCS_OUTPUT_1){\r
4788             char stage2, stage3;\r
4789             char minValue;\r
4790             CharBuffer results;\r
4791             results = ByteBuffer.wrap(mbcsTable.fromUnicodeBytes).asCharBuffer();\r
4792                                    \r
4793             if(which==ROUNDTRIP_SET) {\r
4794                 /* use only roundtrips */\r
4795                 minValue=0xf00;\r
4796             } else {\r
4797                 /* use all roundtrip and fallback results */\r
4798                 minValue=0x800;\r
4799             }\r
4800             for(st1=0;st1<maxStage1;++st1){\r
4801                 st2 = table[st1];\r
4802                 if(st2>maxStage1){\r
4803                     stage2 = st2;\r
4804                     for(st2=0; st2<64; ++st2){\r
4805                         st3 = table[stage2 + st2];\r
4806                         if(st3!=0){\r
4807                             /*read the stage 3 block */\r
4808                             stage3 = (char)st3;\r
4809                             do {\r
4810                                 if(results.get(stage3++)>=minValue){\r
4811                                      setFillIn.add(c);\r
4812                                 }\r
4813                                \r
4814                             }while((++c&0xf) !=0);\r
4815                           } else {\r
4816                             c+= 16; /*empty stage 2 block */\r
4817                         }\r
4818                     }\r
4819                 } else {\r
4820                     c+=1024; /* empty stage 2 block */\r
4821                 }\r
4822             }\r
4823         } else {\r
4824             int stage2,stage3;\r
4825             byte[] bytes;\r
4826             int st3Multiplier;\r
4827             int value;\r
4828             boolean useFallBack;\r
4829             bytes = mbcsTable.fromUnicodeBytes;\r
4830             useFallBack = (which == ROUNDTRIP_AND_FALLBACK_SET);\r
4831             switch(mbcsTable.outputType) {\r
4832             case MBCS_OUTPUT_3:\r
4833             case MBCS_OUTPUT_4_EUC:\r
4834                 st3Multiplier = 3;\r
4835                 break;\r
4836             case MBCS_OUTPUT_4:\r
4837                 st3Multiplier =4;\r
4838                 break;\r
4839             default:\r
4840                 st3Multiplier =2;\r
4841                 break;\r
4842             }\r
4843             //ByteBuffer buffer = (ByteBuffer)charTobyte(table);\r
4844             \r
4845             for(st1=0;st1<maxStage1;++st1){\r
4846                 st2 = table[st1]; \r
4847                 if(st2>(maxStage1>>1)){\r
4848                     stage2 =  st2 ;\r
4849                     for(st2=0;st2<128;++st2){\r
4850                         /*read the stage 3 block */\r
4851                         st3 = table[stage2*2 + st2]<<16;\r
4852                         st3+=table[stage2*2 + ++st2];\r
4853                         if(st3!=0){\r
4854                         //if((st3=table[stage2+st2])!=0){\r
4855                             stage3 = st3Multiplier*16*(st3&UConverterConstants.UNSIGNED_SHORT_MASK);\r
4856                             \r
4857                             /* get the roundtrip flags for the stage 3 block */\r
4858                             st3>>=16;\r
4859                             st3 &= UConverterConstants.UNSIGNED_SHORT_MASK;\r
4860                             switch(filter) {\r
4861                             case UCNV_SET_FILTER_NONE:\r
4862                                 do {\r
4863                                     \r
4864                                    if((st3&1)!=0){\r
4865                                         setFillIn.add(c);\r
4866                                         stage3+=st3Multiplier;\r
4867                                    }else if (useFallBack) {\r
4868                                         \r
4869                                         char b =0;\r
4870                                         switch(st3Multiplier) {\r
4871                                         case 4 :\r
4872                                            \r
4873                                             b|= ByteBuffer.wrap(bytes).getChar(stage3++);\r
4874                                            \r
4875                                         case 3 :\r
4876                                             \r
4877                                             b|= ByteBuffer.wrap(bytes).getChar(stage3++);\r
4878                                            \r
4879                                         case 2 :\r
4880                                            \r
4881                                             b|= ByteBuffer.wrap(bytes).getChar(stage3) | ByteBuffer.wrap(bytes).getChar(stage3+1);\r
4882                                             stage3+=2;\r
4883                                         default:\r
4884                                             break;\r
4885                                         }\r
4886                                         if(b!=0) {\r
4887                                             setFillIn.add(c);\r
4888                                         }\r
4889                                     }\r
4890                                     st3>>=1;\r
4891                                 }while((++c&0xf)!=0);\r
4892                                 break;\r
4893                             case UCNV_SET_FILTER_DBCS_ONLY:\r
4894                                 /* Ignore single bytes results (<0x100). */\r
4895                                 do {\r
4896                                     if(((st3&1) != 0 || useFallBack) && \r
4897                                             (UConverterConstants.UNSIGNED_SHORT_MASK & (ByteBuffer.wrap(bytes).getChar(stage3))) >= 0x100){\r
4898                                         setFillIn.add(c);\r
4899                                     }\r
4900                                     st3>>=1;\r
4901                                     stage3+=2;\r
4902                                 }while((++c&0xf) != 0);\r
4903                                break;\r
4904                             case UCNV_SET_FILTER_2022_CN :\r
4905                                 /* only add code points that map to CNS 11643 planes 1&2 for non-EXT ISO-2202-CN. */\r
4906                                 do {\r
4907                                     if(((st3&1) != 0 || useFallBack) && \r
4908                                             ((value= (UConverterConstants.UNSIGNED_BYTE_MASK & (ByteBuffer.wrap(bytes).get(stage3))))==0x81 || value==0x82) ){\r
4909                                         setFillIn.add(c);\r
4910                                     }\r
4911                                     st3>>=1;\r
4912                                     stage3+=3;\r
4913                                 }while((++c&0xf)!=0);\r
4914                                 break;\r
4915                             case UCNV_SET_FILTER_SJIS:\r
4916                                 /* only add code points that map tp Shift-JIS codes corrosponding to JIS X 0280. */\r
4917                                 do{\r
4918                                     \r
4919                                     if(((st3&1) != 0 || useFallBack) && (value=(UConverterConstants.UNSIGNED_SHORT_MASK & (ByteBuffer.wrap(bytes).getChar(stage3))))>=0x8140 && value<=0xeffc){\r
4920                                         setFillIn.add(c);\r
4921                                     }\r
4922                                     st3>>=1;\r
4923                                     stage3+=2;\r
4924                                 }while((++c&0xf)!=0);\r
4925                                 break;\r
4926                             case UCNV_SET_FILTER_GR94DBCS:\r
4927                                 /* only add code points that maps to ISO 2022 GR 94 DBCS codes*/\r
4928                                 do {\r
4929                                     if(((st3&1) != 0 || useFallBack) && \r
4930                                             (UConverterConstants.UNSIGNED_SHORT_MASK & ((value=(UConverterConstants.UNSIGNED_SHORT_MASK & (ByteBuffer.wrap(bytes).getChar(stage3))))- 0xa1a1))<=(0xfefe - 0xa1a1) && \r
4931                                             (UConverterConstants.UNSIGNED_BYTE_MASK & (value - 0xa1)) <= (0xfe - 0xa1)){\r
4932                                         setFillIn.add(c);\r
4933                                     }\r
4934                                     st3>>=1;\r
4935                                     stage3+=2;\r
4936                                 }while((++c&0xf)!=0);\r
4937                                 break;\r
4938                             case UCNV_SET_FILTER_HZ:\r
4939                                 /*Only add code points that are suitable for HZ DBCS*/\r
4940                                 do {\r
4941                                     if( ((st3&1) != 0 || useFallBack) && \r
4942                                             (UConverterConstants.UNSIGNED_SHORT_MASK & ((value=(UConverterConstants.UNSIGNED_SHORT_MASK & (ByteBuffer.wrap(bytes).getChar(stage3))))-0xa1a1))<=(0xfdfe - 0xa1a1) &&\r
4943                                             (UConverterConstants.UNSIGNED_BYTE_MASK & (value - 0xa1)) <= (0xfe - 0xa1)){\r
4944                                         setFillIn.add(c);\r
4945                                     }\r
4946                                     st3>>=1;\r
4947                                     stage3+=2;\r
4948                                 }while((++c&0xf) != 0);\r
4949                                 break;\r
4950                             default:\r
4951                                 return;\r
4952                             }\r
4953                         } else {\r
4954                             c+=16; /* empty stage 3 block */\r
4955                         }\r
4956                     }\r
4957                 } else {\r
4958                     c+=1024; /*empty stage2 block */\r
4959                 }\r
4960             }\r
4961         }\r
4962         extGetUnicodeSet(setFillIn, which, filter, data);\r
4963     }\r
4964    \r
4965     static void extGetUnicodeSetString(ByteBuffer cx,UnicodeSet setFillIn, boolean useFallback, \r
4966         int minLength, int c, char s[],int length,int sectionIndex){\r
4967         CharBuffer fromUSectionUChar;\r
4968         IntBuffer fromUSectionValues;\r
4969         fromUSectionUChar = (CharBuffer)ARRAY(cx, EXT_FROM_U_UCHARS_INDEX,char.class );\r
4970         fromUSectionValues = (IntBuffer)ARRAY(cx, EXT_FROM_U_VALUES_INDEX,int.class );\r
4971         int fromUSectionUCharIndex = fromUSectionUChar.position()+sectionIndex;\r
4972         int fromUSectionValuesIndex = fromUSectionValues.position()+sectionIndex;\r
4973         int value, i, count;\r
4974         \r
4975         /* read first pair of the section */\r
4976        count = fromUSectionUChar.get(fromUSectionUCharIndex++);\r
4977        value = fromUSectionValues.get(fromUSectionValuesIndex++);\r
4978        if(value!=0 && (FROM_U_IS_ROUNDTRIP(value) || useFallback) && FROM_U_GET_LENGTH(value)>=minLength) {\r
4979            if(c>=0){\r
4980                setFillIn.add(c);\r
4981            } else {\r
4982                String normalizedString=""; // String for composite characters \r
4983                for(int j=0; j<length;j++){\r
4984                    normalizedString+=s[j];\r
4985                }\r
4986                for(int j=0;j<length;j++){\r
4987                    setFillIn.add(normalizedString);\r
4988                }\r
4989              \r
4990              }\r
4991        }\r
4992        \r
4993        for(i=0; i<count; ++i){\r
4994            s[length] = fromUSectionUChar.get(fromUSectionUCharIndex + i);\r
4995            value = fromUSectionValues.get(fromUSectionValuesIndex + i);\r
4996            \r
4997            if(value==0) {\r
4998                /* no mapping, do nothing */\r
4999            } else if (FROM_U_IS_PARTIAL(value)) {\r
5000                extGetUnicodeSetString( cx, setFillIn, useFallback, minLength, UConverterConstants.U_SENTINEL, s, length+1,\r
5001                        FROM_U_GET_PARTIAL_INDEX(value));\r
5002            } else if ((useFallback ? (value&FROM_U_RESERVED_MASK)==0:((value&(FROM_U_ROUNDTRIP_FLAG|FROM_U_RESERVED_MASK))==FROM_U_ROUNDTRIP_FLAG)) \r
5003                    && FROM_U_GET_LENGTH(value)>=minLength) {\r
5004                String normalizedString=""; // String for composite characters \r
5005                for(int j=0; j<(length+1);j++){\r
5006                    normalizedString+=s[j];\r
5007                }\r
5008              setFillIn.add(normalizedString);\r
5009            }\r
5010        }\r
5011         \r
5012     }\r
5013     \r
5014     \r
5015     static void extGetUnicodeSet(UnicodeSet setFillIn, int which, int filter, UConverterSharedData Data){\r
5016         int st1, stage1Length, st2, st3, minLength;\r
5017         int ps2, ps3;\r
5018         \r
5019         CharBuffer stage12, stage3;\r
5020         int value, length;\r
5021         IntBuffer stage3b;\r
5022         boolean useFallback;\r
5023         char s[] = new char[MAX_UCHARS];\r
5024         int c;\r
5025         ByteBuffer cx = Data.mbcs.extIndexes;\r
5026         if(cx == null){\r
5027             return;\r
5028         }\r
5029         stage12 = (CharBuffer)ARRAY(cx, EXT_FROM_U_STAGE_12_INDEX,char.class );\r
5030         stage3 = (CharBuffer)ARRAY(cx, EXT_FROM_U_STAGE_3_INDEX,char.class );\r
5031         stage3b = (IntBuffer)ARRAY(cx, EXT_FROM_U_STAGE_3B_INDEX,int.class );\r
5032         \r
5033         stage1Length = cx.asIntBuffer().get(EXT_FROM_U_STAGE_1_LENGTH);\r
5034         useFallback = (which==ROUNDTRIP_AND_FALLBACK_SET);\r
5035         \r
5036         c = 0;\r
5037         if(filter == UCNV_SET_FILTER_2022_CN) {\r
5038             minLength = 3;\r
5039         } else if (Data.mbcs.outputType == MBCS_OUTPUT_DBCS_ONLY || filter != UCNV_SET_FILTER_NONE) {\r
5040             /* DBCS-only, ignore single-byte results */\r
5041             minLength = 2;\r
5042         } else {\r
5043             minLength = 1;\r
5044         }\r
5045         \r
5046         for(st1=0; st1< stage1Length; ++st1){\r
5047             st2 = stage12.get(st1);\r
5048             if(st2>stage1Length) {\r
5049                 ps2 = st2;\r
5050                 for(st2=0;st2<64;++st2){\r
5051                     st3=((int) stage12.get(ps2+st2))<<STAGE_2_LEFT_SHIFT;\r
5052                     if(st3!= 0){\r
5053                         ps3 = st3;\r
5054                         do {\r
5055                             value = stage3b.get(UConverterConstants.UNSIGNED_SHORT_MASK&stage3.get(ps3++));\r
5056                             if(value==0){\r
5057                                 /* no mapping do nothing */\r
5058                             }else if (FROM_U_IS_PARTIAL(value)){\r
5059                                 length = 0;\r
5060                                 length=UTF16.append(s, length, c);\r
5061                                 extGetUnicodeSetString(cx,setFillIn,useFallback,minLength,c,s,length,FROM_U_GET_PARTIAL_INDEX(value));\r
5062                             } else if ((useFallback ?  (value&FROM_U_RESERVED_MASK)==0 :((value&(FROM_U_ROUNDTRIP_FLAG|FROM_U_RESERVED_MASK))== FROM_U_ROUNDTRIP_FLAG)) && \r
5063                                     FROM_U_GET_LENGTH(value)>=minLength){\r
5064                                 \r
5065                                 switch(filter) {\r
5066                                 case UCNV_SET_FILTER_2022_CN:\r
5067                                     if(!(FROM_U_GET_LENGTH(value)==3 && FROM_U_GET_DATA(value)<=0x82ffff)){\r
5068                                         continue;\r
5069                                     }\r
5070                                     break;\r
5071                                 case UCNV_SET_FILTER_SJIS:\r
5072                                     if(!(FROM_U_GET_LENGTH(value)==2 && (value=FROM_U_GET_DATA(value))>=0x8140 && value<=0xeffc)){\r
5073                                         continue;\r
5074                                     }\r
5075                                     break;\r
5076                                 case UCNV_SET_FILTER_GR94DBCS:\r
5077                                     if(!(FROM_U_GET_LENGTH(value)==2 && (UConverterConstants.UNSIGNED_SHORT_MASK & ((value=FROM_U_GET_DATA(value)) - 0xa1a1))<=(0xfefe - 0xa1a1) \r
5078                                             && (UConverterConstants.UNSIGNED_BYTE_MASK & (value - 0xa1))<= (0xfe - 0xa1))){\r
5079                                         \r
5080                                         continue;\r
5081                                     }\r
5082                                     break;\r
5083                                 case UCNV_SET_FILTER_HZ:\r
5084                                     if(!(FROM_U_GET_LENGTH(value)==2 && (UConverterConstants.UNSIGNED_SHORT_MASK & ((value=FROM_U_GET_DATA(value)) - 0xa1a1))<=(0xfdfe - 0xa1a1) \r
5085                                             && (UConverterConstants.UNSIGNED_BYTE_MASK & (value - 0xa1))<= (0xfe - 0xa1))){\r
5086                                         continue;\r
5087                                     }\r
5088                                     break;\r
5089                                 default:\r
5090                                     /*\r
5091                                      * UCNV_SET_FILTER_NONE,\r
5092                                      * or UCNV_SET_FILTER_DBCS_ONLY which is handled via minLength\r
5093                                      */\r
5094                                     break;\r
5095                                 }\r
5096                                 setFillIn.add(c);\r
5097                               \r
5098                             }\r
5099                         }while((++c&0xf) != 0);\r
5100                       \r
5101                     } else {\r
5102                         c+=16;   /* emplty stage3 block */\r
5103                     }\r
5104                 }\r
5105             } else {\r
5106                 c+=1024;  /* empty stage 2 block*/\r
5107             }\r
5108         }\r
5109     }\r
5110     \r
5111     void MBCSGetUnicodeSetForUnicode(UConverterSharedData data, UnicodeSet setFillIn, int which){\r
5112         MBCSGetFilteredUnicodeSetForUnicode(data, setFillIn, which, \r
5113                 this.sharedData.mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY ? UCNV_SET_FILTER_DBCS_ONLY : UCNV_SET_FILTER_NONE );\r
5114     }\r
5115     \r
5116     void getUnicodeSetImpl( UnicodeSet setFillIn, int which){\r
5117         if((options & MBCS_OPTION_GB18030)!=0){\r
5118             setFillIn.add(0, 0xd7ff);\r
5119             setFillIn.add(0xe000, 0x10ffff);\r
5120         }\r
5121         else {\r
5122             this.MBCSGetUnicodeSetForUnicode(sharedData, setFillIn, which);\r
5123         }\r
5124     }\r
5125 \r
5126 }\r