]> gitweb.fperrin.net Git - Dictionary.git/blob - jars/icu4j-4_8_1_1/main/classes/charset/src/com/ibm/icu/charset/CharsetISO2022.java
Added flags.
[Dictionary.git] / jars / icu4j-4_8_1_1 / main / classes / charset / src / com / ibm / icu / charset / CharsetISO2022.java
1 /*
2  *******************************************************************************
3  * Copyright (C) 2008-2010, International Business Machines Corporation and         *
4  * others. All Rights Reserved.                                                *
5  *******************************************************************************
6  */
7 package com.ibm.icu.charset;
8
9 import java.nio.ByteBuffer;
10 import java.nio.CharBuffer;
11 import java.nio.IntBuffer;
12 import java.nio.charset.CharsetDecoder;
13 import java.nio.charset.CharsetEncoder;
14 import java.nio.charset.CoderResult;
15 import java.util.Arrays;
16
17 import com.ibm.icu.charset.CharsetMBCS.CharsetDecoderMBCS;
18 import com.ibm.icu.charset.CharsetMBCS.CharsetEncoderMBCS;
19 import com.ibm.icu.lang.UCharacter;
20 import com.ibm.icu.text.UTF16;
21 import com.ibm.icu.text.UnicodeSet;
22
23 class CharsetISO2022 extends CharsetICU {
24     private UConverterDataISO2022 myConverterData;
25     private int variant;           // one of enum {ISO_2022_JP, ISO_2022_KR, or ISO_2022_CN}
26     
27     private static final byte[] SHIFT_IN_STR    = { 0x0f };
28 //    private static final byte[] SHIFT_OUT_STR   = { 0x0e };
29
30     private static final byte CR    = 0x0D;
31     private static final byte LF    = 0x0A;
32 /*
33     private static final byte H_TAB = 0x09;
34     private static final byte SPACE = 0x20;
35 */
36     private static final char HWKANA_START  = 0xff61;
37     private static final char HWKANA_END    = 0xff9f;
38     
39     /*
40      * 94-character sets with native byte values A1..FE are encoded in ISO 2022
41      * as bytes 21..7E. (Subtract 0x80.)
42      * 96-character  sets with native bit values A0..FF are encoded in ISO 2022
43      * as bytes 20..7F. (Subtract 0x80.)
44      * Do not encode C1 control codes with native bytes 80..9F
45      * as bytes 00..1F (C0 control codes).
46      */
47 /*
48     private static final char GR94_START    = 0xa1;
49     private static final char GR94_END      = 0xfe;
50 */
51     private static final char GR96_START    = 0xa0;
52     private static final char GR96_END      = 0xff;
53     
54     /* for ISO-2022-JP and -CN implementations */
55     // typedef enum {
56         /* shared values */
57         private static final byte INVALID_STATE = -1;
58         private static final byte ASCII         = 0;
59         
60         private static final byte SS2_STATE = 0x10;
61         private static final byte SS3_STATE = 0x11;
62         
63         /* JP */
64         private static final byte ISO8859_1 = 1;
65         private static final byte ISO8859_7 = 2;
66         private static final byte JISX201   = 3;
67         private static final byte JISX208   = 4;
68         private static final byte JISX212   = 5;
69         private static final byte GB2312    = 6;
70         private static final byte KSC5601   = 7;
71         private static final byte HWKANA_7BIT  = 8; /* Halfwidth Katakana 7 bit */
72         
73         /* CN */
74         /* the first few enum constants must keep their values because they corresponds to myConverterArray[] */
75         private static final byte GB2312_1  = 1;
76         private static final byte ISO_IR_165= 2;
77         private static final byte CNS_11643 = 3;
78         
79         /*
80          * these are used in StateEnum and ISO2022State variables,
81          * but CNS_11643 must be used to index into myConverterArray[]
82          */
83         private static final byte CNS_11643_0 = 0x20;
84         private static final byte CNS_11643_1 = 0x21;
85         private static final byte CNS_11643_2 = 0x22;
86         private static final byte CNS_11643_3 = 0x23;
87         private static final byte CNS_11643_4 = 0x24;
88         private static final byte CNS_11643_5 = 0x25;
89         private static final byte CNS_11643_6 = 0x26;
90         private static final byte CNS_11643_7 = 0x27;
91     // } StateEnum;
92     
93
94     public CharsetISO2022(String icuCanonicalName, String javaCanonicalName, String[] aliases) {
95         super(icuCanonicalName, javaCanonicalName, aliases);
96         
97         myConverterData = new UConverterDataISO2022();
98         
99         int versionIndex = icuCanonicalName.indexOf("version=");
100         int version = Integer.decode(icuCanonicalName.substring(versionIndex+8, versionIndex+9)).intValue();
101         
102         myConverterData.version = version;
103         
104         if (icuCanonicalName.indexOf("locale=ja") > 0) {
105             ISO2022InitJP(version);
106         } else if (icuCanonicalName.indexOf("locale=zh") > 0) {
107             ISO2022InitCN(version);
108         } else /* if (icuCanonicalName.indexOf("locale=ko") > 0) */ {
109             ISO2022InitKR(version);
110         }
111         
112         myConverterData.currentEncoder = (CharsetEncoderMBCS)myConverterData.currentConverter.newEncoder();
113         myConverterData.currentDecoder = (CharsetDecoderMBCS)myConverterData.currentConverter.newDecoder();
114     }
115     
116     private void ISO2022InitJP(int version) {
117         variant = ISO_2022_JP;
118         
119         maxBytesPerChar = 6;
120         minBytesPerChar = 1;
121         maxCharsPerByte = 1;
122         // open the required converters and cache them 
123         if((jpCharsetMasks[version]&CSM(ISO8859_7)) != 0) {
124             myConverterData.myConverterArray[ISO8859_7] = ((CharsetMBCS)CharsetICU.forNameICU("ISO8859_7")).sharedData;
125         }
126         // myConverterData.myConverterArray[JISX201] = ((CharsetMBCS)CharsetICU.forNameICU("jisx-201")).sharedData;
127         myConverterData.myConverterArray[JISX208] = ((CharsetMBCS)CharsetICU.forNameICU("Shift-JIS")).sharedData;
128         if ((jpCharsetMasks[version]&CSM(JISX212)) != 0) {
129             myConverterData.myConverterArray[JISX212] = ((CharsetMBCS)CharsetICU.forNameICU("jisx-212")).sharedData;
130         }
131         if ((jpCharsetMasks[version]&CSM(GB2312)) != 0) {
132             myConverterData.myConverterArray[GB2312] = ((CharsetMBCS)CharsetICU.forNameICU("ibm-5478")).sharedData;
133         }
134         if ((jpCharsetMasks[version]&CSM(KSC5601)) != 0) {
135             myConverterData.myConverterArray[KSC5601] = ((CharsetMBCS)CharsetICU.forNameICU("ksc_5601")).sharedData;
136         }
137         
138         // create a generic CharsetMBCS object
139         myConverterData.currentConverter = (CharsetMBCS)CharsetICU.forNameICU("icu-internal-25546");
140     }
141     
142     private void ISO2022InitCN(int version) {
143         variant = ISO_2022_CN;
144         
145         maxBytesPerChar = 8;
146         minBytesPerChar = 1;
147         maxCharsPerByte = 1;
148         // open the required coverters and cache them.
149         myConverterData.myConverterArray[GB2312_1] = ((CharsetMBCS)CharsetICU.forNameICU("ibm-5478")).sharedData;
150         if (version == 1) {
151             myConverterData.myConverterArray[ISO_IR_165] = ((CharsetMBCS)CharsetICU.forNameICU("iso-ir-165")).sharedData;
152         } 
153         myConverterData.myConverterArray[CNS_11643] = ((CharsetMBCS)CharsetICU.forNameICU("cns-11643-1992")).sharedData;
154         
155         // create a generic CharsetMBCS object
156         myConverterData.currentConverter = (CharsetMBCS)CharsetICU.forNameICU("icu-internal-25546");
157     }
158     
159     private void ISO2022InitKR(int version) {
160         variant = ISO_2022_KR;
161         
162         maxBytesPerChar = 3;
163         minBytesPerChar = 1;
164         maxCharsPerByte = 1;
165         
166         if (version == 1) {
167             myConverterData.currentConverter = (CharsetMBCS)CharsetICU.forNameICU("icu-internal-25546");
168             myConverterData.currentConverter.subChar1 = fromUSubstitutionChar[0][0];
169         } else {
170             myConverterData.currentConverter = (CharsetMBCS)CharsetICU.forNameICU("ibm-949");
171         }
172         
173         myConverterData.currentEncoder = (CharsetEncoderMBCS)myConverterData.currentConverter.newEncoder();
174         myConverterData.currentDecoder = (CharsetDecoderMBCS)myConverterData.currentConverter.newDecoder();
175     }
176     
177     /*
178      * ISO 2022 control codes must not be converted from Unicode
179      * because they would mess up the byte stream.
180      * The bit mask 0x0800c000 has bits set at bit positions 0xe, 0xf, 0x1b
181      * corresponding to SO, SI, and ESC.
182      */
183     private static boolean IS_2022_CONTROL(int c) { 
184         return (c<0x20) && (((1<<c) & 0x0800c000) != 0);
185     }
186     
187     /*
188      * Check that the result is a 2-byte value with each byte in the range A1..FE
189      * (strict EUC DBCS) before accepting it and subtracting 0x80 from each byte
190      * to move it to the ISO 2022 range 21..7E.
191      * return 0 if out of range.
192      */
193     private static int _2022FromGR94DBCS(int value) {
194         if ((value <= 0xfefe && value >= 0xa1a1) && 
195                 ((short)(value&UConverterConstants.UNSIGNED_BYTE_MASK) <= 0xfe && ((short)(value&UConverterConstants.UNSIGNED_BYTE_MASK) >= 0xa1))) {
196             return (value - 0x8080); /* shift down to 21..7e byte range */
197         } else {
198             return 0; /* not valid for ISO 2022 */
199         }
200     }
201     
202     /*
203      * Commented out because Ticket 5691: Call sites now check for validity. They can just += 0x8080 after that. 
204      * 
205      * This method does the reverse of _2022FromGR94DBCS(). Given the 2022 code point, it returns the
206      * 2 byte value that is in the range A1..FE for each byte. Otherwise it returns the 2022 code point
207      * unchanged. 
208      * 
209     private static int _2022ToGR94DBCS(int value) {
210         int returnValue = value + 0x8080;
211         
212         if ((returnValue <= 0xfefe && returnValue >= 0xa1a1) && 
213                 ((short)(returnValue&UConverterConstants.UNSIGNED_BYTE_MASK) <= 0xfe && ((short)(returnValue&UConverterConstants.UNSIGNED_BYTE_MASK) >= 0xa1))) {
214             return returnValue;
215         } else {
216             return value;
217         }
218     }*/
219     
220     /* is the StateEnum charset value for a DBCS charset? */
221     private static boolean IS_JP_DBCS(byte cs) {
222         return ((JISX208 <= cs) && (cs <= KSC5601));
223     }
224     
225     private static short CSM(short cs) {
226         return (short)(1<<cs);
227     }
228     
229     /* This gets the valid index of the end of buffer when decoding. */
230     private static int getEndOfBuffer_2022(ByteBuffer source) {
231         int sourceIndex = source.position();
232         byte mySource = 0;
233         mySource = source.get(sourceIndex);
234         
235         while (source.hasRemaining() && mySource != ESC_2022) {
236             mySource = source.get();
237             if (mySource == ESC_2022) {
238                 break;
239             }
240             sourceIndex++;
241         }
242         return sourceIndex;
243     }
244     
245     /*
246      * This is a simple version of _MBCSGetNextUChar() calls the method in CharsetDecoderMBCS and returns
247      * the value given.
248      *
249      * Return value:
250      * U+fffe   unassigned
251      * U+ffff   illegal
252      * otherwise the Unicode code point
253      */
254      private int MBCSSimpleGetNextUChar(UConverterSharedData sharedData,
255                                ByteBuffer   source, 
256                                boolean      useFallback) {
257          int returnValue;
258          UConverterSharedData tempSharedData = myConverterData.currentConverter.sharedData;
259          myConverterData.currentConverter.sharedData = sharedData;
260          returnValue = myConverterData.currentDecoder.simpleGetNextUChar(source, useFallback);
261          myConverterData.currentConverter.sharedData = tempSharedData;
262          
263          return returnValue;
264     }
265
266     /*
267      * @param is the the output byte
268      * @return 1 roundtrip byte  0 no mapping  -1 fallback byte
269      */
270     static int MBCSSingleFromUChar32(UConverterSharedData sharedData, int c, int[] retval, boolean useFallback) {
271         char[] table;
272         int value;
273         /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
274         if (c >= 0x10000 && (sharedData.mbcs.unicodeMask&UConverterConstants.HAS_SUPPLEMENTARY) == 0) {
275             return 0;
276         }
277         /* convert the Unicode code point in c into codepage bytes */
278         table = sharedData.mbcs.fromUnicodeTable;
279         /* get the byte for the output */
280         value = CharsetMBCS.MBCS_SINGLE_RESULT_FROM_U(table, sharedData.mbcs.fromUnicodeBytes, c);
281         /* get the byte for the output */
282         retval[0] = value & 0xff;
283         if (value >= 0xf00) {
284             return 1; /* roundtrip */
285         } else if (useFallback ? value>=0x800 : value>=0xc00) {
286             return -1; /* fallback taken */
287         } else {
288             return 0; /* no mapping */
289         }
290     }
291     
292     /*
293      * Each of these charset masks (with index x) contains a bit for a charset in exact correspondence
294      * to whether that charset is used in the corresponding version x of ISO_2022, locale=ja,version=x
295      * 
296      * Note: The converter uses some leniency:
297      * - The escape sequence ESC ( I for half-width 7-bit Katakana is recognized in
298      *   all versions, not just JIS7 and JIS8.
299      * - ICU does not distinguish between different version so of JIS X 0208.
300      */
301     private static final short jpCharsetMasks[] = {
302         (short)(CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)),
303         (short)(CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)),
304         (short)(CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7)),
305         (short)(CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7)),
306         (short)(CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7))
307     };
308
309 /*
310     // typedef enum {
311         private static final byte ASCII1 = 0;
312         private static final byte LATIN1 = 1;
313         private static final byte SBCS   = 2;
314         private static final byte DBCS   = 3;
315         private static final byte MBCS   = 4;
316         private static final byte HWKANA = 5;
317     // } Cnv2002Type;
318 */
319
320     private class ISO2022State {
321         private byte []cs;  /* Charset number for SI (G0)/SO (G1)/SS2 (G2)/SS3 (G3) */
322         private byte g;     /* 0..3 for G0..G3 (SI/SO/SS2/SS3) */
323         private byte prevG; /* g before single shift (SS2 or SS3) */
324         
325         ISO2022State() {
326             cs = new byte[4];
327         }
328         
329         void reset() {
330             Arrays.fill(cs, (byte)0);
331             g = 0;
332             prevG = 0;
333         }
334     }
335     
336 //    private static final byte UCNV_OPTIONS_VERSION_MASK = 0xf;
337     private static final byte UCNV_2022_MAX_CONVERTERS  = 10;
338     
339     @SuppressWarnings("unused")
340     private class UConverterDataISO2022 {
341         UConverterSharedData []myConverterArray;
342         CharsetEncoderMBCS currentEncoder;
343         CharsetDecoderMBCS currentDecoder;
344         CharsetMBCS currentConverter;
345         int currentType; // Cnv2022Type;
346         ISO2022State toU2022State;
347         ISO2022State fromU2022State;
348         int key;
349         int version;
350         boolean isEmptySegment;
351         
352         UConverterDataISO2022() {
353             myConverterArray = new UConverterSharedData[UCNV_2022_MAX_CONVERTERS];
354             toU2022State = new ISO2022State();
355             fromU2022State = new ISO2022State();
356             currentType = 0;
357             key = 0;
358             version = 0;
359             isEmptySegment = false;
360         }
361         
362         void reset() {
363             toU2022State.reset();
364             fromU2022State.reset();
365             isEmptySegment = false;
366         }
367     }
368     
369     private static final byte ESC_2022 = 0x1B; /* ESC */
370     
371     // typedef enum {
372         private static final byte INVALID_2022              = -1; /* Doesn't correspond to a valid iso 2022 escape sequence */
373         private static final byte VALID_NON_TERMINAL_2022   =  0;  /* so far corresponds to a valid iso 2022 escape sequence */
374         private static final byte VALID_TERMINAL_2022       =  1;  /* corresponds to a valid iso 2022 escape sequence */
375         private static final byte VALID_MAYBE_TERMINAL_2022 =  2;  /* so far matches one iso 2022 escape sequence, but by adding
376                                                                      more characters might match another escape sequence */
377     // } UCNV_TableStates_2022;
378         
379     /*
380      * The way these state transition arrays work is:
381      * ex : ESC$B is the sequence for JISX208
382      *      a) First Iteration: char is ESC
383      *          i) Get the value of ESC from normalize_esq_chars_2022[] with int value of ESC as index
384      *             int x = normalize_esq_chars_2022[27] which is equal to 1
385      *         ii) Search for this value in escSeqStateTable_Key_2022[]
386      *             value of x is stored at escSeqStateTable_Key_2022[0]
387      *        iii) Save this index as offset
388      *         iv) Get state of this sequence from escSeqStateTable_Value_2022[]
389      *             escSeqStateTable_value_2022[offset], which is VALID_NON_TERMINAL_2022
390      *      b) Switch on this state and continue to next char
391      *          i) Get the value of $ from normalize_esq_chars_2022[] with int value of $ as index
392      *             which is normalize_esq_chars_2022[36] == 4
393      *         ii) x is currently 1(from above)
394      *             x<<=5 -- x is now 32
395      *             x+=normalize_esq_chars_2022[36]
396      *             now x is 36
397      *        iii) Search for this value in escSeqStateTable_Key_2022[]
398      *             value of x is stored at escSeqStateTable_Key_2022[2], so offset is 2
399      *         iv) Get state of this sequence from escSeqStateTable_Value_2022[]
400      *             escSeqStateTable_Value_2022[offset], which is VALID_NON_TERMINAL_2022
401      *      c) Switch on this state and continue to next char
402      *          i) Get the value of B from normalize_esq_chars_2022[] with int value of B as index
403      *         ii) x is currently 36 (from above)
404      *             x<<=5 -- x is now 1152
405      *             x+= normalize_esq_chars_2022[66]
406      *             now x is 1161
407      *        iii) Search for this value in escSeqStateTable_Key_2022[]
408      *             value of x is stored at escSeqStateTable_Key_2022[21], so offset is 21
409      *         iv) Get state of this sequence from escSeqStateTable_Value_2022[1]
410      *             escSeqStateTable_Value_2022[offset], which is VALID_TERMINAL_2022
411      *          v) Get the converter name from escSeqStateTable_Result_2022[21] which is JISX208
412      */
413      /* Below are the 3 arrays depicting a state transition table */
414      private static final byte normalize_esq_chars_2022[] = {
415          /* 0       1       2       3       4       5       6       7       8       9 */
416             0,      0,      0,      0,      0,      0,      0,      0,      0,      0,
417             0,      0,      0,      0,      0,      0,      0,      0,      0,      0,
418             0,      0,      0,      0,      0,      0,      0,      1,      0,      0,
419             0,      0,      0,      0,      0,      0,      4,      7,     29,      0,
420             2,     24,     26,     27,      0,      3,     23,      6,      0,      0,
421             0,      0,      0,      0,      0,      0,      0,      0,      0,      0,
422             0,      0,      0,      0,      5,      8,      9,     10,     11,     12,
423            13,     14,     15,     16,     17,     18,     19,     20,     25,     28,
424             0,      0,     21,      0,      0,      0,      0,      0,      0,      0,
425            22,      0,      0,      0,      0,      0,      0,      0,      0,      0,
426             0,      0,      0,      0,      0,      0,      0,      0,      0,      0,
427             0,      0,      0,      0,      0,      0,      0,      0,      0,      0,
428             0,      0,      0,      0,      0,      0,      0,      0,      0,      0,
429             0,      0,      0,      0,      0,      0,      0,      0,      0,      0,
430             0,      0,      0,      0,      0,      0,      0,      0,      0,      0,
431             0,      0,      0,      0,      0,      0,      0,      0,      0,      0,
432             0,      0,      0,      0,      0,      0,      0,      0,      0,      0,
433             0,      0,      0,      0,      0,      0,      0,      0,      0,      0,
434             0,      0,      0,      0,      0,      0,      0,      0,      0,      0,
435             0,      0,      0,      0,      0,      0,      0,      0,      0,      0,
436             0,      0,      0,      0,      0,      0,      0,      0,      0,      0,
437             0,      0,      0,      0,      0,      0,      0,      0,      0,      0,
438             0,      0,      0,      0,      0,      0,      0,      0,      0,      0,
439             0,      0,      0,      0,      0,      0,      0,      0,      0,      0,
440             0,      0,      0,      0,      0,      0,      0,      0,      0,      0,
441             0,      0,      0,      0,      0,      0,      0,      0,      0,      0,
442             0,      0,      0,      0,      0,      0
443      };
444      
445      private static final short MAX_STATES_2022 = 74;
446      private static final int escSeqStateTable_Key_2022[/* MAX_STATES_2022 */] = {
447          /* 0        1          2         3        4          5         6         7         8         9 */
448             1,      34,        36,       39,      55,        57,       60,       61,     1093,     1096,
449          1097,    1098,      1099,     1100,     1101,     1102,     1103,     1104,     1105,     1106,
450          1109,    1154,      1157,     1160,     1161,     1176,     1178,     1179,     1254,     1257,
451          1768,    1773,      1957,    35105,    36933,    36936,    36937,    36938,    36939,    36940,
452         36942,   36943,     36944,    36945,    36946,    36947,    36948,    37640,    37642,    37644,
453         37646,   37711,     37744,    37745,    37746,    37747,    37748,    40133,    40136,    40138,
454         40139,   40140,     40141,  1123363, 35947624, 35947625, 35947626, 35947627, 35947629, 35947630,
455      35947631, 35947635, 35947636, 35947638
456      };
457      
458      private static final byte escSeqStateTable_Value_2022[/* MAX_STATES_2022 */] = {
459          /*         0                           1                           2                           3                       4               */
460          VALID_NON_TERMINAL_2022,   VALID_NON_TERMINAL_2022,    VALID_NON_TERMINAL_2022,    VALID_NON_TERMINAL_2022,    VALID_NON_TERMINAL_2022,    
461              VALID_TERMINAL_2022,       VALID_TERMINAL_2022,    VALID_NON_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,
462        VALID_MAYBE_TERMINAL_2022,       VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,
463              VALID_TERMINAL_2022,       VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,
464              VALID_TERMINAL_2022,   VALID_NON_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,
465          VALID_NON_TERMINAL_2022,   VALID_NON_TERMINAL_2022,    VALID_NON_TERMINAL_2022,    VALID_NON_TERMINAL_2022,        VALID_TERMINAL_2022,
466              VALID_TERMINAL_2022,       VALID_TERMINAL_2022,        VALID_TERMINAL_2022,    VALID_NON_TERMINAL_2022,        VALID_TERMINAL_2022,
467              VALID_TERMINAL_2022,       VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,
468              VALID_TERMINAL_2022,       VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,
469              VALID_TERMINAL_2022,       VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,
470              VALID_TERMINAL_2022,       VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,
471              VALID_TERMINAL_2022,       VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,
472              VALID_TERMINAL_2022,       VALID_TERMINAL_2022,        VALID_TERMINAL_2022,    VALID_NON_TERMINAL_2022,        VALID_TERMINAL_2022,
473              VALID_TERMINAL_2022,       VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,
474              VALID_TERMINAL_2022,       VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022
475      };
476      
477      /* Type def for refactoring changeState_2022 code */
478      // typedef enum {
479          private static final byte ISO_2022_JP = 1;
480          private static final byte ISO_2022_KR = 2;
481          private static final byte ISO_2022_CN = 3;
482      // } Variant2022;
483          
484     /* const UConverterSharedData _ISO2022Data; */
485     //private UConverterSharedData _ISO2022JPData;
486     //private UConverterSharedData _ISO2022KRData;
487     //private UConverterSharedData _ISO2022CNData;
488     
489     /******************** to unicode ********************/
490     /****************************************************
491      * Recognized escape sequenes are
492      * <ESC>(B  ASCII
493      * <ESC>.A  ISO-8859-1
494      * <ESC>.F  ISO-8859-7
495      * <ESC>(J  JISX-201
496      * <ESC>(I  JISX-201
497      * <ESC>$B  JISX-208
498      * <ESC>$@  JISX-208
499      * <ESC>$(D JISX-212
500      * <ESC>$A  GB2312
501      * <ESC>$(C KSC5601
502      */
503     private final static byte nextStateToUnicodeJP[/* MAX_STATES_2022 */] = {
504         /*     0               1               2               3               4               5               6               7               8               9    */
505         INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,      SS2_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,
506                 ASCII,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,        JISX201,    HWKANA_7BIT,        JISX201,  INVALID_STATE,
507         INVALID_STATE,  INVALID_STATE,        JISX208,         GB2312,        JISX208,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,
508             ISO8859_1,      ISO8859_7,        JISX208,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,        KSC5601,        JISX212,  INVALID_STATE,
509         INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,
510         INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,
511         INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,
512         INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE
513     };
514     
515     private final static byte nextStateToUnicodeCN[/* MAX_STATES_2022 */] = {
516         /*     0               1               2               3               4               5               6               7               8               9    */
517         INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,      SS2_STATE,      SS3_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,
518         INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,
519         INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,
520         INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,
521         INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,       GB2312_1,  INVALID_STATE,     ISO_IR_165,
522           CNS_11643_1,    CNS_11643_2,    CNS_11643_3,    CNS_11643_4,    CNS_11643_5,    CNS_11643_6,    CNS_11643_7,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,
523         INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,
524         INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE
525     };
526     
527     /* runs through a state machine to determine the escape sequence - codepage correspondence */
528     @SuppressWarnings("fallthrough")
529     private CoderResult changeState_2022(CharsetDecoderICU decoder, ByteBuffer source, int var) {
530         CoderResult err = CoderResult.UNDERFLOW;
531         boolean DONE = false;
532         byte value;
533         int key[] = {myConverterData.key};
534         int offset[] = {0};
535         int initialToULength = decoder.toULength;
536         byte c;
537         int malformLength = 0;
538         
539         value = VALID_NON_TERMINAL_2022;
540         while (source.hasRemaining()) {
541             c = source.get();
542             malformLength++;
543             decoder.toUBytesArray[decoder.toULength++] = c;
544             value = getKey_2022(c, key, offset);
545             
546             switch(value) {
547             
548             case VALID_NON_TERMINAL_2022:
549                 /* continue with the loop */
550                 break;
551                 
552             case VALID_TERMINAL_2022:
553                 key[0] = 0;
554                 DONE = true;
555                 break;
556                 
557             case INVALID_2022:
558                 DONE = true;
559                 break;
560                 
561             case VALID_MAYBE_TERMINAL_2022:
562                 /* not ISO_2022 itself, finish here */
563                 value = VALID_TERMINAL_2022;
564                 key[0] = 0;
565                 DONE = true;
566                 break;
567             }
568             if (DONE) {
569                 break;
570             }
571         }
572 // DONE:
573         myConverterData.key = key[0];
574         
575         if (value == VALID_NON_TERMINAL_2022) {
576             /* indicate that the escape sequence is incomplete: key !=0 */
577             return err;
578         } else if (value == INVALID_2022) {
579             err = CoderResult.malformedForLength(malformLength);
580         } else /* value == VALID_TERMINAL_2022 */ {
581             switch (var) {
582             case ISO_2022_JP: {
583                 byte tempState = nextStateToUnicodeJP[offset[0]];
584                 switch (tempState) {
585                 case INVALID_STATE:
586                     err = CoderResult.malformedForLength(malformLength);
587                     break;
588                 case SS2_STATE:
589                     if (myConverterData.toU2022State.cs[2] != 0) {
590                         if (myConverterData.toU2022State.g < 2) {
591                             myConverterData.toU2022State.prevG = myConverterData.toU2022State.g;
592                         }
593                         myConverterData.toU2022State.g = 2;
594                     } else { 
595                         /* illegal to have SS2 before a matching designator */
596                         err = CoderResult.malformedForLength(malformLength);
597                     }
598                     break;
599                 /* case SS3_STATE: not used in ISO-2022-JP-x */
600                 case ISO8859_1:
601                 case ISO8859_7:
602                     if ((jpCharsetMasks[myConverterData.version] & CSM(tempState)) == 0) {
603                         err = CoderResult.unmappableForLength(malformLength);
604                     } else {
605                         /* G2 charset for SS2 */
606                         myConverterData.toU2022State.cs[2] = tempState;
607                     }
608                     break;
609                 default:
610                     if ((jpCharsetMasks[myConverterData.version] & CSM(tempState)) == 0) {
611                         err = CoderResult.unmappableForLength(source.position() - 1);
612                     } else {
613                         /* G0 charset */
614                         myConverterData.toU2022State.cs[0] = tempState;
615                     }
616                     break;
617                 } // end of switch
618                 break;
619             }
620             case ISO_2022_CN: {
621                 byte tempState = nextStateToUnicodeCN[offset[0]];
622                 switch (tempState) {
623                 case INVALID_STATE:
624                     err = CoderResult.unmappableForLength(malformLength);
625                     break;
626                 case SS2_STATE:
627                     if (myConverterData.toU2022State.cs[2] != 0) {
628                         if (myConverterData.toU2022State.g < 2) {
629                             myConverterData.toU2022State.prevG = myConverterData.toU2022State.g;
630                         }
631                         myConverterData.toU2022State.g = 2;
632                     } else {
633                         /* illegal to have SS2 before a matching designator */
634                         err = CoderResult.malformedForLength(malformLength);
635                     }
636                     break;
637                 case SS3_STATE:
638                     if (myConverterData.toU2022State.cs[3] != 0) {
639                         if (myConverterData.toU2022State.g < 2) {
640                             myConverterData.toU2022State.prevG = myConverterData.toU2022State.g;
641                         }
642                         myConverterData.toU2022State.g = 3;
643                     } else {
644                         /* illegal to have SS3 before a matching designator */
645                         err = CoderResult.malformedForLength(malformLength);
646                     }
647                     break;
648                 case ISO_IR_165:
649                     if (myConverterData.version == 0) {
650                         err = CoderResult.unmappableForLength(malformLength);
651                         break;
652                     }
653                     /* fall through */
654                 case GB2312_1:
655                     /* fall through */
656                 case CNS_11643_1:
657                     myConverterData.toU2022State.cs[1] = tempState;
658                     break;
659                 case CNS_11643_2:
660                     myConverterData.toU2022State.cs[2] = tempState;
661                     break;
662                 default:
663                     /* other CNS 11643 planes */
664                     if (myConverterData.version == 0) {
665                         err = CoderResult.unmappableForLength(source.position() - 1);
666                     } else {
667                         myConverterData.toU2022State.cs[3] = tempState;
668                     }
669                     break;
670                 } //end of switch
671             }
672             break;
673             case ISO_2022_KR:
674                 if (offset[0] == 0x30) {
675                     /* nothing to be done, just accept this one escape sequence */
676                 } else {
677                     err = CoderResult.unmappableForLength(malformLength);
678                 }
679                 break;
680             default:
681                 err = CoderResult.malformedForLength(malformLength);
682                 break;
683             } // end of switch
684         }
685         if (!err.isError()) {
686             decoder.toULength = 0;
687         } else if (err.isMalformed()) {
688             if (decoder.toULength > 1) {
689                 /*
690                  * Ticket 5691: consistent illegal sequences:
691                  * - We include at least the first byte (ESC) in the illegal sequence.
692                  * - If any of the non-initial bytes could be the start of a character,
693                  *   we stop the illegal sequece before the first one of those.
694                  *   In escape sequences, all following bytes are "printable", that is,
695                  *   unless they are completely illegal (>7f in SBCS, outside 21..7e in DBCS),
696                  *   they are valid single/lead bytes.
697                  *   For simplicity, we always only report the initial ESC byte as the
698                  *   illegal sequence and back out all other bytes we looked at.
699                  */
700                 /* Back out some bytes. */
701                 int backOutDistance = decoder.toULength - 1;
702                 int bytesFromThisBuffer = decoder.toULength - initialToULength;
703                 if (backOutDistance <= bytesFromThisBuffer) {
704                     /* same as initialToULength<=1 */
705                     source.position(source.position() - backOutDistance);
706                 } else {
707                     /* Back out bytes from the previous buffer: Need to replay them. */
708                     decoder.preToULength = (byte)(bytesFromThisBuffer - backOutDistance);
709                     /* same as -(initalToULength-1) */
710                     /* preToULength is negative! */
711                     for (int i = 0; i < -(decoder.preToULength); i++) {
712                         decoder.preToUArray[i] = decoder.toUBytesArray[i+1];
713                     }
714                     source.position(source.position() - bytesFromThisBuffer);
715                 }
716                 decoder.toULength = 1;
717             }
718         }
719         
720         return err;
721     }
722     
723     private static byte getKey_2022(byte c, int[]key, int[]offset) {
724         int togo;
725         int low = 0;
726         int hi = MAX_STATES_2022;
727         int oldmid = 0;
728         
729         togo = normalize_esq_chars_2022[(short)c&UConverterConstants.UNSIGNED_BYTE_MASK];
730         
731         if (togo == 0) {
732             /* not a valid character anywhere in an escape sequence */
733             key[0] = 0;
734             offset[0] = 0;
735             return INVALID_2022;
736         }
737         togo = (key[0] << 5) + togo;
738         
739         while (hi != low) { /* binary search */
740             int mid = (hi+low) >> 1; /* Finds median */
741         
742             if (mid == oldmid) {
743                 break;
744             }
745             
746             if (escSeqStateTable_Key_2022[mid] > togo) {
747                 hi = mid;
748             } else if (escSeqStateTable_Key_2022[mid] < togo) {
749                 low = mid;
750             } else /* we found it */ {
751                 key[0] = togo;
752                 offset[0] = mid;
753                 return escSeqStateTable_Value_2022[mid];
754             }
755             oldmid = mid;
756         }
757         return INVALID_2022;
758     }
759     
760     /*
761      * To Unicode Callback helper function
762      */
763     private static CoderResult toUnicodeCallback(CharsetDecoderICU cnv, int sourceChar, int targetUniChar) {
764         CoderResult err = CoderResult.UNDERFLOW;
765         if (sourceChar > 0xff) {
766             cnv.toUBytesArray[0] = (byte)(sourceChar>>8);
767             cnv.toUBytesArray[1] = (byte)sourceChar;
768             cnv.toULength = 2;
769         } else {
770             cnv.toUBytesArray[0] = (byte)sourceChar;
771             cnv.toULength = 1;
772         }
773         
774         if (targetUniChar == (UConverterConstants.missingCharMarker-1/* 0xfffe */)) {
775             err = CoderResult.unmappableForLength(1);
776         } else {
777             err = CoderResult.malformedForLength(1);
778         }
779         
780         return err;
781     }
782     
783     /****************************ISO-2022-JP************************************/
784     private class CharsetDecoderISO2022JP extends CharsetDecoderICU {
785         public CharsetDecoderISO2022JP(CharsetICU cs) {
786             super(cs);
787         }
788         
789         protected void implReset() {
790             super.implReset();
791             myConverterData.reset();
792         }
793         /* 
794          * Map 00..7F to Unicode according to JIS X 0201. 
795          * */
796         private int jisx201ToU(int value) {
797             if (value < 0x5c) {
798                 return value;
799             } else if (value == 0x5c) {
800                 return 0xa5;
801             } else if (value == 0x7e) {
802                 return 0x203e;
803             } else { /* value <= 0x7f */
804                 return value;
805             }
806         }
807         /*
808          * Convert a pair of JIS X 208 21..7E bytes to Shift-JIS.
809          * If either byte is outside 21..7E make sure that the result is not valid
810          * for Shift-JIS so that the converter catches it.
811          * Some invalid byte values already turn into equally invalid Shift-JIS
812          * byte values and need not be tested explicitly.
813          */
814         private void _2022ToSJIS(char c1, char c2, byte []bytes) {
815             if ((c1&1) > 0) {
816                 ++c1;
817                 if (c2 <= 0x5f) {
818                     c2 += 0x1f;
819                 } else if (c2 <= 0x7e) {
820                     c2 += 0x20;
821                 } else {
822                     c2 = 0; /* invalid */
823                 }
824             } else {
825                 if ((c2 >= 0x21) && (c2 <= 0x7e)) {
826                     c2 += 0x7e;
827                 } else {
828                     c2 = 0; /* invalid */
829                 }
830             }
831             
832             c1 >>=1;
833             if (c1 <= 0x2f) {
834                 c1 += 0x70;
835             } else if (c1 <= 0x3f) {
836                 c1 += 0xb0;
837             } else {
838                 c1 = 0; /* invalid */
839             }
840             bytes[0] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & c1);
841             bytes[1] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & c2);
842         }
843
844         @SuppressWarnings("fallthrough")
845         protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
846             boolean gotoGetTrail = false;
847             boolean gotoEscape = false;
848             CoderResult err = CoderResult.UNDERFLOW;
849             byte []tempBuf = new byte[2];
850             int targetUniChar = 0x0000;
851             int mySourceChar = 0x0000;
852             int mySourceCharTemp = 0x0000; // use for getTrail label call.
853             byte cs; /* StateEnum */
854             byte csTemp= 0; // use for getTrail label call.
855             
856             if (myConverterData.key != 0) {
857                 /* continue with a partial escape sequence */
858                 // goto escape;
859                 gotoEscape = true;
860             } else if (toULength == 1 && source.hasRemaining() && target.hasRemaining()) {
861                 /* continue with a partial double-byte character */
862                 mySourceChar = (toUBytesArray[0] & UConverterConstants.UNSIGNED_BYTE_MASK);
863                 toULength = 0;
864                 cs = myConverterData.toU2022State.cs[myConverterData.toU2022State.g];
865                 // goto getTrailByte;
866                 mySourceCharTemp = 0x99;
867                 gotoGetTrail = true;
868             }
869             
870             while (source.hasRemaining() || gotoEscape || gotoGetTrail) {
871                 // This code is here for the goto escape label call above.
872                 if (gotoEscape) {
873                     mySourceCharTemp = ESC_2022;
874                 }
875                 
876                 targetUniChar = UConverterConstants.missingCharMarker;
877                 
878                 if (gotoEscape || gotoGetTrail || target.hasRemaining()) {
879                     if (!gotoEscape && !gotoGetTrail) {
880                         mySourceChar = source.get() & UConverterConstants.UNSIGNED_BYTE_MASK;
881                         mySourceCharTemp = mySourceChar;
882                     }
883                     
884                     switch (mySourceCharTemp) {
885                     case UConverterConstants.SI:
886                         if (myConverterData.version == 3) {
887                             myConverterData.toU2022State.g = 0;
888                             continue;
889                         } else {
890                             /* only JIS7 uses SI/SO, not ISO-2022-JP-x */
891                             myConverterData.isEmptySegment = false;
892                             break;
893                         }
894                         
895                     case UConverterConstants.SO:
896                         if (myConverterData.version == 3) {
897                             /* JIS7: switch to G1 half-width Katakana */
898                             myConverterData.toU2022State.cs[1] = HWKANA_7BIT;
899                             myConverterData.toU2022State.g = 1;
900                             continue; 
901                         } else {
902                             /* only JIS7 uses SI/SO, not ISO-2022-JP-x */
903                             myConverterData.isEmptySegment = false; /* reset this, we have a different error */
904                             break;
905                         }
906                         
907                     case ESC_2022:
908                         if (!gotoEscape) {
909                             source.position(source.position() - 1);
910                         } else {
911                             gotoEscape = false;
912                         }
913 // escape:
914                         {
915                             int mySourceBefore = source.position();
916                             int toULengthBefore = this.toULength;
917                             
918                             err = changeState_2022(this, source, variant);
919
920                             /* If in ISO-2022-JP only and we successully completed an escape sequence, but previous segment was empty, create an error */
921                             if(myConverterData.version == 0 && myConverterData.key == 0 && !err.isError() && myConverterData.isEmptySegment) {
922                                 err = CoderResult.malformedForLength(source.position() - mySourceBefore);
923                                 this.toULength = toULengthBefore + (source.position() - mySourceBefore);
924                             }
925                         }
926
927                         /* invalid or illegal escape sequence */
928                         if(err.isError()){
929                             myConverterData.isEmptySegment = false; /* Reset to avoid future spurious errors */
930                             return err;
931                         }
932                         /* If we successfully completed an escape sequence, we begin a new segment, empty so far */
933                         if(myConverterData.key == 0) {
934                             myConverterData.isEmptySegment = true;
935                         }
936
937                         continue;
938                     /* ISO-2022-JP does not use single-byte (C1) SS2 and SS3 */
939                     case CR:
940                         /* falls through */
941                     case LF:
942                         /* automatically reset to single-byte mode */
943                         if (myConverterData.toU2022State.cs[0] != ASCII && myConverterData.toU2022State.cs[0] != JISX201) {
944                             myConverterData.toU2022State.cs[0] = ASCII;
945                         }
946                         myConverterData.toU2022State.cs[2] = 0;
947                         myConverterData.toU2022State.g = 0;
948                         /* falls through */
949                     default :
950                         /* convert one or two bytes */
951                         myConverterData.isEmptySegment = false;
952                         cs = myConverterData.toU2022State.cs[myConverterData.toU2022State.g];
953                         csTemp = cs;
954                         if (gotoGetTrail) {
955                             csTemp = (byte)0x99;
956                         }
957                         if (!gotoGetTrail && ((mySourceChar >= 0xa1) && (mySourceChar <= 0xdf) && myConverterData.version == 4 && !IS_JP_DBCS(cs))) {
958                             /* 8-bit halfwidth katakana in any single-byte mode for JIS8 */
959                             targetUniChar = mySourceChar + (HWKANA_START - 0xa1);
960                             
961                             /* return from a single-shift state to the previous one */
962                             if (myConverterData.toU2022State.g >= 2) {
963                                 myConverterData.toU2022State.g = myConverterData.toU2022State.prevG;
964                             }
965                         } else {
966                             switch(csTemp) {
967                             case ASCII:
968                                 if (mySourceChar <= 0x7f) {
969                                     targetUniChar = mySourceChar;
970                                 }
971                                 break;
972                             case ISO8859_1:
973                                 if (mySourceChar <= 0x7f) {
974                                     targetUniChar = mySourceChar + 0x80;
975                                 }
976                                 /* return from a single-shift state to the prevous one */
977                                 myConverterData.toU2022State.g = myConverterData.toU2022State.prevG;
978                                 break;
979                             case ISO8859_7:
980                                 if (mySourceChar <= 0x7f) {
981                                     /* convert mySourceChar+0x80 to use a normal 8-bit table */
982                                     targetUniChar = CharsetMBCS.MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(myConverterData.myConverterArray[cs].mbcs,
983                                             mySourceChar+0x80);
984                                 }
985                                 /* return from a single-shift state to the previous one */
986                                 myConverterData.toU2022State.g = myConverterData.toU2022State.prevG;
987                                 break;
988                             case JISX201:
989                                 if (mySourceChar <= 0x7f) {
990                                     targetUniChar = jisx201ToU(mySourceChar);
991                                 }
992                                 break;
993                             case HWKANA_7BIT:
994                                 if ((mySourceChar >= 0x21) && (mySourceChar <= 0x5f)) {
995                                     /* 7-bit halfwidth Katakana */
996                                     targetUniChar = mySourceChar + (HWKANA_START - 0x21);
997                                     break;
998                                 }
999                             default :
1000                                 /* G0 DBCS */
1001                                 if (gotoGetTrail || source.hasRemaining()) {
1002 // getTrailByte:
1003                                     int tmpSourceChar;
1004                                     gotoGetTrail = false;
1005                                     short trailByte;
1006                                     boolean leadIsOk, trailIsOk;
1007                                     
1008                                     trailByte = (short)(source.get(source.position()) & UConverterConstants.UNSIGNED_BYTE_MASK);
1009                                     /*
1010                                      * Ticket 5691: consistent illegal sequences:
1011                                      * - We include at least the first byte in the illegal sequence.
1012                                      * - If any of the non-initial bytes could be the start of a character,
1013                                      *   we stop the illegal sequence before the first one of those.
1014                                      * 
1015                                      * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
1016                                      * an ESC/SO/SI, we report only the first byte as the illegal sequence.
1017                                      * Otherwise we convert or report the pair of bytes.
1018                                      */
1019                                     leadIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (mySourceChar - 0x21)) <= (0x7e - 0x21);
1020                                     trailIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (trailByte - 0x21)) <= (0x7e - 0x21);
1021                                     if (leadIsOk && trailIsOk) {
1022                                         source.get();
1023                                         tmpSourceChar = (mySourceChar << 8) | trailByte;
1024                                         if (cs == JISX208) {
1025                                             _2022ToSJIS((char)mySourceChar, (char)trailByte, tempBuf);
1026                                             mySourceChar = tmpSourceChar;
1027                                         } else {
1028                                             /* Copy before we modify tmpSourceChar so toUnicodeCallback() sees the correct bytes. */
1029                                             mySourceChar = tmpSourceChar;
1030                                             if (cs == KSC5601) {
1031                                                 tmpSourceChar += 0x8080; /* = _2022ToGR94DBCS(tmpSourceChar) */
1032                                             }
1033                                             tempBuf[0] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & (tmpSourceChar >> 8));
1034                                             tempBuf[1] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & tmpSourceChar);
1035                                         }
1036                                         targetUniChar = MBCSSimpleGetNextUChar(myConverterData.myConverterArray[cs], ByteBuffer.wrap(tempBuf), false);
1037                                     } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
1038                                         /* report a pair of illegal bytes if the second byte is not a DBCS starter */
1039                                         source.get();
1040                                         /* add another bit so that the code below writes 2 bytes in case of error */
1041                                         mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte;
1042                                     }
1043                                 } else {
1044                                     toUBytesArray[0] = (byte)mySourceChar;
1045                                     toULength = 1;
1046                                     // goto endloop
1047                                     return err;
1048                                 }
1049                             } /* end of inner switch */
1050                         }
1051                         break;
1052                     } /* end of outer switch */
1053                     
1054                     if (targetUniChar < (UConverterConstants.missingCharMarker-1/*0xfffe*/)) {
1055                         if (offsets != null) {
1056                             offsets.put(target.remaining(), source.remaining() - (mySourceChar <= 0xff ? 1 : 2));
1057                         }
1058                         target.put((char)targetUniChar);
1059                     } else if (targetUniChar > UConverterConstants.missingCharMarker) {
1060                         /* disassemble the surrogate pair and write to output */
1061                         targetUniChar -= 0x0010000;
1062                         target.put((char)(0xd800 + (char)(targetUniChar>>10)));
1063                         target.position(target.position()-1);
1064                         if (offsets != null) {
1065                             offsets.put(target.remaining(), source.remaining() - (mySourceChar <= 0xff ? 1 : 2));
1066                         }
1067                         target.get();
1068                         if (target.hasRemaining()) {
1069                             target.put((char)(0xdc00+(char)(targetUniChar&0x3ff)));
1070                             target.position(target.position()-1);
1071                             if (offsets != null) {
1072                                 offsets.put(target.remaining(), source.remaining() - (mySourceChar <= 0xff ? 1 : 2));
1073                             }
1074                             target.get();
1075                         } else {
1076                             charErrorBufferArray[charErrorBufferLength++] = 
1077                                 (char)(0xdc00+(char)(targetUniChar&0x3ff));
1078                         }
1079                     } else {
1080                         /* Call the callback function */
1081                         err = toUnicodeCallback(this, mySourceChar, targetUniChar);
1082                         break;
1083                     }
1084                 } else { /* goes with "if (target.hasRemaining())" way up near the top of the function */
1085                     err = CoderResult.OVERFLOW;
1086                     break;
1087                 }
1088             }
1089 //endloop:
1090             return err;
1091         }
1092     } // end of class CharsetDecoderISO2022JP
1093     
1094     /****************************ISO-2022-CN************************************/
1095     private class CharsetDecoderISO2022CN extends CharsetDecoderICU {
1096         public CharsetDecoderISO2022CN(CharsetICU cs) {
1097             super(cs);
1098         }
1099         
1100         protected void implReset() {
1101             super.implReset();
1102             myConverterData.reset();
1103         }
1104
1105         @SuppressWarnings("fallthrough")
1106         protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
1107             CoderResult err = CoderResult.UNDERFLOW;
1108             byte[] tempBuf = new byte[3];
1109             int targetUniChar = 0x0000;
1110             int mySourceChar = 0x0000;
1111             int mySourceCharTemp = 0x0000;
1112             boolean gotoEscape = false;
1113             boolean gotoGetTrailByte = false;
1114             
1115             if (myConverterData.key != 0) {
1116                 /* continue with a partial escape sequence */
1117                 // goto escape;
1118                 gotoEscape = true;
1119             } else if (toULength == 1 && source.hasRemaining() && target.hasRemaining()) {
1120                 /* continue with a partial double-byte character */
1121                 mySourceChar = (toUBytesArray[0] & UConverterConstants.UNSIGNED_BYTE_MASK);
1122                 toULength = 0;
1123                 targetUniChar = UConverterConstants.missingCharMarker;
1124                 // goto getTrailByte
1125                 gotoGetTrailByte = true;
1126             }
1127             
1128             while (source.hasRemaining() || gotoGetTrailByte || gotoEscape) {
1129                 targetUniChar = UConverterConstants.missingCharMarker;
1130                 
1131                 if (target.hasRemaining() || gotoEscape) {
1132                     if (gotoEscape) {
1133                         mySourceChar = ESC_2022; // goto escape label
1134                         mySourceCharTemp = mySourceChar;
1135                     } else if (gotoGetTrailByte) {
1136                         mySourceCharTemp = 0xff; // goto getTrailByte; set mySourceCharTemp to go to default
1137                     } else {
1138                         mySourceChar = UConverterConstants.UNSIGNED_BYTE_MASK & source.get();
1139                         mySourceCharTemp = mySourceChar;
1140                     }
1141                     
1142                     switch (mySourceCharTemp) {
1143                     case UConverterConstants.SI:
1144                         myConverterData.toU2022State.g = 0;
1145                         if (myConverterData.isEmptySegment) {
1146                             myConverterData.isEmptySegment = false; /* we are handling it, reset to avoid future spurious errors */
1147                             err = CoderResult.malformedForLength(1);
1148                             this.toUBytesArray[0] = (byte)mySourceChar;
1149                             this.toULength = 1;
1150                             return err;
1151                         }
1152                         continue;
1153                         
1154                     case UConverterConstants.SO:
1155                         if (myConverterData.toU2022State.cs[1] != 0) {
1156                             myConverterData.toU2022State.g = 1;
1157                             myConverterData.isEmptySegment = true;  /* Begin a new segment, empty so far */
1158                             continue;
1159                         } else {
1160                             /* illegal to have SO before a matching designator */
1161                             myConverterData.isEmptySegment = false; /* Handling a different error, reset this to avoid future spurious errs */
1162                             break;
1163                         }
1164                         
1165                     case ESC_2022:
1166                         if (!gotoEscape) {
1167                             source.position(source.position()-1);
1168                         }
1169 // escape label
1170                         gotoEscape = false;
1171                         {
1172                             int mySourceBefore = source.position();
1173                             int toULengthBefore = this.toULength;
1174
1175                             err = changeState_2022(this, source, ISO_2022_CN);
1176
1177                             /* After SO there must be at least one character before a designator (designator error handled separately) */
1178                             if(myConverterData.key == 0 && !err.isError() && myConverterData.isEmptySegment) {
1179                                 err = CoderResult.malformedForLength(source.position() - mySourceBefore);
1180                                 this.toULength = toULengthBefore + (source.position() - mySourceBefore);
1181                             }
1182                         }
1183
1184                         /* invalid or illegal escape sequence */
1185                         if(err.isError()){
1186                             myConverterData.isEmptySegment = false; /* Reset to avoid future spurious errors */
1187                             return err;
1188                         }
1189                         continue;
1190                         
1191                     /*ISO-2022-CN does not use single-byte (C1) SS2 and SS3 */
1192                     case CR:
1193                         /* falls through */
1194                     case LF:
1195                         myConverterData.toU2022State.reset();
1196                         /* falls through */
1197                     default:
1198                         /* converter one or two bytes */
1199                         myConverterData.isEmptySegment = false;
1200                         if (myConverterData.toU2022State.g != 0 || gotoGetTrailByte) {
1201                             if (source.hasRemaining() || gotoGetTrailByte) {
1202                                 UConverterSharedData cnv;
1203                                 byte tempState;
1204                                 int tempBufLen;
1205                                 boolean leadIsOk, trailIsOk;
1206                                 short trailByte;
1207 // getTrailByte: label
1208                                 gotoGetTrailByte = false; // reset gotoGetTrailByte
1209                                 
1210                                 trailByte = (short)(source.get(source.position()) & UConverterConstants.UNSIGNED_BYTE_MASK);
1211                                 /*
1212                                  * Ticket 5691: consistent illegal sequences:
1213                                  * - We include at least the first byte in the illegal sequence.
1214                                  * - If any of the non-initial bytes could be the start of a character,
1215                                  *   we stop the illegal sequence before the first one of those.
1216                                  * 
1217                                  * In ISO-2022 DBCS, if the second byte is in the range 21..7e range or is
1218                                  * an ESC/SO/SI, we report only the first byte as the illegal sequence.
1219                                  * Otherwise we convert or report the pair of bytes.
1220                                  */
1221                                 leadIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (mySourceChar - 0x21)) <= (0x7e - 0x21);
1222                                 trailIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (trailByte - 0x21)) <= (0x7e - 0x21);
1223                                 if (leadIsOk && trailIsOk) {
1224                                     source.get();
1225                                     tempState = myConverterData.toU2022State.cs[myConverterData.toU2022State.g];
1226                                     if (tempState > CNS_11643_0) {
1227                                         cnv = myConverterData.myConverterArray[CNS_11643];
1228                                         tempBuf[0] = (byte)(0x80 + (tempState - CNS_11643_0));
1229                                         tempBuf[1] = (byte)mySourceChar;
1230                                         tempBuf[2] = (byte)trailByte;
1231                                         tempBufLen = 3;
1232                                     } else {
1233                                         cnv = myConverterData.myConverterArray[tempState];
1234                                         tempBuf[0] = (byte)mySourceChar;
1235                                         tempBuf[1] = (byte)trailByte;
1236                                         tempBufLen = 2;
1237                                     }
1238                                     ByteBuffer tempBuffer = ByteBuffer.wrap(tempBuf);
1239                                     tempBuffer.limit(tempBufLen);
1240                                     targetUniChar = MBCSSimpleGetNextUChar(cnv, tempBuffer, false);
1241                                     mySourceChar = (mySourceChar << 8) | trailByte;
1242                                     
1243                                 } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
1244                                     /* report a pair of illegal bytes if the second byte is not a DBCS starter */
1245                                     source.get();
1246                                     /* add another bit so that the code below writes 2 bytes in case of error */
1247                                     mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte;
1248                                 }
1249                                 if (myConverterData.toU2022State.g >= 2) {
1250                                     /* return from a single-shift state to the previous one */
1251                                     myConverterData.toU2022State.g = myConverterData.toU2022State.prevG;
1252                                 }
1253                             } else {
1254                                 toUBytesArray[0] = (byte)mySourceChar;
1255                                 toULength = 1;
1256                                 // goto endloop;
1257                                 return err;
1258                             }
1259                         } else {
1260                             if (mySourceChar <= 0x7f) {
1261                                 targetUniChar = (char)mySourceChar;
1262                             }
1263                         }
1264                         break;
1265                     }
1266                     if ((UConverterConstants.UNSIGNED_INT_MASK&targetUniChar) < (UConverterConstants.UNSIGNED_INT_MASK&(UConverterConstants.missingCharMarker-1))) {
1267                         if (offsets != null) {
1268                             offsets.array()[target.position()] = source.remaining() - (mySourceChar <= 0xff ? 1 : 2);
1269                         }
1270                         target.put((char)targetUniChar);
1271                     } else if ((UConverterConstants.UNSIGNED_INT_MASK&targetUniChar) > (UConverterConstants.UNSIGNED_INT_MASK&(UConverterConstants.missingCharMarker))) {
1272                         /* disassemble the surrogate pair and write to output */
1273                         targetUniChar -= 0x0010000;
1274                         target.put((char)(0xd800+(char)(targetUniChar>>10)));
1275                         if (offsets != null) {
1276                             offsets.array()[target.position()-1] = source.position() - (mySourceChar <= 0xff ? 1 : 2);
1277                         }
1278                         if (target.hasRemaining()) {
1279                             target.put((char)(0xdc00+(char)(targetUniChar&0x3ff)));
1280                             if (offsets != null) {
1281                                 offsets.array()[target.position()-1] = source.position() - (mySourceChar <= 0xff ? 1 : 2);
1282                             }
1283                         } else {
1284                             charErrorBufferArray[charErrorBufferLength++] = (char)(0xdc00+(char)(targetUniChar&0x3ff));
1285                         }
1286                     } else {
1287                         /* Call the callback function */ 
1288                         err = toUnicodeCallback(this, mySourceChar, targetUniChar);
1289                         break;
1290                     }
1291                     
1292                 } else {
1293                     err = CoderResult.OVERFLOW;
1294                     break;
1295                 }
1296             }
1297             
1298             return err;
1299         }
1300         
1301     }
1302     /************************ ISO-2022-KR ********************/
1303     private class CharsetDecoderISO2022KR extends CharsetDecoderICU {
1304         public CharsetDecoderISO2022KR(CharsetICU cs) {
1305             super(cs);
1306         }
1307         
1308         protected void implReset() {
1309             super.implReset();
1310             setInitialStateToUnicodeKR();
1311             myConverterData.reset();
1312         }
1313         
1314         protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
1315             CoderResult err = CoderResult.UNDERFLOW;
1316             int mySourceChar = 0x0000;
1317             int targetUniChar = 0x0000;
1318             byte[] tempBuf = new byte[2];
1319             boolean usingFallback;
1320             boolean gotoGetTrailByte = false;
1321             boolean gotoEscape = false;
1322             
1323             if (myConverterData.version == 1) {
1324                 return decodeLoopIBM(myConverterData.currentDecoder, source, target, offsets, flush);
1325             }
1326             
1327             /* initialize state */
1328             usingFallback = isFallbackUsed();
1329             
1330             if (myConverterData.key != 0) {
1331                 /* continue with a partial escape sequence */
1332                 gotoEscape = true;
1333             } else if (toULength == 1 && source.hasRemaining() && target.hasRemaining()) {
1334                 /* continue with a partial double-byte character */
1335                 mySourceChar = (toUBytesArray[0] & UConverterConstants.UNSIGNED_BYTE_MASK);
1336                 toULength = 0;
1337                 gotoGetTrailByte = true;
1338             }
1339             
1340             while (source.hasRemaining() || gotoGetTrailByte || gotoEscape) {
1341                 if (target.hasRemaining() || gotoGetTrailByte || gotoEscape) {
1342                     if (!gotoGetTrailByte && !gotoEscape) {
1343                         mySourceChar = (char)(source.get() & UConverterConstants.UNSIGNED_BYTE_MASK);
1344                     }
1345                     
1346                     if (!gotoGetTrailByte && !gotoEscape && mySourceChar == UConverterConstants.SI) {
1347                         myConverterData.toU2022State.g = 0;
1348                         if (myConverterData.isEmptySegment) {
1349                             myConverterData.isEmptySegment = false; /* we are handling it, reset to avoid future spurious errors */
1350                             err = CoderResult.malformedForLength(1);
1351                             this.toUBytesArray[0] = (byte)mySourceChar;
1352                             this.toULength = 1;
1353                             return err;
1354                         }
1355                         /* consume the source */
1356                         continue;
1357                     } else if (!gotoGetTrailByte && !gotoEscape && mySourceChar == UConverterConstants.SO) {
1358                         myConverterData.toU2022State.g = 1;
1359                         myConverterData.isEmptySegment = true;
1360                         /* consume the source */
1361                         continue;
1362                     } else if (!gotoGetTrailByte && (gotoEscape || mySourceChar == ESC_2022)) {
1363                         if (!gotoEscape) {
1364                             source.position(source.position()-1);
1365                         }
1366 // escape label
1367                         gotoEscape = false; // reset gotoEscape flag
1368                         myConverterData.isEmptySegment = false; /* Any invalid ESC sequences will be detected separately, so just reset this */ 
1369                         err = changeState_2022(this, source, ISO_2022_KR);
1370                         if (err.isError()) {
1371                             return err;
1372                         }
1373                         continue;
1374                     }
1375                     myConverterData.isEmptySegment = false; /* Any invalid char errors will be detected separately, so just reset this */
1376                     if (myConverterData.toU2022State.g == 1 || gotoGetTrailByte) {
1377                         if (source.hasRemaining() || gotoGetTrailByte) {
1378                             boolean leadIsOk, trailIsOk;
1379                             short trailByte;
1380 // getTrailByte label
1381                             gotoGetTrailByte = false; // reset gotoGetTrailByte flag
1382                             
1383                             trailByte = (short)(source.get(source.position()) & UConverterConstants.UNSIGNED_BYTE_MASK);
1384                             targetUniChar = UConverterConstants.missingCharMarker;
1385                             /*
1386                              * Ticket 5691: consistent illegal sequences:
1387                              * - We include at least the first byte in the illegal sequence.
1388                              * - If any of the non-initial bytes could be the start of a character,
1389                              *   we stop the illegal sequence before the first one of those.
1390                              * 
1391                              * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
1392                              * an ESC/SO/SI, we report only the first byte as the illegal sequence.
1393                              * Otherwise we convert or report the pair of bytes.
1394                              */
1395                             leadIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (mySourceChar - 0x21)) <= (0x7e - 0x21);
1396                             trailIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (trailByte - 0x21)) <= (0x7e - 0x21);
1397                             if (leadIsOk && trailIsOk) {
1398                                 source.get();
1399                                 tempBuf[0] = (byte)(mySourceChar + 0x80);
1400                                 tempBuf[1] = (byte)(trailByte + 0x80);
1401                                 targetUniChar = MBCSSimpleGetNextUChar(myConverterData.currentConverter.sharedData, ByteBuffer.wrap(tempBuf), usingFallback);
1402                                 mySourceChar = (char)((mySourceChar << 8) | trailByte);
1403                             } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
1404                                 /* report a pair of illegal bytes if the second byte is not a DBCS starter */
1405                                 source.get();
1406                                 /* add another bit so that the code below writes 2 bytes in case of error */
1407                                 mySourceChar = (char)(0x10000 | (mySourceChar << 8) | trailByte);
1408                             }
1409                         } else {
1410                             toUBytesArray[0] = (byte)mySourceChar;
1411                             toULength = 1;
1412                             break;
1413                         }
1414                     } else if (mySourceChar <= 0x7f) {
1415                         int savedSourceLimit = source.limit();
1416                         int savedSourcePosition = source.position();
1417                         source.limit(source.position());
1418                         source.position(source.position()-1); 
1419                         targetUniChar = MBCSSimpleGetNextUChar(myConverterData.currentConverter.sharedData, source, usingFallback);
1420                         source.limit(savedSourceLimit);
1421                         source.position(savedSourcePosition);
1422                     } else {
1423                         targetUniChar = 0xffff;
1424                     }
1425                     if (targetUniChar < 0xfffe) {
1426                         target.put((char)targetUniChar);
1427                         if (offsets != null) {
1428                             offsets.array()[target.position()] = source.position() - (mySourceChar <= 0xff ? 1 : 2);
1429                         }
1430                     } else {
1431                         /* Call the callback function */
1432                         err = toUnicodeCallback(this, mySourceChar, targetUniChar);
1433                         break;
1434                     }
1435                 } else {
1436                     err = CoderResult.OVERFLOW;
1437                     break;
1438                 }
1439             }
1440             
1441             return err;
1442         }
1443         
1444         protected CoderResult decodeLoopIBM(CharsetDecoderMBCS cnv, ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
1445             CoderResult err = CoderResult.UNDERFLOW;
1446             int sourceStart;
1447             int sourceLimit;
1448             int argSource;
1449             int argTarget;
1450             boolean gotoEscape = false;
1451             int oldSourceLimit;
1452             
1453             /* remember the original start of the input for offsets */
1454             sourceStart = argSource = source.position();
1455             
1456             if (myConverterData.key != 0) {
1457                 /* continue with a partial escape sequence */
1458                 gotoEscape = true;
1459             }
1460             
1461             while (gotoEscape || (!err.isError() && source.hasRemaining())) {
1462                 if (!gotoEscape) {
1463                     /* Find the end of the buffer e.g : Next Escape Seq | end of Buffer */
1464                     int oldSourcePos = source.position();
1465                     sourceLimit = getEndOfBuffer_2022(source);
1466                     source.position(oldSourcePos);
1467                     if (source.position() != sourceLimit) {
1468                         /*
1469                          * get the current partial byte sequence
1470                          * 
1471                          * it needs to be moved between the public and the subconverter
1472                          * so that the conversion frameword, which only sees the public
1473                          * converter, can handle truncated and illegal input etc.
1474                          */
1475                         if (toULength > 0) {
1476                             cnv.toUBytesArray = toUBytesArray.clone();
1477                         }
1478                         cnv.toULength = toULength;
1479                         
1480                         /*
1481                          * Convert up to the end of the input, or to before the next escape character.
1482                          * Does not handle conversion extensions because the preToU[] state etc.
1483                          * is not copied.
1484                          */
1485                         argTarget = target.position();
1486                         oldSourceLimit = source.limit(); // save the old source limit change to new one
1487                         source.limit(sourceLimit);
1488                         err = myConverterData.currentDecoder.cnvMBCSToUnicodeWithOffsets(source, target, offsets, flush);
1489                         source.limit(oldSourceLimit); // restore source limit;
1490                         if (offsets != null && sourceStart != argSource) {
1491                             /* update offsets to base them on the actual start of the input */
1492                             int delta = argSource - sourceStart;
1493                             while (argTarget < target.position()) {
1494                                 int currentOffset = offsets.get();
1495                                 offsets.position(offsets.position()-1);
1496                                 if (currentOffset >= 0) {
1497                                     offsets.put(currentOffset + delta);
1498                                     offsets.position(offsets.position()-1);
1499                                 }
1500                                 offsets.get();
1501                                 target.get();
1502                             }
1503                         }
1504                         argSource = source.position();
1505                         
1506                         /* copy input/error/overflow buffers */
1507                         if (cnv.toULength > 0) {
1508                             toUBytesArray = cnv.toUBytesArray.clone();
1509                         }
1510                         toULength = cnv.toULength;
1511                         
1512                         if (err.isOverflow()) {
1513                             if (cnv.charErrorBufferLength > 0) {
1514                                 charErrorBufferArray = cnv.charErrorBufferArray.clone();
1515                             }
1516                             charErrorBufferLength = cnv.charErrorBufferLength;
1517                             cnv.charErrorBufferLength = 0;
1518                         }
1519                     }
1520                     
1521                     if (err.isError() || err.isOverflow() || (source.position() == source.limit())) {
1522                         return err;
1523                     }
1524                 }
1525 // escape label
1526                 gotoEscape = false;
1527                 err = changeState_2022(this, source, ISO_2022_KR);
1528             }
1529             return err;
1530         }
1531     }
1532     
1533     /******************** from unicode **********************/
1534     /* preference order of JP charsets */
1535     private final static byte []jpCharsetPref = {
1536         ASCII,
1537         JISX201,
1538         ISO8859_1,
1539         ISO8859_7,
1540         JISX208,
1541         JISX212,
1542         GB2312,
1543         KSC5601,
1544         HWKANA_7BIT
1545     };
1546     /*
1547      * The escape sequences must be in order of the enum constants like JISX201 = 3,
1548      * not in order of jpCharsetPref[]!
1549      */
1550     private final static byte [][]escSeqChars = {
1551             { 0x1B, 0x28, 0x42},        /* <ESC>(B  ASCII       */
1552             { 0x1B, 0x2E, 0x41},        /* <ESC>.A  ISO-8859-1  */
1553             { 0x1B, 0x2E, 0x46},        /* <ESC>.F  ISO-8859-7  */
1554             { 0x1B, 0x28, 0x4A},        /* <ESC>(J  JISX-201    */
1555             { 0x1B, 0x24, 0x42},        /* <ESC>$B  JISX-208    */
1556             { 0x1B, 0x24, 0x28, 0x44},  /* <ESC>$(D JISX-212    */
1557             { 0x1B, 0x24, 0x41},        /* <ESC>$A  GB2312      */
1558             { 0x1B, 0x24, 0x28, 0x43},  /* <ESC>$(C KSC5601     */
1559             { 0x1B, 0x28, 0x49}         /* <ESC>(I  HWKANA_7BIT */
1560     };
1561     /*
1562      * JIS X 0208 has fallbacks from Unicode half-width Katakana to full-width (DBCS)
1563      * Katakana.
1564      * Now that we use a Shift-JIS table for JIS X 0208 we need to hardcode these fallbacks
1565      * because Shift-JIS roundtrips half-width Katakana to single bytes.
1566      * These were the only fallbacks in ICU's jisx-208.ucm file.
1567      */
1568     private final static char []hwkana_fb = {
1569         0x2123,  /* U+FF61 */
1570         0x2156,
1571         0x2157,
1572         0x2122,
1573         0x2126,
1574         0x2572,
1575         0x2521,
1576         0x2523,
1577         0x2525,
1578         0x2527,
1579         0x2529,
1580         0x2563,
1581         0x2565,
1582         0x2567,
1583         0x2543,
1584         0x213C,  /* U+FF70 */
1585         0x2522,
1586         0x2524,
1587         0x2526,
1588         0x2528,
1589         0x252A,
1590         0x252B,
1591         0x252D,
1592         0x252F,
1593         0x2531,
1594         0x2533,
1595         0x2535,
1596         0x2537,
1597         0x2539,
1598         0x253B,
1599         0x253D,
1600         0x253F,  /* U+FF80 */
1601         0x2541,
1602         0x2544,
1603         0x2546,
1604         0x2548,
1605         0x254A,
1606         0x254B,
1607         0x254C,
1608         0x254D,
1609         0x254E,
1610         0x254F,
1611         0x2552,
1612         0x2555,
1613         0x2558,
1614         0x255B,
1615         0x255E,
1616         0x255F,  /* U+FF90 */
1617         0x2560,
1618         0x2561,
1619         0x2562,
1620         0x2564,
1621         0x2566,
1622         0x2568,
1623         0x2569,
1624         0x256A,
1625         0x256B,
1626         0x256C,
1627         0x256D,
1628         0x256F,
1629         0x2573,
1630         0x212B,
1631         0x212C   /* U+FF9F */
1632     };
1633     
1634     protected byte [][]fromUSubstitutionChar = new byte[][]{ { (byte)0x1A }, { (byte)0x2F, (byte)0x7E} };
1635     /****************************ISO-2022-JP************************************/
1636     private class CharsetEncoderISO2022JP extends CharsetEncoderICU {
1637         public CharsetEncoderISO2022JP(CharsetICU cs) {
1638             super(cs, fromUSubstitutionChar[0]);
1639         }
1640         
1641         protected void implReset() {
1642             super.implReset();
1643             myConverterData.reset();
1644         }
1645         /* Map Unicode to 00..7F according to JIS X 0201. Return U+FFFE if unmappable. */
1646         private int jisx201FromU(int value) {
1647             if (value <= 0x7f) {
1648                 if (value != 0x5c && value != 0x7e) {
1649                     return value;
1650                 }
1651             } else if (value == 0xa5) {
1652                 return 0x5c;
1653             } else if (value == 0x203e) {
1654                 return 0x7e;
1655             }
1656             return (int)(UConverterConstants.UNSIGNED_INT_MASK & 0xfffe);
1657         }
1658         
1659         /*
1660          * Take a valid Shift-JIS byte pair, check that it is in the range corresponding
1661          * to JIS X 0208, and convert it to a pair of 21..7E bytes.
1662          * Return 0 if the byte pair is out of range.
1663          */
1664         private int _2022FromSJIS(int value) {
1665             short trail;
1666             
1667             if (value > 0xEFFC) {
1668                 return 0; /* beyond JIS X 0208 */
1669             }
1670             
1671             trail = (short)(value & UConverterConstants.UNSIGNED_BYTE_MASK);
1672             
1673             value &= 0xff00; /* lead byte */
1674             if (value <= 0x9f00) {
1675                 value -= 0x7000;
1676             } else { /* 0xe000 <= value <= 0xef00 */
1677                 value -= 0xb000;
1678             }
1679             
1680             value <<= 1;
1681             
1682             if (trail <= 0x9e) {
1683                 value -= 0x100;
1684                 if (trail <= 0x7e) {
1685                     value |= ((trail - 0x1f) & UConverterConstants.UNSIGNED_BYTE_MASK);
1686                 } else {
1687                     value |= ((trail - 0x20) & UConverterConstants.UNSIGNED_BYTE_MASK);
1688                 }
1689             } else { /* trail <= 0xfc */
1690                 value |= ((trail - 0x7e) & UConverterConstants.UNSIGNED_BYTE_MASK);
1691             }
1692             
1693             return value;
1694         }
1695         /* This overrides the cbFromUWriteSub method in CharsetEncoderICU */
1696         CoderResult cbFromUWriteSub (CharsetEncoderICU encoder, 
1697                 CharBuffer source, ByteBuffer target, IntBuffer offsets){
1698                 CoderResult err = CoderResult.UNDERFLOW;
1699                 byte[] buffer = new byte[8];
1700                 int i = 0;
1701                 byte[] subchar;
1702                 subchar = encoder.replacement();
1703                 
1704                 byte cs;
1705                 if (myConverterData.fromU2022State.g == 1) {
1706                     /* JIS7: switch from G1 to G0 */
1707                     myConverterData.fromU2022State.g = 0;
1708                     buffer[i++] = UConverterConstants.SI;
1709                 }
1710                 cs = myConverterData.fromU2022State.cs[0];
1711                 
1712                 if (cs != ASCII && cs != JISX201) {
1713                     /* not in ASCII or JIS X 0201: switch to ASCII */
1714                     myConverterData.fromU2022State.cs[0] = ASCII;
1715                     buffer[i++] = 0x1B;
1716                     buffer[i++] = 0x28;
1717                     buffer[i++] = 0x42;
1718                 }
1719                 
1720                 buffer[i++] = subchar[0];
1721                 
1722                 err = CharsetEncoderICU.fromUWriteBytes(this, buffer, 0, i, target, offsets, source.position() - 1);
1723
1724                 return err;
1725             }
1726         
1727         protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {
1728             CoderResult err = CoderResult.UNDERFLOW;
1729             int sourceChar;
1730             byte cs, g;
1731             int choiceCount;
1732             int len, outLen;
1733             byte[] choices = new byte[10];
1734             int targetValue = 0;
1735             boolean usingFallback;
1736             byte[] buffer = new byte[8];
1737             boolean getTrail = false; // use for getTrail label
1738             int oldSourcePos; // for proper error handling
1739             
1740             choiceCount = 0;
1741             
1742             /* check if the last codepoint of previous buffer was a lead surrogate */
1743             if ((sourceChar = fromUChar32) != 0 && target.hasRemaining()) {
1744                 getTrail = true;
1745             }
1746             
1747             while (getTrail || source.hasRemaining()) {
1748                 if (getTrail || target.hasRemaining()) {
1749                     oldSourcePos = source.position();
1750                     if (!getTrail) { /* skip if going to getTrail label */
1751                         sourceChar = source.get();
1752                     }
1753                     /* check if the char is a First surrogate */
1754                     if (getTrail || UTF16.isSurrogate((char)sourceChar)) {
1755                         if (getTrail || UTF16.isLeadSurrogate((char)sourceChar)) {
1756 // getTrail:                 
1757                             if (getTrail) {
1758                                 getTrail = false;
1759                             }
1760                             /* look ahead to find the trail surrogate */
1761                             if (source.hasRemaining()) {
1762                                 /* test the following code unit */
1763                                 char trail = source.get();
1764                                 /* go back to the previous position */
1765                                 source.position(source.position()-1);
1766                                 if (UTF16.isTrailSurrogate(trail)) {
1767                                     source.get();
1768                                     sourceChar = UCharacter.getCodePoint((char)sourceChar, trail);
1769                                     fromUChar32 = 0x00;
1770                                     /* convert this supplementary code point */
1771                                     /* exit this condition tree */
1772                                 } else {
1773                                     /* this is an unmatched lead code unit (1st surrogate) */
1774                                     /* callback(illegal) */
1775                                     err = CoderResult.malformedForLength(1);
1776                                     fromUChar32 = sourceChar;
1777                                     break;
1778                                 }
1779                             } else {
1780                                 /* no more input */
1781                                 fromUChar32 = sourceChar;
1782                                 break;
1783                             }
1784                         } else {
1785                             /* this is an unmatched trail code unit (2nd surrogate) */
1786                             /* callback(illegal) */
1787                             err = CoderResult.malformedForLength(1);
1788                             fromUChar32 = sourceChar;
1789                             break;
1790                         }
1791                     }
1792                     
1793                     /* do not convert SO/SI/ESC */
1794                     if (IS_2022_CONTROL(sourceChar)) {
1795                         /* callback(illegal) */
1796                         err = CoderResult.malformedForLength(1);
1797                         fromUChar32 = sourceChar;
1798                         break;
1799                     }
1800                     
1801                     /* do the conversion */
1802                     
1803                     if (choiceCount == 0) {
1804                         char csm;
1805                         /*
1806                          * The csm variable keeps track of which charsets are allowed
1807                          * and not used yet while building the choices[].
1808                          */
1809                         csm = (char)jpCharsetMasks[myConverterData.version];
1810                         choiceCount = 0;
1811                         
1812                         /* JIS7/8: try single-byte half-width Katakana before JISX208 */
1813                         if (myConverterData.version == 3 || myConverterData.version == 4) {
1814                             choices[choiceCount++] = HWKANA_7BIT;
1815                         }
1816                         /* Do not try single-bit half-width Katakana for other versions. */
1817                         csm &= ~CSM(HWKANA_7BIT);
1818                         
1819                         /* try the current G0 charset */
1820                         choices[choiceCount++] = cs = myConverterData.fromU2022State.cs[0];
1821                         csm &= ~CSM(cs);
1822                         
1823                         /* try the current G2 charset */
1824                         if ((cs = myConverterData.fromU2022State.cs[2]) != 0) {
1825                             choices[choiceCount++] = cs;
1826                             csm &= ~CSM(cs);
1827                         }
1828                         
1829                         /* try all the other charsets */
1830                         for (int i = 0; i < jpCharsetPref.length; i++) {
1831                             cs = jpCharsetPref[i];
1832                             if ((CSM(cs) & csm) != 0) {
1833                                 choices[choiceCount++] = cs;
1834                                 csm &= ~CSM(cs);
1835                             }
1836                         }
1837                     }
1838                     
1839                     cs = g = 0;
1840                     /* 
1841                      * len==0:  no mapping found yet
1842                      * len<0:   found a fallback result:  continue looking for a roundtrip but no further fallbacks
1843                      * len>0:   found a roundtrip result, done
1844                      */
1845                     len = 0;
1846                     /*
1847                      * We will turn off usingFallBack after finding a fallback,
1848                      * but we still get fallbacks from PUA code points as usual.
1849                      * Therefore, we will also need to check that we don't overwrite
1850                      * an early fallback with a later one.
1851                      */
1852                     usingFallback = useFallback;
1853                     
1854                     for (int i = 0; i < choiceCount && len <= 0; i++) {
1855                         int[] value = new int[1];
1856                         int len2;
1857                         byte cs0 = choices[i];
1858                         switch (cs0) {
1859                         case ASCII:
1860                             if (sourceChar <= 0x7f) {
1861                                 targetValue = sourceChar;
1862                                 len = 1;
1863                                 cs = cs0;
1864                                 g = 0;
1865                             }
1866                             break;
1867                         case ISO8859_1:
1868                             if (GR96_START <= sourceChar && sourceChar <= GR96_END) {
1869                                 targetValue = sourceChar - 0x80;
1870                                 len = 1;
1871                                 cs = cs0;
1872                                 g = 2;
1873                             }
1874                             break;
1875                         case HWKANA_7BIT:
1876                             if (sourceChar <= HWKANA_END && sourceChar >= HWKANA_START) {
1877                                 if (myConverterData.version == 3) {
1878                                     /* JIS7: use G1 (SO) */
1879                                     /* Shift U+FF61..U+FF9F to bytes 21..5F. */
1880                                     targetValue = (int)(UConverterConstants.UNSIGNED_INT_MASK & (sourceChar - (HWKANA_START - 0x21)));
1881                                     len = 1;
1882                                     myConverterData.fromU2022State.cs[1] = cs = cs0; /* do not output an escape sequence */
1883                                     g = 1;
1884                                 } else if (myConverterData.version == 4) {
1885                                     /* JIS8: use 8-bit bytes with any single-byte charset, see escape sequence output below */
1886                                     /* Shift U+FF61..U+FF9F to bytes A1..DF. */
1887                                     targetValue = (int)(UConverterConstants.UNSIGNED_INT_MASK & (sourceChar - (HWKANA_START - 0xa1)));
1888                                     len = 1;
1889                                     
1890                                     cs = myConverterData.fromU2022State.cs[0];
1891                                     if (IS_JP_DBCS(cs)) {
1892                                         /* switch from a DBCS charset to JISX201 */
1893                                         cs = JISX201;
1894                                     }
1895                                     /* else stay in the current G0 charset */
1896                                     g = 0;
1897                                 }
1898                                 /* else do not use HWKANA_7BIT with other versions */
1899                             }
1900                             break;
1901                         case JISX201:
1902                             /* G0 SBCS */
1903                             value[0] = jisx201FromU(sourceChar);
1904                             if (value[0] <= 0x7f) {
1905                                 targetValue = value[0];
1906                                 len = 1;
1907                                 cs = cs0;
1908                                 g = 0;
1909                                 usingFallback = false;
1910                             }
1911                             break;
1912                         case JISX208:
1913                             /* G0 DBCS from JIS table */
1914                             myConverterData.currentConverter.sharedData = myConverterData.myConverterArray[cs0];
1915                             myConverterData.currentConverter.sharedData.mbcs.outputType = CharsetMBCS.MBCS_OUTPUT_2;
1916                             len2 = myConverterData.currentEncoder.fromUChar32(sourceChar, value, usingFallback);
1917                             //len2 = MBCSFromUChar32_ISO2022(myConverterData.myConverterArray[cs0], sourceChar, value, usingFallback, CharsetMBCS.MBCS_OUTPUT_2);
1918                             if (len2 == 2 || (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len) == 2 */
1919                                 value[0] = _2022FromSJIS(value[0]);
1920                                 if (value[0] != 0) {
1921                                     targetValue = value[0];
1922                                     len = len2;
1923                                     cs = cs0;
1924                                     g = 0;
1925                                     usingFallback = false;
1926                                 }
1927                             } else if (len == 0 && usingFallback  && sourceChar <= HWKANA_END && sourceChar >= HWKANA_START) {
1928                                 targetValue = hwkana_fb[sourceChar - HWKANA_START];
1929                                 len = -2;
1930                                 cs = cs0;
1931                                 g = 0;
1932                                 usingFallback = false;
1933                             }
1934                             break;
1935                         case ISO8859_7:
1936                             /* G0 SBCS forced to 7-bit output */
1937                             len2 = MBCSSingleFromUChar32(myConverterData.myConverterArray[cs0], sourceChar, value, usingFallback);
1938                             if (len2 != 0 && !(len2 < 0 && len != 0) && GR96_START <= value[0] && value[0] <= GR96_END) {
1939                                 targetValue = value[0] - 0x80;
1940                                 len = len2;
1941                                 cs = cs0;
1942                                 g = 2;
1943                                 usingFallback = false;
1944                             }
1945                             break;
1946                         default :
1947                             /* G0 DBCS */
1948                             myConverterData.currentConverter.sharedData = myConverterData.myConverterArray[cs0];
1949                             myConverterData.currentConverter.sharedData.mbcs.outputType = CharsetMBCS.MBCS_OUTPUT_2;
1950                             len2 = myConverterData.currentEncoder.fromUChar32(sourceChar, value, usingFallback);
1951                             //len2 = MBCSFromUChar32_ISO2022(myConverterData.myConverterArray[cs0], sourceChar, value, usingFallback, CharsetMBCS.MBCS_OUTPUT_2);
1952                             if (len2 == 2 || (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len)==2 */
1953                                 if (cs0 == KSC5601) {
1954                                     /*
1955                                      * Check for valid bytes for the encoding scheme.
1956                                      * This is necessary because the sub-converter (windows-949)
1957                                      * has a broader encoding scheme than is valid for 2022.
1958                                      */
1959                                     value[0] = _2022FromGR94DBCS(value[0]);
1960                                     if (value[0] == 0) {
1961                                         break;
1962                                     }
1963                                 }
1964                                 targetValue = value[0];
1965                                 len = len2;
1966                                 cs = cs0;
1967                                 g = 0;
1968                                 usingFallback = false;
1969                             }
1970                             break;
1971                         }
1972                     }
1973                     
1974                     if (len != 0) {
1975                         if (len < 0) {
1976                             len = -len; /* fallback */
1977                         }
1978                         outLen = 0;
1979                         
1980                         /* write SI if necessary (only for JIS7 */
1981                         if (myConverterData.fromU2022State.g == 1 && g == 0) {
1982                             buffer[outLen++] = UConverterConstants.SI;
1983                             myConverterData.fromU2022State.g = 0;
1984                         }
1985                         
1986                         /* write the designation sequence if necessary */
1987                         if (cs != myConverterData.fromU2022State.cs[g]) {
1988                             for (int i = 0; i < escSeqChars[cs].length; i++) {
1989                                 buffer[outLen++] = escSeqChars[cs][i];
1990                             }
1991                             myConverterData.fromU2022State.cs[g] = cs;
1992                             
1993                             /* invalidate the choices[] */
1994                             choiceCount = 0;
1995                         }
1996                         
1997                         /* write the shift sequence if necessary */
1998                         if (g != myConverterData.fromU2022State.g) {
1999                             switch (g) {
2000                             /* case 0 handled before writing escapes */
2001                             case 1:
2002                                 buffer[outLen++] = UConverterConstants.SO;
2003                                 myConverterData.fromU2022State.g = 1;
2004                                 break;
2005                             default : /* case 2 */
2006                                 buffer[outLen++] = 0x1b;
2007                                 buffer[outLen++] = 0x4e;
2008                                 break;
2009                             /* case 3: no SS3 in ISO-2022-JP-x */
2010                             }
2011                         }
2012                         
2013                         /* write the output bytes */
2014                         if (len == 1) {
2015                             buffer[outLen++] = (byte)targetValue;
2016                         } else { /* len == 2 */
2017                             buffer[outLen++] = (byte)(targetValue >> 8);
2018                             buffer[outLen++] = (byte)targetValue;
2019                         }
2020                     }else {
2021                         /*
2022                          * if we cannot find the character after checking all codepages
2023                          * then this is an error.
2024                          */
2025                         err = CoderResult.unmappableForLength(source.position()-oldSourcePos);
2026                         fromUChar32 = sourceChar;
2027                         break;
2028                     }
2029                     
2030                     if (sourceChar == CR || sourceChar == LF) {
2031                         /* reset the G2 state at the end of a line (conversion got use into ASCII or JISX201 already) */
2032                         myConverterData.fromU2022State.cs[2] = 0;
2033                         choiceCount = 0;
2034                     }
2035                     
2036                     /* output outLen>0 bytes in buffer[] */
2037                     if (outLen == 1) {
2038                         target.put(buffer[0]);
2039                         if (offsets != null) {
2040                             offsets.put(source.remaining() - 1); /* -1 known to be ASCII */
2041                         }
2042                     } else if (outLen == 2 && (target.position() + 2) <= target.limit()) {
2043                         target.put(buffer[0]);
2044                         target.put(buffer[1]);
2045                         if (offsets != null) {
2046                             int sourceIndex = source.position() - 1;
2047                             offsets.put(sourceIndex);
2048                             offsets.put(sourceIndex);
2049                         }
2050                     } else {
2051                         err = CharsetEncoderICU.fromUWriteBytes(this, buffer, 0, outLen, target, offsets, source.position()-1);
2052                     }
2053                 } else {
2054                     err = CoderResult.OVERFLOW;
2055                     break;
2056                 }
2057             }
2058             
2059             /*
2060              * the end of the input stream and detection of truncated input
2061              * are handled by the framework, but for ISO-2022-JP conversion
2062              * we need to be in ASCII mode at the very end
2063              * 
2064              * conditions:
2065              *  successful
2066              *  in SO mode or not in ASCII mode
2067              *  end of input and no truncated input
2068              */
2069             if (!err.isError() &&
2070                     (myConverterData.fromU2022State.g != 0 || myConverterData.fromU2022State.cs[0] != ASCII) &&
2071                     flush && !source.hasRemaining() && fromUChar32 == 0) {
2072                 int sourceIndex;
2073                 
2074                 outLen = 0;
2075                 
2076                 if (myConverterData.fromU2022State.g != 0) {
2077                     buffer[outLen++] = UConverterConstants.SI;
2078                     myConverterData.fromU2022State.g = 0;
2079                 }
2080                 
2081                 if (myConverterData.fromU2022State.cs[0] != ASCII) {
2082                     for (int i = 0; i < escSeqChars[ASCII].length; i++) {
2083                         buffer[outLen++] = escSeqChars[ASCII][i];
2084                     }
2085                     myConverterData.fromU2022State.cs[0] = ASCII;
2086                 }
2087                 
2088                 /* get the source index of the last input character */
2089                 sourceIndex = source.position();
2090                 if (sourceIndex > 0) {
2091                     --sourceIndex;
2092                     if (UTF16.isTrailSurrogate(source.get(sourceIndex)) &&
2093                             (sourceIndex == 0 || UTF16.isLeadSurrogate(source.get(sourceIndex-1)))) {
2094                         --sourceIndex;
2095                     }
2096                 } else {
2097                     sourceIndex = -1;
2098                 }
2099                 
2100                 err = CharsetEncoderICU.fromUWriteBytes(this, buffer, 0, outLen, target, offsets, sourceIndex);
2101             }
2102             return err;
2103         }
2104     }
2105     /****************************ISO-2022-CN************************************/
2106     /*
2107      * Rules for ISO-2022-CN Encoding:
2108      * i)   The designator sequence must appear once on a line before any instance
2109      *      of chracter set it designates.
2110      * ii)  If two lines contain characters from the same character set, both lines
2111      *      must include the designator sequence.
2112      * iii) Once the designator sequence is known, a shifting sequence has to be found
2113      *      to invoke the shifting
2114      * iv)  All lines start in ASCII and end in ASCII.
2115      * v)   Four shifting sequences are employed for this purpose:
2116      *      Sequence    ASCII Eq    Charsets
2117      *      ---------   ---------   --------
2118      *      SI          <SI>        US-ASCII
2119      *      SO          <SO>        CNS-11643-1992 Plane 1, GB2312, ISO-IR-165
2120      *      SS2         <ESC>N      CNS-11643-1992 Plane 2
2121      *      SS3         <ESC>O      CNS-11643-1992 Planes 3-7
2122      * vi)  
2123      *      SOdesignator    : ESC "$" ")" finalchar_for_SO
2124      *      SS2designator   : ESC "$" "*" finalchar_for_SS2
2125      *      SS3designator   : ESC "$" "+" finalchar_for_SS3
2126      *      
2127      *      ESC $ ) A       Indicates the bytes following SO are Chinese
2128      *       characters as defined in GB 2312-80, until
2129      *       another SOdesignation appears
2130      *      
2131      *      ESC $ ) E       Indicates the bytes following SO are as defined
2132      *       in ISO-IR-165 (for details, see section 2.1),
2133      *       until another SOdesignation appears
2134      *       
2135      *      ESC $ ) G       Indicates the bytes following SO are as defined
2136      *       in CNS 11643-plane-1, until another SOdesignation appears
2137      *       
2138      *      ESC $ * H       Indicates teh two bytes immediately following
2139      *       SS2 is a Chinese character as defined in CNS
2140      *       11643-plane-2, until another SS2designation
2141      *       appears
2142      *       (Meaning <ESC>N must preceed ever 2 byte sequence.)
2143      *      
2144      *      ESC $ + I       Indicates the immediate two bytes following SS3
2145      *       is a Chinese character as defined in CNS
2146      *       11643-plane-3, until another SS3designation
2147      *       appears
2148      *       (Meaning <ESC>O must preceed every 2 byte sequence.)
2149      *      
2150      *      ESC $ + J       Indicates the immediate two bytes following SS3
2151      *       is a Chinese character as defined in CNS
2152      *       11643-plane-4, until another SS3designation
2153      *       appears
2154      *       (In English: <ESC>O must preceed every 2 byte sequence.)
2155      *      
2156      *      ESC $ + K       Indicates the immediate two bytes following SS3
2157      *       is a Chinese character as defined in CNS
2158      *       11643-plane-5, until another SS3designation
2159      *       appears
2160      *       
2161      *      ESC $ + L       Indicates the immediate two bytes following SS3
2162      *       is a Chinese character as defined in CNS
2163      *       11643-plane-6, until another SS3designation
2164      *       appears
2165      *       
2166      *      ESC $ + M       Indicates the immediate two bytes following SS3
2167      *       is a Chinese character as defined in CNS
2168      *       11643-plane-7, until another SS3designation
2169      *       appears
2170      *       
2171      *      As in ISO-2022-CN, each line starts in ASCII, and ends in ASCII, and
2172      *      has its own designation information before any Chinese chracters
2173      *      appears
2174      */
2175     
2176     /* The following are defined this way to make strings truely readonly */
2177     private final static byte[] GB_2312_80_STR = { 0x1B, 0x24, 0x29, 0x41 };
2178     private final static byte[] ISO_IR_165_STR = { 0x1B, 0x24, 0x29, 0x45 };
2179     private final static byte[] CNS_11643_1992_Plane_1_STR = { 0x1B, 0x24, 0x29, 0x47 };
2180     private final static byte[] CNS_11643_1992_Plane_2_STR = { 0x1B, 0x24, 0x2A, 0x48 };
2181     private final static byte[] CNS_11643_1992_Plane_3_STR = { 0x1B, 0x24, 0x2B, 0x49 };
2182     private final static byte[] CNS_11643_1992_Plane_4_STR = { 0x1B, 0x24, 0x2B, 0x4A };
2183     private final static byte[] CNS_11643_1992_Plane_5_STR = { 0x1B, 0x24, 0x2B, 0x4B };
2184     private final static byte[] CNS_11643_1992_Plane_6_STR = { 0x1B, 0x24, 0x2B, 0x4C };
2185     private final static byte[] CNS_11643_1992_Plane_7_STR = { 0x1B, 0x24, 0x2B, 0x4D };
2186     
2187     /************************ ISO2022-CN Data *****************************/
2188     private final static byte[][] escSeqCharsCN = {
2189         SHIFT_IN_STR,
2190         GB_2312_80_STR,
2191         ISO_IR_165_STR,
2192         CNS_11643_1992_Plane_1_STR,
2193         CNS_11643_1992_Plane_2_STR,
2194         CNS_11643_1992_Plane_3_STR,
2195         CNS_11643_1992_Plane_4_STR,
2196         CNS_11643_1992_Plane_5_STR,
2197         CNS_11643_1992_Plane_6_STR,
2198         CNS_11643_1992_Plane_7_STR,
2199     };
2200     
2201     private class CharsetEncoderISO2022CN extends CharsetEncoderICU {
2202         public CharsetEncoderISO2022CN(CharsetICU cs) {
2203             super(cs, fromUSubstitutionChar[0]);
2204         }
2205         
2206         protected void implReset() {
2207             super.implReset();
2208             myConverterData.reset();
2209         }
2210         
2211         /* This overrides the cbFromUWriteSub method in CharsetEncoderICU */
2212         CoderResult cbFromUWriteSub (CharsetEncoderICU encoder, 
2213             CharBuffer source, ByteBuffer target, IntBuffer offsets){
2214             CoderResult err = CoderResult.UNDERFLOW;
2215             byte[] buffer = new byte[8];
2216             int i = 0;
2217             byte[] subchar;
2218             subchar = encoder.replacement();
2219             
2220             if (myConverterData.fromU2022State.g != 0) {
2221                 /* not in ASCII mode: switch to ASCII */
2222                 myConverterData.fromU2022State.g = 0;
2223                 buffer[i++] = UConverterConstants.SI;
2224             }
2225             buffer[i++] = subchar[0];
2226             
2227             err = CharsetEncoderICU.fromUWriteBytes(this, buffer, 0, i, target, offsets, source.position() - 1);
2228
2229             return err;
2230         }
2231         
2232         protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {
2233             CoderResult err = CoderResult.UNDERFLOW;
2234             int sourceChar;
2235             byte[] buffer = new byte[8];
2236             int len;
2237             byte[] choices = new byte[3];
2238             int choiceCount;
2239             int targetValue = 0;
2240             boolean usingFallback;
2241             boolean gotoGetTrail = false;
2242             int oldSourcePos; // For proper error handling
2243             
2244             choiceCount = 0;
2245             
2246             /* check if the last codepoint of previous buffer was a lead surrogate */
2247             if ((sourceChar = fromUChar32) != 0 && target.hasRemaining()) {
2248                 // goto getTrail label
2249                 gotoGetTrail = true; 
2250             }
2251             
2252             while (source.hasRemaining() || gotoGetTrail) {
2253                 if (target.hasRemaining() || gotoGetTrail) {
2254                     oldSourcePos = source.position();
2255                     if (!gotoGetTrail) {
2256                         sourceChar = source.get();
2257                     }
2258                     /* check if the char is a First surrogate */
2259                     if (UTF16.isSurrogate((char)sourceChar) || gotoGetTrail) {
2260                         if (UTF16.isLeadSurrogate((char)sourceChar) || gotoGetTrail) {
2261 // getTrail label
2262                             /* reset gotoGetTrail flag*/
2263                              gotoGetTrail = false;
2264                             
2265                             /* look ahead to find the trail surrogate */
2266                             if (source.hasRemaining()) {
2267                                 /* test the following code unit */
2268                                 char trail = source.get();
2269                                 source.position(source.position()-1);
2270                                 if (UTF16.isTrailSurrogate(trail)) {
2271                                     source.get();
2272                                     sourceChar = UCharacter.getCodePoint((char)sourceChar, trail);
2273                                     fromUChar32 = 0x00;
2274                                     /* convert this supplementary code point */
2275                                     /* exit this condition tree */
2276                                 } else {
2277                                     /* this is an unmatched lead code unit (1st surrogate) */
2278                                     /* callback(illegal) */
2279                                     err = CoderResult.malformedForLength(1);
2280                                     fromUChar32 = sourceChar;
2281                                     break;
2282                                 }
2283                             } else {
2284                                 /* no more input */
2285                                 fromUChar32 = sourceChar;
2286                                 break;
2287                             }
2288                         } else {
2289                             /* this is an unmatched trail code unit (2nd surrogate) */
2290                             /* callback(illegal) */
2291                             err = CoderResult.malformedForLength(1);
2292                             fromUChar32 = sourceChar;
2293                             break;
2294                         }
2295                     }
2296                     
2297                     /* do the conversion */
2298                     if (sourceChar <= 0x007f) {
2299                         /* do not converter SO/SI/ESC */
2300                         if (IS_2022_CONTROL(sourceChar)) {
2301                             /* callback(illegal) */
2302                             err = CoderResult.malformedForLength(1);
2303                             fromUChar32 = sourceChar;
2304                             break;
2305                         }
2306                         
2307                         /* US-ASCII */
2308                         if (myConverterData.fromU2022State.g == 0) {
2309                             buffer[0] = (byte)sourceChar;
2310                             len = 1;
2311                         } else {
2312                             buffer[0] = UConverterConstants.SI;
2313                             buffer[1] = (byte)sourceChar;
2314                             len = 2;
2315                             myConverterData.fromU2022State.g = 0;
2316                             choiceCount = 0;
2317                         }
2318                         
2319                         if (sourceChar == CR || sourceChar == LF) {
2320                             /* reset the state at the end of a line */
2321                             myConverterData.fromU2022State.reset();
2322                             choiceCount = 0;
2323                         }
2324                     } else {
2325                         /* convert U+0080..U+10ffff */
2326                         int i;
2327                         byte cs, g;
2328                         
2329                         if (choiceCount == 0) {
2330                             /* try the current SO/G1 converter first */
2331                             choices[0] = myConverterData.fromU2022State.cs[1];
2332                             
2333                             /* default to GB2312_1 if none is designated yet */
2334                             if (choices[0] == 0) {
2335                                 choices[0] = GB2312_1;
2336                             }
2337                             if (myConverterData.version == 0) {
2338                                 /* ISO-2022-CN */
2339                                 /* try other SO/G1 converter; a CNS_11643_1 lookup may result in any plane */
2340                                 if (choices[0] == GB2312_1) {
2341                                     choices[1] = CNS_11643_1;
2342                                 } else {
2343                                     choices[1] = GB2312_1;
2344                                 }
2345                                 
2346                                 choiceCount = 2;
2347                             } else if (myConverterData.version == 1) {
2348                                 /* ISO-2022-CN-EXT */
2349                                 
2350                                 /* try one of the other converters */
2351                                 switch (choices[0]) {
2352                                 case GB2312_1:
2353                                     choices[1] = CNS_11643_1;
2354                                     choices[2] = ISO_IR_165;
2355                                     break;
2356                                 case ISO_IR_165:
2357                                     choices[1] = GB2312_1;
2358                                     choices[2] = CNS_11643_1;
2359                                     break;
2360                                 default :
2361                                     choices[1] = GB2312_1;
2362                                     choices[2] = ISO_IR_165;
2363                                     break;
2364                                 }
2365                                 
2366                                 choiceCount = 3;
2367                             } else {
2368                                 /* ISO-2022-CN-CNS */
2369                                 choices[0] = CNS_11643_1;
2370                                 choices[1] = GB2312_1;
2371                                 
2372                                 choiceCount = 2;
2373                             }
2374                         }
2375                         
2376                         cs = g = 0;
2377                         /*
2378                          * len==0:  no mapping found yet
2379                          * len<0:   found a fallback result: continue looking for a roundtrip but no further fallbacks
2380                          * len>0:   found a roundtrip result, done
2381                          */
2382                         len = 0;
2383                         /*
2384                          * We will turn off usingFallback after finding a fallback,
2385                          * but we still get fallbacks from PUA code points as usual.
2386                          * Therefore, we will also need to check that we don't overwrite
2387                          * an early fallback with a later one.
2388                          */
2389                         usingFallback = useFallback;
2390                         
2391                         for (i = 0; i < choiceCount && len <= 0; ++i) {
2392                             byte cs0 = choices[i];
2393                             if (cs0 > 0) {
2394                                 int[] value = new int[1];
2395                                 int len2;
2396                                 if (cs0 > CNS_11643_0) {
2397                                     myConverterData.currentConverter.sharedData = myConverterData.myConverterArray[CNS_11643];
2398                                     myConverterData.currentConverter.sharedData.mbcs.outputType = CharsetMBCS.MBCS_OUTPUT_3;
2399                                     len2 = myConverterData.currentEncoder.fromUChar32(sourceChar, value, usingFallback);
2400                                     //len2 = MBCSFromUChar32_ISO2022(myConverterData.myConverterArray[CNS_11643],
2401                                     //        sourceChar, value, usingFallback, CharsetMBCS.MBCS_OUTPUT_3);
2402                                     if (len2 == 3 || (len2 == -3 && len == 0)) {
2403                                         targetValue = value[0];
2404                                         cs = (byte)(CNS_11643_0 + (value[0] >> 16) - 0x80);
2405                                         if (len2 >= 0) {
2406                                             len = 2;
2407                                         } else {
2408                                             len = -2;
2409                                             usingFallback = false;
2410                                         }
2411                                         if (cs == CNS_11643_1) {
2412                                             g = 1;
2413                                         } else if (cs == CNS_11643_2) {
2414                                             g = 2;
2415                                         } else if (myConverterData.version == 1) { /* plane 3..7 */
2416                                             g = 3;
2417                                         } else {
2418                                             /* ISO-2022-CN (without -EXT) does not support plane 3..7 */
2419                                             len = 0;
2420                                         }
2421                                     }
2422                                 } else {
2423                                     /* GB2312_1 or ISO-IR-165 */
2424                                     myConverterData.currentConverter.sharedData = myConverterData.myConverterArray[cs0];
2425                                     myConverterData.currentConverter.sharedData.mbcs.outputType = CharsetMBCS.MBCS_OUTPUT_2;
2426                                     len2 = myConverterData.currentEncoder.fromUChar32(sourceChar, value, usingFallback);
2427                                     //len2 = MBCSFromUChar32_ISO2022(myConverterData.myConverterArray[cs0],
2428                                     //        sourceChar, value, usingFallback, CharsetMBCS.MBCS_OUTPUT_2);
2429                                     if (len2 == 2 || (len2 == -2 && len == 0)) {
2430                                         targetValue = value[0];
2431                                         len = len2;
2432                                         cs = cs0;
2433                                         g = 1;
2434                                         usingFallback = false;
2435                                     }
2436                                 }
2437                             }
2438                         }
2439                         
2440                         if (len != 0) {
2441                             len = 0; /* count output bytes; it must have ben abs(len) == 2 */
2442                             
2443                             /* write the designation sequence if necessary */
2444                             if (cs != myConverterData.fromU2022State.cs[g]) {
2445                                 if (cs < CNS_11643) {
2446                                     for (int n = 0; n < escSeqCharsCN[cs].length; n++) {
2447                                         buffer[n] = escSeqCharsCN[cs][n];
2448                                     }
2449                                 } else {
2450                                     for (int n = 0; n < escSeqCharsCN[CNS_11643 + (cs - CNS_11643_1)].length; n++) {
2451                                         buffer[n] = escSeqCharsCN[CNS_11643 + (cs - CNS_11643_1)][n];
2452                                     }
2453                                 }
2454                                 len = 4;
2455                                 myConverterData.fromU2022State.cs[g] = cs;
2456                                 if (g == 1) {
2457                                     /* changing the SO/G1 charset invalidates the choices[] */
2458                                     choiceCount = 0;
2459                                 }
2460                             }
2461                             
2462                             /* write the shift sequence if necessary */
2463                             if (g != myConverterData.fromU2022State.g) {
2464                                 switch (g) {
2465                                 case 1:
2466                                     buffer[len++] = UConverterConstants.SO;
2467                                     
2468                                     /* set the new state only if it is the locking shift SO/G1, not for SS2 or SS3 */
2469                                     myConverterData.fromU2022State.g = 1;
2470                                     break;
2471                                 case 2:
2472                                     buffer[len++] = 0x1b;
2473                                     buffer[len++] = 0x4e;
2474                                     break;
2475                                 default: /* case 3 */
2476                                     buffer[len++] = 0x1b;
2477                                     buffer[len++] = 0x4f;
2478                                     break;
2479                                 }
2480                             }
2481                             
2482                             /* write the two output bytes */
2483                             buffer[len++] = (byte)(targetValue >> 8);
2484                             buffer[len++] = (byte)targetValue;
2485                         } else {
2486                             /* if we cannot find the character after checking all codepages
2487                              * then this is an error
2488                              */
2489                             err = CoderResult.unmappableForLength(source.position()-oldSourcePos);
2490                             fromUChar32 = sourceChar;
2491                             break;
2492                         }
2493                     }
2494                     /* output len>0 bytes in buffer[] */
2495                     if (len == 1) {
2496                         target.put(buffer[0]);
2497                         if (offsets != null) {
2498                             offsets.put(source.position()-1);
2499                         }
2500                     } else if (len == 2 && (target.remaining() >= 2)) {
2501                         target.put(buffer[0]);
2502                         target.put(buffer[1]);
2503                         if (offsets != null) {
2504                             int sourceIndex = source.position();
2505                             offsets.put(sourceIndex);
2506                             offsets.put(sourceIndex);
2507                         }
2508                     } else {
2509                         err = CharsetEncoderICU.fromUWriteBytes(this, buffer, 0, len, target, offsets, source.position()-1);
2510                         if (err.isError()) {
2511                             break;
2512                         }
2513                     }
2514                 } else {
2515                     err = CoderResult.OVERFLOW;
2516                     break;
2517                 }
2518             } /* end while (source.hasRemaining() */
2519             
2520             /*
2521              * the end of the input stream and detection of truncated input
2522              * are handled by the framework, but for ISO-2022-CN conversion
2523              * we need to be in ASCII mode at the very end
2524              * 
2525              * condtions:
2526              *   succesful
2527              *   not in ASCII mode
2528              *   end of input and no truncated input
2529              */
2530             if (!err.isError() && myConverterData.fromU2022State.g != 0 && flush && !source.hasRemaining() && fromUChar32 == 0) {
2531                 int sourceIndex;
2532                 
2533                 /* we are switching to ASCII */
2534                 myConverterData.fromU2022State.g = 0;
2535                 
2536                 /* get the source index of the last input character */
2537                 sourceIndex = source.position();
2538                 if (sourceIndex > 0) {
2539                     --sourceIndex;
2540                     if (UTF16.isTrailSurrogate(source.get(sourceIndex)) && 
2541                             (sourceIndex == 0 || UTF16.isLeadSurrogate(source.get(sourceIndex-1)))) {
2542                         --sourceIndex;
2543                     }
2544                 } else {
2545                     sourceIndex = -1;
2546                 }
2547                 
2548                 err = CharsetEncoderICU.fromUWriteBytes(this, SHIFT_IN_STR, 0, 1, target, offsets, sourceIndex);
2549             }
2550             
2551             return err;
2552         }
2553     }
2554     /******************************** ISO-2022-KR *****************************/
2555     /*
2556      *   Rules for ISO-2022-KR encoding
2557      *   i) The KSC5601 designator sequence should appear only once in a file,
2558      *      at the begining of a line before any KSC5601 characters. This usually
2559      *      means that it appears by itself on the first line of the file
2560      *  ii) There are only 2 shifting sequences SO to shift into double byte mode
2561      *      and SI to shift into single byte mode
2562      */
2563     private class CharsetEncoderISO2022KR extends CharsetEncoderICU {
2564         public CharsetEncoderISO2022KR(CharsetICU cs) {
2565             super(cs, fromUSubstitutionChar[myConverterData.version]);
2566         }
2567         
2568         protected void implReset() {
2569             super.implReset();
2570             myConverterData.reset();
2571             setInitialStateFromUnicodeKR(this);
2572         }
2573         
2574         /* This overrides the cbFromUWriteSub method in CharsetEncoderICU */
2575         CoderResult cbFromUWriteSub (CharsetEncoderICU encoder, 
2576             CharBuffer source, ByteBuffer target, IntBuffer offsets){
2577             CoderResult err = CoderResult.UNDERFLOW;
2578             byte[] buffer = new byte[8];
2579             int length, i = 0;
2580             byte[] subchar;
2581             
2582             subchar = encoder.replacement();
2583             length = subchar.length;
2584             
2585             if (myConverterData.version == 0) {
2586                 if (length == 1) {
2587                     if (encoder.fromUnicodeStatus != 0) {
2588                         /* in DBCS mode: switch to SBCS */
2589                         encoder.fromUnicodeStatus = 0;
2590                         buffer[i++] = UConverterConstants.SI;
2591                     }
2592                     buffer[i++] = subchar[0];
2593                 } else { /* length == 2 */
2594                     if (encoder.fromUnicodeStatus == 0) {
2595                         /* in SBCS mode: switch to DBCS */
2596                         encoder.fromUnicodeStatus = 1;
2597                         buffer[i++] = UConverterConstants.SO;
2598                     }
2599                     buffer[i++] = subchar[0];
2600                     buffer[i++] = subchar[1];
2601                 }
2602                 err = CharsetEncoderICU.fromUWriteBytes(this, buffer, 0, i, target, offsets, source.position() - 1);
2603             } else { 
2604                 /* save the subvonverter's substitution string */
2605                 byte[] currentSubChars = myConverterData.currentEncoder.replacement();
2606                 
2607                 /* set our substitution string into the subconverter */
2608                 myConverterData.currentEncoder.replaceWith(subchar);
2609                 myConverterData.currentConverter.subChar1 = fromUSubstitutionChar[0][0];
2610                 /* let the subconverter write the subchar, set/retrieve fromUChar32 state */
2611                 myConverterData.currentEncoder.fromUChar32 = encoder.fromUChar32;
2612                 err = myConverterData.currentEncoder.cbFromUWriteSub(myConverterData.currentEncoder, source, target, offsets);
2613                 encoder.fromUChar32 = myConverterData.currentEncoder.fromUChar32;
2614                 
2615                 /* restore the subconverter's substitution string */
2616                 myConverterData.currentEncoder.replaceWith(currentSubChars);
2617                 
2618                 if (err.isOverflow()) {
2619                     if (myConverterData.currentEncoder.errorBufferLength > 0) {
2620                         encoder.errorBuffer = myConverterData.currentEncoder.errorBuffer.clone();
2621                     }
2622                     encoder.errorBufferLength = myConverterData.currentEncoder.errorBufferLength;
2623                     myConverterData.currentEncoder.errorBufferLength = 0;
2624                 }
2625             }
2626             
2627             return err;
2628         }
2629         
2630         private CoderResult encodeLoopIBM(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {
2631             CoderResult err = CoderResult.UNDERFLOW;
2632
2633             myConverterData.currentEncoder.fromUChar32 = fromUChar32;
2634             err = myConverterData.currentEncoder.cnvMBCSFromUnicodeWithOffsets(source, target, offsets, flush);
2635             fromUChar32 = myConverterData.currentEncoder.fromUChar32;
2636             
2637             if (err.isOverflow()) {
2638                 if (myConverterData.currentEncoder.errorBufferLength > 0) {
2639                     errorBuffer = myConverterData.currentEncoder.errorBuffer.clone();
2640                 }
2641                 errorBufferLength = myConverterData.currentEncoder.errorBufferLength;
2642                 myConverterData.currentEncoder.errorBufferLength = 0;
2643             }
2644             
2645             return err;
2646         }
2647         
2648         protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {
2649             CoderResult err = CoderResult.UNDERFLOW;
2650             int[] targetByteUnit = { 0x0000 };
2651             int sourceChar = 0x0000;
2652             boolean isTargetByteDBCS;
2653             boolean oldIsTargetByteDBCS;
2654             boolean usingFallback;
2655             int length = 0;
2656             boolean gotoGetTrail = false; // for goto getTrail label call
2657             
2658             /*
2659              * if the version is 1 then the user is requesting
2660              * conversion with ibm-25546 pass the argument to
2661              * MBCS converter and return
2662              */
2663             if (myConverterData.version == 1) {
2664                 return encodeLoopIBM(source, target, offsets, flush);
2665             }
2666             
2667             usingFallback = useFallback;
2668             isTargetByteDBCS = fromUnicodeStatus == 0 ? false : true;
2669             if ((sourceChar = fromUChar32) != 0 && target.hasRemaining()) {
2670                 gotoGetTrail = true;
2671             }
2672             
2673             while (source.hasRemaining() || gotoGetTrail) {
2674                 targetByteUnit[0] = UConverterConstants.missingCharMarker;
2675                 
2676                 if (target.hasRemaining() || gotoGetTrail) {
2677                     if (!gotoGetTrail) {
2678                         sourceChar = source.get();
2679                     
2680                         /* do not convert SO/SI/ESC */
2681                         if (IS_2022_CONTROL(sourceChar)) {
2682                             /* callback(illegal) */
2683                             err = CoderResult.malformedForLength(1);
2684                             fromUChar32 = sourceChar;
2685                             break;
2686                         }
2687                         myConverterData.currentConverter.sharedData.mbcs.outputType = CharsetMBCS.MBCS_OUTPUT_2;
2688                         length = myConverterData.currentEncoder.fromUChar32(sourceChar, targetByteUnit, usingFallback);
2689                         //length = MBCSFromUChar32_ISO2022(myConverterData.currentConverter.sharedData, sourceChar, targetByteUnit, usingFallback, CharsetMBCS.MBCS_OUTPUT_2); 
2690                         if (length < 0) {
2691                             length = -length; /* fallback */
2692                         }
2693                         /* only DBCS or SBCS characters are expected */
2694                         /* DB characters with high bit set to 1 are expected */
2695                         if (length > 2 || length == 0 ||
2696                                 (length == 1 && targetByteUnit[0] > 0x7f) ||
2697                                 (length ==2 &&
2698                                         ((char)(targetByteUnit[0] - 0xa1a1) > (0xfefe - 0xa1a1) ||
2699                                         ((targetByteUnit[0] - 0xa1) & UConverterConstants.UNSIGNED_BYTE_MASK) > (0xfe - 0xa1)))) {
2700                             targetByteUnit[0] = UConverterConstants.missingCharMarker;
2701                         }
2702                     }
2703                     if (!gotoGetTrail && targetByteUnit[0] != UConverterConstants.missingCharMarker) {
2704                         oldIsTargetByteDBCS = isTargetByteDBCS;
2705                         isTargetByteDBCS = (targetByteUnit[0] > 0x00FF);
2706                         /* append the shift sequence */
2707                         if (oldIsTargetByteDBCS != isTargetByteDBCS) {
2708                             if (isTargetByteDBCS) {
2709                                 target.put((byte)UConverterConstants.SO);
2710                             } else {
2711                                 target.put((byte)UConverterConstants.SI);
2712                             }
2713                             if (offsets != null) {
2714                                 offsets.put(source.position()-1);
2715                             }
2716                         }
2717                         /* write the targetUniChar to target */
2718                         if (targetByteUnit[0] <= 0x00FF) {
2719                             if (target.hasRemaining()) {
2720                                 target.put((byte)targetByteUnit[0]);
2721                                 if (offsets != null) {
2722                                     offsets.put(source.position()-1);
2723                                 }
2724                             } else {
2725                                 errorBuffer[errorBufferLength++] = (byte)targetByteUnit[0];
2726                                 err = CoderResult.OVERFLOW;
2727                             }
2728                         } else {
2729                             if (target.hasRemaining()) {
2730                                 target.put((byte)(UConverterConstants.UNSIGNED_BYTE_MASK & ((targetByteUnit[0]>>8) - 0x80)));
2731                                 if (offsets != null) {
2732                                     offsets.put(source.position()-1);
2733                                 }
2734                                 if (target.hasRemaining()) {
2735                                     target.put((byte)(UConverterConstants.UNSIGNED_BYTE_MASK & (targetByteUnit[0]- 0x80)));
2736                                     if (offsets != null) {
2737                                         offsets.put(source.position()-1);
2738                                     }
2739                                 } else {
2740                                     errorBuffer[errorBufferLength++] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & (targetByteUnit[0] - 0x80));
2741                                     err = CoderResult.OVERFLOW;
2742                                 }
2743                                 
2744                             } else {
2745                                 errorBuffer[errorBufferLength++] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & ((targetByteUnit[0]>>8) - 0x80));
2746                                 errorBuffer[errorBufferLength++] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & (targetByteUnit[0]- 0x80));
2747                                 err = CoderResult.OVERFLOW;
2748                             }
2749                         }
2750                     } else {
2751                         /* oops.. the code point is unassigned
2752                          * set the error and reason
2753                          */
2754                         
2755                         /* check if the char is a First surrogate */
2756                         if (gotoGetTrail || UTF16.isSurrogate((char)sourceChar)) {
2757                             if (gotoGetTrail || UTF16.isLeadSurrogate((char)sourceChar)) {
2758 // getTrail label
2759                                 // reset gotoGetTrail flag
2760                                 gotoGetTrail = false;
2761                                 
2762                                 /* look ahead to find the trail surrogate */
2763                                 if (source.hasRemaining()) {
2764                                     /* test the following code unit */
2765                                     char trail = source.get();
2766                                     source.position(source.position()-1);
2767                                     if (UTF16.isTrailSurrogate(trail)) {
2768                                         source.get();
2769                                          sourceChar = UCharacter.getCodePoint((char)sourceChar, trail);
2770                                          err = CoderResult.unmappableForLength(2);
2771                                          /* convert this surrogate code point */
2772                                          /* exit this condition tree */
2773                                     } else {
2774                                         /* this is an unmatched lead code unit (1st surrogate) */
2775                                         /* callback(illegal) */
2776                                         err = CoderResult.malformedForLength(1);
2777                                     }
2778                                 } else {
2779                                     /* no more input */
2780                                     err = CoderResult.UNDERFLOW;
2781                                 }
2782                             } else {
2783                                 /* this is an unmatched trail code unit (2nd surrogate ) */
2784                                 /* callback(illegal) */
2785                                 err = CoderResult.malformedForLength(1);
2786                             }
2787                         } else {
2788                             /* callback(unassigned) for a BMP code point */
2789                             err = CoderResult.unmappableForLength(1);
2790                         }
2791                         
2792                         fromUChar32 = sourceChar;
2793                         break;
2794                     }
2795                 } else {
2796                     err = CoderResult.OVERFLOW;
2797                     break;
2798                 }
2799             }
2800             /*
2801              * the end of the input stream and detection of truncated input
2802              * are handled by the framework, but for ISO-2022-KR conversion
2803              * we need to be inASCII mode at the very end
2804              * 
2805              * conditions:
2806              *  successful
2807              *  not in ASCII mode
2808              *  end of  input and no truncated input
2809              */
2810             if (!err.isError() && isTargetByteDBCS && flush && !source.hasRemaining() && fromUChar32 == 0) {
2811                 int sourceIndex;
2812                 
2813                 /* we are switching to ASCII */
2814                 isTargetByteDBCS = false;
2815                 
2816                 /* get the source index of the last input character */
2817                 sourceIndex = source.position();
2818                 if (sourceIndex > 0) {
2819                     --sourceIndex;
2820                     if (UTF16.isTrailSurrogate(source.get(sourceIndex)) && UTF16.isLeadSurrogate(source.get(sourceIndex-1))) {
2821                         --sourceIndex;
2822                     }
2823                 } else {
2824                     sourceIndex = -1;
2825                 }
2826                 
2827                 CharsetEncoderICU.fromUWriteBytes(this, SHIFT_IN_STR, 0, 1, target, offsets, sourceIndex);
2828             }
2829             /*save the state and return */
2830             fromUnicodeStatus = isTargetByteDBCS ? 1 : 0;
2831             
2832             return err;
2833         }
2834     }
2835     
2836     public CharsetDecoder newDecoder() {
2837         switch (variant) {
2838         case ISO_2022_JP:
2839             return new CharsetDecoderISO2022JP(this);
2840         
2841         case ISO_2022_CN:
2842             return new CharsetDecoderISO2022CN(this);
2843             
2844         case ISO_2022_KR:
2845             setInitialStateToUnicodeKR();
2846             return new CharsetDecoderISO2022KR(this);
2847             
2848         default: /* should not happen */
2849             return null;
2850         }
2851     }
2852     
2853     public CharsetEncoder newEncoder() {
2854         CharsetEncoderICU cnv;
2855         
2856         switch (variant) {
2857         case ISO_2022_JP:
2858             return new CharsetEncoderISO2022JP(this);
2859             
2860         case ISO_2022_CN:
2861             return new CharsetEncoderISO2022CN(this);
2862             
2863         case ISO_2022_KR:
2864             cnv = new CharsetEncoderISO2022KR(this);
2865             setInitialStateFromUnicodeKR(cnv);
2866             return cnv;
2867             
2868         default: /* should not happen */
2869             return null;
2870         }
2871     }
2872     
2873     private void setInitialStateToUnicodeKR() {
2874         if (myConverterData.version == 1) {
2875             myConverterData.currentDecoder.toUnicodeStatus = 0;     /* offset */
2876             myConverterData.currentDecoder.mode = 0;                /* state */
2877             myConverterData.currentDecoder.toULength = 0;           /* byteIndex */
2878         }
2879     }
2880     private void setInitialStateFromUnicodeKR(CharsetEncoderICU cnv) {
2881         /* ISO-2022-KR the designator sequence appears only once
2882          * in a file so we append it only once
2883          */
2884         if (cnv.errorBufferLength == 0) {
2885             cnv.errorBufferLength = 4;
2886             cnv.errorBuffer[0] = 0x1b;
2887             cnv.errorBuffer[1] = 0x24;
2888             cnv.errorBuffer[2] = 0x29;
2889             cnv.errorBuffer[3] = 0x43;
2890         }
2891         if (myConverterData.version == 1) {
2892             ((CharsetMBCS)myConverterData.currentEncoder.charset()).subChar1 = 0x1A;
2893             myConverterData.currentEncoder.fromUChar32 = 0;
2894             myConverterData.currentEncoder.fromUnicodeStatus = 1; /* prevLength */
2895         }
2896     }
2897     
2898     void getUnicodeSetImpl(UnicodeSet setFillIn, int which) {
2899         int i;
2900         /*open a set and initialize it with code points that are algorithmically round-tripped */
2901         
2902         switch(variant){
2903         case ISO_2022_JP:
2904            /*include JIS X 0201 which is hardcoded */
2905             setFillIn.add(0xa5);
2906             setFillIn.add(0x203e);
2907             if((jpCharsetMasks[myConverterData.version]&CSM(ISO8859_1))!=0){
2908                 /*include Latin-1 some variants of JP */
2909                 setFillIn.add(0, 0xff);
2910             
2911             }
2912             else {
2913                 /* include ASCII for JP */
2914                 setFillIn.add(0, 0x7f);
2915              }
2916             if(myConverterData.version==3 || myConverterData.version==4 ||which == ROUNDTRIP_AND_FALLBACK_SET){
2917             /*
2918              * Do not test(jpCharsetMasks[myConverterData.version]&CSM(HWKANA_7BIT))!=0 because the bit
2919              * is on for all JP versions although version 3 & 4 (JIS7 and JIS8) use half-width Katakana.
2920              * This is because all ISO_2022_JP variant are lenient in that they accept (in toUnicode) half-width
2921              * Katakana via ESC.
2922              * However, we only emit (fromUnicode) half-width Katakana according to the
2923              * definition of each variant.
2924              *
2925              * When including fallbacks,
2926              * we need to include half-width Katakana Unicode code points for all JP variants because
2927              * JIS X 0208 has hardcoded fallbacks for them (which map to full-width Katakana).
2928              */
2929             /* include half-width Katakana for JP */
2930                 setFillIn.add(HWKANA_START, HWKANA_END);
2931              }
2932             break;
2933         case ISO_2022_CN:
2934             /* Include ASCII for CN */
2935             setFillIn.add(0, 0x7f);
2936             break;
2937         case ISO_2022_KR:
2938             /* there is only one converter for KR */
2939           myConverterData.currentConverter.getUnicodeSetImpl(setFillIn, which);
2940           break;
2941         default:
2942             break;
2943         }
2944         
2945         //TODO Replaced by ucnv_MBCSGetFilteredUnicodeSetForUnicode() until
2946         for(i=0; i<UCNV_2022_MAX_CONVERTERS;i++){
2947             int filter;
2948             if(myConverterData.myConverterArray[i]!=null){
2949                 if(variant==ISO_2022_CN && myConverterData.version==0 && i==CNS_11643){
2950                     /*
2951                      * 
2952                      * version -specific for CN:
2953                      * CN version 0 does not map CNS planes 3..7 although
2954                      * they are all available in the CNS conversion table;
2955                      * CN version 1 (-EXT) does map them all.
2956                      * The two versions create different Unicode sets.
2957                      */
2958                     filter=CharsetMBCS.UCNV_SET_FILTER_2022_CN;
2959                 } else if(variant==ISO_2022_JP && i == JISX208){
2960                     /* 
2961                      * Only add code points that map to Shift-JIS codes
2962                      * corrosponding to JIS X 208
2963                      */
2964                     filter=CharsetMBCS.UCNV_SET_FILTER_SJIS;
2965                 } else if(i==KSC5601){
2966                     /*
2967                      * Some of the KSC 5601 tables (Convrtrs.txt has this aliases on multiple tables)
2968                      * are broader than GR94.
2969                      */
2970                     filter=CharsetMBCS.UCNV_SET_FILTER_GR94DBCS;
2971                 } else {
2972                     filter=CharsetMBCS.UCNV_SET_FILTER_NONE;
2973                 }
2974                 
2975                 myConverterData.currentConverter.MBCSGetFilteredUnicodeSetForUnicode(myConverterData.myConverterArray[i],setFillIn, which, filter);
2976            }
2977         }
2978         /*
2979          * ISO Converter must not convert SO/SI/ESC despite what sub-converters do by themselves
2980          * Remove these characters from the set.
2981          */
2982         setFillIn.remove(0x0e);
2983         setFillIn.remove(0x0f);
2984         setFillIn.remove(0x1b);
2985         
2986         /* ISO 2022 converter do not convert C! controls either */
2987         setFillIn.remove(0x80, 0x9f);
2988     }
2989 }
2990
2991
2992
2993
2994
2995
2996
2997
2998