]> gitweb.fperrin.net Git - Dictionary.git/blob - jars/icu4j-4_2_1-src/src/com/ibm/icu/charset/CharsetISO2022.java
icu4jsrc
[Dictionary.git] / jars / icu4j-4_2_1-src / src / com / ibm / icu / charset / CharsetISO2022.java
1 /*\r
2  *******************************************************************************\r
3  * Copyright (C) 2008-2009, International Business Machines Corporation and         *\r
4  * others. All Rights Reserved.                                                *\r
5  *******************************************************************************\r
6  */\r
7 package com.ibm.icu.charset;\r
8 \r
9 import java.nio.ByteBuffer;\r
10 import java.nio.CharBuffer;\r
11 import java.nio.IntBuffer;\r
12 import java.nio.charset.CharsetDecoder;\r
13 import java.nio.charset.CharsetEncoder;\r
14 import java.nio.charset.CoderResult;\r
15 import java.util.Arrays;\r
16 \r
17 import com.ibm.icu.charset.CharsetMBCS.CharsetDecoderMBCS;\r
18 import com.ibm.icu.charset.CharsetMBCS.CharsetEncoderMBCS;\r
19 import com.ibm.icu.lang.UCharacter;\r
20 import com.ibm.icu.text.UTF16;\r
21 import com.ibm.icu.text.UnicodeSet;\r
22 \r
23 class CharsetISO2022 extends CharsetICU {\r
24     private UConverterDataISO2022 myConverterData;\r
25     private int variant;           // one of enum {ISO_2022_JP, ISO_2022_KR, or ISO_2022_CN}\r
26     \r
27     private static final byte[] SHIFT_IN_STR    = { 0x0f };\r
28 //    private static final byte[] SHIFT_OUT_STR   = { 0x0e };\r
29 \r
30     private static final byte CR    = 0x0D;\r
31     private static final byte LF    = 0x0A;\r
32 /*\r
33     private static final byte H_TAB = 0x09;\r
34     private static final byte SPACE = 0x20;\r
35 */\r
36     private static final char HWKANA_START  = 0xff61;\r
37     private static final char HWKANA_END    = 0xff9f;\r
38     \r
39     /*\r
40      * 94-character sets with native byte values A1..FE are encoded in ISO 2022\r
41      * as bytes 21..7E. (Subtract 0x80.)\r
42      * 96-character  sets with native bit values A0..FF are encoded in ISO 2022\r
43      * as bytes 20..7F. (Subtract 0x80.)\r
44      * Do not encode C1 control codes with native bytes 80..9F\r
45      * as bytes 00..1F (C0 control codes).\r
46      */\r
47 /*\r
48     private static final char GR94_START    = 0xa1;\r
49     private static final char GR94_END      = 0xfe;\r
50 */\r
51     private static final char GR96_START    = 0xa0;\r
52     private static final char GR96_END      = 0xff;\r
53     \r
54     /* for ISO-2022-JP and -CN implementations */\r
55     // typedef enum {\r
56         /* shared values */\r
57         private static final byte INVALID_STATE = -1;\r
58         private static final byte ASCII         = 0;\r
59         \r
60         private static final byte SS2_STATE = 0x10;\r
61         private static final byte SS3_STATE = 0x11;\r
62         \r
63         /* JP */\r
64         private static final byte ISO8859_1 = 1;\r
65         private static final byte ISO8859_7 = 2;\r
66         private static final byte JISX201   = 3;\r
67         private static final byte JISX208   = 4;\r
68         private static final byte JISX212   = 5;\r
69         private static final byte GB2312    = 6;\r
70         private static final byte KSC5601   = 7;\r
71         private static final byte HWKANA_7BIT  = 8; /* Halfwidth Katakana 7 bit */\r
72         \r
73         /* CN */\r
74         /* the first few enum constants must keep their values because they corresponds to myConverterArray[] */\r
75         private static final byte GB2312_1  = 1;\r
76         private static final byte ISO_IR_165= 2;\r
77         private static final byte CNS_11643 = 3;\r
78         \r
79         /*\r
80          * these are used in StateEnum and ISO2022State variables,\r
81          * but CNS_11643 must be used to index into myConverterArray[]\r
82          */\r
83         private static final byte CNS_11643_0 = 0x20;\r
84         private static final byte CNS_11643_1 = 0x21;\r
85         private static final byte CNS_11643_2 = 0x22;\r
86         private static final byte CNS_11643_3 = 0x23;\r
87         private static final byte CNS_11643_4 = 0x24;\r
88         private static final byte CNS_11643_5 = 0x25;\r
89         private static final byte CNS_11643_6 = 0x26;\r
90         private static final byte CNS_11643_7 = 0x27;\r
91     // } StateEnum;\r
92     \r
93 \r
94     public CharsetISO2022(String icuCanonicalName, String javaCanonicalName, String[] aliases) {\r
95         super(icuCanonicalName, javaCanonicalName, aliases);\r
96         \r
97         myConverterData = new UConverterDataISO2022();\r
98         \r
99         int versionIndex = icuCanonicalName.indexOf("version=");\r
100         int version = Integer.decode(icuCanonicalName.substring(versionIndex+8, versionIndex+9)).intValue();\r
101         \r
102         myConverterData.version = version;\r
103         \r
104         if (icuCanonicalName.indexOf("locale=ja") > 0) {\r
105             ISO2022InitJP(version);\r
106         } else if (icuCanonicalName.indexOf("locale=zh") > 0) {\r
107             ISO2022InitCN(version);\r
108         } else /* if (icuCanonicalName.indexOf("locale=ko") > 0) */ {\r
109             ISO2022InitKR(version);\r
110         }\r
111         \r
112         myConverterData.currentEncoder = (CharsetEncoderMBCS)myConverterData.currentConverter.newEncoder();\r
113         myConverterData.currentDecoder = (CharsetDecoderMBCS)myConverterData.currentConverter.newDecoder();\r
114     }\r
115     \r
116     private void ISO2022InitJP(int version) {\r
117         variant = ISO_2022_JP;\r
118         \r
119         maxBytesPerChar = 6;\r
120         minBytesPerChar = 1;\r
121         maxCharsPerByte = 1;\r
122         // open the required converters and cache them \r
123         if((jpCharsetMasks[version]&CSM(ISO8859_7)) != 0) {\r
124             myConverterData.myConverterArray[ISO8859_7] = ((CharsetMBCS)CharsetICU.forNameICU("ISO8859_7")).sharedData;\r
125         }\r
126         // myConverterData.myConverterArray[JISX201] = ((CharsetMBCS)CharsetICU.forNameICU("jisx-201")).sharedData;\r
127         myConverterData.myConverterArray[JISX208] = ((CharsetMBCS)CharsetICU.forNameICU("Shift-JIS")).sharedData;\r
128         if ((jpCharsetMasks[version]&CSM(JISX212)) != 0) {\r
129             myConverterData.myConverterArray[JISX212] = ((CharsetMBCS)CharsetICU.forNameICU("jisx-212")).sharedData;\r
130         }\r
131         if ((jpCharsetMasks[version]&CSM(GB2312)) != 0) {\r
132             myConverterData.myConverterArray[GB2312] = ((CharsetMBCS)CharsetICU.forNameICU("ibm-5478")).sharedData;\r
133         }\r
134         if ((jpCharsetMasks[version]&CSM(KSC5601)) != 0) {\r
135             myConverterData.myConverterArray[KSC5601] = ((CharsetMBCS)CharsetICU.forNameICU("ksc_5601")).sharedData;\r
136         }\r
137         \r
138         // create a generic CharsetMBCS object\r
139         myConverterData.currentConverter = (CharsetMBCS)CharsetICU.forNameICU("icu-internal-25546");\r
140     }\r
141     \r
142     private void ISO2022InitCN(int version) {\r
143         variant = ISO_2022_CN;\r
144         \r
145         maxBytesPerChar = 8;\r
146         minBytesPerChar = 1;\r
147         maxCharsPerByte = 1;\r
148         // open the required coverters and cache them.\r
149         myConverterData.myConverterArray[GB2312_1] = ((CharsetMBCS)CharsetICU.forNameICU("ibm-5478")).sharedData;\r
150         if (version == 1) {\r
151             myConverterData.myConverterArray[ISO_IR_165] = ((CharsetMBCS)CharsetICU.forNameICU("iso-ir-165")).sharedData;\r
152         } \r
153         myConverterData.myConverterArray[CNS_11643] = ((CharsetMBCS)CharsetICU.forNameICU("cns-11643-1992")).sharedData;\r
154         \r
155         // create a generic CharsetMBCS object\r
156         myConverterData.currentConverter = (CharsetMBCS)CharsetICU.forNameICU("icu-internal-25546");\r
157     }\r
158     \r
159     private void ISO2022InitKR(int version) {\r
160         variant = ISO_2022_KR;\r
161         \r
162         maxBytesPerChar = 3;\r
163         minBytesPerChar = 1;\r
164         maxCharsPerByte = 1;\r
165         \r
166         if (version == 1) {\r
167             myConverterData.currentConverter = (CharsetMBCS)CharsetICU.forNameICU("icu-internal-25546");\r
168             myConverterData.currentConverter.subChar1 = fromUSubstitutionChar[0][0];\r
169         } else {\r
170             myConverterData.currentConverter = (CharsetMBCS)CharsetICU.forNameICU("ibm-949");\r
171         }\r
172         \r
173         myConverterData.currentEncoder = (CharsetEncoderMBCS)myConverterData.currentConverter.newEncoder();\r
174         myConverterData.currentDecoder = (CharsetDecoderMBCS)myConverterData.currentConverter.newDecoder();\r
175     }\r
176     \r
177     /*\r
178      * ISO 2022 control codes must not be converted from Unicode\r
179      * because they would mess up the byte stream.\r
180      * The bit mask 0x0800c000 has bits set at bit positions 0xe, 0xf, 0x1b\r
181      * corresponding to SO, SI, and ESC.\r
182      */\r
183     private static boolean IS_2022_CONTROL(int c) { \r
184         return (((c)<0x20) && ((((int)1<<c) & 0x0800c000) != 0));\r
185     }\r
186     \r
187     /*\r
188      * Check that the result is a 2-byte value with each byte in the range A1..FE\r
189      * (strict EUC DBCS) before accepting it and subtracting 0x80 from each byte\r
190      * to move it to the ISO 2022 range 21..7E.\r
191      * return 0 if out of range.\r
192      */\r
193     private static int _2022FromGR94DBCS(int value) {\r
194         if ((value <= 0xfefe && value >= 0xa1a1) && \r
195                 ((short)(value&UConverterConstants.UNSIGNED_BYTE_MASK) <= 0xfe && ((short)(value&UConverterConstants.UNSIGNED_BYTE_MASK) >= 0xa1))) {\r
196             return (value - 0x8080); /* shift down to 21..7e byte range */\r
197         } else {\r
198             return 0; /* not valid for ISO 2022 */\r
199         }\r
200     }\r
201     \r
202     /*\r
203      * Commented out because Ticket 5691: Call sites now check for validity. They can just += 0x8080 after that. \r
204      * \r
205      * This method does the reverse of _2022FromGR94DBCS(). Given the 2022 code point, it returns the\r
206      * 2 byte value that is in the range A1..FE for each byte. Otherwise it returns the 2022 code point\r
207      * unchanged. \r
208      * \r
209     private static int _2022ToGR94DBCS(int value) {\r
210         int returnValue = value + 0x8080;\r
211         \r
212         if ((returnValue <= 0xfefe && returnValue >= 0xa1a1) && \r
213                 ((short)(returnValue&UConverterConstants.UNSIGNED_BYTE_MASK) <= 0xfe && ((short)(returnValue&UConverterConstants.UNSIGNED_BYTE_MASK) >= 0xa1))) {\r
214             return returnValue;\r
215         } else {\r
216             return value;\r
217         }\r
218     }*/\r
219     \r
220     /* is the StateEnum charset value for a DBCS charset? */\r
221     private static boolean IS_JP_DBCS(byte cs) {\r
222         return ((JISX208 <= cs) && (cs <= KSC5601));\r
223     }\r
224     \r
225     private static short CSM(short cs) {\r
226         return (short)(1<<cs);\r
227     }\r
228     \r
229     /* This gets the valid index of the end of buffer when decoding. */\r
230     private static int getEndOfBuffer_2022(ByteBuffer source) {\r
231         int sourceIndex = source.position();\r
232         byte mySource = 0;\r
233         mySource = source.get(sourceIndex);\r
234         \r
235         while (source.hasRemaining() && mySource != ESC_2022) {\r
236             mySource = source.get();\r
237             if (mySource == ESC_2022) {\r
238                 break;\r
239             }\r
240             sourceIndex++;\r
241         }\r
242         return sourceIndex;\r
243     }\r
244     \r
245     /*\r
246      * This is a simple version of _MBCSGetNextUChar() calls the method in CharsetDecoderMBCS and returns\r
247      * the value given.\r
248      *\r
249      * Return value:\r
250      * U+fffe   unassigned\r
251      * U+ffff   illegal\r
252      * otherwise the Unicode code point\r
253      */\r
254      private int MBCSSimpleGetNextUChar(UConverterSharedData sharedData,\r
255                                ByteBuffer   source, \r
256                                boolean      useFallback) {\r
257          int returnValue;\r
258          UConverterSharedData tempSharedData = myConverterData.currentConverter.sharedData;\r
259          myConverterData.currentConverter.sharedData = sharedData;\r
260          returnValue = ((CharsetDecoderMBCS)myConverterData.currentDecoder).simpleGetNextUChar(source, useFallback);\r
261          myConverterData.currentConverter.sharedData = tempSharedData;\r
262          \r
263          return returnValue;\r
264     }\r
265 \r
266     /*\r
267      * @param is the the output byte\r
268      * @return 1 roundtrip byte  0 no mapping  -1 fallback byte\r
269      */\r
270     static int MBCSSingleFromUChar32(UConverterSharedData sharedData, int c, int[] retval, boolean useFallback) {\r
271         char[] table;\r
272         int value;\r
273         /* BMP-only codepages are stored without stage 1 entries for supplementary code points */\r
274         if (c >= 0x10000 && (sharedData.mbcs.unicodeMask&UConverterConstants.HAS_SUPPLEMENTARY) == 0) {\r
275             return 0;\r
276         }\r
277         /* convert the Unicode code point in c into codepage bytes */\r
278         table = sharedData.mbcs.fromUnicodeTable;\r
279         /* get the byte for the output */\r
280         value = CharsetMBCS.MBCS_SINGLE_RESULT_FROM_U(table, sharedData.mbcs.fromUnicodeBytes, c);\r
281         /* get the byte for the output */\r
282         retval[0] = value & 0xff;\r
283         if (value >= 0xf00) {\r
284             return 1; /* roundtrip */\r
285         } else if (useFallback ? value>=0x800 : value>=0xc00) {\r
286             return -1; /* fallback taken */\r
287         } else {\r
288             return 0; /* no mapping */\r
289         }\r
290     }\r
291     \r
292     /*\r
293      * Each of these charset masks (with index x) contains a bit for a charset in exact correspondence\r
294      * to whether that charset is used in the corresponding version x of ISO_2022, locale=ja,version=x\r
295      * \r
296      * Note: The converter uses some leniency:\r
297      * - The escape sequence ESC ( I for half-width 7-bit Katakana is recognized in\r
298      *   all versions, not just JIS7 and JIS8.\r
299      * - ICU does not distinguish between different version so of JIS X 0208.\r
300      */\r
301     private static final short jpCharsetMasks[] = {\r
302         (short)(CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)),\r
303         (short)(CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)),\r
304         (short)(CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7)),\r
305         (short)(CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7)),\r
306         (short)(CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7))\r
307     };\r
308 \r
309 /*\r
310     // typedef enum {\r
311         private static final byte ASCII1 = 0;\r
312         private static final byte LATIN1 = 1;\r
313         private static final byte SBCS   = 2;\r
314         private static final byte DBCS   = 3;\r
315         private static final byte MBCS   = 4;\r
316         private static final byte HWKANA = 5;\r
317     // } Cnv2002Type;\r
318 */\r
319 \r
320     private class ISO2022State {\r
321         private byte []cs;  /* Charset number for SI (G0)/SO (G1)/SS2 (G2)/SS3 (G3) */\r
322         private byte g;     /* 0..3 for G0..G3 (SI/SO/SS2/SS3) */\r
323         private byte prevG; /* g before single shift (SS2 or SS3) */\r
324         \r
325         ISO2022State() {\r
326             cs = new byte[4];\r
327         }\r
328         \r
329         void reset() {\r
330             Arrays.fill(cs, (byte)0);\r
331             g = 0;\r
332             prevG = 0;\r
333         }\r
334     }\r
335     \r
336 //    private static final byte UCNV_OPTIONS_VERSION_MASK = 0xf;\r
337     private static final byte UCNV_2022_MAX_CONVERTERS  = 10;\r
338     \r
339     private class UConverterDataISO2022 {\r
340         UConverterSharedData []myConverterArray;\r
341         CharsetEncoderMBCS currentEncoder;\r
342         CharsetDecoderMBCS currentDecoder;\r
343         CharsetMBCS currentConverter;\r
344         int currentType; // Cnv2022Type;\r
345         ISO2022State toU2022State;\r
346         ISO2022State fromU2022State;\r
347         int key;\r
348         int version;\r
349         boolean isEmptySegment;\r
350         \r
351         UConverterDataISO2022() {\r
352             myConverterArray = new UConverterSharedData[UCNV_2022_MAX_CONVERTERS];\r
353             toU2022State = new ISO2022State();\r
354             fromU2022State = new ISO2022State();\r
355             currentType = 0;\r
356             key = 0;\r
357             version = 0;\r
358             isEmptySegment = false;\r
359         }\r
360         \r
361         void reset() {\r
362             toU2022State.reset();\r
363             fromU2022State.reset();\r
364             isEmptySegment = false;\r
365         }\r
366     }\r
367     \r
368     private static final byte ESC_2022 = 0x1B; /* ESC */\r
369     \r
370     // typedef enum {\r
371         private static final byte INVALID_2022              = -1; /* Doesn't correspond to a valid iso 2022 escape sequence */\r
372         private static final byte VALID_NON_TERMINAL_2022   =  0;  /* so far corresponds to a valid iso 2022 escape sequence */\r
373         private static final byte VALID_TERMINAL_2022       =  1;  /* corresponds to a valid iso 2022 escape sequence */\r
374         private static final byte VALID_MAYBE_TERMINAL_2022 =  2;  /* so far matches one iso 2022 escape sequence, but by adding\r
375                                                                      more characters might match another escape sequence */\r
376     // } UCNV_TableStates_2022;\r
377         \r
378     /*\r
379      * The way these state transition arrays work is:\r
380      * ex : ESC$B is the sequence for JISX208\r
381      *      a) First Iteration: char is ESC\r
382      *          i) Get the value of ESC from normalize_esq_chars_2022[] with int value of ESC as index\r
383      *             int x = normalize_esq_chars_2022[27] which is equal to 1\r
384      *         ii) Search for this value in escSeqStateTable_Key_2022[]\r
385      *             value of x is stored at escSeqStateTable_Key_2022[0]\r
386      *        iii) Save this index as offset\r
387      *         iv) Get state of this sequence from escSeqStateTable_Value_2022[]\r
388      *             escSeqStateTable_value_2022[offset], which is VALID_NON_TERMINAL_2022\r
389      *      b) Switch on this state and continue to next char\r
390      *          i) Get the value of $ from normalize_esq_chars_2022[] with int value of $ as index\r
391      *             which is normalize_esq_chars_2022[36] == 4\r
392      *         ii) x is currently 1(from above)\r
393      *             x<<=5 -- x is now 32\r
394      *             x+=normalize_esq_chars_2022[36]\r
395      *             now x is 36\r
396      *        iii) Search for this value in escSeqStateTable_Key_2022[]\r
397      *             value of x is stored at escSeqStateTable_Key_2022[2], so offset is 2\r
398      *         iv) Get state of this sequence from escSeqStateTable_Value_2022[]\r
399      *             escSeqStateTable_Value_2022[offset], which is VALID_NON_TERMINAL_2022\r
400      *      c) Switch on this state and continue to next char\r
401      *          i) Get the value of B from normalize_esq_chars_2022[] with int value of B as index\r
402      *         ii) x is currently 36 (from above)\r
403      *             x<<=5 -- x is now 1152\r
404      *             x+= normalize_esq_chars_2022[66]\r
405      *             now x is 1161\r
406      *        iii) Search for this value in escSeqStateTable_Key_2022[]\r
407      *             value of x is stored at escSeqStateTable_Key_2022[21], so offset is 21\r
408      *         iv) Get state of this sequence from escSeqStateTable_Value_2022[1]\r
409      *             escSeqStateTable_Value_2022[offset], which is VALID_TERMINAL_2022\r
410      *          v) Get the converter name from escSeqStateTable_Result_2022[21] which is JISX208\r
411      */\r
412      /* Below are the 3 arrays depicting a state transition table */\r
413      private static final byte normalize_esq_chars_2022[] = {\r
414          /* 0       1       2       3       4       5       6       7       8       9 */\r
415             0,      0,      0,      0,      0,      0,      0,      0,      0,      0,\r
416             0,      0,      0,      0,      0,      0,      0,      0,      0,      0,\r
417             0,      0,      0,      0,      0,      0,      0,      1,      0,      0,\r
418             0,      0,      0,      0,      0,      0,      4,      7,     29,      0,\r
419             2,     24,     26,     27,      0,      3,     23,      6,      0,      0,\r
420             0,      0,      0,      0,      0,      0,      0,      0,      0,      0,\r
421             0,      0,      0,      0,      5,      8,      9,     10,     11,     12,\r
422            13,     14,     15,     16,     17,     18,     19,     20,     25,     28,\r
423             0,      0,     21,      0,      0,      0,      0,      0,      0,      0,\r
424            22,      0,      0,      0,      0,      0,      0,      0,      0,      0,\r
425             0,      0,      0,      0,      0,      0,      0,      0,      0,      0,\r
426             0,      0,      0,      0,      0,      0,      0,      0,      0,      0,\r
427             0,      0,      0,      0,      0,      0,      0,      0,      0,      0,\r
428             0,      0,      0,      0,      0,      0,      0,      0,      0,      0,\r
429             0,      0,      0,      0,      0,      0,      0,      0,      0,      0,\r
430             0,      0,      0,      0,      0,      0,      0,      0,      0,      0,\r
431             0,      0,      0,      0,      0,      0,      0,      0,      0,      0,\r
432             0,      0,      0,      0,      0,      0,      0,      0,      0,      0,\r
433             0,      0,      0,      0,      0,      0,      0,      0,      0,      0,\r
434             0,      0,      0,      0,      0,      0,      0,      0,      0,      0,\r
435             0,      0,      0,      0,      0,      0,      0,      0,      0,      0,\r
436             0,      0,      0,      0,      0,      0,      0,      0,      0,      0,\r
437             0,      0,      0,      0,      0,      0,      0,      0,      0,      0,\r
438             0,      0,      0,      0,      0,      0,      0,      0,      0,      0,\r
439             0,      0,      0,      0,      0,      0,      0,      0,      0,      0,\r
440             0,      0,      0,      0,      0,      0,      0,      0,      0,      0,\r
441             0,      0,      0,      0,      0,      0\r
442      };\r
443      \r
444      private static final short MAX_STATES_2022 = 74;\r
445      private static final int escSeqStateTable_Key_2022[/* MAX_STATES_2022 */] = {\r
446          /* 0        1          2         3        4          5         6         7         8         9 */\r
447             1,      34,        36,       39,      55,        57,       60,       61,     1093,     1096,\r
448          1097,    1098,      1099,     1100,     1101,     1102,     1103,     1104,     1105,     1106,\r
449          1109,    1154,      1157,     1160,     1161,     1176,     1178,     1179,     1254,     1257,\r
450          1768,    1773,      1957,    35105,    36933,    36936,    36937,    36938,    36939,    36940,\r
451         36942,   36943,     36944,    36945,    36946,    36947,    36948,    37640,    37642,    37644,\r
452         37646,   37711,     37744,    37745,    37746,    37747,    37748,    40133,    40136,    40138,\r
453         40139,   40140,     40141,  1123363, 35947624, 35947625, 35947626, 35947627, 35947629, 35947630,\r
454      35947631, 35947635, 35947636, 35947638\r
455      };\r
456      \r
457      private static final byte escSeqStateTable_Value_2022[/* MAX_STATES_2022 */] = {\r
458          /*         0                           1                           2                           3                       4               */\r
459          VALID_NON_TERMINAL_2022,   VALID_NON_TERMINAL_2022,    VALID_NON_TERMINAL_2022,    VALID_NON_TERMINAL_2022,    VALID_NON_TERMINAL_2022,    \r
460              VALID_TERMINAL_2022,       VALID_TERMINAL_2022,    VALID_NON_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,\r
461        VALID_MAYBE_TERMINAL_2022,       VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,\r
462              VALID_TERMINAL_2022,       VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,\r
463              VALID_TERMINAL_2022,   VALID_NON_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,\r
464          VALID_NON_TERMINAL_2022,   VALID_NON_TERMINAL_2022,    VALID_NON_TERMINAL_2022,    VALID_NON_TERMINAL_2022,        VALID_TERMINAL_2022,\r
465              VALID_TERMINAL_2022,       VALID_TERMINAL_2022,        VALID_TERMINAL_2022,    VALID_NON_TERMINAL_2022,        VALID_TERMINAL_2022,\r
466              VALID_TERMINAL_2022,       VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,\r
467              VALID_TERMINAL_2022,       VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,\r
468              VALID_TERMINAL_2022,       VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,\r
469              VALID_TERMINAL_2022,       VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,\r
470              VALID_TERMINAL_2022,       VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,\r
471              VALID_TERMINAL_2022,       VALID_TERMINAL_2022,        VALID_TERMINAL_2022,    VALID_NON_TERMINAL_2022,        VALID_TERMINAL_2022,\r
472              VALID_TERMINAL_2022,       VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,\r
473              VALID_TERMINAL_2022,       VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022\r
474      };\r
475      \r
476      /* Type def for refactoring changeState_2022 code */\r
477      // typedef enum {\r
478          private static final byte ISO_2022_JP = 1;\r
479          private static final byte ISO_2022_KR = 2;\r
480          private static final byte ISO_2022_CN = 3;\r
481      // } Variant2022;\r
482          \r
483     /* const UConverterSharedData _ISO2022Data; */\r
484     //private UConverterSharedData _ISO2022JPData;\r
485     //private UConverterSharedData _ISO2022KRData;\r
486     //private UConverterSharedData _ISO2022CNData;\r
487     \r
488     /******************** to unicode ********************/\r
489     /****************************************************\r
490      * Recognized escape sequenes are\r
491      * <ESC>(B  ASCII\r
492      * <ESC>.A  ISO-8859-1\r
493      * <ESC>.F  ISO-8859-7\r
494      * <ESC>(J  JISX-201\r
495      * <ESC>(I  JISX-201\r
496      * <ESC>$B  JISX-208\r
497      * <ESC>$@  JISX-208\r
498      * <ESC>$(D JISX-212\r
499      * <ESC>$A  GB2312\r
500      * <ESC>$(C KSC5601\r
501      */\r
502     private final static byte nextStateToUnicodeJP[/* MAX_STATES_2022 */] = {\r
503         /*     0               1               2               3               4               5               6               7               8               9    */\r
504         INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,      SS2_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,\r
505                 ASCII,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,        JISX201,    HWKANA_7BIT,        JISX201,  INVALID_STATE,\r
506         INVALID_STATE,  INVALID_STATE,        JISX208,         GB2312,        JISX208,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,\r
507             ISO8859_1,      ISO8859_7,        JISX208,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,        KSC5601,        JISX212,  INVALID_STATE,\r
508         INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,\r
509         INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,\r
510         INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,\r
511         INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE\r
512     };\r
513     \r
514     private final static byte nextStateToUnicodeCN[/* MAX_STATES_2022 */] = {\r
515         /*     0               1               2               3               4               5               6               7               8               9    */\r
516         INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,      SS2_STATE,      SS3_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,\r
517         INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,\r
518         INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,\r
519         INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,\r
520         INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,       GB2312_1,  INVALID_STATE,     ISO_IR_165,\r
521           CNS_11643_1,    CNS_11643_2,    CNS_11643_3,    CNS_11643_4,    CNS_11643_5,    CNS_11643_6,    CNS_11643_7,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,\r
522         INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,\r
523         INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE\r
524     };\r
525     \r
526     /* runs through a state machine to determine the escape sequence - codepage correspondence */\r
527     private CoderResult changeState_2022(CharsetDecoderICU decoder, ByteBuffer source, int var) {\r
528         CoderResult err = CoderResult.UNDERFLOW;\r
529         boolean DONE = false;\r
530         byte value;\r
531         int key[] = {myConverterData.key};\r
532         int offset[] = {0};\r
533         int initialToULength = decoder.toULength;\r
534         byte c;\r
535         int malformLength = 0;\r
536         \r
537         value = VALID_NON_TERMINAL_2022;\r
538         while (source.hasRemaining()) {\r
539             c = source.get();\r
540             malformLength++;\r
541             decoder.toUBytesArray[decoder.toULength++] = c;\r
542             value = getKey_2022(c, key, offset);\r
543             \r
544             switch(value) {\r
545             \r
546             case VALID_NON_TERMINAL_2022:\r
547                 /* continue with the loop */\r
548                 break;\r
549                 \r
550             case VALID_TERMINAL_2022:\r
551                 key[0] = 0;\r
552                 DONE = true;\r
553                 break;\r
554                 \r
555             case INVALID_2022:\r
556                 DONE = true;\r
557                 break;\r
558                 \r
559             case VALID_MAYBE_TERMINAL_2022:\r
560                 /* not ISO_2022 itself, finish here */\r
561                 value = VALID_TERMINAL_2022;\r
562                 key[0] = 0;\r
563                 DONE = true;\r
564                 break;\r
565             }\r
566             if (DONE) {\r
567                 break;\r
568             }\r
569         }\r
570 // DONE:\r
571         myConverterData.key = key[0];\r
572         \r
573         if (value == VALID_NON_TERMINAL_2022) {\r
574             /* indicate that the escape sequence is incomplete: key !=0 */\r
575             return err;\r
576         } else if (value == INVALID_2022) {\r
577             err = CoderResult.malformedForLength(malformLength);\r
578         } else /* value == VALID_TERMINAL_2022 */ {\r
579             switch (var) {\r
580             case ISO_2022_JP: {\r
581                 byte tempState = nextStateToUnicodeJP[offset[0]];\r
582                 switch (tempState) {\r
583                 case INVALID_STATE:\r
584                     err = CoderResult.malformedForLength(malformLength);\r
585                     break;\r
586                 case SS2_STATE:\r
587                     if (myConverterData.toU2022State.cs[2] != 0) {\r
588                         if (myConverterData.toU2022State.g < 2) {\r
589                             myConverterData.toU2022State.prevG = myConverterData.toU2022State.g;\r
590                         }\r
591                         myConverterData.toU2022State.g = 2;\r
592                     } else { \r
593                         /* illegal to have SS2 before a matching designator */\r
594                         err = CoderResult.malformedForLength(malformLength);\r
595                     }\r
596                     break;\r
597                 /* case SS3_STATE: not used in ISO-2022-JP-x */\r
598                 case ISO8859_1:\r
599                 case ISO8859_7:\r
600                     if ((jpCharsetMasks[myConverterData.version] & CSM(tempState)) == 0) {\r
601                         err = CoderResult.unmappableForLength(malformLength);\r
602                     } else {\r
603                         /* G2 charset for SS2 */\r
604                         myConverterData.toU2022State.cs[2] = tempState;\r
605                     }\r
606                     break;\r
607                 default:\r
608                     if ((jpCharsetMasks[myConverterData.version] & CSM(tempState)) == 0) {\r
609                         err = CoderResult.unmappableForLength(source.position() - 1);\r
610                     } else {\r
611                         /* G0 charset */\r
612                         myConverterData.toU2022State.cs[0] = tempState;\r
613                     }\r
614                     break;\r
615                 } // end of switch\r
616                 break;\r
617             }\r
618             case ISO_2022_CN: {\r
619                 byte tempState = nextStateToUnicodeCN[offset[0]];\r
620                 switch (tempState) {\r
621                 case INVALID_STATE:\r
622                     err = CoderResult.unmappableForLength(malformLength);\r
623                     break;\r
624                 case SS2_STATE:\r
625                     if (myConverterData.toU2022State.cs[2] != 0) {\r
626                         if (myConverterData.toU2022State.g < 2) {\r
627                             myConverterData.toU2022State.prevG = myConverterData.toU2022State.g;\r
628                         }\r
629                         myConverterData.toU2022State.g = 2;\r
630                     } else {\r
631                         /* illegal to have SS2 before a matching designator */\r
632                         err = CoderResult.malformedForLength(malformLength);\r
633                     }\r
634                     break;\r
635                 case SS3_STATE:\r
636                     if (myConverterData.toU2022State.cs[3] != 0) {\r
637                         if (myConverterData.toU2022State.g < 2) {\r
638                             myConverterData.toU2022State.prevG = myConverterData.toU2022State.g;\r
639                         }\r
640                         myConverterData.toU2022State.g = 3;\r
641                     } else {\r
642                         /* illegal to have SS3 before a matching designator */\r
643                         err = CoderResult.malformedForLength(malformLength);\r
644                     }\r
645                     break;\r
646                 case ISO_IR_165:\r
647                     if (myConverterData.version == 0) {\r
648                         err = CoderResult.unmappableForLength(malformLength);\r
649                         break;\r
650                     }\r
651                     /* fall through */\r
652                 case GB2312_1:\r
653                     /* fall through */\r
654                 case CNS_11643_1:\r
655                     myConverterData.toU2022State.cs[1] = tempState;\r
656                     break;\r
657                 case CNS_11643_2:\r
658                     myConverterData.toU2022State.cs[2] = tempState;\r
659                     break;\r
660                 default:\r
661                     /* other CNS 11643 planes */\r
662                     if (myConverterData.version == 0) {\r
663                         err = CoderResult.unmappableForLength(source.position() - 1);\r
664                     } else {\r
665                         myConverterData.toU2022State.cs[3] = tempState;\r
666                     }\r
667                     break;\r
668                 } //end of switch\r
669             }\r
670             break;\r
671             case ISO_2022_KR:\r
672                 if (offset[0] == 0x30) {\r
673                     /* nothing to be done, just accept this one escape sequence */\r
674                 } else {\r
675                     err = CoderResult.unmappableForLength(malformLength);\r
676                 }\r
677                 break;\r
678             default:\r
679                 err = CoderResult.malformedForLength(malformLength);\r
680                 break;\r
681             } // end of switch\r
682         }\r
683         if (!err.isError()) {\r
684             decoder.toULength = 0;\r
685         } else if (err.isMalformed()) {\r
686             if (decoder.toULength > 1) {\r
687                 /*\r
688                  * Ticket 5691: consistent illegal sequences:\r
689                  * - We include at least the first byte (ESC) in the illegal sequence.\r
690                  * - If any of the non-initial bytes could be the start of a character,\r
691                  *   we stop the illegal sequece before the first one of those.\r
692                  *   In escape sequences, all following bytes are "printable", that is,\r
693                  *   unless they are completely illegal (>7f in SBCS, outside 21..7e in DBCS),\r
694                  *   they are valid single/lead bytes.\r
695                  *   For simplicity, we always only report the initial ESC byte as the\r
696                  *   illegal sequence and back out all other bytes we looked at.\r
697                  */\r
698                 /* Back out some bytes. */\r
699                 int backOutDistance = decoder.toULength - 1;\r
700                 int bytesFromThisBuffer = decoder.toULength - initialToULength;\r
701                 if (backOutDistance <= bytesFromThisBuffer) {\r
702                     /* same as initialToULength<=1 */\r
703                     source.position(source.position() - backOutDistance);\r
704                 } else {\r
705                     /* Back out bytes from the previous buffer: Need to replay them. */\r
706                     decoder.preToULength = (byte)(bytesFromThisBuffer - backOutDistance);\r
707                     /* same as -(initalToULength-1) */\r
708                     /* preToULength is negative! */\r
709                     for (int i = 0; i < -(decoder.preToULength); i++) {\r
710                         decoder.preToUArray[i] = decoder.toUBytesArray[i+1];\r
711                     }\r
712                     source.position(source.position() - bytesFromThisBuffer);\r
713                 }\r
714                 decoder.toULength = 1;\r
715             }\r
716         }\r
717         \r
718         return err;\r
719     }\r
720     \r
721     private static byte getKey_2022(byte c, int[]key, int[]offset) {\r
722         int togo;\r
723         int low = 0;\r
724         int hi = MAX_STATES_2022;\r
725         int oldmid = 0;\r
726         \r
727         togo = normalize_esq_chars_2022[(short)c&UConverterConstants.UNSIGNED_BYTE_MASK];\r
728         \r
729         if (togo == 0) {\r
730             /* not a valid character anywhere in an escape sequence */\r
731             key[0] = 0;\r
732             offset[0] = 0;\r
733             return INVALID_2022;\r
734         }\r
735         togo = (key[0] << 5) + togo;\r
736         \r
737         while (hi != low) { /* binary search */\r
738             int mid = (hi+low) >> 1; /* Finds median */\r
739         \r
740             if (mid == oldmid) {\r
741                 break;\r
742             }\r
743             \r
744             if (escSeqStateTable_Key_2022[mid] > togo) {\r
745                 hi = mid;\r
746             } else if (escSeqStateTable_Key_2022[mid] < togo) {\r
747                 low = mid;\r
748             } else /* we found it */ {\r
749                 key[0] = togo;\r
750                 offset[0] = mid;\r
751                 return escSeqStateTable_Value_2022[mid];\r
752             }\r
753             oldmid = mid;\r
754         }\r
755         return INVALID_2022;\r
756     }\r
757     \r
758     /*\r
759      * To Unicode Callback helper function\r
760      */\r
761     private static CoderResult toUnicodeCallback(CharsetDecoderICU cnv, int sourceChar, int targetUniChar) {\r
762         CoderResult err = CoderResult.UNDERFLOW;\r
763         if (sourceChar > 0xff) {\r
764             cnv.toUBytesArray[0] = (byte)(sourceChar>>8);\r
765             cnv.toUBytesArray[1] = (byte)sourceChar;\r
766             cnv.toULength = 2;\r
767         } else {\r
768             cnv.toUBytesArray[0] = (byte)sourceChar;\r
769             cnv.toULength = 1;\r
770         }\r
771         \r
772         if (targetUniChar == (UConverterConstants.missingCharMarker-1/* 0xfffe */)) {\r
773             err = CoderResult.unmappableForLength(1);\r
774         } else {\r
775             err = CoderResult.malformedForLength(1);\r
776         }\r
777         \r
778         return err;\r
779     }\r
780     \r
781     /****************************ISO-2022-JP************************************/\r
782     private class CharsetDecoderISO2022JP extends CharsetDecoderICU {\r
783         public CharsetDecoderISO2022JP(CharsetICU cs) {\r
784             super(cs);\r
785         }\r
786         \r
787         protected void implReset() {\r
788             super.implReset();\r
789             myConverterData.reset();\r
790         }\r
791         /* \r
792          * Map 00..7F to Unicode according to JIS X 0201. \r
793          * */\r
794         private int jisx201ToU(int value) {\r
795             if (value < 0x5c) {\r
796                 return value;\r
797             } else if (value == 0x5c) {\r
798                 return 0xa5;\r
799             } else if (value == 0x7e) {\r
800                 return 0x203e;\r
801             } else { /* value <= 0x7f */\r
802                 return value;\r
803             }\r
804         }\r
805         /*\r
806          * Convert a pair of JIS X 208 21..7E bytes to Shift-JIS.\r
807          * If either byte is outside 21..7E make sure that the result is not valid\r
808          * for Shift-JIS so that the converter catches it.\r
809          * Some invalid byte values already turn into equally invalid Shift-JIS\r
810          * byte values and need not be tested explicitly.\r
811          */\r
812         private void _2022ToSJIS(char c1, char c2, byte []bytes) {\r
813             if ((c1&1) > 0) {\r
814                 ++c1;\r
815                 if (c2 <= 0x5f) {\r
816                     c2 += 0x1f;\r
817                 } else if (c2 <= 0x7e) {\r
818                     c2 += 0x20;\r
819                 } else {\r
820                     c2 = 0; /* invalid */\r
821                 }\r
822             } else {\r
823                 if ((c2 >= 0x21) && (c2 <= 0x7e)) {\r
824                     c2 += 0x7e;\r
825                 } else {\r
826                     c2 = 0; /* invalid */\r
827                 }\r
828             }\r
829             \r
830             c1 >>=1;\r
831             if (c1 <= 0x2f) {\r
832                 c1 += 0x70;\r
833             } else if (c1 <= 0x3f) {\r
834                 c1 += 0xb0;\r
835             } else {\r
836                 c1 = 0; /* invalid */\r
837             }\r
838             bytes[0] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & c1);\r
839             bytes[1] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & c2);\r
840         }\r
841         protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {\r
842             boolean gotoGetTrail = false;\r
843             boolean gotoEscape = false;\r
844             CoderResult err = CoderResult.UNDERFLOW;\r
845             byte []tempBuf = new byte[2];\r
846             int targetUniChar = 0x0000;\r
847             int mySourceChar = 0x0000;\r
848             int mySourceCharTemp = 0x0000; // use for getTrail label call.\r
849             byte cs; /* StateEnum */\r
850             byte csTemp= 0; // use for getTrail label call.\r
851             \r
852             if (myConverterData.key != 0) {\r
853                 /* continue with a partial escape sequence */\r
854                 // goto escape;\r
855                 gotoEscape = true;\r
856             } else if (toULength == 1 && source.hasRemaining() && target.hasRemaining()) {\r
857                 /* continue with a partial double-byte character */\r
858                 mySourceChar = (toUBytesArray[0] & UConverterConstants.UNSIGNED_BYTE_MASK);\r
859                 toULength = 0;\r
860                 cs = myConverterData.toU2022State.cs[myConverterData.toU2022State.g];\r
861                 // goto getTrailByte;\r
862                 mySourceCharTemp = 0x99;\r
863                 gotoGetTrail = true;\r
864             }\r
865             \r
866             while (source.hasRemaining() || gotoEscape || gotoGetTrail) {\r
867                 // This code is here for the goto escape label call above.\r
868                 if (gotoEscape) {\r
869                     mySourceCharTemp = ESC_2022;\r
870                 }\r
871                 \r
872                 targetUniChar = UConverterConstants.missingCharMarker;\r
873                 \r
874                 if (gotoEscape || gotoGetTrail || target.hasRemaining()) {\r
875                     if (!gotoEscape && !gotoGetTrail) {\r
876                         mySourceChar = source.get() & UConverterConstants.UNSIGNED_BYTE_MASK;\r
877                         mySourceCharTemp = mySourceChar;\r
878                     }\r
879                     \r
880                     switch (mySourceCharTemp) {\r
881                     case UConverterConstants.SI:\r
882                         if (myConverterData.version == 3) {\r
883                             myConverterData.toU2022State.g = 0;\r
884                             continue;\r
885                         } else {\r
886                             /* only JIS7 uses SI/SO, not ISO-2022-JP-x */\r
887                             myConverterData.isEmptySegment = false;\r
888                             break;\r
889                         }\r
890                         \r
891                     case UConverterConstants.SO:\r
892                         if (myConverterData.version == 3) {\r
893                             /* JIS7: switch to G1 half-width Katakana */\r
894                             myConverterData.toU2022State.cs[1] = HWKANA_7BIT;\r
895                             myConverterData.toU2022State.g = 1;\r
896                             continue; \r
897                         } else {\r
898                             /* only JIS7 uses SI/SO, not ISO-2022-JP-x */\r
899                             myConverterData.isEmptySegment = false; /* reset this, we have a different error */\r
900                             break;\r
901                         }\r
902                         \r
903                     case ESC_2022:\r
904                         if (!gotoEscape) {\r
905                             source.position(source.position() - 1);\r
906                         } else {\r
907                             gotoEscape = false;\r
908                         }\r
909 // escape:\r
910                         {\r
911                             int mySourceBefore = source.position();\r
912                             int toULengthBefore = this.toULength;\r
913                             \r
914                             err = changeState_2022(this, source, variant);\r
915 \r
916                             /* If in ISO-2022-JP only and we successully completed an escape sequence, but previous segment was empty, create an error */\r
917                             if(myConverterData.version == 0 && myConverterData.key == 0 && !err.isError() && myConverterData.isEmptySegment) {\r
918                                 err = CoderResult.malformedForLength(source.position() - mySourceBefore);\r
919                                 this.toULength = toULengthBefore + (source.position() - mySourceBefore);\r
920                             }\r
921                         }\r
922 \r
923                         /* invalid or illegal escape sequence */\r
924                         if(err.isError()){\r
925                             myConverterData.isEmptySegment = false; /* Reset to avoid future spurious errors */\r
926                             return err;\r
927                         }\r
928                         /* If we successfully completed an escape sequence, we begin a new segment, empty so far */\r
929                         if(myConverterData.key == 0) {\r
930                             myConverterData.isEmptySegment = true;\r
931                         }\r
932 \r
933                         continue;\r
934                     /* ISO-2022-JP does not use single-byte (C1) SS2 and SS3 */\r
935                     case CR:\r
936                         /* falls through */\r
937                     case LF:\r
938                         /* automatically reset to single-byte mode */\r
939                         if (myConverterData.toU2022State.cs[0] != ASCII && myConverterData.toU2022State.cs[0] != JISX201) {\r
940                             myConverterData.toU2022State.cs[0] = ASCII;\r
941                         }\r
942                         myConverterData.toU2022State.cs[2] = 0;\r
943                         myConverterData.toU2022State.g = 0;\r
944                         /* falls through */\r
945                     default :\r
946                         /* convert one or two bytes */\r
947                         myConverterData.isEmptySegment = false;\r
948                         cs = myConverterData.toU2022State.cs[myConverterData.toU2022State.g];\r
949                         csTemp = cs;\r
950                         if (gotoGetTrail) {\r
951                             csTemp = (byte)0x99;\r
952                         }\r
953                         if (!gotoGetTrail && ((mySourceChar >= 0xa1) && (mySourceChar <= 0xdf) && myConverterData.version == 4 && !IS_JP_DBCS(cs))) {\r
954                             /* 8-bit halfwidth katakana in any single-byte mode for JIS8 */\r
955                             targetUniChar = mySourceChar + (HWKANA_START - 0xa1);\r
956                             \r
957                             /* return from a single-shift state to the previous one */\r
958                             if (myConverterData.toU2022State.g >= 2) {\r
959                                 myConverterData.toU2022State.g = myConverterData.toU2022State.prevG;\r
960                             }\r
961                         } else {\r
962                             switch(csTemp) {\r
963                             case ASCII:\r
964                                 if (mySourceChar <= 0x7f) {\r
965                                     targetUniChar = mySourceChar;\r
966                                 }\r
967                                 break;\r
968                             case ISO8859_1:\r
969                                 if (mySourceChar <= 0x7f) {\r
970                                     targetUniChar = mySourceChar + 0x80;\r
971                                 }\r
972                                 /* return from a single-shift state to the prevous one */\r
973                                 myConverterData.toU2022State.g = myConverterData.toU2022State.prevG;\r
974                                 break;\r
975                             case ISO8859_7:\r
976                                 if (mySourceChar <= 0x7f) {\r
977                                     /* convert mySourceChar+0x80 to use a normal 8-bit table */\r
978                                     targetUniChar = CharsetMBCS.MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(myConverterData.myConverterArray[cs].mbcs,\r
979                                             mySourceChar+0x80);\r
980                                 }\r
981                                 /* return from a single-shift state to the previous one */\r
982                                 myConverterData.toU2022State.g = myConverterData.toU2022State.prevG;\r
983                                 break;\r
984                             case JISX201:\r
985                                 if (mySourceChar <= 0x7f) {\r
986                                     targetUniChar = jisx201ToU(mySourceChar);\r
987                                 }\r
988                                 break;\r
989                             case HWKANA_7BIT:\r
990                                 if ((mySourceChar >= 0x21) && (mySourceChar <= 0x5f)) {\r
991                                     /* 7-bit halfwidth Katakana */\r
992                                     targetUniChar = mySourceChar + (HWKANA_START - 0x21);\r
993                                     break;\r
994                                 }\r
995                             default :\r
996                                 /* G0 DBCS */\r
997                                 if (gotoGetTrail || source.hasRemaining()) {\r
998 // getTrailByte:\r
999                                     int tmpSourceChar;\r
1000                                     gotoGetTrail = false;\r
1001                                     short trailByte;\r
1002                                     boolean leadIsOk, trailIsOk;\r
1003                                     \r
1004                                     trailByte = (short)(source.get(source.position()) & UConverterConstants.UNSIGNED_BYTE_MASK);\r
1005                                     /*\r
1006                                      * Ticket 5691: consistent illegal sequences:\r
1007                                      * - We include at least the first byte in the illegal sequence.\r
1008                                      * - If any of the non-initial bytes could be the start of a character,\r
1009                                      *   we stop the illegal sequence before the first one of those.\r
1010                                      * \r
1011                                      * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is\r
1012                                      * an ESC/SO/SI, we report only the first byte as the illegal sequence.\r
1013                                      * Otherwise we convert or report the pair of bytes.\r
1014                                      */\r
1015                                     leadIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (mySourceChar - 0x21)) <= (0x7e - 0x21);\r
1016                                     trailIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (trailByte - 0x21)) <= (0x7e - 0x21);\r
1017                                     if (leadIsOk && trailIsOk) {\r
1018                                         source.get();\r
1019                                         tmpSourceChar = (mySourceChar << 8) | trailByte;\r
1020                                         if (cs == JISX208) {\r
1021                                             _2022ToSJIS((char)mySourceChar, (char)trailByte, tempBuf);\r
1022                                             mySourceChar = tmpSourceChar;\r
1023                                         } else {\r
1024                                             /* Copy before we modify tmpSourceChar so toUnicodeCallback() sees the correct bytes. */\r
1025                                             mySourceChar = tmpSourceChar;\r
1026                                             if (cs == KSC5601) {\r
1027                                                 tmpSourceChar += 0x8080; /* = _2022ToGR94DBCS(tmpSourceChar) */\r
1028                                             }\r
1029                                             tempBuf[0] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & (tmpSourceChar >> 8));\r
1030                                             tempBuf[1] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & tmpSourceChar);\r
1031                                         }\r
1032                                         targetUniChar = MBCSSimpleGetNextUChar(myConverterData.myConverterArray[cs], ByteBuffer.wrap(tempBuf), false);\r
1033                                     } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {\r
1034                                         /* report a pair of illegal bytes if the second byte is not a DBCS starter */\r
1035                                         source.get();\r
1036                                         /* add another bit so that the code below writes 2 bytes in case of error */\r
1037                                         mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte;\r
1038                                     }\r
1039                                 } else {\r
1040                                     toUBytesArray[0] = (byte)mySourceChar;\r
1041                                     toULength = 1;\r
1042                                     // goto endloop\r
1043                                     return err;\r
1044                                 }\r
1045                             } /* end of inner switch */\r
1046                         }\r
1047                         break;\r
1048                     } /* end of outer switch */\r
1049                     \r
1050                     if (targetUniChar < (UConverterConstants.missingCharMarker-1/*0xfffe*/)) {\r
1051                         if (offsets != null) {\r
1052                             offsets.put(target.remaining(), source.remaining() - (mySourceChar <= 0xff ? 1 : 2));\r
1053                         }\r
1054                         target.put((char)targetUniChar);\r
1055                     } else if (targetUniChar > UConverterConstants.missingCharMarker) {\r
1056                         /* disassemble the surrogate pair and write to output */\r
1057                         targetUniChar -= 0x0010000;\r
1058                         target.put((char)(0xd800 + (char)(targetUniChar>>10)));\r
1059                         target.position(target.position()-1);\r
1060                         if (offsets != null) {\r
1061                             offsets.put(target.remaining(), source.remaining() - (mySourceChar <= 0xff ? 1 : 2));\r
1062                         }\r
1063                         target.get();\r
1064                         if (target.hasRemaining()) {\r
1065                             target.put((char)(0xdc00+(char)(targetUniChar&0x3ff)));\r
1066                             target.position(target.position()-1);\r
1067                             if (offsets != null) {\r
1068                                 offsets.put(target.remaining(), source.remaining() - (mySourceChar <= 0xff ? 1 : 2));\r
1069                             }\r
1070                             target.get();\r
1071                         } else {\r
1072                             charErrorBufferArray[charErrorBufferLength++] = \r
1073                                 (char)(0xdc00+(char)(targetUniChar&0x3ff));\r
1074                         }\r
1075                     } else {\r
1076                         /* Call the callback function */\r
1077                         err = toUnicodeCallback(this, mySourceChar, targetUniChar);\r
1078                         break;\r
1079                     }\r
1080                 } else { /* goes with "if (target.hasRemaining())" way up near the top of the function */\r
1081                     err = CoderResult.OVERFLOW;\r
1082                     break;\r
1083                 }\r
1084             }\r
1085 //endloop:\r
1086             return err;\r
1087         }\r
1088     } // end of class CharsetDecoderISO2022JP\r
1089     \r
1090     /****************************ISO-2022-CN************************************/\r
1091     private class CharsetDecoderISO2022CN extends CharsetDecoderICU {\r
1092         public CharsetDecoderISO2022CN(CharsetICU cs) {\r
1093             super(cs);\r
1094         }\r
1095         \r
1096         protected void implReset() {\r
1097             super.implReset();\r
1098             myConverterData.reset();\r
1099         }\r
1100         \r
1101         protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {\r
1102             CoderResult err = CoderResult.UNDERFLOW;\r
1103             byte[] tempBuf = new byte[3];\r
1104             int targetUniChar = 0x0000;\r
1105             int mySourceChar = 0x0000;\r
1106             int mySourceCharTemp = 0x0000;\r
1107             boolean gotoEscape = false;\r
1108             boolean gotoGetTrailByte = false;\r
1109             \r
1110             if (myConverterData.key != 0) {\r
1111                 /* continue with a partial escape sequence */\r
1112                 // goto escape;\r
1113                 gotoEscape = true;\r
1114             } else if (toULength == 1 && source.hasRemaining() && target.hasRemaining()) {\r
1115                 /* continue with a partial double-byte character */\r
1116                 mySourceChar = (toUBytesArray[0] & UConverterConstants.UNSIGNED_BYTE_MASK);\r
1117                 toULength = 0;\r
1118                 targetUniChar = UConverterConstants.missingCharMarker;\r
1119                 // goto getTrailByte\r
1120                 gotoGetTrailByte = true;\r
1121             }\r
1122             \r
1123             while (source.hasRemaining() || gotoGetTrailByte || gotoEscape) {\r
1124                 targetUniChar = UConverterConstants.missingCharMarker;\r
1125                 \r
1126                 if (target.hasRemaining() || gotoEscape) {\r
1127                     if (gotoEscape) {\r
1128                         mySourceChar = ESC_2022; // goto escape label\r
1129                         mySourceCharTemp = mySourceChar;\r
1130                     } else if (gotoGetTrailByte) {\r
1131                         mySourceCharTemp = 0xff; // goto getTrailByte; set mySourceCharTemp to go to default\r
1132                     } else {\r
1133                         mySourceChar = UConverterConstants.UNSIGNED_BYTE_MASK & source.get();\r
1134                         mySourceCharTemp = mySourceChar;\r
1135                     }\r
1136                     \r
1137                     switch (mySourceCharTemp) {\r
1138                     case UConverterConstants.SI:\r
1139                         myConverterData.toU2022State.g = 0;\r
1140                         if (myConverterData.isEmptySegment) {\r
1141                             myConverterData.isEmptySegment = false; /* we are handling it, reset to avoid future spurious errors */\r
1142                             err = CoderResult.malformedForLength(1);\r
1143                             this.toUBytesArray[0] = (byte)mySourceChar;\r
1144                             this.toULength = 1;\r
1145                             return err;\r
1146                         }\r
1147                         continue;\r
1148                         \r
1149                     case UConverterConstants.SO:\r
1150                         if (myConverterData.toU2022State.cs[1] != 0) {\r
1151                             myConverterData.toU2022State.g = 1;\r
1152                             myConverterData.isEmptySegment = true;  /* Begin a new segment, empty so far */\r
1153                             continue;\r
1154                         } else {\r
1155                             /* illegal to have SO before a matching designator */\r
1156                             myConverterData.isEmptySegment = false; /* Handling a different error, reset this to avoid future spurious errs */\r
1157                             break;\r
1158                         }\r
1159                         \r
1160                     case ESC_2022:\r
1161                         if (!gotoEscape) {\r
1162                             source.position(source.position()-1);\r
1163                         }\r
1164 // escape label\r
1165                         gotoEscape = false;\r
1166                         {\r
1167                             int mySourceBefore = source.position();\r
1168                             int toULengthBefore = this.toULength;\r
1169 \r
1170                             err = changeState_2022(this, source, ISO_2022_CN);\r
1171 \r
1172                             /* After SO there must be at least one character before a designator (designator error handled separately) */\r
1173                             if(myConverterData.key == 0 && !err.isError() && myConverterData.isEmptySegment) {\r
1174                                 err = CoderResult.malformedForLength(source.position() - mySourceBefore);\r
1175                                 this.toULength = toULengthBefore + (source.position() - mySourceBefore);\r
1176                             }\r
1177                         }\r
1178 \r
1179                         /* invalid or illegal escape sequence */\r
1180                         if(err.isError()){\r
1181                             myConverterData.isEmptySegment = false; /* Reset to avoid future spurious errors */\r
1182                             return err;\r
1183                         }\r
1184                         continue;\r
1185                         \r
1186                     /*ISO-2022-CN does not use single-byte (C1) SS2 and SS3 */\r
1187                     case CR:\r
1188                         /* falls through */\r
1189                     case LF:\r
1190                         myConverterData.toU2022State.reset();\r
1191                         /* falls through */\r
1192                     default:\r
1193                         /* converter one or two bytes */\r
1194                         myConverterData.isEmptySegment = false;\r
1195                         if (myConverterData.toU2022State.g != 0 || gotoGetTrailByte) {\r
1196                             if (source.hasRemaining() || gotoGetTrailByte) {\r
1197                                 UConverterSharedData cnv;\r
1198                                 byte tempState;\r
1199                                 int tempBufLen;\r
1200                                 boolean leadIsOk, trailIsOk;\r
1201                                 short trailByte;\r
1202 // getTrailByte: label\r
1203                                 gotoGetTrailByte = false; // reset gotoGetTrailByte\r
1204                                 \r
1205                                 trailByte = (short)(source.get(source.position()) & UConverterConstants.UNSIGNED_BYTE_MASK);\r
1206                                 /*\r
1207                                  * Ticket 5691: consistent illegal sequences:\r
1208                                  * - We include at least the first byte in the illegal sequence.\r
1209                                  * - If any of the non-initial bytes could be the start of a character,\r
1210                                  *   we stop the illegal sequence before the first one of those.\r
1211                                  * \r
1212                                  * In ISO-2022 DBCS, if the second byte is in the range 21..7e range or is\r
1213                                  * an ESC/SO/SI, we report only the first byte as the illegal sequence.\r
1214                                  * Otherwise we convert or report the pair of bytes.\r
1215                                  */\r
1216                                 leadIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (mySourceChar - 0x21)) <= (0x7e - 0x21);\r
1217                                 trailIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (trailByte - 0x21)) <= (0x7e - 0x21);\r
1218                                 if (leadIsOk && trailIsOk) {\r
1219                                     source.get();\r
1220                                     tempState = myConverterData.toU2022State.cs[myConverterData.toU2022State.g];\r
1221                                     if (tempState > CNS_11643_0) {\r
1222                                         cnv = myConverterData.myConverterArray[CNS_11643];\r
1223                                         tempBuf[0] = (byte)(0x80 + (tempState - CNS_11643_0));\r
1224                                         tempBuf[1] = (byte)mySourceChar;\r
1225                                         tempBuf[2] = (byte)trailByte;\r
1226                                         tempBufLen = 3;\r
1227                                     } else {\r
1228                                         cnv = myConverterData.myConverterArray[tempState];\r
1229                                         tempBuf[0] = (byte)mySourceChar;\r
1230                                         tempBuf[1] = (byte)trailByte;\r
1231                                         tempBufLen = 2;\r
1232                                     }\r
1233                                     ByteBuffer tempBuffer = ByteBuffer.wrap(tempBuf);\r
1234                                     tempBuffer.limit(tempBufLen);\r
1235                                     targetUniChar = MBCSSimpleGetNextUChar(cnv, tempBuffer, false);\r
1236                                     mySourceChar = (mySourceChar << 8) | trailByte;\r
1237                                     \r
1238                                 } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {\r
1239                                     /* report a pair of illegal bytes if the second byte is not a DBCS starter */\r
1240                                     source.get();\r
1241                                     /* add another bit so that the code below writes 2 bytes in case of error */\r
1242                                     mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte;\r
1243                                 }\r
1244                                 if (myConverterData.toU2022State.g >= 2) {\r
1245                                     /* return from a single-shift state to the previous one */\r
1246                                     myConverterData.toU2022State.g = myConverterData.toU2022State.prevG;\r
1247                                 }\r
1248                             } else {\r
1249                                 toUBytesArray[0] = (byte)mySourceChar;\r
1250                                 toULength = 1;\r
1251                                 // goto endloop;\r
1252                                 return err;\r
1253                             }\r
1254                         } else {\r
1255                             if (mySourceChar <= 0x7f) {\r
1256                                 targetUniChar = (char)mySourceChar;\r
1257                             }\r
1258                         }\r
1259                         break;\r
1260                     }\r
1261                     if ((UConverterConstants.UNSIGNED_INT_MASK&targetUniChar) < (UConverterConstants.UNSIGNED_INT_MASK&(UConverterConstants.missingCharMarker-1))) {\r
1262                         if (offsets != null) {\r
1263                             offsets.array()[target.position()] = source.remaining() - (mySourceChar <= 0xff ? 1 : 2);\r
1264                         }\r
1265                         target.put((char)targetUniChar);\r
1266                     } else if ((UConverterConstants.UNSIGNED_INT_MASK&targetUniChar) > (UConverterConstants.UNSIGNED_INT_MASK&(UConverterConstants.missingCharMarker))) {\r
1267                         /* disassemble the surrogate pair and write to output */\r
1268                         targetUniChar -= 0x0010000;\r
1269                         target.put((char)(0xd800+(char)(targetUniChar>>10)));\r
1270                         if (offsets != null) {\r
1271                             offsets.array()[target.position()-1] = (int)(source.position() - (mySourceChar <= 0xff ? 1 : 2));\r
1272                         }\r
1273                         if (target.hasRemaining()) {\r
1274                             target.put((char)(0xdc00+(char)(targetUniChar&0x3ff)));\r
1275                             if (offsets != null) {\r
1276                                 offsets.array()[target.position()-1] = (int)(source.position() - (mySourceChar <= 0xff ? 1 : 2));\r
1277                             }\r
1278                         } else {\r
1279                             charErrorBufferArray[charErrorBufferLength++] = (char)(0xdc00+(char)(targetUniChar&0x3ff));\r
1280                         }\r
1281                     } else {\r
1282                         /* Call the callback function */ \r
1283                         err = toUnicodeCallback(this, mySourceChar, targetUniChar);\r
1284                         break;\r
1285                     }\r
1286                     \r
1287                 } else {\r
1288                     err = CoderResult.OVERFLOW;\r
1289                     break;\r
1290                 }\r
1291             }\r
1292             \r
1293             return err;\r
1294         }\r
1295         \r
1296     }\r
1297     /************************ ISO-2022-KR ********************/\r
1298     private class CharsetDecoderISO2022KR extends CharsetDecoderICU {\r
1299         public CharsetDecoderISO2022KR(CharsetICU cs) {\r
1300             super(cs);\r
1301         }\r
1302         \r
1303         protected void implReset() {\r
1304             super.implReset();\r
1305             setInitialStateToUnicodeKR();\r
1306             myConverterData.reset();\r
1307         }\r
1308         \r
1309         protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {\r
1310             CoderResult err = CoderResult.UNDERFLOW;\r
1311             int mySourceChar = 0x0000;\r
1312             int targetUniChar = 0x0000;\r
1313             byte[] tempBuf = new byte[2];\r
1314             boolean usingFallback;\r
1315             boolean gotoGetTrailByte = false;\r
1316             boolean gotoEscape = false;\r
1317             \r
1318             if (myConverterData.version == 1) {\r
1319                 return decodeLoopIBM(myConverterData.currentDecoder, source, target, offsets, flush);\r
1320             }\r
1321             \r
1322             /* initialize state */\r
1323             usingFallback = isFallbackUsed();\r
1324             \r
1325             if (myConverterData.key != 0) {\r
1326                 /* continue with a partial escape sequence */\r
1327                 gotoEscape = true;\r
1328             } else if (toULength == 1 && source.hasRemaining() && target.hasRemaining()) {\r
1329                 /* continue with a partial double-byte character */\r
1330                 mySourceChar = (toUBytesArray[0] & UConverterConstants.UNSIGNED_BYTE_MASK);\r
1331                 toULength = 0;\r
1332                 gotoGetTrailByte = true;\r
1333             }\r
1334             \r
1335             while (source.hasRemaining() || gotoGetTrailByte || gotoEscape) {\r
1336                 if (target.hasRemaining() || gotoGetTrailByte || gotoEscape) {\r
1337                     if (!gotoGetTrailByte && !gotoEscape) {\r
1338                         mySourceChar = (char)(source.get() & UConverterConstants.UNSIGNED_BYTE_MASK);\r
1339                     }\r
1340                     \r
1341                     if (!gotoGetTrailByte && !gotoEscape && mySourceChar == UConverterConstants.SI) {\r
1342                         myConverterData.toU2022State.g = 0;\r
1343                         if (myConverterData.isEmptySegment) {\r
1344                             myConverterData.isEmptySegment = false; /* we are handling it, reset to avoid future spurious errors */\r
1345                             err = CoderResult.malformedForLength(1);\r
1346                             this.toUBytesArray[0] = (byte)mySourceChar;\r
1347                             this.toULength = 1;\r
1348                             return err;\r
1349                         }\r
1350                         /* consume the source */\r
1351                         continue;\r
1352                     } else if (!gotoGetTrailByte && !gotoEscape && mySourceChar == UConverterConstants.SO) {\r
1353                         myConverterData.toU2022State.g = 1;\r
1354                         myConverterData.isEmptySegment = true;\r
1355                         /* consume the source */\r
1356                         continue;\r
1357                     } else if (!gotoGetTrailByte && (gotoEscape || mySourceChar == ESC_2022)) {\r
1358                         if (!gotoEscape) {\r
1359                             source.position(source.position()-1);\r
1360                         }\r
1361 // escape label\r
1362                         gotoEscape = false; // reset gotoEscape flag\r
1363                         myConverterData.isEmptySegment = false; /* Any invalid ESC sequences will be detected separately, so just reset this */ \r
1364                         err = changeState_2022(this, source, ISO_2022_KR);\r
1365                         if (err.isError()) {\r
1366                             return err;\r
1367                         }\r
1368                         continue;\r
1369                     }\r
1370                     myConverterData.isEmptySegment = false; /* Any invalid char errors will be detected separately, so just reset this */\r
1371                     if (myConverterData.toU2022State.g == 1 || gotoGetTrailByte) {\r
1372                         if (source.hasRemaining() || gotoGetTrailByte) {\r
1373                             boolean leadIsOk, trailIsOk;\r
1374                             short trailByte;\r
1375 // getTrailByte label\r
1376                             gotoGetTrailByte = false; // reset gotoGetTrailByte flag\r
1377                             \r
1378                             trailByte = (short)(source.get(source.position()) & UConverterConstants.UNSIGNED_BYTE_MASK);\r
1379                             targetUniChar = UConverterConstants.missingCharMarker;\r
1380                             /*\r
1381                              * Ticket 5691: consistent illegal sequences:\r
1382                              * - We include at least the first byte in the illegal sequence.\r
1383                              * - If any of the non-initial bytes could be the start of a character,\r
1384                              *   we stop the illegal sequence before the first one of those.\r
1385                              * \r
1386                              * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is\r
1387                              * an ESC/SO/SI, we report only the first byte as the illegal sequence.\r
1388                              * Otherwise we convert or report the pair of bytes.\r
1389                              */\r
1390                             leadIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (mySourceChar - 0x21)) <= (0x7e - 0x21);\r
1391                             trailIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (trailByte - 0x21)) <= (0x7e - 0x21);\r
1392                             if (leadIsOk && trailIsOk) {\r
1393                                 source.get();\r
1394                                 tempBuf[0] = (byte)(mySourceChar + 0x80);\r
1395                                 tempBuf[1] = (byte)(trailByte + 0x80);\r
1396                                 targetUniChar = MBCSSimpleGetNextUChar(myConverterData.currentConverter.sharedData, ByteBuffer.wrap(tempBuf), usingFallback);\r
1397                                 mySourceChar = (char)((mySourceChar << 8) | trailByte);\r
1398                             } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {\r
1399                                 /* report a pair of illegal bytes if the second byte is not a DBCS starter */\r
1400                                 source.get();\r
1401                                 /* add another bit so that the code below writes 2 bytes in case of error */\r
1402                                 mySourceChar = (char)(0x10000 | (mySourceChar << 8) | trailByte);\r
1403                             }\r
1404                         } else {\r
1405                             toUBytesArray[0] = (byte)mySourceChar;\r
1406                             toULength = 1;\r
1407                             break;\r
1408                         }\r
1409                     } else if (mySourceChar <= 0x7f) {\r
1410                         int savedSourceLimit = source.limit();\r
1411                         int savedSourcePosition = source.position();\r
1412                         source.limit(source.position());\r
1413                         source.position(source.position()-1); \r
1414                         targetUniChar = MBCSSimpleGetNextUChar(myConverterData.currentConverter.sharedData, source, usingFallback);\r
1415                         source.limit(savedSourceLimit);\r
1416                         source.position(savedSourcePosition);\r
1417                     } else {\r
1418                         targetUniChar = 0xffff;\r
1419                     }\r
1420                     if (targetUniChar < 0xfffe) {\r
1421                         target.put((char)targetUniChar);\r
1422                         if (offsets != null) {\r
1423                             offsets.array()[target.position()] = source.position() - (mySourceChar <= 0xff ? 1 : 2);\r
1424                         }\r
1425                     } else {\r
1426                         /* Call the callback function */\r
1427                         err = toUnicodeCallback(this, mySourceChar, targetUniChar);\r
1428                         break;\r
1429                     }\r
1430                 } else {\r
1431                     err = CoderResult.OVERFLOW;\r
1432                     break;\r
1433                 }\r
1434             }\r
1435             \r
1436             return err;\r
1437         }\r
1438         \r
1439         protected CoderResult decodeLoopIBM(CharsetDecoderMBCS cnv, ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {\r
1440             CoderResult err = CoderResult.UNDERFLOW;\r
1441             int sourceStart;\r
1442             int sourceLimit;\r
1443             int argSource;\r
1444             int argTarget;\r
1445             boolean gotoEscape = false;\r
1446             int oldSourceLimit;\r
1447             \r
1448             /* remember the original start of the input for offsets */\r
1449             sourceStart = argSource = source.position();\r
1450             \r
1451             if (myConverterData.key != 0) {\r
1452                 /* continue with a partial escape sequence */\r
1453                 gotoEscape = true;\r
1454             }\r
1455             \r
1456             while (gotoEscape || (!err.isError() && source.hasRemaining())) {\r
1457                 if (!gotoEscape) {\r
1458                     /* Find the end of the buffer e.g : Next Escape Seq | end of Buffer */\r
1459                     int oldSourcePos = source.position();\r
1460                     sourceLimit = getEndOfBuffer_2022(source);\r
1461                     source.position(oldSourcePos);\r
1462                     if (source.position() != sourceLimit) {\r
1463                         /*\r
1464                          * get the current partial byte sequence\r
1465                          * \r
1466                          * it needs to be moved between the public and the subconverter\r
1467                          * so that the conversion frameword, which only sees the public\r
1468                          * converter, can handle truncated and illegal input etc.\r
1469                          */\r
1470                         if (toULength > 0) {\r
1471                             cnv.toUBytesArray = (byte[])(toUBytesArray.clone());\r
1472                         }\r
1473                         cnv.toULength = toULength;\r
1474                         \r
1475                         /*\r
1476                          * Convert up to the end of the input, or to before the next escape character.\r
1477                          * Does not handle conversion extensions because the preToU[] state etc.\r
1478                          * is not copied.\r
1479                          */\r
1480                         argTarget = target.position();\r
1481                         oldSourceLimit = source.limit(); // save the old source limit change to new one\r
1482                         source.limit(sourceLimit);\r
1483                         err = myConverterData.currentDecoder.cnvMBCSToUnicodeWithOffsets(source, target, offsets, flush);\r
1484                         source.limit(oldSourceLimit); // restore source limit;\r
1485                         if (offsets != null && sourceStart != argSource) {\r
1486                             /* update offsets to base them on the actual start of the input */\r
1487                             int delta = argSource - sourceStart;\r
1488                             while (argTarget < target.position()) {\r
1489                                 int currentOffset = offsets.get();\r
1490                                 offsets.position(offsets.position()-1);\r
1491                                 if (currentOffset >= 0) {\r
1492                                     offsets.put(currentOffset + delta);\r
1493                                     offsets.position(offsets.position()-1);\r
1494                                 }\r
1495                                 offsets.get();\r
1496                                 target.get();\r
1497                             }\r
1498                         }\r
1499                         argSource = source.position();\r
1500                         \r
1501                         /* copy input/error/overflow buffers */\r
1502                         if (cnv.toULength > 0) {\r
1503                             toUBytesArray = (byte[])(cnv.toUBytesArray.clone());\r
1504                         }\r
1505                         toULength = cnv.toULength;\r
1506                         \r
1507                         if (err.isOverflow()) {\r
1508                             if (cnv.charErrorBufferLength > 0) {\r
1509                                 charErrorBufferArray = (char[])(cnv.charErrorBufferArray.clone());\r
1510                             }\r
1511                             charErrorBufferLength = cnv.charErrorBufferLength;\r
1512                             cnv.charErrorBufferLength = 0;\r
1513                         }\r
1514                     }\r
1515                     \r
1516                     if (err.isError() || err.isOverflow() || (source.position() == source.limit())) {\r
1517                         return err;\r
1518                     }\r
1519                 }\r
1520 // escape label\r
1521                 gotoEscape = false;\r
1522                 err = changeState_2022(this, source, ISO_2022_KR);\r
1523             }\r
1524             return err;\r
1525         }\r
1526     }\r
1527     \r
1528     /******************** from unicode **********************/\r
1529     /* preference order of JP charsets */\r
1530     private final static byte []jpCharsetPref = {\r
1531         ASCII,\r
1532         JISX201,\r
1533         ISO8859_1,\r
1534         ISO8859_7,\r
1535         JISX208,\r
1536         JISX212,\r
1537         GB2312,\r
1538         KSC5601,\r
1539         HWKANA_7BIT\r
1540     };\r
1541     /*\r
1542      * The escape sequences must be in order of the enum constants like JISX201 = 3,\r
1543      * not in order of jpCharsetPref[]!\r
1544      */\r
1545     private final static byte [][]escSeqChars = {\r
1546             { 0x1B, 0x28, 0x42},        /* <ESC>(B  ASCII       */\r
1547             { 0x1B, 0x2E, 0x41},        /* <ESC>.A  ISO-8859-1  */\r
1548             { 0x1B, 0x2E, 0x46},        /* <ESC>.F  ISO-8859-7  */\r
1549             { 0x1B, 0x28, 0x4A},        /* <ESC>(J  JISX-201    */\r
1550             { 0x1B, 0x24, 0x42},        /* <ESC>$B  JISX-208    */\r
1551             { 0x1B, 0x24, 0x28, 0x44},  /* <ESC>$(D JISX-212    */\r
1552             { 0x1B, 0x24, 0x41},        /* <ESC>$A  GB2312      */\r
1553             { 0x1B, 0x24, 0x28, 0x43},  /* <ESC>$(C KSC5601     */\r
1554             { 0x1B, 0x28, 0x49}         /* <ESC>(I  HWKANA_7BIT */\r
1555     };\r
1556     /*\r
1557      * JIS X 0208 has fallbacks from Unicode half-width Katakana to full-width (DBCS)\r
1558      * Katakana.\r
1559      * Now that we use a Shift-JIS table for JIS X 0208 we need to hardcode these fallbacks\r
1560      * because Shift-JIS roundtrips half-width Katakana to single bytes.\r
1561      * These were the only fallbacks in ICU's jisx-208.ucm file.\r
1562      */\r
1563     private final static char []hwkana_fb = {\r
1564         0x2123,  /* U+FF61 */\r
1565         0x2156,\r
1566         0x2157,\r
1567         0x2122,\r
1568         0x2126,\r
1569         0x2572,\r
1570         0x2521,\r
1571         0x2523,\r
1572         0x2525,\r
1573         0x2527,\r
1574         0x2529,\r
1575         0x2563,\r
1576         0x2565,\r
1577         0x2567,\r
1578         0x2543,\r
1579         0x213C,  /* U+FF70 */\r
1580         0x2522,\r
1581         0x2524,\r
1582         0x2526,\r
1583         0x2528,\r
1584         0x252A,\r
1585         0x252B,\r
1586         0x252D,\r
1587         0x252F,\r
1588         0x2531,\r
1589         0x2533,\r
1590         0x2535,\r
1591         0x2537,\r
1592         0x2539,\r
1593         0x253B,\r
1594         0x253D,\r
1595         0x253F,  /* U+FF80 */\r
1596         0x2541,\r
1597         0x2544,\r
1598         0x2546,\r
1599         0x2548,\r
1600         0x254A,\r
1601         0x254B,\r
1602         0x254C,\r
1603         0x254D,\r
1604         0x254E,\r
1605         0x254F,\r
1606         0x2552,\r
1607         0x2555,\r
1608         0x2558,\r
1609         0x255B,\r
1610         0x255E,\r
1611         0x255F,  /* U+FF90 */\r
1612         0x2560,\r
1613         0x2561,\r
1614         0x2562,\r
1615         0x2564,\r
1616         0x2566,\r
1617         0x2568,\r
1618         0x2569,\r
1619         0x256A,\r
1620         0x256B,\r
1621         0x256C,\r
1622         0x256D,\r
1623         0x256F,\r
1624         0x2573,\r
1625         0x212B,\r
1626         0x212C   /* U+FF9F */\r
1627     };\r
1628     \r
1629     protected byte [][]fromUSubstitutionChar = new byte[][]{ { (byte)0x1A }, { (byte)0x2F, (byte)0x7E} };\r
1630     /****************************ISO-2022-JP************************************/\r
1631     private class CharsetEncoderISO2022JP extends CharsetEncoderICU {\r
1632         public CharsetEncoderISO2022JP(CharsetICU cs) {\r
1633             super(cs, fromUSubstitutionChar[0]);\r
1634         }\r
1635         \r
1636         protected void implReset() {\r
1637             super.implReset();\r
1638             myConverterData.reset();\r
1639         }\r
1640         /* Map Unicode to 00..7F according to JIS X 0201. Return U+FFFE if unmappable. */\r
1641         private int jisx201FromU(int value) {\r
1642             if (value <= 0x7f) {\r
1643                 if (value != 0x5c && value != 0x7e) {\r
1644                     return value;\r
1645                 }\r
1646             } else if (value == 0xa5) {\r
1647                 return 0x5c;\r
1648             } else if (value == 0x203e) {\r
1649                 return 0x7e;\r
1650             }\r
1651             return (int)(UConverterConstants.UNSIGNED_INT_MASK & 0xfffe);\r
1652         }\r
1653         \r
1654         /*\r
1655          * Take a valid Shift-JIS byte pair, check that it is in the range corresponding\r
1656          * to JIS X 0208, and convert it to a pair of 21..7E bytes.\r
1657          * Return 0 if the byte pair is out of range.\r
1658          */\r
1659         private int _2022FromSJIS(int value) {\r
1660             short trail;\r
1661             \r
1662             if (value > 0xEFFC) {\r
1663                 return 0; /* beyond JIS X 0208 */\r
1664             }\r
1665             \r
1666             trail = (short)(value & UConverterConstants.UNSIGNED_BYTE_MASK);\r
1667             \r
1668             value &= 0xff00; /* lead byte */\r
1669             if (value <= 0x9f00) {\r
1670                 value -= 0x7000;\r
1671             } else { /* 0xe000 <= value <= 0xef00 */\r
1672                 value -= 0xb000;\r
1673             }\r
1674             \r
1675             value <<= 1;\r
1676             \r
1677             if (trail <= 0x9e) {\r
1678                 value -= 0x100;\r
1679                 if (trail <= 0x7e) {\r
1680                     value |= ((trail - 0x1f) & UConverterConstants.UNSIGNED_BYTE_MASK);\r
1681                 } else {\r
1682                     value |= ((trail - 0x20) & UConverterConstants.UNSIGNED_BYTE_MASK);\r
1683                 }\r
1684             } else { /* trail <= 0xfc */\r
1685                 value |= ((trail - 0x7e) & UConverterConstants.UNSIGNED_BYTE_MASK);\r
1686             }\r
1687             \r
1688             return value;\r
1689         }\r
1690         /* This overrides the cbFromUWriteSub method in CharsetEncoderICU */\r
1691         CoderResult cbFromUWriteSub (CharsetEncoderICU encoder, \r
1692                 CharBuffer source, ByteBuffer target, IntBuffer offsets){\r
1693                 CoderResult err = CoderResult.UNDERFLOW;\r
1694                 byte[] buffer = new byte[8];\r
1695                 int i = 0;\r
1696                 byte[] subchar;\r
1697                 subchar = encoder.replacement();\r
1698                 \r
1699                 byte cs;\r
1700                 if (myConverterData.fromU2022State.g == 1) {\r
1701                     /* JIS7: switch from G1 to G0 */\r
1702                     myConverterData.fromU2022State.g = 0;\r
1703                     buffer[i++] = UConverterConstants.SI;\r
1704                 }\r
1705                 cs = myConverterData.fromU2022State.cs[0];\r
1706                 \r
1707                 if (cs != ASCII && cs != JISX201) {\r
1708                     /* not in ASCII or JIS X 0201: switch to ASCII */\r
1709                     myConverterData.fromU2022State.cs[0] = ASCII;\r
1710                     buffer[i++] = 0x1B;\r
1711                     buffer[i++] = 0x28;\r
1712                     buffer[i++] = 0x42;\r
1713                 }\r
1714                 \r
1715                 buffer[i++] = subchar[0];\r
1716                 \r
1717                 err = CharsetEncoderICU.fromUWriteBytes(this, buffer, 0, i, target, offsets, source.position() - 1);\r
1718 \r
1719                 return err;\r
1720             }\r
1721         \r
1722         protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {\r
1723             CoderResult err = CoderResult.UNDERFLOW;\r
1724             int sourceChar;\r
1725             byte cs, g;\r
1726             int choiceCount;\r
1727             int len, outLen;\r
1728             byte[] choices = new byte[10];\r
1729             int targetValue = 0;\r
1730             boolean usingFallback;\r
1731             byte[] buffer = new byte[8];\r
1732             boolean getTrail = false; // use for getTrail label\r
1733             int oldSourcePos; // for proper error handling\r
1734             \r
1735             choiceCount = 0;\r
1736             \r
1737             /* check if the last codepoint of previous buffer was a lead surrogate */\r
1738             if ((sourceChar = fromUChar32) != 0 && target.hasRemaining()) {\r
1739                 getTrail = true;\r
1740             }\r
1741             \r
1742             while (getTrail || source.hasRemaining()) {\r
1743                 if (getTrail || target.hasRemaining()) {\r
1744                     oldSourcePos = source.position();\r
1745                     if (!getTrail) { /* skip if going to getTrail label */\r
1746                         sourceChar = source.get();\r
1747                     }\r
1748                     /* check if the char is a First surrogate */\r
1749                     if (getTrail || UTF16.isSurrogate((char)sourceChar)) {\r
1750                         if (getTrail || UTF16.isLeadSurrogate((char)sourceChar)) {\r
1751 // getTrail:                 \r
1752                             if (getTrail) {\r
1753                                 getTrail = false;\r
1754                             }\r
1755                             /* look ahead to find the trail surrogate */\r
1756                             if (source.hasRemaining()) {\r
1757                                 /* test the following code unit */\r
1758                                 char trail = source.get();\r
1759                                 /* go back to the previous position */\r
1760                                 source.position(source.position()-1);\r
1761                                 if (UTF16.isTrailSurrogate(trail)) {\r
1762                                     source.get();\r
1763                                     sourceChar = UCharacter.getCodePoint((char)sourceChar, trail);\r
1764                                     fromUChar32 = 0x00;\r
1765                                     /* convert this supplementary code point */\r
1766                                     /* exit this condition tree */\r
1767                                 } else {\r
1768                                     /* this is an unmatched lead code unit (1st surrogate) */\r
1769                                     /* callback(illegal) */\r
1770                                     err = CoderResult.malformedForLength(1);\r
1771                                     fromUChar32 = sourceChar;\r
1772                                     break;\r
1773                                 }\r
1774                             } else {\r
1775                                 /* no more input */\r
1776                                 fromUChar32 = sourceChar;\r
1777                                 break;\r
1778                             }\r
1779                         } else {\r
1780                             /* this is an unmatched trail code unit (2nd surrogate) */\r
1781                             /* callback(illegal) */\r
1782                             err = CoderResult.malformedForLength(1);\r
1783                             fromUChar32 = sourceChar;\r
1784                             break;\r
1785                         }\r
1786                     }\r
1787                     \r
1788                     /* do not convert SO/SI/ESC */\r
1789                     if (IS_2022_CONTROL(sourceChar)) {\r
1790                         /* callback(illegal) */\r
1791                         err = CoderResult.malformedForLength(1);\r
1792                         fromUChar32 = sourceChar;\r
1793                         break;\r
1794                     }\r
1795                     \r
1796                     /* do the conversion */\r
1797                     \r
1798                     if (choiceCount == 0) {\r
1799                         char csm;\r
1800                         /*\r
1801                          * The csm variable keeps track of which charsets are allowed\r
1802                          * and not used yet while building the choices[].\r
1803                          */\r
1804                         csm = (char)jpCharsetMasks[myConverterData.version];\r
1805                         choiceCount = 0;\r
1806                         \r
1807                         /* JIS7/8: try single-byte half-width Katakana before JISX208 */\r
1808                         if (myConverterData.version == 3 || myConverterData.version == 4) {\r
1809                             choices[choiceCount++] = HWKANA_7BIT;\r
1810                         }\r
1811                         /* Do not try single-bit half-width Katakana for other versions. */\r
1812                         csm &= ~CSM(HWKANA_7BIT);\r
1813                         \r
1814                         /* try the current G0 charset */\r
1815                         choices[choiceCount++] = cs = myConverterData.fromU2022State.cs[0];\r
1816                         csm &= ~CSM(cs);\r
1817                         \r
1818                         /* try the current G2 charset */\r
1819                         if ((cs = myConverterData.fromU2022State.cs[2]) != 0) {\r
1820                             choices[choiceCount++] = cs;\r
1821                             csm &= ~CSM(cs);\r
1822                         }\r
1823                         \r
1824                         /* try all the other charsets */\r
1825                         for (int i = 0; i < jpCharsetPref.length; i++) {\r
1826                             cs = jpCharsetPref[i];\r
1827                             if ((CSM(cs) & csm) != 0) {\r
1828                                 choices[choiceCount++] = cs;\r
1829                                 csm &= ~CSM(cs);\r
1830                             }\r
1831                         }\r
1832                     }\r
1833                     \r
1834                     cs = g = 0;\r
1835                     /* \r
1836                      * len==0:  no mapping found yet\r
1837                      * len<0:   found a fallback result:  continue looking for a roundtrip but no further fallbacks\r
1838                      * len>0:   found a roundtrip result, done\r
1839                      */\r
1840                     len = 0;\r
1841                     /*\r
1842                      * We will turn off usingFallBack after finding a fallback,\r
1843                      * but we still get fallbacks from PUA code points as usual.\r
1844                      * Therefore, we will also need to check that we don't overwrite\r
1845                      * an early fallback with a later one.\r
1846                      */\r
1847                     usingFallback = useFallback;\r
1848                     \r
1849                     for (int i = 0; i < choiceCount && len <= 0; i++) {\r
1850                         int[] value = new int[1];\r
1851                         int len2;\r
1852                         byte cs0 = choices[i];\r
1853                         switch (cs0) {\r
1854                         case ASCII:\r
1855                             if (sourceChar <= 0x7f) {\r
1856                                 targetValue = sourceChar;\r
1857                                 len = 1;\r
1858                                 cs = cs0;\r
1859                                 g = 0;\r
1860                             }\r
1861                             break;\r
1862                         case ISO8859_1:\r
1863                             if (GR96_START <= sourceChar && sourceChar <= GR96_END) {\r
1864                                 targetValue = sourceChar - 0x80;\r
1865                                 len = 1;\r
1866                                 cs = cs0;\r
1867                                 g = 2;\r
1868                             }\r
1869                             break;\r
1870                         case HWKANA_7BIT:\r
1871                             if (sourceChar <= HWKANA_END && sourceChar >= HWKANA_START) {\r
1872                                 if (myConverterData.version == 3) {\r
1873                                     /* JIS7: use G1 (SO) */\r
1874                                     /* Shift U+FF61..U+FF9F to bytes 21..5F. */\r
1875                                     targetValue = (int)(UConverterConstants.UNSIGNED_INT_MASK & (sourceChar - (HWKANA_START - 0x21)));\r
1876                                     len = 1;\r
1877                                     myConverterData.fromU2022State.cs[1] = cs = cs0; /* do not output an escape sequence */\r
1878                                     g = 1;\r
1879                                 } else if (myConverterData.version == 4) {\r
1880                                     /* JIS8: use 8-bit bytes with any single-byte charset, see escape sequence output below */\r
1881                                     /* Shift U+FF61..U+FF9F to bytes A1..DF. */\r
1882                                     targetValue = (int)(UConverterConstants.UNSIGNED_INT_MASK & (sourceChar - (HWKANA_START - 0xa1)));\r
1883                                     len = 1;\r
1884                                     \r
1885                                     cs = myConverterData.fromU2022State.cs[0];\r
1886                                     if (IS_JP_DBCS(cs)) {\r
1887                                         /* switch from a DBCS charset to JISX201 */\r
1888                                         cs = JISX201;\r
1889                                     }\r
1890                                     /* else stay in the current G0 charset */\r
1891                                     g = 0;\r
1892                                 }\r
1893                                 /* else do not use HWKANA_7BIT with other versions */\r
1894                             }\r
1895                             break;\r
1896                         case JISX201:\r
1897                             /* G0 SBCS */\r
1898                             value[0] = jisx201FromU(sourceChar);\r
1899                             if (value[0] <= 0x7f) {\r
1900                                 targetValue = value[0];\r
1901                                 len = 1;\r
1902                                 cs = cs0;\r
1903                                 g = 0;\r
1904                                 usingFallback = false;\r
1905                             }\r
1906                             break;\r
1907                         case JISX208:\r
1908                             /* G0 DBCS from JIS table */\r
1909                             myConverterData.currentConverter.sharedData = myConverterData.myConverterArray[cs0];\r
1910                             myConverterData.currentConverter.sharedData.mbcs.outputType = CharsetMBCS.MBCS_OUTPUT_2;\r
1911                             len2 = myConverterData.currentEncoder.fromUChar32(sourceChar, value, usingFallback);\r
1912                             //len2 = MBCSFromUChar32_ISO2022(myConverterData.myConverterArray[cs0], sourceChar, value, usingFallback, CharsetMBCS.MBCS_OUTPUT_2);\r
1913                             if (len2 == 2 || (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len) == 2 */\r
1914                                 value[0] = _2022FromSJIS(value[0]);\r
1915                                 if (value[0] != 0) {\r
1916                                     targetValue = value[0];\r
1917                                     len = len2;\r
1918                                     cs = cs0;\r
1919                                     g = 0;\r
1920                                     usingFallback = false;\r
1921                                 }\r
1922                             } else if (len == 0 && usingFallback  && sourceChar <= HWKANA_END && sourceChar >= HWKANA_START) {\r
1923                                 targetValue = hwkana_fb[sourceChar - HWKANA_START];\r
1924                                 len = -2;\r
1925                                 cs = cs0;\r
1926                                 g = 0;\r
1927                                 usingFallback = false;\r
1928                             }\r
1929                             break;\r
1930                         case ISO8859_7:\r
1931                             /* G0 SBCS forced to 7-bit output */\r
1932                             len2 = MBCSSingleFromUChar32(myConverterData.myConverterArray[cs0], sourceChar, value, usingFallback);\r
1933                             if (len2 != 0 && !(len2 < 0 && len != 0) && GR96_START <= value[0] && value[0] <= GR96_END) {\r
1934                                 targetValue = value[0] - 0x80;\r
1935                                 len = len2;\r
1936                                 cs = cs0;\r
1937                                 g = 2;\r
1938                                 usingFallback = false;\r
1939                             }\r
1940                             break;\r
1941                         default :\r
1942                             /* G0 DBCS */\r
1943                             myConverterData.currentConverter.sharedData = myConverterData.myConverterArray[cs0];\r
1944                             myConverterData.currentConverter.sharedData.mbcs.outputType = CharsetMBCS.MBCS_OUTPUT_2;\r
1945                             len2 = myConverterData.currentEncoder.fromUChar32(sourceChar, value, usingFallback);\r
1946                             //len2 = MBCSFromUChar32_ISO2022(myConverterData.myConverterArray[cs0], sourceChar, value, usingFallback, CharsetMBCS.MBCS_OUTPUT_2);\r
1947                             if (len2 == 2 || (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len)==2 */\r
1948                                 if (cs0 == KSC5601) {\r
1949                                     /*\r
1950                                      * Check for valid bytes for the encoding scheme.\r
1951                                      * This is necessary because the sub-converter (windows-949)\r
1952                                      * has a broader encoding scheme than is valid for 2022.\r
1953                                      */\r
1954                                     value[0] = _2022FromGR94DBCS(value[0]);\r
1955                                     if (value[0] == 0) {\r
1956                                         break;\r
1957                                     }\r
1958                                 }\r
1959                                 targetValue = value[0];\r
1960                                 len = len2;\r
1961                                 cs = cs0;\r
1962                                 g = 0;\r
1963                                 usingFallback = false;\r
1964                             }\r
1965                             break;\r
1966                         }\r
1967                     }\r
1968                     \r
1969                     if (len != 0) {\r
1970                         if (len < 0) {\r
1971                             len = -len; /* fallback */\r
1972                         }\r
1973                         outLen = 0;\r
1974                         \r
1975                         /* write SI if necessary (only for JIS7 */\r
1976                         if (myConverterData.fromU2022State.g == 1 && g == 0) {\r
1977                             buffer[outLen++] = UConverterConstants.SI;\r
1978                             myConverterData.fromU2022State.g = 0;\r
1979                         }\r
1980                         \r
1981                         /* write the designation sequence if necessary */\r
1982                         if (cs != myConverterData.fromU2022State.cs[g]) {\r
1983                             for (int i = 0; i < escSeqChars[cs].length; i++) {\r
1984                                 buffer[outLen++] = escSeqChars[cs][i];\r
1985                             }\r
1986                             myConverterData.fromU2022State.cs[g] = cs;\r
1987                             \r
1988                             /* invalidate the choices[] */\r
1989                             choiceCount = 0;\r
1990                         }\r
1991                         \r
1992                         /* write the shift sequence if necessary */\r
1993                         if (g != myConverterData.fromU2022State.g) {\r
1994                             switch (g) {\r
1995                             /* case 0 handled before writing escapes */\r
1996                             case 1:\r
1997                                 buffer[outLen++] = UConverterConstants.SO;\r
1998                                 myConverterData.fromU2022State.g = 1;\r
1999                                 break;\r
2000                             default : /* case 2 */\r
2001                                 buffer[outLen++] = 0x1b;\r
2002                                 buffer[outLen++] = 0x4e;\r
2003                                 break;\r
2004                             /* case 3: no SS3 in ISO-2022-JP-x */\r
2005                             }\r
2006                         }\r
2007                         \r
2008                         /* write the output bytes */\r
2009                         if (len == 1) {\r
2010                             buffer[outLen++] = (byte)targetValue;\r
2011                         } else { /* len == 2 */\r
2012                             buffer[outLen++] = (byte)(targetValue >> 8);\r
2013                             buffer[outLen++] = (byte)targetValue;\r
2014                         }\r
2015                     }else {\r
2016                         /*\r
2017                          * if we cannot find the character after checking all codepages\r
2018                          * then this is an error.\r
2019                          */\r
2020                         err = CoderResult.unmappableForLength(source.position()-oldSourcePos);\r
2021                         fromUChar32 = sourceChar;\r
2022                         break;\r
2023                     }\r
2024                     \r
2025                     if (sourceChar == CR || sourceChar == LF) {\r
2026                         /* reset the G2 state at the end of a line (conversion got use into ASCII or JISX201 already) */\r
2027                         myConverterData.fromU2022State.cs[2] = 0;\r
2028                         choiceCount = 0;\r
2029                     }\r
2030                     \r
2031                     /* output outLen>0 bytes in buffer[] */\r
2032                     if (outLen == 1) {\r
2033                         target.put(buffer[0]);\r
2034                         if (offsets != null) {\r
2035                             offsets.put(source.remaining() - 1); /* -1 known to be ASCII */\r
2036                         }\r
2037                     } else if (outLen == 2 && (target.position() + 2) <= target.limit()) {\r
2038                         target.put(buffer[0]);\r
2039                         target.put(buffer[1]);\r
2040                         if (offsets != null) {\r
2041                             int sourceIndex = source.position() - 1;\r
2042                             offsets.put(sourceIndex);\r
2043                             offsets.put(sourceIndex);\r
2044                         }\r
2045                     } else {\r
2046                         err = CharsetEncoderICU.fromUWriteBytes(this, buffer, 0, outLen, target, offsets, source.position()-1);\r
2047                     }\r
2048                 } else {\r
2049                     err = CoderResult.OVERFLOW;\r
2050                     break;\r
2051                 }\r
2052             }\r
2053             \r
2054             /*\r
2055              * the end of the input stream and detection of truncated input\r
2056              * are handled by the framework, but for ISO-2022-JP conversion\r
2057              * we need to be in ASCII mode at the very end\r
2058              * \r
2059              * conditions:\r
2060              *  successful\r
2061              *  in SO mode or not in ASCII mode\r
2062              *  end of input and no truncated input\r
2063              */\r
2064             if (!err.isError() &&\r
2065                     (myConverterData.fromU2022State.g != 0 || myConverterData.fromU2022State.cs[0] != ASCII) &&\r
2066                     flush && !source.hasRemaining() && fromUChar32 == 0) {\r
2067                 int sourceIndex;\r
2068                 \r
2069                 outLen = 0;\r
2070                 \r
2071                 if (myConverterData.fromU2022State.g != 0) {\r
2072                     buffer[outLen++] = UConverterConstants.SI;\r
2073                     myConverterData.fromU2022State.g = 0;\r
2074                 }\r
2075                 \r
2076                 if (myConverterData.fromU2022State.cs[0] != ASCII) {\r
2077                     for (int i = 0; i < escSeqChars[ASCII].length; i++) {\r
2078                         buffer[outLen++] = escSeqChars[ASCII][i];\r
2079                     }\r
2080                     myConverterData.fromU2022State.cs[0] = ASCII;\r
2081                 }\r
2082                 \r
2083                 /* get the source index of the last input character */\r
2084                 sourceIndex = source.position();\r
2085                 if (sourceIndex > 0) {\r
2086                     --sourceIndex;\r
2087                     if (UTF16.isTrailSurrogate(source.get(sourceIndex)) &&\r
2088                             (sourceIndex == 0 || UTF16.isLeadSurrogate(source.get(sourceIndex-1)))) {\r
2089                         --sourceIndex;\r
2090                     }\r
2091                 } else {\r
2092                     sourceIndex = -1;\r
2093                 }\r
2094                 \r
2095                 err = CharsetEncoderICU.fromUWriteBytes(this, buffer, 0, outLen, target, offsets, sourceIndex);\r
2096             }\r
2097             return err;\r
2098         }\r
2099     }\r
2100     /****************************ISO-2022-CN************************************/\r
2101     /*\r
2102      * Rules for ISO-2022-CN Encoding:\r
2103      * i)   The designator sequence must appear once on a line before any instance\r
2104      *      of chracter set it designates.\r
2105      * ii)  If two lines contain characters from the same character set, both lines\r
2106      *      must include the designator sequence.\r
2107      * iii) Once the designator sequence is known, a shifting sequence has to be found\r
2108      *      to invoke the shifting\r
2109      * iv)  All lines start in ASCII and end in ASCII.\r
2110      * v)   Four shifting sequences are employed for this purpose:\r
2111      *      Sequence    ASCII Eq    Charsets\r
2112      *      ---------   ---------   --------\r
2113      *      SI          <SI>        US-ASCII\r
2114      *      SO          <SO>        CNS-11643-1992 Plane 1, GB2312, ISO-IR-165\r
2115      *      SS2         <ESC>N      CNS-11643-1992 Plane 2\r
2116      *      SS3         <ESC>O      CNS-11643-1992 Planes 3-7\r
2117      * vi)  \r
2118      *      SOdesignator    : ESC "$" ")" finalchar_for_SO\r
2119      *      SS2designator   : ESC "$" "*" finalchar_for_SS2\r
2120      *      SS3designator   : ESC "$" "+" finalchar_for_SS3\r
2121      *      \r
2122      *      ESC $ ) A       Indicates the bytes following SO are Chinese\r
2123      *       characters as defined in GB 2312-80, until\r
2124      *       another SOdesignation appears\r
2125      *      \r
2126      *      ESC $ ) E       Indicates the bytes following SO are as defined\r
2127      *       in ISO-IR-165 (for details, see section 2.1),\r
2128      *       until another SOdesignation appears\r
2129      *       \r
2130      *      ESC $ ) G       Indicates the bytes following SO are as defined\r
2131      *       in CNS 11643-plane-1, until another SOdesignation appears\r
2132      *       \r
2133      *      ESC $ * H       Indicates teh two bytes immediately following\r
2134      *       SS2 is a Chinese character as defined in CNS\r
2135      *       11643-plane-2, until another SS2designation\r
2136      *       appears\r
2137      *       (Meaning <ESC>N must preceed ever 2 byte sequence.)\r
2138      *      \r
2139      *      ESC $ + I       Indicates the immediate two bytes following SS3\r
2140      *       is a Chinese character as defined in CNS\r
2141      *       11643-plane-3, until another SS3designation\r
2142      *       appears\r
2143      *       (Meaning <ESC>O must preceed every 2 byte sequence.)\r
2144      *      \r
2145      *      ESC $ + J       Indicates the immediate two bytes following SS3\r
2146      *       is a Chinese character as defined in CNS\r
2147      *       11643-plane-4, until another SS3designation\r
2148      *       appears\r
2149      *       (In English: <ESC>O must preceed every 2 byte sequence.)\r
2150      *      \r
2151      *      ESC $ + K       Indicates the immediate two bytes following SS3\r
2152      *       is a Chinese character as defined in CNS\r
2153      *       11643-plane-5, until another SS3designation\r
2154      *       appears\r
2155      *       \r
2156      *      ESC $ + L       Indicates the immediate two bytes following SS3\r
2157      *       is a Chinese character as defined in CNS\r
2158      *       11643-plane-6, until another SS3designation\r
2159      *       appears\r
2160      *       \r
2161      *      ESC $ + M       Indicates the immediate two bytes following SS3\r
2162      *       is a Chinese character as defined in CNS\r
2163      *       11643-plane-7, until another SS3designation\r
2164      *       appears\r
2165      *       \r
2166      *      As in ISO-2022-CN, each line starts in ASCII, and ends in ASCII, and\r
2167      *      has its own designation information before any Chinese chracters\r
2168      *      appears\r
2169      */\r
2170     \r
2171     /* The following are defined this way to make strings truely readonly */\r
2172     private final static byte[] GB_2312_80_STR = { 0x1B, 0x24, 0x29, 0x41 };\r
2173     private final static byte[] ISO_IR_165_STR = { 0x1B, 0x24, 0x29, 0x45 };\r
2174     private final static byte[] CNS_11643_1992_Plane_1_STR = { 0x1B, 0x24, 0x29, 0x47 };\r
2175     private final static byte[] CNS_11643_1992_Plane_2_STR = { 0x1B, 0x24, 0x2A, 0x48 };\r
2176     private final static byte[] CNS_11643_1992_Plane_3_STR = { 0x1B, 0x24, 0x2B, 0x49 };\r
2177     private final static byte[] CNS_11643_1992_Plane_4_STR = { 0x1B, 0x24, 0x2B, 0x4A };\r
2178     private final static byte[] CNS_11643_1992_Plane_5_STR = { 0x1B, 0x24, 0x2B, 0x4B };\r
2179     private final static byte[] CNS_11643_1992_Plane_6_STR = { 0x1B, 0x24, 0x2B, 0x4C };\r
2180     private final static byte[] CNS_11643_1992_Plane_7_STR = { 0x1B, 0x24, 0x2B, 0x4D };\r
2181     \r
2182     /************************ ISO2022-CN Data *****************************/\r
2183     private final static byte[][] escSeqCharsCN = {\r
2184         SHIFT_IN_STR,\r
2185         GB_2312_80_STR,\r
2186         ISO_IR_165_STR,\r
2187         CNS_11643_1992_Plane_1_STR,\r
2188         CNS_11643_1992_Plane_2_STR,\r
2189         CNS_11643_1992_Plane_3_STR,\r
2190         CNS_11643_1992_Plane_4_STR,\r
2191         CNS_11643_1992_Plane_5_STR,\r
2192         CNS_11643_1992_Plane_6_STR,\r
2193         CNS_11643_1992_Plane_7_STR,\r
2194     };\r
2195     \r
2196     private class CharsetEncoderISO2022CN extends CharsetEncoderICU {\r
2197         public CharsetEncoderISO2022CN(CharsetICU cs) {\r
2198             super(cs, fromUSubstitutionChar[0]);\r
2199         }\r
2200         \r
2201         protected void implReset() {\r
2202             super.implReset();\r
2203             myConverterData.reset();\r
2204         }\r
2205         \r
2206         /* This overrides the cbFromUWriteSub method in CharsetEncoderICU */\r
2207         CoderResult cbFromUWriteSub (CharsetEncoderICU encoder, \r
2208             CharBuffer source, ByteBuffer target, IntBuffer offsets){\r
2209             CoderResult err = CoderResult.UNDERFLOW;\r
2210             byte[] buffer = new byte[8];\r
2211             int i = 0;\r
2212             byte[] subchar;\r
2213             subchar = encoder.replacement();\r
2214             \r
2215             if (myConverterData.fromU2022State.g != 0) {\r
2216                 /* not in ASCII mode: switch to ASCII */\r
2217                 myConverterData.fromU2022State.g = 0;\r
2218                 buffer[i++] = UConverterConstants.SI;\r
2219             }\r
2220             buffer[i++] = subchar[0];\r
2221             \r
2222             err = CharsetEncoderICU.fromUWriteBytes(this, buffer, 0, i, target, offsets, source.position() - 1);\r
2223 \r
2224             return err;\r
2225         }\r
2226         \r
2227         protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {\r
2228             CoderResult err = CoderResult.UNDERFLOW;\r
2229             int sourceChar;\r
2230             byte[] buffer = new byte[8];\r
2231             int len;\r
2232             byte[] choices = new byte[3];\r
2233             int choiceCount;\r
2234             int targetValue = 0;\r
2235             boolean usingFallback;\r
2236             boolean gotoGetTrail = false;\r
2237             int oldSourcePos; // For proper error handling\r
2238             \r
2239             choiceCount = 0;\r
2240             \r
2241             /* check if the last codepoint of previous buffer was a lead surrogate */\r
2242             if ((sourceChar = fromUChar32) != 0 && target.hasRemaining()) {\r
2243                 // goto getTrail label\r
2244                 gotoGetTrail = true; \r
2245             }\r
2246             \r
2247             while (source.hasRemaining() || gotoGetTrail) {\r
2248                 if (target.hasRemaining() || gotoGetTrail) {\r
2249                     oldSourcePos = source.position();\r
2250                     if (!gotoGetTrail) {\r
2251                         sourceChar = source.get();\r
2252                     }\r
2253                     /* check if the char is a First surrogate */\r
2254                     if (UTF16.isSurrogate((char)sourceChar) || gotoGetTrail) {\r
2255                         if (UTF16.isLeadSurrogate((char)sourceChar) || gotoGetTrail) {\r
2256 // getTrail label\r
2257                             /* reset gotoGetTrail flag*/\r
2258                              gotoGetTrail = false;\r
2259                             \r
2260                             /* look ahead to find the trail surrogate */\r
2261                             if (source.hasRemaining()) {\r
2262                                 /* test the following code unit */\r
2263                                 char trail = source.get();\r
2264                                 source.position(source.position()-1);\r
2265                                 if (UTF16.isTrailSurrogate(trail)) {\r
2266                                     source.get();\r
2267                                     sourceChar = UCharacter.getCodePoint((char)sourceChar, trail);\r
2268                                     fromUChar32 = 0x00;\r
2269                                     /* convert this supplementary code point */\r
2270                                     /* exit this condition tree */\r
2271                                 } else {\r
2272                                     /* this is an unmatched lead code unit (1st surrogate) */\r
2273                                     /* callback(illegal) */\r
2274                                     err = CoderResult.malformedForLength(1);\r
2275                                     fromUChar32 = sourceChar;\r
2276                                     break;\r
2277                                 }\r
2278                             } else {\r
2279                                 /* no more input */\r
2280                                 fromUChar32 = sourceChar;\r
2281                                 break;\r
2282                             }\r
2283                         } else {\r
2284                             /* this is an unmatched trail code unit (2nd surrogate) */\r
2285                             /* callback(illegal) */\r
2286                             err = CoderResult.malformedForLength(1);\r
2287                             fromUChar32 = sourceChar;\r
2288                             break;\r
2289                         }\r
2290                     }\r
2291                     \r
2292                     /* do the conversion */\r
2293                     if (sourceChar <= 0x007f) {\r
2294                         /* do not converter SO/SI/ESC */\r
2295                         if (IS_2022_CONTROL(sourceChar)) {\r
2296                             /* callback(illegal) */\r
2297                             err = CoderResult.malformedForLength(1);\r
2298                             fromUChar32 = sourceChar;\r
2299                             break;\r
2300                         }\r
2301                         \r
2302                         /* US-ASCII */\r
2303                         if (myConverterData.fromU2022State.g == 0) {\r
2304                             buffer[0] = (byte)sourceChar;\r
2305                             len = 1;\r
2306                         } else {\r
2307                             buffer[0] = UConverterConstants.SI;\r
2308                             buffer[1] = (byte)sourceChar;\r
2309                             len = 2;\r
2310                             myConverterData.fromU2022State.g = 0;\r
2311                             choiceCount = 0;\r
2312                         }\r
2313                         \r
2314                         if (sourceChar == CR || sourceChar == LF) {\r
2315                             /* reset the state at the end of a line */\r
2316                             myConverterData.fromU2022State.reset();\r
2317                             choiceCount = 0;\r
2318                         }\r
2319                     } else {\r
2320                         /* convert U+0080..U+10ffff */\r
2321                         int i;\r
2322                         byte cs, g;\r
2323                         \r
2324                         if (choiceCount == 0) {\r
2325                             /* try the current SO/G1 converter first */\r
2326                             choices[0] = myConverterData.fromU2022State.cs[1];\r
2327                             \r
2328                             /* default to GB2312_1 if none is designated yet */\r
2329                             if (choices[0] == 0) {\r
2330                                 choices[0] = GB2312_1;\r
2331                             }\r
2332                             if (myConverterData.version == 0) {\r
2333                                 /* ISO-2022-CN */\r
2334                                 /* try other SO/G1 converter; a CNS_11643_1 lookup may result in any plane */\r
2335                                 if (choices[0] == GB2312_1) {\r
2336                                     choices[1] = CNS_11643_1;\r
2337                                 } else {\r
2338                                     choices[1] = GB2312_1;\r
2339                                 }\r
2340                                 \r
2341                                 choiceCount = 2;\r
2342                             } else {\r
2343                                 /* ISO-2022-CN-EXT */\r
2344                                 \r
2345                                 /* try one of the other converters */\r
2346                                 switch (choices[0]) {\r
2347                                 case GB2312_1:\r
2348                                     choices[1] = CNS_11643_1;\r
2349                                     choices[2] = ISO_IR_165;\r
2350                                     break;\r
2351                                 case ISO_IR_165:\r
2352                                     choices[1] = GB2312_1;\r
2353                                     choices[2] = CNS_11643_1;\r
2354                                     break;\r
2355                                 default :\r
2356                                     choices[1] = GB2312_1;\r
2357                                     choices[2] = ISO_IR_165;\r
2358                                     break;\r
2359                                 }\r
2360                                 \r
2361                                 choiceCount = 3;\r
2362                             }\r
2363                         }\r
2364                         \r
2365                         cs = g = 0;\r
2366                         /*\r
2367                          * len==0:  no mapping found yet\r
2368                          * len<0:   found a fallback result: continue looking for a roundtrip but no further fallbacks\r
2369                          * len>0:   found a roundtrip result, done\r
2370                          */\r
2371                         len = 0;\r
2372                         /*\r
2373                          * We will turn off usingFallback after finding a fallback,\r
2374                          * but we still get fallbacks from PUA code points as usual.\r
2375                          * Therefore, we will also need to check that we don't overwrite\r
2376                          * an early fallback with a later one.\r
2377                          */\r
2378                         usingFallback = useFallback;\r
2379                         \r
2380                         for (i = 0; i < choiceCount && len <= 0; ++i) {\r
2381                             byte cs0 = choices[i];\r
2382                             if (cs0 > 0) {\r
2383                                 int[] value = new int[1];\r
2384                                 int len2;\r
2385                                 if (cs0 > CNS_11643_0) {\r
2386                                     myConverterData.currentConverter.sharedData = myConverterData.myConverterArray[CNS_11643];\r
2387                                     myConverterData.currentConverter.sharedData.mbcs.outputType = CharsetMBCS.MBCS_OUTPUT_3;\r
2388                                     len2 = myConverterData.currentEncoder.fromUChar32(sourceChar, value, usingFallback);\r
2389                                     //len2 = MBCSFromUChar32_ISO2022(myConverterData.myConverterArray[CNS_11643],\r
2390                                     //        sourceChar, value, usingFallback, CharsetMBCS.MBCS_OUTPUT_3);\r
2391                                     if (len2 == 3 || (len2 == -3 && len == 0)) {\r
2392                                         targetValue = value[0];\r
2393                                         cs = (byte)(CNS_11643_0 + (value[0] >> 16) - 0x80);\r
2394                                         if (len2 >= 0) {\r
2395                                             len = 2;\r
2396                                         } else {\r
2397                                             len = -2;\r
2398                                             usingFallback = false;\r
2399                                         }\r
2400                                         if (cs == CNS_11643_1) {\r
2401                                             g = 1;\r
2402                                         } else if (cs == CNS_11643_2) {\r
2403                                             g = 2;\r
2404                                         } else if (myConverterData.version == 1) { /* plane 3..7 */\r
2405                                             g = 3;\r
2406                                         } else {\r
2407                                             /* ISO-2022-CN (without -EXT) does not support plane 3..7 */\r
2408                                             len = 0;\r
2409                                         }\r
2410                                     }\r
2411                                 } else {\r
2412                                     /* GB2312_1 or ISO-IR-165 */\r
2413                                     myConverterData.currentConverter.sharedData = myConverterData.myConverterArray[cs0];\r
2414                                     myConverterData.currentConverter.sharedData.mbcs.outputType = CharsetMBCS.MBCS_OUTPUT_2;\r
2415                                     len2 = myConverterData.currentEncoder.fromUChar32(sourceChar, value, usingFallback);\r
2416                                     //len2 = MBCSFromUChar32_ISO2022(myConverterData.myConverterArray[cs0],\r
2417                                     //        sourceChar, value, usingFallback, CharsetMBCS.MBCS_OUTPUT_2);\r
2418                                     if (len2 == 2 || (len2 == -2 && len == 0)) {\r
2419                                         targetValue = value[0];\r
2420                                         len = len2;\r
2421                                         cs = cs0;\r
2422                                         g = 1;\r
2423                                         usingFallback = false;\r
2424                                     }\r
2425                                 }\r
2426                             }\r
2427                         }\r
2428                         \r
2429                         if (len != 0) {\r
2430                             len = 0; /* count output bytes; it must have ben abs(len) == 2 */\r
2431                             \r
2432                             /* write the designation sequence if necessary */\r
2433                             if (cs != myConverterData.fromU2022State.cs[g]) {\r
2434                                 if (cs < CNS_11643) {\r
2435                                     for (int n = 0; n < escSeqCharsCN[cs].length; n++) {\r
2436                                         buffer[n] = escSeqCharsCN[cs][n];\r
2437                                     }\r
2438                                 } else {\r
2439                                     for (int n = 0; n < escSeqCharsCN[CNS_11643 + (cs - CNS_11643_1)].length; n++) {\r
2440                                         buffer[n] = escSeqCharsCN[CNS_11643 + (cs - CNS_11643_1)][n];\r
2441                                     }\r
2442                                 }\r
2443                                 len = 4;\r
2444                                 myConverterData.fromU2022State.cs[g] = cs;\r
2445                                 if (g == 1) {\r
2446                                     /* changing the SO/G1 charset invalidates the choices[] */\r
2447                                     choiceCount = 0;\r
2448                                 }\r
2449                             }\r
2450                             \r
2451                             /* write the shift sequence if necessary */\r
2452                             if (g != myConverterData.fromU2022State.g) {\r
2453                                 switch (g) {\r
2454                                 case 1:\r
2455                                     buffer[len++] = UConverterConstants.SO;\r
2456                                     \r
2457                                     /* set the new state only if it is the locking shift SO/G1, not for SS2 or SS3 */\r
2458                                     myConverterData.fromU2022State.g = 1;\r
2459                                     break;\r
2460                                 case 2:\r
2461                                     buffer[len++] = 0x1b;\r
2462                                     buffer[len++] = 0x4e;\r
2463                                     break;\r
2464                                 default: /* case 3 */\r
2465                                     buffer[len++] = 0x1b;\r
2466                                     buffer[len++] = 0x4f;\r
2467                                     break;\r
2468                                 }\r
2469                             }\r
2470                             \r
2471                             /* write the two output bytes */\r
2472                             buffer[len++] = (byte)(targetValue >> 8);\r
2473                             buffer[len++] = (byte)targetValue;\r
2474                         } else {\r
2475                             /* if we cannot find the character after checking all codepages\r
2476                              * then this is an error\r
2477                              */\r
2478                             err = CoderResult.unmappableForLength(source.position()-oldSourcePos);\r
2479                             fromUChar32 = sourceChar;\r
2480                             break;\r
2481                         }\r
2482                     }\r
2483                     /* output len>0 bytes in buffer[] */\r
2484                     if (len == 1) {\r
2485                         target.put(buffer[0]);\r
2486                         if (offsets != null) {\r
2487                             offsets.put(source.position()-1);\r
2488                         }\r
2489                     } else if (len == 2 && (target.remaining() >= 2)) {\r
2490                         target.put(buffer[0]);\r
2491                         target.put(buffer[1]);\r
2492                         if (offsets != null) {\r
2493                             int sourceIndex = source.position();\r
2494                             offsets.put(sourceIndex);\r
2495                             offsets.put(sourceIndex);\r
2496                         }\r
2497                     } else {\r
2498                         err = CharsetEncoderICU.fromUWriteBytes(this, buffer, 0, len, target, offsets, source.position()-1);\r
2499                         if (err.isError()) {\r
2500                             break;\r
2501                         }\r
2502                     }\r
2503                 } else {\r
2504                     err = CoderResult.OVERFLOW;\r
2505                     break;\r
2506                 }\r
2507             } /* end while (source.hasRemaining() */\r
2508             \r
2509             /*\r
2510              * the end of the input stream and detection of truncated input\r
2511              * are handled by the framework, but for ISO-2022-CN conversion\r
2512              * we need to be in ASCII mode at the very end\r
2513              * \r
2514              * condtions:\r
2515              *   succesful\r
2516              *   not in ASCII mode\r
2517              *   end of input and no truncated input\r
2518              */\r
2519             if (!err.isError() && myConverterData.fromU2022State.g != 0 && flush && !source.hasRemaining() && fromUChar32 == 0) {\r
2520                 int sourceIndex;\r
2521                 \r
2522                 /* we are switching to ASCII */\r
2523                 myConverterData.fromU2022State.g = 0;\r
2524                 \r
2525                 /* get the source index of the last input character */\r
2526                 sourceIndex = source.position();\r
2527                 if (sourceIndex > 0) {\r
2528                     --sourceIndex;\r
2529                     if (UTF16.isTrailSurrogate(source.get(sourceIndex)) && \r
2530                             (sourceIndex == 0 || UTF16.isLeadSurrogate(source.get(sourceIndex-1)))) {\r
2531                         --sourceIndex;\r
2532                     }\r
2533                 } else {\r
2534                     sourceIndex = -1;\r
2535                 }\r
2536                 \r
2537                 err = CharsetEncoderICU.fromUWriteBytes(this, SHIFT_IN_STR, 0, 1, target, offsets, sourceIndex);\r
2538             }\r
2539             \r
2540             return err;\r
2541         }\r
2542     }\r
2543     /******************************** ISO-2022-KR *****************************/\r
2544     /*\r
2545      *   Rules for ISO-2022-KR encoding\r
2546      *   i) The KSC5601 designator sequence should appear only once in a file,\r
2547      *      at the begining of a line before any KSC5601 characters. This usually\r
2548      *      means that it appears by itself on the first line of the file\r
2549      *  ii) There are only 2 shifting sequences SO to shift into double byte mode\r
2550      *      and SI to shift into single byte mode\r
2551      */\r
2552     private class CharsetEncoderISO2022KR extends CharsetEncoderICU {\r
2553         public CharsetEncoderISO2022KR(CharsetICU cs) {\r
2554             super(cs, fromUSubstitutionChar[myConverterData.version]);\r
2555         }\r
2556         \r
2557         protected void implReset() {\r
2558             super.implReset();\r
2559             myConverterData.reset();\r
2560             setInitialStateFromUnicodeKR(this);\r
2561         }\r
2562         \r
2563         /* This overrides the cbFromUWriteSub method in CharsetEncoderICU */\r
2564         CoderResult cbFromUWriteSub (CharsetEncoderICU encoder, \r
2565             CharBuffer source, ByteBuffer target, IntBuffer offsets){\r
2566             CoderResult err = CoderResult.UNDERFLOW;\r
2567             byte[] buffer = new byte[8];\r
2568             int length, i = 0;\r
2569             byte[] subchar;\r
2570             \r
2571             subchar = encoder.replacement();\r
2572             length = subchar.length;\r
2573             \r
2574             if (myConverterData.version == 0) {\r
2575                 if (length == 1) {\r
2576                     if (encoder.fromUnicodeStatus != 0) {\r
2577                         /* in DBCS mode: switch to SBCS */\r
2578                         encoder.fromUnicodeStatus = 0;\r
2579                         buffer[i++] = UConverterConstants.SI;\r
2580                     }\r
2581                     buffer[i++] = subchar[0];\r
2582                 } else { /* length == 2 */\r
2583                     if (encoder.fromUnicodeStatus == 0) {\r
2584                         /* in SBCS mode: switch to DBCS */\r
2585                         encoder.fromUnicodeStatus = 1;\r
2586                         buffer[i++] = UConverterConstants.SO;\r
2587                     }\r
2588                     buffer[i++] = subchar[0];\r
2589                     buffer[i++] = subchar[1];\r
2590                 }\r
2591                 err = CharsetEncoderICU.fromUWriteBytes(this, buffer, 0, i, target, offsets, source.position() - 1);\r
2592             } else { \r
2593                 /* save the subvonverter's substitution string */\r
2594                 byte[] currentSubChars = myConverterData.currentEncoder.replacement();\r
2595                 \r
2596                 /* set our substitution string into the subconverter */\r
2597                 myConverterData.currentEncoder.replaceWith(subchar);\r
2598                 myConverterData.currentConverter.subChar1 = fromUSubstitutionChar[0][0];\r
2599                 /* let the subconverter write the subchar, set/retrieve fromUChar32 state */\r
2600                 myConverterData.currentEncoder.fromUChar32 = encoder.fromUChar32;\r
2601                 err = myConverterData.currentEncoder.cbFromUWriteSub(myConverterData.currentEncoder, source, target, offsets);\r
2602                 encoder.fromUChar32 = myConverterData.currentEncoder.fromUChar32;\r
2603                 \r
2604                 /* restore the subconverter's substitution string */\r
2605                 myConverterData.currentEncoder.replaceWith(currentSubChars);\r
2606                 \r
2607                 if (err.isOverflow()) {\r
2608                     if (myConverterData.currentEncoder.errorBufferLength > 0) {\r
2609                         encoder.errorBuffer = (byte[])(myConverterData.currentEncoder.errorBuffer.clone());\r
2610                     }\r
2611                     encoder.errorBufferLength = myConverterData.currentEncoder.errorBufferLength;\r
2612                     myConverterData.currentEncoder.errorBufferLength = 0;\r
2613                 }\r
2614             }\r
2615             \r
2616             return err;\r
2617         }\r
2618         \r
2619         private CoderResult encodeLoopIBM(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {\r
2620             CoderResult err = CoderResult.UNDERFLOW;\r
2621 \r
2622             myConverterData.currentEncoder.fromUChar32 = fromUChar32;\r
2623             err = myConverterData.currentEncoder.cnvMBCSFromUnicodeWithOffsets(source, target, offsets, flush);\r
2624             fromUChar32 = myConverterData.currentEncoder.fromUChar32;\r
2625             \r
2626             if (err.isOverflow()) {\r
2627                 if (myConverterData.currentEncoder.errorBufferLength > 0) {\r
2628                     errorBuffer = (byte[])(myConverterData.currentEncoder.errorBuffer.clone());\r
2629                 }\r
2630                 errorBufferLength = myConverterData.currentEncoder.errorBufferLength;\r
2631                 myConverterData.currentEncoder.errorBufferLength = 0;\r
2632             }\r
2633             \r
2634             return err;\r
2635         }\r
2636         \r
2637         protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {\r
2638             CoderResult err = CoderResult.UNDERFLOW;\r
2639             int[] targetByteUnit = { 0x0000 };\r
2640             int sourceChar = 0x0000;\r
2641             boolean isTargetByteDBCS;\r
2642             boolean oldIsTargetByteDBCS;\r
2643             boolean usingFallback;\r
2644             int length = 0;\r
2645             boolean gotoGetTrail = false; // for goto getTrail label call\r
2646             \r
2647             /*\r
2648              * if the version is 1 then the user is requesting\r
2649              * conversion with ibm-25546 pass the argument to\r
2650              * MBCS converter and return\r
2651              */\r
2652             if (myConverterData.version == 1) {\r
2653                 return encodeLoopIBM(source, target, offsets, flush);\r
2654             }\r
2655             \r
2656             usingFallback = useFallback;\r
2657             isTargetByteDBCS = fromUnicodeStatus == 0 ? false : true;\r
2658             if ((sourceChar = fromUChar32) != 0 && target.hasRemaining()) {\r
2659                 gotoGetTrail = true;\r
2660             }\r
2661             \r
2662             while (source.hasRemaining() || gotoGetTrail) {\r
2663                 targetByteUnit[0] = UConverterConstants.missingCharMarker;\r
2664                 \r
2665                 if (target.hasRemaining() || gotoGetTrail) {\r
2666                     if (!gotoGetTrail) {\r
2667                         sourceChar = source.get();\r
2668                     \r
2669                         /* do not convert SO/SI/ESC */\r
2670                         if (IS_2022_CONTROL(sourceChar)) {\r
2671                             /* callback(illegal) */\r
2672                             err = CoderResult.malformedForLength(1);\r
2673                             fromUChar32 = sourceChar;\r
2674                             break;\r
2675                         }\r
2676                         myConverterData.currentConverter.sharedData.mbcs.outputType = CharsetMBCS.MBCS_OUTPUT_2;\r
2677                         length = myConverterData.currentEncoder.fromUChar32(sourceChar, targetByteUnit, usingFallback);\r
2678                         //length = MBCSFromUChar32_ISO2022(myConverterData.currentConverter.sharedData, sourceChar, targetByteUnit, usingFallback, CharsetMBCS.MBCS_OUTPUT_2); \r
2679                         if (length < 0) {\r
2680                             length = -length; /* fallback */\r
2681                         }\r
2682                         /* only DBCS or SBCS characters are expected */\r
2683                         /* DB characters with high bit set to 1 are expected */\r
2684                         if (length > 2 || length == 0 ||\r
2685                                 (length == 1 && targetByteUnit[0] > 0x7f) ||\r
2686                                 (length ==2 &&\r
2687                                         ((char)(targetByteUnit[0] - 0xa1a1) > (0xfefe - 0xa1a1) ||\r
2688                                         ((targetByteUnit[0] - 0xa1) & UConverterConstants.UNSIGNED_BYTE_MASK) > (0xfe - 0xa1)))) {\r
2689                             targetByteUnit[0] = UConverterConstants.missingCharMarker;\r
2690                         }\r
2691                     }\r
2692                     if (!gotoGetTrail && targetByteUnit[0] != UConverterConstants.missingCharMarker) {\r
2693                         oldIsTargetByteDBCS = isTargetByteDBCS;\r
2694                         isTargetByteDBCS = (targetByteUnit[0] > 0x00FF);\r
2695                         /* append the shift sequence */\r
2696                         if (oldIsTargetByteDBCS != isTargetByteDBCS) {\r
2697                             if (isTargetByteDBCS) {\r
2698                                 target.put((byte)UConverterConstants.SO);\r
2699                             } else {\r
2700                                 target.put((byte)UConverterConstants.SI);\r
2701                             }\r
2702                             if (offsets != null) {\r
2703                                 offsets.put(source.position()-1);\r
2704                             }\r
2705                         }\r
2706                         /* write the targetUniChar to target */\r
2707                         if (targetByteUnit[0] <= 0x00FF) {\r
2708                             if (target.hasRemaining()) {\r
2709                                 target.put((byte)targetByteUnit[0]);\r
2710                                 if (offsets != null) {\r
2711                                     offsets.put(source.position()-1);\r
2712                                 }\r
2713                             } else {\r
2714                                 errorBuffer[errorBufferLength++] = (byte)targetByteUnit[0];\r
2715                                 err = CoderResult.OVERFLOW;\r
2716                             }\r
2717                         } else {\r
2718                             if (target.hasRemaining()) {\r
2719                                 target.put((byte)(UConverterConstants.UNSIGNED_BYTE_MASK & ((targetByteUnit[0]>>8) - 0x80)));\r
2720                                 if (offsets != null) {\r
2721                                     offsets.put(source.position()-1);\r
2722                                 }\r
2723                                 if (target.hasRemaining()) {\r
2724                                     target.put((byte)(UConverterConstants.UNSIGNED_BYTE_MASK & (targetByteUnit[0]- 0x80)));\r
2725                                     if (offsets != null) {\r
2726                                         offsets.put(source.position()-1);\r
2727                                     }\r
2728                                 } else {\r
2729                                     errorBuffer[errorBufferLength++] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & (targetByteUnit[0] - 0x80));\r
2730                                     err = CoderResult.OVERFLOW;\r
2731                                 }\r
2732                                 \r
2733                             } else {\r
2734                                 errorBuffer[errorBufferLength++] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & ((targetByteUnit[0]>>8) - 0x80));\r
2735                                 errorBuffer[errorBufferLength++] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & (targetByteUnit[0]- 0x80));\r
2736                                 err = CoderResult.OVERFLOW;\r
2737                             }\r
2738                         }\r
2739                     } else {\r
2740                         /* oops.. the code point is unassigned\r
2741                          * set the error and reason\r
2742                          */\r
2743                         \r
2744                         /* check if the char is a First surrogate */\r
2745                         if (gotoGetTrail || UTF16.isSurrogate((char)sourceChar)) {\r
2746                             if (gotoGetTrail || UTF16.isLeadSurrogate((char)sourceChar)) {\r
2747 // getTrail label\r
2748                                 // reset gotoGetTrail flag\r
2749                                 gotoGetTrail = false;\r
2750                                 \r
2751                                 /* look ahead to find the trail surrogate */\r
2752                                 if (source.hasRemaining()) {\r
2753                                     /* test the following code unit */\r
2754                                     char trail = source.get();\r
2755                                     source.position(source.position()-1);\r
2756                                     if (UTF16.isTrailSurrogate(trail)) {\r
2757                                         source.get();\r
2758                                          sourceChar = UCharacter.getCodePoint((char)sourceChar, trail);\r
2759                                          err = CoderResult.unmappableForLength(2);\r
2760                                          /* convert this surrogate code point */\r
2761                                          /* exit this condition tree */\r
2762                                     } else {\r
2763                                         /* this is an unmatched lead code unit (1st surrogate) */\r
2764                                         /* callback(illegal) */\r
2765                                         err = CoderResult.malformedForLength(1);\r
2766                                     }\r
2767                                 } else {\r
2768                                     /* no more input */\r
2769                                     err = CoderResult.UNDERFLOW;\r
2770                                 }\r
2771                             } else {\r
2772                                 /* this is an unmatched trail code unit (2nd surrogate ) */\r
2773                                 /* callback(illegal) */\r
2774                                 err = CoderResult.malformedForLength(1);\r
2775                             }\r
2776                         } else {\r
2777                             /* callback(unassigned) for a BMP code point */\r
2778                             err = CoderResult.unmappableForLength(1);\r
2779                         }\r
2780                         \r
2781                         fromUChar32 = sourceChar;\r
2782                         break;\r
2783                     }\r
2784                 } else {\r
2785                     err = CoderResult.OVERFLOW;\r
2786                     break;\r
2787                 }\r
2788             }\r
2789             /*\r
2790              * the end of the input stream and detection of truncated input\r
2791              * are handled by the framework, but for ISO-2022-KR conversion\r
2792              * we need to be inASCII mode at the very end\r
2793              * \r
2794              * conditions:\r
2795              *  successful\r
2796              *  not in ASCII mode\r
2797              *  end of  input and no truncated input\r
2798              */\r
2799             if (!err.isError() && isTargetByteDBCS && flush && !source.hasRemaining() && fromUChar32 == 0) {\r
2800                 int sourceIndex;\r
2801                 \r
2802                 /* we are switching to ASCII */\r
2803                 isTargetByteDBCS = false;\r
2804                 \r
2805                 /* get the source index of the last input character */\r
2806                 sourceIndex = source.position();\r
2807                 if (sourceIndex > 0) {\r
2808                     --sourceIndex;\r
2809                     if (UTF16.isTrailSurrogate(source.get(sourceIndex)) && UTF16.isLeadSurrogate(source.get(sourceIndex-1))) {\r
2810                         --sourceIndex;\r
2811                     }\r
2812                 } else {\r
2813                     sourceIndex = -1;\r
2814                 }\r
2815                 \r
2816                 CharsetEncoderICU.fromUWriteBytes(this, SHIFT_IN_STR, 0, 1, target, offsets, sourceIndex);\r
2817             }\r
2818             /*save the state and return */\r
2819             fromUnicodeStatus = isTargetByteDBCS ? 1 : 0;\r
2820             \r
2821             return err;\r
2822         }\r
2823     }\r
2824     \r
2825     public CharsetDecoder newDecoder() {\r
2826         switch (variant) {\r
2827         case ISO_2022_JP:\r
2828             return new CharsetDecoderISO2022JP(this);\r
2829         \r
2830         case ISO_2022_CN:\r
2831             return new CharsetDecoderISO2022CN(this);\r
2832             \r
2833         case ISO_2022_KR:\r
2834             setInitialStateToUnicodeKR();\r
2835             return new CharsetDecoderISO2022KR(this);\r
2836             \r
2837         default: /* should not happen */\r
2838             return null;\r
2839         }\r
2840     }\r
2841     \r
2842     public CharsetEncoder newEncoder() {\r
2843         CharsetEncoderICU cnv;\r
2844         \r
2845         switch (variant) {\r
2846         case ISO_2022_JP:\r
2847             return new CharsetEncoderISO2022JP(this);\r
2848             \r
2849         case ISO_2022_CN:\r
2850             return new CharsetEncoderISO2022CN(this);\r
2851             \r
2852         case ISO_2022_KR:\r
2853             cnv = new CharsetEncoderISO2022KR(this);\r
2854             setInitialStateFromUnicodeKR(cnv);\r
2855             return cnv;\r
2856             \r
2857         default: /* should not happen */\r
2858             return null;\r
2859         }\r
2860     }\r
2861     \r
2862     private void setInitialStateToUnicodeKR() {\r
2863         if (myConverterData.version == 1) {\r
2864             myConverterData.currentDecoder.toUnicodeStatus = 0;     /* offset */\r
2865             myConverterData.currentDecoder.mode = 0;                /* state */\r
2866             myConverterData.currentDecoder.toULength = 0;           /* byteIndex */\r
2867         }\r
2868     }\r
2869     private void setInitialStateFromUnicodeKR(CharsetEncoderICU cnv) {\r
2870         /* ISO-2022-KR the designator sequence appears only once\r
2871          * in a file so we append it only once\r
2872          */\r
2873         if (cnv.errorBufferLength == 0) {\r
2874             cnv.errorBufferLength = 4;\r
2875             cnv.errorBuffer[0] = 0x1b;\r
2876             cnv.errorBuffer[1] = 0x24;\r
2877             cnv.errorBuffer[2] = 0x29;\r
2878             cnv.errorBuffer[3] = 0x43;\r
2879         }\r
2880         if (myConverterData.version == 1) {\r
2881             ((CharsetMBCS)myConverterData.currentEncoder.charset()).subChar1 = 0x1A;\r
2882             myConverterData.currentEncoder.fromUChar32 = 0;\r
2883             myConverterData.currentEncoder.fromUnicodeStatus = 1; /* prevLength */\r
2884         }\r
2885     }\r
2886     \r
2887     void getUnicodeSetImpl(UnicodeSet setFillIn, int which) {\r
2888         int i;\r
2889         /*open a set and initialize it with code points that are algorithmically round-tripped */\r
2890         \r
2891         switch(variant){\r
2892         case ISO_2022_JP:\r
2893            /*include JIS X 0201 which is hardcoded */\r
2894             setFillIn.add(0xa5);\r
2895             setFillIn.add(0x203e);\r
2896             if((jpCharsetMasks[myConverterData.version]&CSM(ISO8859_1))!=0){\r
2897                 /*include Latin-1 some variants of JP */\r
2898                 setFillIn.add(0, 0xff);\r
2899             \r
2900             }\r
2901             else {\r
2902                 /* include ASCII for JP */\r
2903                 setFillIn.add(0, 0x7f);\r
2904              }\r
2905             if(myConverterData.version==3 || myConverterData.version==4 ||which == ROUNDTRIP_AND_FALLBACK_SET){\r
2906             /*\r
2907              * Do not test(jpCharsetMasks[myConverterData.version]&CSM(HWKANA_7BIT))!=0 because the bit\r
2908              * is on for all JP versions although version 3 & 4 (JIS7 and JIS8) use half-width Katakana.\r
2909              * This is because all ISO_2022_JP variant are lenient in that they accept (in toUnicode) half-width\r
2910              * Katakana via ESC.\r
2911              * However, we only emit (fromUnicode) half-width Katakana according to the\r
2912              * definition of each variant.\r
2913              *\r
2914              * When including fallbacks,\r
2915              * we need to include half-width Katakana Unicode code points for all JP variants because\r
2916              * JIS X 0208 has hardcoded fallbacks for them (which map to full-width Katakana).\r
2917              */\r
2918             /* include half-width Katakana for JP */\r
2919                 setFillIn.add(HWKANA_START, HWKANA_END);\r
2920              }\r
2921             break;\r
2922         case ISO_2022_CN:\r
2923             /* Include ASCII for CN */\r
2924             setFillIn.add(0, 0x7f);\r
2925             break;\r
2926         case ISO_2022_KR:\r
2927             /* there is only one converter for KR */\r
2928           myConverterData.currentConverter.getUnicodeSetImpl(setFillIn, which);\r
2929           break;\r
2930         default:\r
2931             break;\r
2932         }\r
2933         \r
2934         //TODO Replaced by ucnv_MBCSGetFilteredUnicodeSetForUnicode() until\r
2935         for(i=0; i<UCNV_2022_MAX_CONVERTERS;i++){\r
2936             int filter;\r
2937             if(myConverterData.myConverterArray[i]!=null){\r
2938                 if(variant==ISO_2022_CN && myConverterData.version==0 && i==CNS_11643){\r
2939                     /*\r
2940                      * \r
2941                      * version -specific for CN:\r
2942                      * CN version 0 does not map CNS planes 3..7 although\r
2943                      * they are all available in the CNS conversion table;\r
2944                      * CN version 1 (-EXT) does map them all.\r
2945                      * The two versions create different Unicode sets.\r
2946                      */\r
2947                     filter=CharsetMBCS.UCNV_SET_FILTER_2022_CN;\r
2948                 } else if(variant==ISO_2022_JP && i == JISX208){\r
2949                     /* \r
2950                      * Only add code points that map to Shift-JIS codes\r
2951                      * corrosponding to JIS X 208\r
2952                      */\r
2953                     filter=CharsetMBCS.UCNV_SET_FILTER_SJIS;\r
2954                 } else if(i==KSC5601){\r
2955                     /*\r
2956                      * Some of the KSC 5601 tables (Convrtrs.txt has this aliases on multiple tables)\r
2957                      * are broader than GR94.\r
2958                      */\r
2959                     filter=CharsetMBCS.UCNV_SET_FILTER_GR94DBCS;\r
2960                 } else {\r
2961                     filter=CharsetMBCS.UCNV_SET_FILTER_NONE;\r
2962                 }\r
2963                 \r
2964                 myConverterData.currentConverter.MBCSGetFilteredUnicodeSetForUnicode(myConverterData.myConverterArray[i],setFillIn, which, filter);\r
2965            }\r
2966         }\r
2967         /*\r
2968          * ISO Converter must not convert SO/SI/ESC despite what sub-converters do by themselves\r
2969          * Remove these characters from the set.\r
2970          */\r
2971         setFillIn.remove(0x0e);\r
2972         setFillIn.remove(0x0f);\r
2973         setFillIn.remove(0x1b);\r
2974         \r
2975         /* ISO 2022 converter do not convert C! controls either */\r
2976         setFillIn.remove(0x80, 0x9f);\r
2977     }\r
2978 }\r
2979 \r
2980 \r
2981 \r
2982 \r
2983 \r
2984 \r
2985 \r
2986 \r
2987 \r