]> gitweb.fperrin.net Git - Dictionary.git/blob - jars/icu4j-4_4_2-src/main/classes/charset/src/com/ibm/icu/charset/CharsetISO2022.java
go
[Dictionary.git] / jars / icu4j-4_4_2-src / main / classes / charset / src / com / ibm / icu / charset / CharsetISO2022.java
1 /*\r
2  *******************************************************************************\r
3  * Copyright (C) 2008-2009, International Business Machines Corporation and         *\r
4  * others. All Rights Reserved.                                                *\r
5  *******************************************************************************\r
6  */\r
7 package com.ibm.icu.charset;\r
8 \r
9 import java.nio.ByteBuffer;\r
10 import java.nio.CharBuffer;\r
11 import java.nio.IntBuffer;\r
12 import java.nio.charset.CharsetDecoder;\r
13 import java.nio.charset.CharsetEncoder;\r
14 import java.nio.charset.CoderResult;\r
15 import java.util.Arrays;\r
16 \r
17 import com.ibm.icu.charset.CharsetMBCS.CharsetDecoderMBCS;\r
18 import com.ibm.icu.charset.CharsetMBCS.CharsetEncoderMBCS;\r
19 import com.ibm.icu.lang.UCharacter;\r
20 import com.ibm.icu.text.UTF16;\r
21 import com.ibm.icu.text.UnicodeSet;\r
22 \r
23 class CharsetISO2022 extends CharsetICU {\r
24     private UConverterDataISO2022 myConverterData;\r
25     private int variant;           // one of enum {ISO_2022_JP, ISO_2022_KR, or ISO_2022_CN}\r
26     \r
27     private static final byte[] SHIFT_IN_STR    = { 0x0f };\r
28 //    private static final byte[] SHIFT_OUT_STR   = { 0x0e };\r
29 \r
30     private static final byte CR    = 0x0D;\r
31     private static final byte LF    = 0x0A;\r
32 /*\r
33     private static final byte H_TAB = 0x09;\r
34     private static final byte SPACE = 0x20;\r
35 */\r
36     private static final char HWKANA_START  = 0xff61;\r
37     private static final char HWKANA_END    = 0xff9f;\r
38     \r
39     /*\r
40      * 94-character sets with native byte values A1..FE are encoded in ISO 2022\r
41      * as bytes 21..7E. (Subtract 0x80.)\r
42      * 96-character  sets with native bit values A0..FF are encoded in ISO 2022\r
43      * as bytes 20..7F. (Subtract 0x80.)\r
44      * Do not encode C1 control codes with native bytes 80..9F\r
45      * as bytes 00..1F (C0 control codes).\r
46      */\r
47 /*\r
48     private static final char GR94_START    = 0xa1;\r
49     private static final char GR94_END      = 0xfe;\r
50 */\r
51     private static final char GR96_START    = 0xa0;\r
52     private static final char GR96_END      = 0xff;\r
53     \r
54     /* for ISO-2022-JP and -CN implementations */\r
55     // typedef enum {\r
56         /* shared values */\r
57         private static final byte INVALID_STATE = -1;\r
58         private static final byte ASCII         = 0;\r
59         \r
60         private static final byte SS2_STATE = 0x10;\r
61         private static final byte SS3_STATE = 0x11;\r
62         \r
63         /* JP */\r
64         private static final byte ISO8859_1 = 1;\r
65         private static final byte ISO8859_7 = 2;\r
66         private static final byte JISX201   = 3;\r
67         private static final byte JISX208   = 4;\r
68         private static final byte JISX212   = 5;\r
69         private static final byte GB2312    = 6;\r
70         private static final byte KSC5601   = 7;\r
71         private static final byte HWKANA_7BIT  = 8; /* Halfwidth Katakana 7 bit */\r
72         \r
73         /* CN */\r
74         /* the first few enum constants must keep their values because they corresponds to myConverterArray[] */\r
75         private static final byte GB2312_1  = 1;\r
76         private static final byte ISO_IR_165= 2;\r
77         private static final byte CNS_11643 = 3;\r
78         \r
79         /*\r
80          * these are used in StateEnum and ISO2022State variables,\r
81          * but CNS_11643 must be used to index into myConverterArray[]\r
82          */\r
83         private static final byte CNS_11643_0 = 0x20;\r
84         private static final byte CNS_11643_1 = 0x21;\r
85         private static final byte CNS_11643_2 = 0x22;\r
86         private static final byte CNS_11643_3 = 0x23;\r
87         private static final byte CNS_11643_4 = 0x24;\r
88         private static final byte CNS_11643_5 = 0x25;\r
89         private static final byte CNS_11643_6 = 0x26;\r
90         private static final byte CNS_11643_7 = 0x27;\r
91     // } StateEnum;\r
92     \r
93 \r
94     public CharsetISO2022(String icuCanonicalName, String javaCanonicalName, String[] aliases) {\r
95         super(icuCanonicalName, javaCanonicalName, aliases);\r
96         \r
97         myConverterData = new UConverterDataISO2022();\r
98         \r
99         int versionIndex = icuCanonicalName.indexOf("version=");\r
100         int version = Integer.decode(icuCanonicalName.substring(versionIndex+8, versionIndex+9)).intValue();\r
101         \r
102         myConverterData.version = version;\r
103         \r
104         if (icuCanonicalName.indexOf("locale=ja") > 0) {\r
105             ISO2022InitJP(version);\r
106         } else if (icuCanonicalName.indexOf("locale=zh") > 0) {\r
107             ISO2022InitCN(version);\r
108         } else /* if (icuCanonicalName.indexOf("locale=ko") > 0) */ {\r
109             ISO2022InitKR(version);\r
110         }\r
111         \r
112         myConverterData.currentEncoder = (CharsetEncoderMBCS)myConverterData.currentConverter.newEncoder();\r
113         myConverterData.currentDecoder = (CharsetDecoderMBCS)myConverterData.currentConverter.newDecoder();\r
114     }\r
115     \r
116     private void ISO2022InitJP(int version) {\r
117         variant = ISO_2022_JP;\r
118         \r
119         maxBytesPerChar = 6;\r
120         minBytesPerChar = 1;\r
121         maxCharsPerByte = 1;\r
122         // open the required converters and cache them \r
123         if((jpCharsetMasks[version]&CSM(ISO8859_7)) != 0) {\r
124             myConverterData.myConverterArray[ISO8859_7] = ((CharsetMBCS)CharsetICU.forNameICU("ISO8859_7")).sharedData;\r
125         }\r
126         // myConverterData.myConverterArray[JISX201] = ((CharsetMBCS)CharsetICU.forNameICU("jisx-201")).sharedData;\r
127         myConverterData.myConverterArray[JISX208] = ((CharsetMBCS)CharsetICU.forNameICU("Shift-JIS")).sharedData;\r
128         if ((jpCharsetMasks[version]&CSM(JISX212)) != 0) {\r
129             myConverterData.myConverterArray[JISX212] = ((CharsetMBCS)CharsetICU.forNameICU("jisx-212")).sharedData;\r
130         }\r
131         if ((jpCharsetMasks[version]&CSM(GB2312)) != 0) {\r
132             myConverterData.myConverterArray[GB2312] = ((CharsetMBCS)CharsetICU.forNameICU("ibm-5478")).sharedData;\r
133         }\r
134         if ((jpCharsetMasks[version]&CSM(KSC5601)) != 0) {\r
135             myConverterData.myConverterArray[KSC5601] = ((CharsetMBCS)CharsetICU.forNameICU("ksc_5601")).sharedData;\r
136         }\r
137         \r
138         // create a generic CharsetMBCS object\r
139         myConverterData.currentConverter = (CharsetMBCS)CharsetICU.forNameICU("icu-internal-25546");\r
140     }\r
141     \r
142     private void ISO2022InitCN(int version) {\r
143         variant = ISO_2022_CN;\r
144         \r
145         maxBytesPerChar = 8;\r
146         minBytesPerChar = 1;\r
147         maxCharsPerByte = 1;\r
148         // open the required coverters and cache them.\r
149         myConverterData.myConverterArray[GB2312_1] = ((CharsetMBCS)CharsetICU.forNameICU("ibm-5478")).sharedData;\r
150         if (version == 1) {\r
151             myConverterData.myConverterArray[ISO_IR_165] = ((CharsetMBCS)CharsetICU.forNameICU("iso-ir-165")).sharedData;\r
152         } \r
153         myConverterData.myConverterArray[CNS_11643] = ((CharsetMBCS)CharsetICU.forNameICU("cns-11643-1992")).sharedData;\r
154         \r
155         // create a generic CharsetMBCS object\r
156         myConverterData.currentConverter = (CharsetMBCS)CharsetICU.forNameICU("icu-internal-25546");\r
157     }\r
158     \r
159     private void ISO2022InitKR(int version) {\r
160         variant = ISO_2022_KR;\r
161         \r
162         maxBytesPerChar = 3;\r
163         minBytesPerChar = 1;\r
164         maxCharsPerByte = 1;\r
165         \r
166         if (version == 1) {\r
167             myConverterData.currentConverter = (CharsetMBCS)CharsetICU.forNameICU("icu-internal-25546");\r
168             myConverterData.currentConverter.subChar1 = fromUSubstitutionChar[0][0];\r
169         } else {\r
170             myConverterData.currentConverter = (CharsetMBCS)CharsetICU.forNameICU("ibm-949");\r
171         }\r
172         \r
173         myConverterData.currentEncoder = (CharsetEncoderMBCS)myConverterData.currentConverter.newEncoder();\r
174         myConverterData.currentDecoder = (CharsetDecoderMBCS)myConverterData.currentConverter.newDecoder();\r
175     }\r
176     \r
177     /*\r
178      * ISO 2022 control codes must not be converted from Unicode\r
179      * because they would mess up the byte stream.\r
180      * The bit mask 0x0800c000 has bits set at bit positions 0xe, 0xf, 0x1b\r
181      * corresponding to SO, SI, and ESC.\r
182      */\r
183     private static boolean IS_2022_CONTROL(int c) { \r
184         return (c<0x20) && (((1<<c) & 0x0800c000) != 0);\r
185     }\r
186     \r
187     /*\r
188      * Check that the result is a 2-byte value with each byte in the range A1..FE\r
189      * (strict EUC DBCS) before accepting it and subtracting 0x80 from each byte\r
190      * to move it to the ISO 2022 range 21..7E.\r
191      * return 0 if out of range.\r
192      */\r
193     private static int _2022FromGR94DBCS(int value) {\r
194         if ((value <= 0xfefe && value >= 0xa1a1) && \r
195                 ((short)(value&UConverterConstants.UNSIGNED_BYTE_MASK) <= 0xfe && ((short)(value&UConverterConstants.UNSIGNED_BYTE_MASK) >= 0xa1))) {\r
196             return (value - 0x8080); /* shift down to 21..7e byte range */\r
197         } else {\r
198             return 0; /* not valid for ISO 2022 */\r
199         }\r
200     }\r
201     \r
202     /*\r
203      * Commented out because Ticket 5691: Call sites now check for validity. They can just += 0x8080 after that. \r
204      * \r
205      * This method does the reverse of _2022FromGR94DBCS(). Given the 2022 code point, it returns the\r
206      * 2 byte value that is in the range A1..FE for each byte. Otherwise it returns the 2022 code point\r
207      * unchanged. \r
208      * \r
209     private static int _2022ToGR94DBCS(int value) {\r
210         int returnValue = value + 0x8080;\r
211         \r
212         if ((returnValue <= 0xfefe && returnValue >= 0xa1a1) && \r
213                 ((short)(returnValue&UConverterConstants.UNSIGNED_BYTE_MASK) <= 0xfe && ((short)(returnValue&UConverterConstants.UNSIGNED_BYTE_MASK) >= 0xa1))) {\r
214             return returnValue;\r
215         } else {\r
216             return value;\r
217         }\r
218     }*/\r
219     \r
220     /* is the StateEnum charset value for a DBCS charset? */\r
221     private static boolean IS_JP_DBCS(byte cs) {\r
222         return ((JISX208 <= cs) && (cs <= KSC5601));\r
223     }\r
224     \r
225     private static short CSM(short cs) {\r
226         return (short)(1<<cs);\r
227     }\r
228     \r
229     /* This gets the valid index of the end of buffer when decoding. */\r
230     private static int getEndOfBuffer_2022(ByteBuffer source) {\r
231         int sourceIndex = source.position();\r
232         byte mySource = 0;\r
233         mySource = source.get(sourceIndex);\r
234         \r
235         while (source.hasRemaining() && mySource != ESC_2022) {\r
236             mySource = source.get();\r
237             if (mySource == ESC_2022) {\r
238                 break;\r
239             }\r
240             sourceIndex++;\r
241         }\r
242         return sourceIndex;\r
243     }\r
244     \r
245     /*\r
246      * This is a simple version of _MBCSGetNextUChar() calls the method in CharsetDecoderMBCS and returns\r
247      * the value given.\r
248      *\r
249      * Return value:\r
250      * U+fffe   unassigned\r
251      * U+ffff   illegal\r
252      * otherwise the Unicode code point\r
253      */\r
254      private int MBCSSimpleGetNextUChar(UConverterSharedData sharedData,\r
255                                ByteBuffer   source, \r
256                                boolean      useFallback) {\r
257          int returnValue;\r
258          UConverterSharedData tempSharedData = myConverterData.currentConverter.sharedData;\r
259          myConverterData.currentConverter.sharedData = sharedData;\r
260          returnValue = myConverterData.currentDecoder.simpleGetNextUChar(source, useFallback);\r
261          myConverterData.currentConverter.sharedData = tempSharedData;\r
262          \r
263          return returnValue;\r
264     }\r
265 \r
266     /*\r
267      * @param is the the output byte\r
268      * @return 1 roundtrip byte  0 no mapping  -1 fallback byte\r
269      */\r
270     static int MBCSSingleFromUChar32(UConverterSharedData sharedData, int c, int[] retval, boolean useFallback) {\r
271         char[] table;\r
272         int value;\r
273         /* BMP-only codepages are stored without stage 1 entries for supplementary code points */\r
274         if (c >= 0x10000 && (sharedData.mbcs.unicodeMask&UConverterConstants.HAS_SUPPLEMENTARY) == 0) {\r
275             return 0;\r
276         }\r
277         /* convert the Unicode code point in c into codepage bytes */\r
278         table = sharedData.mbcs.fromUnicodeTable;\r
279         /* get the byte for the output */\r
280         value = CharsetMBCS.MBCS_SINGLE_RESULT_FROM_U(table, sharedData.mbcs.fromUnicodeBytes, c);\r
281         /* get the byte for the output */\r
282         retval[0] = value & 0xff;\r
283         if (value >= 0xf00) {\r
284             return 1; /* roundtrip */\r
285         } else if (useFallback ? value>=0x800 : value>=0xc00) {\r
286             return -1; /* fallback taken */\r
287         } else {\r
288             return 0; /* no mapping */\r
289         }\r
290     }\r
291     \r
292     /*\r
293      * Each of these charset masks (with index x) contains a bit for a charset in exact correspondence\r
294      * to whether that charset is used in the corresponding version x of ISO_2022, locale=ja,version=x\r
295      * \r
296      * Note: The converter uses some leniency:\r
297      * - The escape sequence ESC ( I for half-width 7-bit Katakana is recognized in\r
298      *   all versions, not just JIS7 and JIS8.\r
299      * - ICU does not distinguish between different version so of JIS X 0208.\r
300      */\r
301     private static final short jpCharsetMasks[] = {\r
302         (short)(CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)),\r
303         (short)(CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)),\r
304         (short)(CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7)),\r
305         (short)(CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7)),\r
306         (short)(CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7))\r
307     };\r
308 \r
309 /*\r
310     // typedef enum {\r
311         private static final byte ASCII1 = 0;\r
312         private static final byte LATIN1 = 1;\r
313         private static final byte SBCS   = 2;\r
314         private static final byte DBCS   = 3;\r
315         private static final byte MBCS   = 4;\r
316         private static final byte HWKANA = 5;\r
317     // } Cnv2002Type;\r
318 */\r
319 \r
320     private class ISO2022State {\r
321         private byte []cs;  /* Charset number for SI (G0)/SO (G1)/SS2 (G2)/SS3 (G3) */\r
322         private byte g;     /* 0..3 for G0..G3 (SI/SO/SS2/SS3) */\r
323         private byte prevG; /* g before single shift (SS2 or SS3) */\r
324         \r
325         ISO2022State() {\r
326             cs = new byte[4];\r
327         }\r
328         \r
329         void reset() {\r
330             Arrays.fill(cs, (byte)0);\r
331             g = 0;\r
332             prevG = 0;\r
333         }\r
334     }\r
335     \r
336 //    private static final byte UCNV_OPTIONS_VERSION_MASK = 0xf;\r
337     private static final byte UCNV_2022_MAX_CONVERTERS  = 10;\r
338     \r
339     @SuppressWarnings("unused")\r
340     private class UConverterDataISO2022 {\r
341         UConverterSharedData []myConverterArray;\r
342         CharsetEncoderMBCS currentEncoder;\r
343         CharsetDecoderMBCS currentDecoder;\r
344         CharsetMBCS currentConverter;\r
345         int currentType; // Cnv2022Type;\r
346         ISO2022State toU2022State;\r
347         ISO2022State fromU2022State;\r
348         int key;\r
349         int version;\r
350         boolean isEmptySegment;\r
351         \r
352         UConverterDataISO2022() {\r
353             myConverterArray = new UConverterSharedData[UCNV_2022_MAX_CONVERTERS];\r
354             toU2022State = new ISO2022State();\r
355             fromU2022State = new ISO2022State();\r
356             currentType = 0;\r
357             key = 0;\r
358             version = 0;\r
359             isEmptySegment = false;\r
360         }\r
361         \r
362         void reset() {\r
363             toU2022State.reset();\r
364             fromU2022State.reset();\r
365             isEmptySegment = false;\r
366         }\r
367     }\r
368     \r
369     private static final byte ESC_2022 = 0x1B; /* ESC */\r
370     \r
371     // typedef enum {\r
372         private static final byte INVALID_2022              = -1; /* Doesn't correspond to a valid iso 2022 escape sequence */\r
373         private static final byte VALID_NON_TERMINAL_2022   =  0;  /* so far corresponds to a valid iso 2022 escape sequence */\r
374         private static final byte VALID_TERMINAL_2022       =  1;  /* corresponds to a valid iso 2022 escape sequence */\r
375         private static final byte VALID_MAYBE_TERMINAL_2022 =  2;  /* so far matches one iso 2022 escape sequence, but by adding\r
376                                                                      more characters might match another escape sequence */\r
377     // } UCNV_TableStates_2022;\r
378         \r
379     /*\r
380      * The way these state transition arrays work is:\r
381      * ex : ESC$B is the sequence for JISX208\r
382      *      a) First Iteration: char is ESC\r
383      *          i) Get the value of ESC from normalize_esq_chars_2022[] with int value of ESC as index\r
384      *             int x = normalize_esq_chars_2022[27] which is equal to 1\r
385      *         ii) Search for this value in escSeqStateTable_Key_2022[]\r
386      *             value of x is stored at escSeqStateTable_Key_2022[0]\r
387      *        iii) Save this index as offset\r
388      *         iv) Get state of this sequence from escSeqStateTable_Value_2022[]\r
389      *             escSeqStateTable_value_2022[offset], which is VALID_NON_TERMINAL_2022\r
390      *      b) Switch on this state and continue to next char\r
391      *          i) Get the value of $ from normalize_esq_chars_2022[] with int value of $ as index\r
392      *             which is normalize_esq_chars_2022[36] == 4\r
393      *         ii) x is currently 1(from above)\r
394      *             x<<=5 -- x is now 32\r
395      *             x+=normalize_esq_chars_2022[36]\r
396      *             now x is 36\r
397      *        iii) Search for this value in escSeqStateTable_Key_2022[]\r
398      *             value of x is stored at escSeqStateTable_Key_2022[2], so offset is 2\r
399      *         iv) Get state of this sequence from escSeqStateTable_Value_2022[]\r
400      *             escSeqStateTable_Value_2022[offset], which is VALID_NON_TERMINAL_2022\r
401      *      c) Switch on this state and continue to next char\r
402      *          i) Get the value of B from normalize_esq_chars_2022[] with int value of B as index\r
403      *         ii) x is currently 36 (from above)\r
404      *             x<<=5 -- x is now 1152\r
405      *             x+= normalize_esq_chars_2022[66]\r
406      *             now x is 1161\r
407      *        iii) Search for this value in escSeqStateTable_Key_2022[]\r
408      *             value of x is stored at escSeqStateTable_Key_2022[21], so offset is 21\r
409      *         iv) Get state of this sequence from escSeqStateTable_Value_2022[1]\r
410      *             escSeqStateTable_Value_2022[offset], which is VALID_TERMINAL_2022\r
411      *          v) Get the converter name from escSeqStateTable_Result_2022[21] which is JISX208\r
412      */\r
413      /* Below are the 3 arrays depicting a state transition table */\r
414      private static final byte normalize_esq_chars_2022[] = {\r
415          /* 0       1       2       3       4       5       6       7       8       9 */\r
416             0,      0,      0,      0,      0,      0,      0,      0,      0,      0,\r
417             0,      0,      0,      0,      0,      0,      0,      0,      0,      0,\r
418             0,      0,      0,      0,      0,      0,      0,      1,      0,      0,\r
419             0,      0,      0,      0,      0,      0,      4,      7,     29,      0,\r
420             2,     24,     26,     27,      0,      3,     23,      6,      0,      0,\r
421             0,      0,      0,      0,      0,      0,      0,      0,      0,      0,\r
422             0,      0,      0,      0,      5,      8,      9,     10,     11,     12,\r
423            13,     14,     15,     16,     17,     18,     19,     20,     25,     28,\r
424             0,      0,     21,      0,      0,      0,      0,      0,      0,      0,\r
425            22,      0,      0,      0,      0,      0,      0,      0,      0,      0,\r
426             0,      0,      0,      0,      0,      0,      0,      0,      0,      0,\r
427             0,      0,      0,      0,      0,      0,      0,      0,      0,      0,\r
428             0,      0,      0,      0,      0,      0,      0,      0,      0,      0,\r
429             0,      0,      0,      0,      0,      0,      0,      0,      0,      0,\r
430             0,      0,      0,      0,      0,      0,      0,      0,      0,      0,\r
431             0,      0,      0,      0,      0,      0,      0,      0,      0,      0,\r
432             0,      0,      0,      0,      0,      0,      0,      0,      0,      0,\r
433             0,      0,      0,      0,      0,      0,      0,      0,      0,      0,\r
434             0,      0,      0,      0,      0,      0,      0,      0,      0,      0,\r
435             0,      0,      0,      0,      0,      0,      0,      0,      0,      0,\r
436             0,      0,      0,      0,      0,      0,      0,      0,      0,      0,\r
437             0,      0,      0,      0,      0,      0,      0,      0,      0,      0,\r
438             0,      0,      0,      0,      0,      0,      0,      0,      0,      0,\r
439             0,      0,      0,      0,      0,      0,      0,      0,      0,      0,\r
440             0,      0,      0,      0,      0,      0,      0,      0,      0,      0,\r
441             0,      0,      0,      0,      0,      0,      0,      0,      0,      0,\r
442             0,      0,      0,      0,      0,      0\r
443      };\r
444      \r
445      private static final short MAX_STATES_2022 = 74;\r
446      private static final int escSeqStateTable_Key_2022[/* MAX_STATES_2022 */] = {\r
447          /* 0        1          2         3        4          5         6         7         8         9 */\r
448             1,      34,        36,       39,      55,        57,       60,       61,     1093,     1096,\r
449          1097,    1098,      1099,     1100,     1101,     1102,     1103,     1104,     1105,     1106,\r
450          1109,    1154,      1157,     1160,     1161,     1176,     1178,     1179,     1254,     1257,\r
451          1768,    1773,      1957,    35105,    36933,    36936,    36937,    36938,    36939,    36940,\r
452         36942,   36943,     36944,    36945,    36946,    36947,    36948,    37640,    37642,    37644,\r
453         37646,   37711,     37744,    37745,    37746,    37747,    37748,    40133,    40136,    40138,\r
454         40139,   40140,     40141,  1123363, 35947624, 35947625, 35947626, 35947627, 35947629, 35947630,\r
455      35947631, 35947635, 35947636, 35947638\r
456      };\r
457      \r
458      private static final byte escSeqStateTable_Value_2022[/* MAX_STATES_2022 */] = {\r
459          /*         0                           1                           2                           3                       4               */\r
460          VALID_NON_TERMINAL_2022,   VALID_NON_TERMINAL_2022,    VALID_NON_TERMINAL_2022,    VALID_NON_TERMINAL_2022,    VALID_NON_TERMINAL_2022,    \r
461              VALID_TERMINAL_2022,       VALID_TERMINAL_2022,    VALID_NON_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,\r
462        VALID_MAYBE_TERMINAL_2022,       VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,\r
463              VALID_TERMINAL_2022,       VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,\r
464              VALID_TERMINAL_2022,   VALID_NON_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,\r
465          VALID_NON_TERMINAL_2022,   VALID_NON_TERMINAL_2022,    VALID_NON_TERMINAL_2022,    VALID_NON_TERMINAL_2022,        VALID_TERMINAL_2022,\r
466              VALID_TERMINAL_2022,       VALID_TERMINAL_2022,        VALID_TERMINAL_2022,    VALID_NON_TERMINAL_2022,        VALID_TERMINAL_2022,\r
467              VALID_TERMINAL_2022,       VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,\r
468              VALID_TERMINAL_2022,       VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,\r
469              VALID_TERMINAL_2022,       VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,\r
470              VALID_TERMINAL_2022,       VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,\r
471              VALID_TERMINAL_2022,       VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,\r
472              VALID_TERMINAL_2022,       VALID_TERMINAL_2022,        VALID_TERMINAL_2022,    VALID_NON_TERMINAL_2022,        VALID_TERMINAL_2022,\r
473              VALID_TERMINAL_2022,       VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,\r
474              VALID_TERMINAL_2022,       VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022\r
475      };\r
476      \r
477      /* Type def for refactoring changeState_2022 code */\r
478      // typedef enum {\r
479          private static final byte ISO_2022_JP = 1;\r
480          private static final byte ISO_2022_KR = 2;\r
481          private static final byte ISO_2022_CN = 3;\r
482      // } Variant2022;\r
483          \r
484     /* const UConverterSharedData _ISO2022Data; */\r
485     //private UConverterSharedData _ISO2022JPData;\r
486     //private UConverterSharedData _ISO2022KRData;\r
487     //private UConverterSharedData _ISO2022CNData;\r
488     \r
489     /******************** to unicode ********************/\r
490     /****************************************************\r
491      * Recognized escape sequenes are\r
492      * <ESC>(B  ASCII\r
493      * <ESC>.A  ISO-8859-1\r
494      * <ESC>.F  ISO-8859-7\r
495      * <ESC>(J  JISX-201\r
496      * <ESC>(I  JISX-201\r
497      * <ESC>$B  JISX-208\r
498      * <ESC>$@  JISX-208\r
499      * <ESC>$(D JISX-212\r
500      * <ESC>$A  GB2312\r
501      * <ESC>$(C KSC5601\r
502      */\r
503     private final static byte nextStateToUnicodeJP[/* MAX_STATES_2022 */] = {\r
504         /*     0               1               2               3               4               5               6               7               8               9    */\r
505         INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,      SS2_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,\r
506                 ASCII,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,        JISX201,    HWKANA_7BIT,        JISX201,  INVALID_STATE,\r
507         INVALID_STATE,  INVALID_STATE,        JISX208,         GB2312,        JISX208,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,\r
508             ISO8859_1,      ISO8859_7,        JISX208,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,        KSC5601,        JISX212,  INVALID_STATE,\r
509         INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,\r
510         INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,\r
511         INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,\r
512         INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE\r
513     };\r
514     \r
515     private final static byte nextStateToUnicodeCN[/* MAX_STATES_2022 */] = {\r
516         /*     0               1               2               3               4               5               6               7               8               9    */\r
517         INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,      SS2_STATE,      SS3_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,\r
518         INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,\r
519         INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,\r
520         INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,\r
521         INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,       GB2312_1,  INVALID_STATE,     ISO_IR_165,\r
522           CNS_11643_1,    CNS_11643_2,    CNS_11643_3,    CNS_11643_4,    CNS_11643_5,    CNS_11643_6,    CNS_11643_7,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,\r
523         INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,\r
524         INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE\r
525     };\r
526     \r
527     /* runs through a state machine to determine the escape sequence - codepage correspondence */\r
528     @SuppressWarnings("fallthrough")\r
529     private CoderResult changeState_2022(CharsetDecoderICU decoder, ByteBuffer source, int var) {\r
530         CoderResult err = CoderResult.UNDERFLOW;\r
531         boolean DONE = false;\r
532         byte value;\r
533         int key[] = {myConverterData.key};\r
534         int offset[] = {0};\r
535         int initialToULength = decoder.toULength;\r
536         byte c;\r
537         int malformLength = 0;\r
538         \r
539         value = VALID_NON_TERMINAL_2022;\r
540         while (source.hasRemaining()) {\r
541             c = source.get();\r
542             malformLength++;\r
543             decoder.toUBytesArray[decoder.toULength++] = c;\r
544             value = getKey_2022(c, key, offset);\r
545             \r
546             switch(value) {\r
547             \r
548             case VALID_NON_TERMINAL_2022:\r
549                 /* continue with the loop */\r
550                 break;\r
551                 \r
552             case VALID_TERMINAL_2022:\r
553                 key[0] = 0;\r
554                 DONE = true;\r
555                 break;\r
556                 \r
557             case INVALID_2022:\r
558                 DONE = true;\r
559                 break;\r
560                 \r
561             case VALID_MAYBE_TERMINAL_2022:\r
562                 /* not ISO_2022 itself, finish here */\r
563                 value = VALID_TERMINAL_2022;\r
564                 key[0] = 0;\r
565                 DONE = true;\r
566                 break;\r
567             }\r
568             if (DONE) {\r
569                 break;\r
570             }\r
571         }\r
572 // DONE:\r
573         myConverterData.key = key[0];\r
574         \r
575         if (value == VALID_NON_TERMINAL_2022) {\r
576             /* indicate that the escape sequence is incomplete: key !=0 */\r
577             return err;\r
578         } else if (value == INVALID_2022) {\r
579             err = CoderResult.malformedForLength(malformLength);\r
580         } else /* value == VALID_TERMINAL_2022 */ {\r
581             switch (var) {\r
582             case ISO_2022_JP: {\r
583                 byte tempState = nextStateToUnicodeJP[offset[0]];\r
584                 switch (tempState) {\r
585                 case INVALID_STATE:\r
586                     err = CoderResult.malformedForLength(malformLength);\r
587                     break;\r
588                 case SS2_STATE:\r
589                     if (myConverterData.toU2022State.cs[2] != 0) {\r
590                         if (myConverterData.toU2022State.g < 2) {\r
591                             myConverterData.toU2022State.prevG = myConverterData.toU2022State.g;\r
592                         }\r
593                         myConverterData.toU2022State.g = 2;\r
594                     } else { \r
595                         /* illegal to have SS2 before a matching designator */\r
596                         err = CoderResult.malformedForLength(malformLength);\r
597                     }\r
598                     break;\r
599                 /* case SS3_STATE: not used in ISO-2022-JP-x */\r
600                 case ISO8859_1:\r
601                 case ISO8859_7:\r
602                     if ((jpCharsetMasks[myConverterData.version] & CSM(tempState)) == 0) {\r
603                         err = CoderResult.unmappableForLength(malformLength);\r
604                     } else {\r
605                         /* G2 charset for SS2 */\r
606                         myConverterData.toU2022State.cs[2] = tempState;\r
607                     }\r
608                     break;\r
609                 default:\r
610                     if ((jpCharsetMasks[myConverterData.version] & CSM(tempState)) == 0) {\r
611                         err = CoderResult.unmappableForLength(source.position() - 1);\r
612                     } else {\r
613                         /* G0 charset */\r
614                         myConverterData.toU2022State.cs[0] = tempState;\r
615                     }\r
616                     break;\r
617                 } // end of switch\r
618                 break;\r
619             }\r
620             case ISO_2022_CN: {\r
621                 byte tempState = nextStateToUnicodeCN[offset[0]];\r
622                 switch (tempState) {\r
623                 case INVALID_STATE:\r
624                     err = CoderResult.unmappableForLength(malformLength);\r
625                     break;\r
626                 case SS2_STATE:\r
627                     if (myConverterData.toU2022State.cs[2] != 0) {\r
628                         if (myConverterData.toU2022State.g < 2) {\r
629                             myConverterData.toU2022State.prevG = myConverterData.toU2022State.g;\r
630                         }\r
631                         myConverterData.toU2022State.g = 2;\r
632                     } else {\r
633                         /* illegal to have SS2 before a matching designator */\r
634                         err = CoderResult.malformedForLength(malformLength);\r
635                     }\r
636                     break;\r
637                 case SS3_STATE:\r
638                     if (myConverterData.toU2022State.cs[3] != 0) {\r
639                         if (myConverterData.toU2022State.g < 2) {\r
640                             myConverterData.toU2022State.prevG = myConverterData.toU2022State.g;\r
641                         }\r
642                         myConverterData.toU2022State.g = 3;\r
643                     } else {\r
644                         /* illegal to have SS3 before a matching designator */\r
645                         err = CoderResult.malformedForLength(malformLength);\r
646                     }\r
647                     break;\r
648                 case ISO_IR_165:\r
649                     if (myConverterData.version == 0) {\r
650                         err = CoderResult.unmappableForLength(malformLength);\r
651                         break;\r
652                     }\r
653                     /* fall through */\r
654                 case GB2312_1:\r
655                     /* fall through */\r
656                 case CNS_11643_1:\r
657                     myConverterData.toU2022State.cs[1] = tempState;\r
658                     break;\r
659                 case CNS_11643_2:\r
660                     myConverterData.toU2022State.cs[2] = tempState;\r
661                     break;\r
662                 default:\r
663                     /* other CNS 11643 planes */\r
664                     if (myConverterData.version == 0) {\r
665                         err = CoderResult.unmappableForLength(source.position() - 1);\r
666                     } else {\r
667                         myConverterData.toU2022State.cs[3] = tempState;\r
668                     }\r
669                     break;\r
670                 } //end of switch\r
671             }\r
672             break;\r
673             case ISO_2022_KR:\r
674                 if (offset[0] == 0x30) {\r
675                     /* nothing to be done, just accept this one escape sequence */\r
676                 } else {\r
677                     err = CoderResult.unmappableForLength(malformLength);\r
678                 }\r
679                 break;\r
680             default:\r
681                 err = CoderResult.malformedForLength(malformLength);\r
682                 break;\r
683             } // end of switch\r
684         }\r
685         if (!err.isError()) {\r
686             decoder.toULength = 0;\r
687         } else if (err.isMalformed()) {\r
688             if (decoder.toULength > 1) {\r
689                 /*\r
690                  * Ticket 5691: consistent illegal sequences:\r
691                  * - We include at least the first byte (ESC) in the illegal sequence.\r
692                  * - If any of the non-initial bytes could be the start of a character,\r
693                  *   we stop the illegal sequece before the first one of those.\r
694                  *   In escape sequences, all following bytes are "printable", that is,\r
695                  *   unless they are completely illegal (>7f in SBCS, outside 21..7e in DBCS),\r
696                  *   they are valid single/lead bytes.\r
697                  *   For simplicity, we always only report the initial ESC byte as the\r
698                  *   illegal sequence and back out all other bytes we looked at.\r
699                  */\r
700                 /* Back out some bytes. */\r
701                 int backOutDistance = decoder.toULength - 1;\r
702                 int bytesFromThisBuffer = decoder.toULength - initialToULength;\r
703                 if (backOutDistance <= bytesFromThisBuffer) {\r
704                     /* same as initialToULength<=1 */\r
705                     source.position(source.position() - backOutDistance);\r
706                 } else {\r
707                     /* Back out bytes from the previous buffer: Need to replay them. */\r
708                     decoder.preToULength = (byte)(bytesFromThisBuffer - backOutDistance);\r
709                     /* same as -(initalToULength-1) */\r
710                     /* preToULength is negative! */\r
711                     for (int i = 0; i < -(decoder.preToULength); i++) {\r
712                         decoder.preToUArray[i] = decoder.toUBytesArray[i+1];\r
713                     }\r
714                     source.position(source.position() - bytesFromThisBuffer);\r
715                 }\r
716                 decoder.toULength = 1;\r
717             }\r
718         }\r
719         \r
720         return err;\r
721     }\r
722     \r
723     private static byte getKey_2022(byte c, int[]key, int[]offset) {\r
724         int togo;\r
725         int low = 0;\r
726         int hi = MAX_STATES_2022;\r
727         int oldmid = 0;\r
728         \r
729         togo = normalize_esq_chars_2022[(short)c&UConverterConstants.UNSIGNED_BYTE_MASK];\r
730         \r
731         if (togo == 0) {\r
732             /* not a valid character anywhere in an escape sequence */\r
733             key[0] = 0;\r
734             offset[0] = 0;\r
735             return INVALID_2022;\r
736         }\r
737         togo = (key[0] << 5) + togo;\r
738         \r
739         while (hi != low) { /* binary search */\r
740             int mid = (hi+low) >> 1; /* Finds median */\r
741         \r
742             if (mid == oldmid) {\r
743                 break;\r
744             }\r
745             \r
746             if (escSeqStateTable_Key_2022[mid] > togo) {\r
747                 hi = mid;\r
748             } else if (escSeqStateTable_Key_2022[mid] < togo) {\r
749                 low = mid;\r
750             } else /* we found it */ {\r
751                 key[0] = togo;\r
752                 offset[0] = mid;\r
753                 return escSeqStateTable_Value_2022[mid];\r
754             }\r
755             oldmid = mid;\r
756         }\r
757         return INVALID_2022;\r
758     }\r
759     \r
760     /*\r
761      * To Unicode Callback helper function\r
762      */\r
763     private static CoderResult toUnicodeCallback(CharsetDecoderICU cnv, int sourceChar, int targetUniChar) {\r
764         CoderResult err = CoderResult.UNDERFLOW;\r
765         if (sourceChar > 0xff) {\r
766             cnv.toUBytesArray[0] = (byte)(sourceChar>>8);\r
767             cnv.toUBytesArray[1] = (byte)sourceChar;\r
768             cnv.toULength = 2;\r
769         } else {\r
770             cnv.toUBytesArray[0] = (byte)sourceChar;\r
771             cnv.toULength = 1;\r
772         }\r
773         \r
774         if (targetUniChar == (UConverterConstants.missingCharMarker-1/* 0xfffe */)) {\r
775             err = CoderResult.unmappableForLength(1);\r
776         } else {\r
777             err = CoderResult.malformedForLength(1);\r
778         }\r
779         \r
780         return err;\r
781     }\r
782     \r
783     /****************************ISO-2022-JP************************************/\r
784     private class CharsetDecoderISO2022JP extends CharsetDecoderICU {\r
785         public CharsetDecoderISO2022JP(CharsetICU cs) {\r
786             super(cs);\r
787         }\r
788         \r
789         protected void implReset() {\r
790             super.implReset();\r
791             myConverterData.reset();\r
792         }\r
793         /* \r
794          * Map 00..7F to Unicode according to JIS X 0201. \r
795          * */\r
796         private int jisx201ToU(int value) {\r
797             if (value < 0x5c) {\r
798                 return value;\r
799             } else if (value == 0x5c) {\r
800                 return 0xa5;\r
801             } else if (value == 0x7e) {\r
802                 return 0x203e;\r
803             } else { /* value <= 0x7f */\r
804                 return value;\r
805             }\r
806         }\r
807         /*\r
808          * Convert a pair of JIS X 208 21..7E bytes to Shift-JIS.\r
809          * If either byte is outside 21..7E make sure that the result is not valid\r
810          * for Shift-JIS so that the converter catches it.\r
811          * Some invalid byte values already turn into equally invalid Shift-JIS\r
812          * byte values and need not be tested explicitly.\r
813          */\r
814         private void _2022ToSJIS(char c1, char c2, byte []bytes) {\r
815             if ((c1&1) > 0) {\r
816                 ++c1;\r
817                 if (c2 <= 0x5f) {\r
818                     c2 += 0x1f;\r
819                 } else if (c2 <= 0x7e) {\r
820                     c2 += 0x20;\r
821                 } else {\r
822                     c2 = 0; /* invalid */\r
823                 }\r
824             } else {\r
825                 if ((c2 >= 0x21) && (c2 <= 0x7e)) {\r
826                     c2 += 0x7e;\r
827                 } else {\r
828                     c2 = 0; /* invalid */\r
829                 }\r
830             }\r
831             \r
832             c1 >>=1;\r
833             if (c1 <= 0x2f) {\r
834                 c1 += 0x70;\r
835             } else if (c1 <= 0x3f) {\r
836                 c1 += 0xb0;\r
837             } else {\r
838                 c1 = 0; /* invalid */\r
839             }\r
840             bytes[0] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & c1);\r
841             bytes[1] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & c2);\r
842         }\r
843 \r
844         @SuppressWarnings("fallthrough")\r
845         protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {\r
846             boolean gotoGetTrail = false;\r
847             boolean gotoEscape = false;\r
848             CoderResult err = CoderResult.UNDERFLOW;\r
849             byte []tempBuf = new byte[2];\r
850             int targetUniChar = 0x0000;\r
851             int mySourceChar = 0x0000;\r
852             int mySourceCharTemp = 0x0000; // use for getTrail label call.\r
853             byte cs; /* StateEnum */\r
854             byte csTemp= 0; // use for getTrail label call.\r
855             \r
856             if (myConverterData.key != 0) {\r
857                 /* continue with a partial escape sequence */\r
858                 // goto escape;\r
859                 gotoEscape = true;\r
860             } else if (toULength == 1 && source.hasRemaining() && target.hasRemaining()) {\r
861                 /* continue with a partial double-byte character */\r
862                 mySourceChar = (toUBytesArray[0] & UConverterConstants.UNSIGNED_BYTE_MASK);\r
863                 toULength = 0;\r
864                 cs = myConverterData.toU2022State.cs[myConverterData.toU2022State.g];\r
865                 // goto getTrailByte;\r
866                 mySourceCharTemp = 0x99;\r
867                 gotoGetTrail = true;\r
868             }\r
869             \r
870             while (source.hasRemaining() || gotoEscape || gotoGetTrail) {\r
871                 // This code is here for the goto escape label call above.\r
872                 if (gotoEscape) {\r
873                     mySourceCharTemp = ESC_2022;\r
874                 }\r
875                 \r
876                 targetUniChar = UConverterConstants.missingCharMarker;\r
877                 \r
878                 if (gotoEscape || gotoGetTrail || target.hasRemaining()) {\r
879                     if (!gotoEscape && !gotoGetTrail) {\r
880                         mySourceChar = source.get() & UConverterConstants.UNSIGNED_BYTE_MASK;\r
881                         mySourceCharTemp = mySourceChar;\r
882                     }\r
883                     \r
884                     switch (mySourceCharTemp) {\r
885                     case UConverterConstants.SI:\r
886                         if (myConverterData.version == 3) {\r
887                             myConverterData.toU2022State.g = 0;\r
888                             continue;\r
889                         } else {\r
890                             /* only JIS7 uses SI/SO, not ISO-2022-JP-x */\r
891                             myConverterData.isEmptySegment = false;\r
892                             break;\r
893                         }\r
894                         \r
895                     case UConverterConstants.SO:\r
896                         if (myConverterData.version == 3) {\r
897                             /* JIS7: switch to G1 half-width Katakana */\r
898                             myConverterData.toU2022State.cs[1] = HWKANA_7BIT;\r
899                             myConverterData.toU2022State.g = 1;\r
900                             continue; \r
901                         } else {\r
902                             /* only JIS7 uses SI/SO, not ISO-2022-JP-x */\r
903                             myConverterData.isEmptySegment = false; /* reset this, we have a different error */\r
904                             break;\r
905                         }\r
906                         \r
907                     case ESC_2022:\r
908                         if (!gotoEscape) {\r
909                             source.position(source.position() - 1);\r
910                         } else {\r
911                             gotoEscape = false;\r
912                         }\r
913 // escape:\r
914                         {\r
915                             int mySourceBefore = source.position();\r
916                             int toULengthBefore = this.toULength;\r
917                             \r
918                             err = changeState_2022(this, source, variant);\r
919 \r
920                             /* If in ISO-2022-JP only and we successully completed an escape sequence, but previous segment was empty, create an error */\r
921                             if(myConverterData.version == 0 && myConverterData.key == 0 && !err.isError() && myConverterData.isEmptySegment) {\r
922                                 err = CoderResult.malformedForLength(source.position() - mySourceBefore);\r
923                                 this.toULength = toULengthBefore + (source.position() - mySourceBefore);\r
924                             }\r
925                         }\r
926 \r
927                         /* invalid or illegal escape sequence */\r
928                         if(err.isError()){\r
929                             myConverterData.isEmptySegment = false; /* Reset to avoid future spurious errors */\r
930                             return err;\r
931                         }\r
932                         /* If we successfully completed an escape sequence, we begin a new segment, empty so far */\r
933                         if(myConverterData.key == 0) {\r
934                             myConverterData.isEmptySegment = true;\r
935                         }\r
936 \r
937                         continue;\r
938                     /* ISO-2022-JP does not use single-byte (C1) SS2 and SS3 */\r
939                     case CR:\r
940                         /* falls through */\r
941                     case LF:\r
942                         /* automatically reset to single-byte mode */\r
943                         if (myConverterData.toU2022State.cs[0] != ASCII && myConverterData.toU2022State.cs[0] != JISX201) {\r
944                             myConverterData.toU2022State.cs[0] = ASCII;\r
945                         }\r
946                         myConverterData.toU2022State.cs[2] = 0;\r
947                         myConverterData.toU2022State.g = 0;\r
948                         /* falls through */\r
949                     default :\r
950                         /* convert one or two bytes */\r
951                         myConverterData.isEmptySegment = false;\r
952                         cs = myConverterData.toU2022State.cs[myConverterData.toU2022State.g];\r
953                         csTemp = cs;\r
954                         if (gotoGetTrail) {\r
955                             csTemp = (byte)0x99;\r
956                         }\r
957                         if (!gotoGetTrail && ((mySourceChar >= 0xa1) && (mySourceChar <= 0xdf) && myConverterData.version == 4 && !IS_JP_DBCS(cs))) {\r
958                             /* 8-bit halfwidth katakana in any single-byte mode for JIS8 */\r
959                             targetUniChar = mySourceChar + (HWKANA_START - 0xa1);\r
960                             \r
961                             /* return from a single-shift state to the previous one */\r
962                             if (myConverterData.toU2022State.g >= 2) {\r
963                                 myConverterData.toU2022State.g = myConverterData.toU2022State.prevG;\r
964                             }\r
965                         } else {\r
966                             switch(csTemp) {\r
967                             case ASCII:\r
968                                 if (mySourceChar <= 0x7f) {\r
969                                     targetUniChar = mySourceChar;\r
970                                 }\r
971                                 break;\r
972                             case ISO8859_1:\r
973                                 if (mySourceChar <= 0x7f) {\r
974                                     targetUniChar = mySourceChar + 0x80;\r
975                                 }\r
976                                 /* return from a single-shift state to the prevous one */\r
977                                 myConverterData.toU2022State.g = myConverterData.toU2022State.prevG;\r
978                                 break;\r
979                             case ISO8859_7:\r
980                                 if (mySourceChar <= 0x7f) {\r
981                                     /* convert mySourceChar+0x80 to use a normal 8-bit table */\r
982                                     targetUniChar = CharsetMBCS.MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(myConverterData.myConverterArray[cs].mbcs,\r
983                                             mySourceChar+0x80);\r
984                                 }\r
985                                 /* return from a single-shift state to the previous one */\r
986                                 myConverterData.toU2022State.g = myConverterData.toU2022State.prevG;\r
987                                 break;\r
988                             case JISX201:\r
989                                 if (mySourceChar <= 0x7f) {\r
990                                     targetUniChar = jisx201ToU(mySourceChar);\r
991                                 }\r
992                                 break;\r
993                             case HWKANA_7BIT:\r
994                                 if ((mySourceChar >= 0x21) && (mySourceChar <= 0x5f)) {\r
995                                     /* 7-bit halfwidth Katakana */\r
996                                     targetUniChar = mySourceChar + (HWKANA_START - 0x21);\r
997                                     break;\r
998                                 }\r
999                             default :\r
1000                                 /* G0 DBCS */\r
1001                                 if (gotoGetTrail || source.hasRemaining()) {\r
1002 // getTrailByte:\r
1003                                     int tmpSourceChar;\r
1004                                     gotoGetTrail = false;\r
1005                                     short trailByte;\r
1006                                     boolean leadIsOk, trailIsOk;\r
1007                                     \r
1008                                     trailByte = (short)(source.get(source.position()) & UConverterConstants.UNSIGNED_BYTE_MASK);\r
1009                                     /*\r
1010                                      * Ticket 5691: consistent illegal sequences:\r
1011                                      * - We include at least the first byte in the illegal sequence.\r
1012                                      * - If any of the non-initial bytes could be the start of a character,\r
1013                                      *   we stop the illegal sequence before the first one of those.\r
1014                                      * \r
1015                                      * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is\r
1016                                      * an ESC/SO/SI, we report only the first byte as the illegal sequence.\r
1017                                      * Otherwise we convert or report the pair of bytes.\r
1018                                      */\r
1019                                     leadIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (mySourceChar - 0x21)) <= (0x7e - 0x21);\r
1020                                     trailIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (trailByte - 0x21)) <= (0x7e - 0x21);\r
1021                                     if (leadIsOk && trailIsOk) {\r
1022                                         source.get();\r
1023                                         tmpSourceChar = (mySourceChar << 8) | trailByte;\r
1024                                         if (cs == JISX208) {\r
1025                                             _2022ToSJIS((char)mySourceChar, (char)trailByte, tempBuf);\r
1026                                             mySourceChar = tmpSourceChar;\r
1027                                         } else {\r
1028                                             /* Copy before we modify tmpSourceChar so toUnicodeCallback() sees the correct bytes. */\r
1029                                             mySourceChar = tmpSourceChar;\r
1030                                             if (cs == KSC5601) {\r
1031                                                 tmpSourceChar += 0x8080; /* = _2022ToGR94DBCS(tmpSourceChar) */\r
1032                                             }\r
1033                                             tempBuf[0] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & (tmpSourceChar >> 8));\r
1034                                             tempBuf[1] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & tmpSourceChar);\r
1035                                         }\r
1036                                         targetUniChar = MBCSSimpleGetNextUChar(myConverterData.myConverterArray[cs], ByteBuffer.wrap(tempBuf), false);\r
1037                                     } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {\r
1038                                         /* report a pair of illegal bytes if the second byte is not a DBCS starter */\r
1039                                         source.get();\r
1040                                         /* add another bit so that the code below writes 2 bytes in case of error */\r
1041                                         mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte;\r
1042                                     }\r
1043                                 } else {\r
1044                                     toUBytesArray[0] = (byte)mySourceChar;\r
1045                                     toULength = 1;\r
1046                                     // goto endloop\r
1047                                     return err;\r
1048                                 }\r
1049                             } /* end of inner switch */\r
1050                         }\r
1051                         break;\r
1052                     } /* end of outer switch */\r
1053                     \r
1054                     if (targetUniChar < (UConverterConstants.missingCharMarker-1/*0xfffe*/)) {\r
1055                         if (offsets != null) {\r
1056                             offsets.put(target.remaining(), source.remaining() - (mySourceChar <= 0xff ? 1 : 2));\r
1057                         }\r
1058                         target.put((char)targetUniChar);\r
1059                     } else if (targetUniChar > UConverterConstants.missingCharMarker) {\r
1060                         /* disassemble the surrogate pair and write to output */\r
1061                         targetUniChar -= 0x0010000;\r
1062                         target.put((char)(0xd800 + (char)(targetUniChar>>10)));\r
1063                         target.position(target.position()-1);\r
1064                         if (offsets != null) {\r
1065                             offsets.put(target.remaining(), source.remaining() - (mySourceChar <= 0xff ? 1 : 2));\r
1066                         }\r
1067                         target.get();\r
1068                         if (target.hasRemaining()) {\r
1069                             target.put((char)(0xdc00+(char)(targetUniChar&0x3ff)));\r
1070                             target.position(target.position()-1);\r
1071                             if (offsets != null) {\r
1072                                 offsets.put(target.remaining(), source.remaining() - (mySourceChar <= 0xff ? 1 : 2));\r
1073                             }\r
1074                             target.get();\r
1075                         } else {\r
1076                             charErrorBufferArray[charErrorBufferLength++] = \r
1077                                 (char)(0xdc00+(char)(targetUniChar&0x3ff));\r
1078                         }\r
1079                     } else {\r
1080                         /* Call the callback function */\r
1081                         err = toUnicodeCallback(this, mySourceChar, targetUniChar);\r
1082                         break;\r
1083                     }\r
1084                 } else { /* goes with "if (target.hasRemaining())" way up near the top of the function */\r
1085                     err = CoderResult.OVERFLOW;\r
1086                     break;\r
1087                 }\r
1088             }\r
1089 //endloop:\r
1090             return err;\r
1091         }\r
1092     } // end of class CharsetDecoderISO2022JP\r
1093     \r
1094     /****************************ISO-2022-CN************************************/\r
1095     private class CharsetDecoderISO2022CN extends CharsetDecoderICU {\r
1096         public CharsetDecoderISO2022CN(CharsetICU cs) {\r
1097             super(cs);\r
1098         }\r
1099         \r
1100         protected void implReset() {\r
1101             super.implReset();\r
1102             myConverterData.reset();\r
1103         }\r
1104 \r
1105         @SuppressWarnings("fallthrough")\r
1106         protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {\r
1107             CoderResult err = CoderResult.UNDERFLOW;\r
1108             byte[] tempBuf = new byte[3];\r
1109             int targetUniChar = 0x0000;\r
1110             int mySourceChar = 0x0000;\r
1111             int mySourceCharTemp = 0x0000;\r
1112             boolean gotoEscape = false;\r
1113             boolean gotoGetTrailByte = false;\r
1114             \r
1115             if (myConverterData.key != 0) {\r
1116                 /* continue with a partial escape sequence */\r
1117                 // goto escape;\r
1118                 gotoEscape = true;\r
1119             } else if (toULength == 1 && source.hasRemaining() && target.hasRemaining()) {\r
1120                 /* continue with a partial double-byte character */\r
1121                 mySourceChar = (toUBytesArray[0] & UConverterConstants.UNSIGNED_BYTE_MASK);\r
1122                 toULength = 0;\r
1123                 targetUniChar = UConverterConstants.missingCharMarker;\r
1124                 // goto getTrailByte\r
1125                 gotoGetTrailByte = true;\r
1126             }\r
1127             \r
1128             while (source.hasRemaining() || gotoGetTrailByte || gotoEscape) {\r
1129                 targetUniChar = UConverterConstants.missingCharMarker;\r
1130                 \r
1131                 if (target.hasRemaining() || gotoEscape) {\r
1132                     if (gotoEscape) {\r
1133                         mySourceChar = ESC_2022; // goto escape label\r
1134                         mySourceCharTemp = mySourceChar;\r
1135                     } else if (gotoGetTrailByte) {\r
1136                         mySourceCharTemp = 0xff; // goto getTrailByte; set mySourceCharTemp to go to default\r
1137                     } else {\r
1138                         mySourceChar = UConverterConstants.UNSIGNED_BYTE_MASK & source.get();\r
1139                         mySourceCharTemp = mySourceChar;\r
1140                     }\r
1141                     \r
1142                     switch (mySourceCharTemp) {\r
1143                     case UConverterConstants.SI:\r
1144                         myConverterData.toU2022State.g = 0;\r
1145                         if (myConverterData.isEmptySegment) {\r
1146                             myConverterData.isEmptySegment = false; /* we are handling it, reset to avoid future spurious errors */\r
1147                             err = CoderResult.malformedForLength(1);\r
1148                             this.toUBytesArray[0] = (byte)mySourceChar;\r
1149                             this.toULength = 1;\r
1150                             return err;\r
1151                         }\r
1152                         continue;\r
1153                         \r
1154                     case UConverterConstants.SO:\r
1155                         if (myConverterData.toU2022State.cs[1] != 0) {\r
1156                             myConverterData.toU2022State.g = 1;\r
1157                             myConverterData.isEmptySegment = true;  /* Begin a new segment, empty so far */\r
1158                             continue;\r
1159                         } else {\r
1160                             /* illegal to have SO before a matching designator */\r
1161                             myConverterData.isEmptySegment = false; /* Handling a different error, reset this to avoid future spurious errs */\r
1162                             break;\r
1163                         }\r
1164                         \r
1165                     case ESC_2022:\r
1166                         if (!gotoEscape) {\r
1167                             source.position(source.position()-1);\r
1168                         }\r
1169 // escape label\r
1170                         gotoEscape = false;\r
1171                         {\r
1172                             int mySourceBefore = source.position();\r
1173                             int toULengthBefore = this.toULength;\r
1174 \r
1175                             err = changeState_2022(this, source, ISO_2022_CN);\r
1176 \r
1177                             /* After SO there must be at least one character before a designator (designator error handled separately) */\r
1178                             if(myConverterData.key == 0 && !err.isError() && myConverterData.isEmptySegment) {\r
1179                                 err = CoderResult.malformedForLength(source.position() - mySourceBefore);\r
1180                                 this.toULength = toULengthBefore + (source.position() - mySourceBefore);\r
1181                             }\r
1182                         }\r
1183 \r
1184                         /* invalid or illegal escape sequence */\r
1185                         if(err.isError()){\r
1186                             myConverterData.isEmptySegment = false; /* Reset to avoid future spurious errors */\r
1187                             return err;\r
1188                         }\r
1189                         continue;\r
1190                         \r
1191                     /*ISO-2022-CN does not use single-byte (C1) SS2 and SS3 */\r
1192                     case CR:\r
1193                         /* falls through */\r
1194                     case LF:\r
1195                         myConverterData.toU2022State.reset();\r
1196                         /* falls through */\r
1197                     default:\r
1198                         /* converter one or two bytes */\r
1199                         myConverterData.isEmptySegment = false;\r
1200                         if (myConverterData.toU2022State.g != 0 || gotoGetTrailByte) {\r
1201                             if (source.hasRemaining() || gotoGetTrailByte) {\r
1202                                 UConverterSharedData cnv;\r
1203                                 byte tempState;\r
1204                                 int tempBufLen;\r
1205                                 boolean leadIsOk, trailIsOk;\r
1206                                 short trailByte;\r
1207 // getTrailByte: label\r
1208                                 gotoGetTrailByte = false; // reset gotoGetTrailByte\r
1209                                 \r
1210                                 trailByte = (short)(source.get(source.position()) & UConverterConstants.UNSIGNED_BYTE_MASK);\r
1211                                 /*\r
1212                                  * Ticket 5691: consistent illegal sequences:\r
1213                                  * - We include at least the first byte in the illegal sequence.\r
1214                                  * - If any of the non-initial bytes could be the start of a character,\r
1215                                  *   we stop the illegal sequence before the first one of those.\r
1216                                  * \r
1217                                  * In ISO-2022 DBCS, if the second byte is in the range 21..7e range or is\r
1218                                  * an ESC/SO/SI, we report only the first byte as the illegal sequence.\r
1219                                  * Otherwise we convert or report the pair of bytes.\r
1220                                  */\r
1221                                 leadIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (mySourceChar - 0x21)) <= (0x7e - 0x21);\r
1222                                 trailIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (trailByte - 0x21)) <= (0x7e - 0x21);\r
1223                                 if (leadIsOk && trailIsOk) {\r
1224                                     source.get();\r
1225                                     tempState = myConverterData.toU2022State.cs[myConverterData.toU2022State.g];\r
1226                                     if (tempState > CNS_11643_0) {\r
1227                                         cnv = myConverterData.myConverterArray[CNS_11643];\r
1228                                         tempBuf[0] = (byte)(0x80 + (tempState - CNS_11643_0));\r
1229                                         tempBuf[1] = (byte)mySourceChar;\r
1230                                         tempBuf[2] = (byte)trailByte;\r
1231                                         tempBufLen = 3;\r
1232                                     } else {\r
1233                                         cnv = myConverterData.myConverterArray[tempState];\r
1234                                         tempBuf[0] = (byte)mySourceChar;\r
1235                                         tempBuf[1] = (byte)trailByte;\r
1236                                         tempBufLen = 2;\r
1237                                     }\r
1238                                     ByteBuffer tempBuffer = ByteBuffer.wrap(tempBuf);\r
1239                                     tempBuffer.limit(tempBufLen);\r
1240                                     targetUniChar = MBCSSimpleGetNextUChar(cnv, tempBuffer, false);\r
1241                                     mySourceChar = (mySourceChar << 8) | trailByte;\r
1242                                     \r
1243                                 } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {\r
1244                                     /* report a pair of illegal bytes if the second byte is not a DBCS starter */\r
1245                                     source.get();\r
1246                                     /* add another bit so that the code below writes 2 bytes in case of error */\r
1247                                     mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte;\r
1248                                 }\r
1249                                 if (myConverterData.toU2022State.g >= 2) {\r
1250                                     /* return from a single-shift state to the previous one */\r
1251                                     myConverterData.toU2022State.g = myConverterData.toU2022State.prevG;\r
1252                                 }\r
1253                             } else {\r
1254                                 toUBytesArray[0] = (byte)mySourceChar;\r
1255                                 toULength = 1;\r
1256                                 // goto endloop;\r
1257                                 return err;\r
1258                             }\r
1259                         } else {\r
1260                             if (mySourceChar <= 0x7f) {\r
1261                                 targetUniChar = (char)mySourceChar;\r
1262                             }\r
1263                         }\r
1264                         break;\r
1265                     }\r
1266                     if ((UConverterConstants.UNSIGNED_INT_MASK&targetUniChar) < (UConverterConstants.UNSIGNED_INT_MASK&(UConverterConstants.missingCharMarker-1))) {\r
1267                         if (offsets != null) {\r
1268                             offsets.array()[target.position()] = source.remaining() - (mySourceChar <= 0xff ? 1 : 2);\r
1269                         }\r
1270                         target.put((char)targetUniChar);\r
1271                     } else if ((UConverterConstants.UNSIGNED_INT_MASK&targetUniChar) > (UConverterConstants.UNSIGNED_INT_MASK&(UConverterConstants.missingCharMarker))) {\r
1272                         /* disassemble the surrogate pair and write to output */\r
1273                         targetUniChar -= 0x0010000;\r
1274                         target.put((char)(0xd800+(char)(targetUniChar>>10)));\r
1275                         if (offsets != null) {\r
1276                             offsets.array()[target.position()-1] = source.position() - (mySourceChar <= 0xff ? 1 : 2);\r
1277                         }\r
1278                         if (target.hasRemaining()) {\r
1279                             target.put((char)(0xdc00+(char)(targetUniChar&0x3ff)));\r
1280                             if (offsets != null) {\r
1281                                 offsets.array()[target.position()-1] = source.position() - (mySourceChar <= 0xff ? 1 : 2);\r
1282                             }\r
1283                         } else {\r
1284                             charErrorBufferArray[charErrorBufferLength++] = (char)(0xdc00+(char)(targetUniChar&0x3ff));\r
1285                         }\r
1286                     } else {\r
1287                         /* Call the callback function */ \r
1288                         err = toUnicodeCallback(this, mySourceChar, targetUniChar);\r
1289                         break;\r
1290                     }\r
1291                     \r
1292                 } else {\r
1293                     err = CoderResult.OVERFLOW;\r
1294                     break;\r
1295                 }\r
1296             }\r
1297             \r
1298             return err;\r
1299         }\r
1300         \r
1301     }\r
1302     /************************ ISO-2022-KR ********************/\r
1303     private class CharsetDecoderISO2022KR extends CharsetDecoderICU {\r
1304         public CharsetDecoderISO2022KR(CharsetICU cs) {\r
1305             super(cs);\r
1306         }\r
1307         \r
1308         protected void implReset() {\r
1309             super.implReset();\r
1310             setInitialStateToUnicodeKR();\r
1311             myConverterData.reset();\r
1312         }\r
1313         \r
1314         protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {\r
1315             CoderResult err = CoderResult.UNDERFLOW;\r
1316             int mySourceChar = 0x0000;\r
1317             int targetUniChar = 0x0000;\r
1318             byte[] tempBuf = new byte[2];\r
1319             boolean usingFallback;\r
1320             boolean gotoGetTrailByte = false;\r
1321             boolean gotoEscape = false;\r
1322             \r
1323             if (myConverterData.version == 1) {\r
1324                 return decodeLoopIBM(myConverterData.currentDecoder, source, target, offsets, flush);\r
1325             }\r
1326             \r
1327             /* initialize state */\r
1328             usingFallback = isFallbackUsed();\r
1329             \r
1330             if (myConverterData.key != 0) {\r
1331                 /* continue with a partial escape sequence */\r
1332                 gotoEscape = true;\r
1333             } else if (toULength == 1 && source.hasRemaining() && target.hasRemaining()) {\r
1334                 /* continue with a partial double-byte character */\r
1335                 mySourceChar = (toUBytesArray[0] & UConverterConstants.UNSIGNED_BYTE_MASK);\r
1336                 toULength = 0;\r
1337                 gotoGetTrailByte = true;\r
1338             }\r
1339             \r
1340             while (source.hasRemaining() || gotoGetTrailByte || gotoEscape) {\r
1341                 if (target.hasRemaining() || gotoGetTrailByte || gotoEscape) {\r
1342                     if (!gotoGetTrailByte && !gotoEscape) {\r
1343                         mySourceChar = (char)(source.get() & UConverterConstants.UNSIGNED_BYTE_MASK);\r
1344                     }\r
1345                     \r
1346                     if (!gotoGetTrailByte && !gotoEscape && mySourceChar == UConverterConstants.SI) {\r
1347                         myConverterData.toU2022State.g = 0;\r
1348                         if (myConverterData.isEmptySegment) {\r
1349                             myConverterData.isEmptySegment = false; /* we are handling it, reset to avoid future spurious errors */\r
1350                             err = CoderResult.malformedForLength(1);\r
1351                             this.toUBytesArray[0] = (byte)mySourceChar;\r
1352                             this.toULength = 1;\r
1353                             return err;\r
1354                         }\r
1355                         /* consume the source */\r
1356                         continue;\r
1357                     } else if (!gotoGetTrailByte && !gotoEscape && mySourceChar == UConverterConstants.SO) {\r
1358                         myConverterData.toU2022State.g = 1;\r
1359                         myConverterData.isEmptySegment = true;\r
1360                         /* consume the source */\r
1361                         continue;\r
1362                     } else if (!gotoGetTrailByte && (gotoEscape || mySourceChar == ESC_2022)) {\r
1363                         if (!gotoEscape) {\r
1364                             source.position(source.position()-1);\r
1365                         }\r
1366 // escape label\r
1367                         gotoEscape = false; // reset gotoEscape flag\r
1368                         myConverterData.isEmptySegment = false; /* Any invalid ESC sequences will be detected separately, so just reset this */ \r
1369                         err = changeState_2022(this, source, ISO_2022_KR);\r
1370                         if (err.isError()) {\r
1371                             return err;\r
1372                         }\r
1373                         continue;\r
1374                     }\r
1375                     myConverterData.isEmptySegment = false; /* Any invalid char errors will be detected separately, so just reset this */\r
1376                     if (myConverterData.toU2022State.g == 1 || gotoGetTrailByte) {\r
1377                         if (source.hasRemaining() || gotoGetTrailByte) {\r
1378                             boolean leadIsOk, trailIsOk;\r
1379                             short trailByte;\r
1380 // getTrailByte label\r
1381                             gotoGetTrailByte = false; // reset gotoGetTrailByte flag\r
1382                             \r
1383                             trailByte = (short)(source.get(source.position()) & UConverterConstants.UNSIGNED_BYTE_MASK);\r
1384                             targetUniChar = UConverterConstants.missingCharMarker;\r
1385                             /*\r
1386                              * Ticket 5691: consistent illegal sequences:\r
1387                              * - We include at least the first byte in the illegal sequence.\r
1388                              * - If any of the non-initial bytes could be the start of a character,\r
1389                              *   we stop the illegal sequence before the first one of those.\r
1390                              * \r
1391                              * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is\r
1392                              * an ESC/SO/SI, we report only the first byte as the illegal sequence.\r
1393                              * Otherwise we convert or report the pair of bytes.\r
1394                              */\r
1395                             leadIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (mySourceChar - 0x21)) <= (0x7e - 0x21);\r
1396                             trailIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (trailByte - 0x21)) <= (0x7e - 0x21);\r
1397                             if (leadIsOk && trailIsOk) {\r
1398                                 source.get();\r
1399                                 tempBuf[0] = (byte)(mySourceChar + 0x80);\r
1400                                 tempBuf[1] = (byte)(trailByte + 0x80);\r
1401                                 targetUniChar = MBCSSimpleGetNextUChar(myConverterData.currentConverter.sharedData, ByteBuffer.wrap(tempBuf), usingFallback);\r
1402                                 mySourceChar = (char)((mySourceChar << 8) | trailByte);\r
1403                             } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {\r
1404                                 /* report a pair of illegal bytes if the second byte is not a DBCS starter */\r
1405                                 source.get();\r
1406                                 /* add another bit so that the code below writes 2 bytes in case of error */\r
1407                                 mySourceChar = (char)(0x10000 | (mySourceChar << 8) | trailByte);\r
1408                             }\r
1409                         } else {\r
1410                             toUBytesArray[0] = (byte)mySourceChar;\r
1411                             toULength = 1;\r
1412                             break;\r
1413                         }\r
1414                     } else if (mySourceChar <= 0x7f) {\r
1415                         int savedSourceLimit = source.limit();\r
1416                         int savedSourcePosition = source.position();\r
1417                         source.limit(source.position());\r
1418                         source.position(source.position()-1); \r
1419                         targetUniChar = MBCSSimpleGetNextUChar(myConverterData.currentConverter.sharedData, source, usingFallback);\r
1420                         source.limit(savedSourceLimit);\r
1421                         source.position(savedSourcePosition);\r
1422                     } else {\r
1423                         targetUniChar = 0xffff;\r
1424                     }\r
1425                     if (targetUniChar < 0xfffe) {\r
1426                         target.put((char)targetUniChar);\r
1427                         if (offsets != null) {\r
1428                             offsets.array()[target.position()] = source.position() - (mySourceChar <= 0xff ? 1 : 2);\r
1429                         }\r
1430                     } else {\r
1431                         /* Call the callback function */\r
1432                         err = toUnicodeCallback(this, mySourceChar, targetUniChar);\r
1433                         break;\r
1434                     }\r
1435                 } else {\r
1436                     err = CoderResult.OVERFLOW;\r
1437                     break;\r
1438                 }\r
1439             }\r
1440             \r
1441             return err;\r
1442         }\r
1443         \r
1444         protected CoderResult decodeLoopIBM(CharsetDecoderMBCS cnv, ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {\r
1445             CoderResult err = CoderResult.UNDERFLOW;\r
1446             int sourceStart;\r
1447             int sourceLimit;\r
1448             int argSource;\r
1449             int argTarget;\r
1450             boolean gotoEscape = false;\r
1451             int oldSourceLimit;\r
1452             \r
1453             /* remember the original start of the input for offsets */\r
1454             sourceStart = argSource = source.position();\r
1455             \r
1456             if (myConverterData.key != 0) {\r
1457                 /* continue with a partial escape sequence */\r
1458                 gotoEscape = true;\r
1459             }\r
1460             \r
1461             while (gotoEscape || (!err.isError() && source.hasRemaining())) {\r
1462                 if (!gotoEscape) {\r
1463                     /* Find the end of the buffer e.g : Next Escape Seq | end of Buffer */\r
1464                     int oldSourcePos = source.position();\r
1465                     sourceLimit = getEndOfBuffer_2022(source);\r
1466                     source.position(oldSourcePos);\r
1467                     if (source.position() != sourceLimit) {\r
1468                         /*\r
1469                          * get the current partial byte sequence\r
1470                          * \r
1471                          * it needs to be moved between the public and the subconverter\r
1472                          * so that the conversion frameword, which only sees the public\r
1473                          * converter, can handle truncated and illegal input etc.\r
1474                          */\r
1475                         if (toULength > 0) {\r
1476                             cnv.toUBytesArray = toUBytesArray.clone();\r
1477                         }\r
1478                         cnv.toULength = toULength;\r
1479                         \r
1480                         /*\r
1481                          * Convert up to the end of the input, or to before the next escape character.\r
1482                          * Does not handle conversion extensions because the preToU[] state etc.\r
1483                          * is not copied.\r
1484                          */\r
1485                         argTarget = target.position();\r
1486                         oldSourceLimit = source.limit(); // save the old source limit change to new one\r
1487                         source.limit(sourceLimit);\r
1488                         err = myConverterData.currentDecoder.cnvMBCSToUnicodeWithOffsets(source, target, offsets, flush);\r
1489                         source.limit(oldSourceLimit); // restore source limit;\r
1490                         if (offsets != null && sourceStart != argSource) {\r
1491                             /* update offsets to base them on the actual start of the input */\r
1492                             int delta = argSource - sourceStart;\r
1493                             while (argTarget < target.position()) {\r
1494                                 int currentOffset = offsets.get();\r
1495                                 offsets.position(offsets.position()-1);\r
1496                                 if (currentOffset >= 0) {\r
1497                                     offsets.put(currentOffset + delta);\r
1498                                     offsets.position(offsets.position()-1);\r
1499                                 }\r
1500                                 offsets.get();\r
1501                                 target.get();\r
1502                             }\r
1503                         }\r
1504                         argSource = source.position();\r
1505                         \r
1506                         /* copy input/error/overflow buffers */\r
1507                         if (cnv.toULength > 0) {\r
1508                             toUBytesArray = cnv.toUBytesArray.clone();\r
1509                         }\r
1510                         toULength = cnv.toULength;\r
1511                         \r
1512                         if (err.isOverflow()) {\r
1513                             if (cnv.charErrorBufferLength > 0) {\r
1514                                 charErrorBufferArray = cnv.charErrorBufferArray.clone();\r
1515                             }\r
1516                             charErrorBufferLength = cnv.charErrorBufferLength;\r
1517                             cnv.charErrorBufferLength = 0;\r
1518                         }\r
1519                     }\r
1520                     \r
1521                     if (err.isError() || err.isOverflow() || (source.position() == source.limit())) {\r
1522                         return err;\r
1523                     }\r
1524                 }\r
1525 // escape label\r
1526                 gotoEscape = false;\r
1527                 err = changeState_2022(this, source, ISO_2022_KR);\r
1528             }\r
1529             return err;\r
1530         }\r
1531     }\r
1532     \r
1533     /******************** from unicode **********************/\r
1534     /* preference order of JP charsets */\r
1535     private final static byte []jpCharsetPref = {\r
1536         ASCII,\r
1537         JISX201,\r
1538         ISO8859_1,\r
1539         ISO8859_7,\r
1540         JISX208,\r
1541         JISX212,\r
1542         GB2312,\r
1543         KSC5601,\r
1544         HWKANA_7BIT\r
1545     };\r
1546     /*\r
1547      * The escape sequences must be in order of the enum constants like JISX201 = 3,\r
1548      * not in order of jpCharsetPref[]!\r
1549      */\r
1550     private final static byte [][]escSeqChars = {\r
1551             { 0x1B, 0x28, 0x42},        /* <ESC>(B  ASCII       */\r
1552             { 0x1B, 0x2E, 0x41},        /* <ESC>.A  ISO-8859-1  */\r
1553             { 0x1B, 0x2E, 0x46},        /* <ESC>.F  ISO-8859-7  */\r
1554             { 0x1B, 0x28, 0x4A},        /* <ESC>(J  JISX-201    */\r
1555             { 0x1B, 0x24, 0x42},        /* <ESC>$B  JISX-208    */\r
1556             { 0x1B, 0x24, 0x28, 0x44},  /* <ESC>$(D JISX-212    */\r
1557             { 0x1B, 0x24, 0x41},        /* <ESC>$A  GB2312      */\r
1558             { 0x1B, 0x24, 0x28, 0x43},  /* <ESC>$(C KSC5601     */\r
1559             { 0x1B, 0x28, 0x49}         /* <ESC>(I  HWKANA_7BIT */\r
1560     };\r
1561     /*\r
1562      * JIS X 0208 has fallbacks from Unicode half-width Katakana to full-width (DBCS)\r
1563      * Katakana.\r
1564      * Now that we use a Shift-JIS table for JIS X 0208 we need to hardcode these fallbacks\r
1565      * because Shift-JIS roundtrips half-width Katakana to single bytes.\r
1566      * These were the only fallbacks in ICU's jisx-208.ucm file.\r
1567      */\r
1568     private final static char []hwkana_fb = {\r
1569         0x2123,  /* U+FF61 */\r
1570         0x2156,\r
1571         0x2157,\r
1572         0x2122,\r
1573         0x2126,\r
1574         0x2572,\r
1575         0x2521,\r
1576         0x2523,\r
1577         0x2525,\r
1578         0x2527,\r
1579         0x2529,\r
1580         0x2563,\r
1581         0x2565,\r
1582         0x2567,\r
1583         0x2543,\r
1584         0x213C,  /* U+FF70 */\r
1585         0x2522,\r
1586         0x2524,\r
1587         0x2526,\r
1588         0x2528,\r
1589         0x252A,\r
1590         0x252B,\r
1591         0x252D,\r
1592         0x252F,\r
1593         0x2531,\r
1594         0x2533,\r
1595         0x2535,\r
1596         0x2537,\r
1597         0x2539,\r
1598         0x253B,\r
1599         0x253D,\r
1600         0x253F,  /* U+FF80 */\r
1601         0x2541,\r
1602         0x2544,\r
1603         0x2546,\r
1604         0x2548,\r
1605         0x254A,\r
1606         0x254B,\r
1607         0x254C,\r
1608         0x254D,\r
1609         0x254E,\r
1610         0x254F,\r
1611         0x2552,\r
1612         0x2555,\r
1613         0x2558,\r
1614         0x255B,\r
1615         0x255E,\r
1616         0x255F,  /* U+FF90 */\r
1617         0x2560,\r
1618         0x2561,\r
1619         0x2562,\r
1620         0x2564,\r
1621         0x2566,\r
1622         0x2568,\r
1623         0x2569,\r
1624         0x256A,\r
1625         0x256B,\r
1626         0x256C,\r
1627         0x256D,\r
1628         0x256F,\r
1629         0x2573,\r
1630         0x212B,\r
1631         0x212C   /* U+FF9F */\r
1632     };\r
1633     \r
1634     protected byte [][]fromUSubstitutionChar = new byte[][]{ { (byte)0x1A }, { (byte)0x2F, (byte)0x7E} };\r
1635     /****************************ISO-2022-JP************************************/\r
1636     private class CharsetEncoderISO2022JP extends CharsetEncoderICU {\r
1637         public CharsetEncoderISO2022JP(CharsetICU cs) {\r
1638             super(cs, fromUSubstitutionChar[0]);\r
1639         }\r
1640         \r
1641         protected void implReset() {\r
1642             super.implReset();\r
1643             myConverterData.reset();\r
1644         }\r
1645         /* Map Unicode to 00..7F according to JIS X 0201. Return U+FFFE if unmappable. */\r
1646         private int jisx201FromU(int value) {\r
1647             if (value <= 0x7f) {\r
1648                 if (value != 0x5c && value != 0x7e) {\r
1649                     return value;\r
1650                 }\r
1651             } else if (value == 0xa5) {\r
1652                 return 0x5c;\r
1653             } else if (value == 0x203e) {\r
1654                 return 0x7e;\r
1655             }\r
1656             return (int)(UConverterConstants.UNSIGNED_INT_MASK & 0xfffe);\r
1657         }\r
1658         \r
1659         /*\r
1660          * Take a valid Shift-JIS byte pair, check that it is in the range corresponding\r
1661          * to JIS X 0208, and convert it to a pair of 21..7E bytes.\r
1662          * Return 0 if the byte pair is out of range.\r
1663          */\r
1664         private int _2022FromSJIS(int value) {\r
1665             short trail;\r
1666             \r
1667             if (value > 0xEFFC) {\r
1668                 return 0; /* beyond JIS X 0208 */\r
1669             }\r
1670             \r
1671             trail = (short)(value & UConverterConstants.UNSIGNED_BYTE_MASK);\r
1672             \r
1673             value &= 0xff00; /* lead byte */\r
1674             if (value <= 0x9f00) {\r
1675                 value -= 0x7000;\r
1676             } else { /* 0xe000 <= value <= 0xef00 */\r
1677                 value -= 0xb000;\r
1678             }\r
1679             \r
1680             value <<= 1;\r
1681             \r
1682             if (trail <= 0x9e) {\r
1683                 value -= 0x100;\r
1684                 if (trail <= 0x7e) {\r
1685                     value |= ((trail - 0x1f) & UConverterConstants.UNSIGNED_BYTE_MASK);\r
1686                 } else {\r
1687                     value |= ((trail - 0x20) & UConverterConstants.UNSIGNED_BYTE_MASK);\r
1688                 }\r
1689             } else { /* trail <= 0xfc */\r
1690                 value |= ((trail - 0x7e) & UConverterConstants.UNSIGNED_BYTE_MASK);\r
1691             }\r
1692             \r
1693             return value;\r
1694         }\r
1695         /* This overrides the cbFromUWriteSub method in CharsetEncoderICU */\r
1696         CoderResult cbFromUWriteSub (CharsetEncoderICU encoder, \r
1697                 CharBuffer source, ByteBuffer target, IntBuffer offsets){\r
1698                 CoderResult err = CoderResult.UNDERFLOW;\r
1699                 byte[] buffer = new byte[8];\r
1700                 int i = 0;\r
1701                 byte[] subchar;\r
1702                 subchar = encoder.replacement();\r
1703                 \r
1704                 byte cs;\r
1705                 if (myConverterData.fromU2022State.g == 1) {\r
1706                     /* JIS7: switch from G1 to G0 */\r
1707                     myConverterData.fromU2022State.g = 0;\r
1708                     buffer[i++] = UConverterConstants.SI;\r
1709                 }\r
1710                 cs = myConverterData.fromU2022State.cs[0];\r
1711                 \r
1712                 if (cs != ASCII && cs != JISX201) {\r
1713                     /* not in ASCII or JIS X 0201: switch to ASCII */\r
1714                     myConverterData.fromU2022State.cs[0] = ASCII;\r
1715                     buffer[i++] = 0x1B;\r
1716                     buffer[i++] = 0x28;\r
1717                     buffer[i++] = 0x42;\r
1718                 }\r
1719                 \r
1720                 buffer[i++] = subchar[0];\r
1721                 \r
1722                 err = CharsetEncoderICU.fromUWriteBytes(this, buffer, 0, i, target, offsets, source.position() - 1);\r
1723 \r
1724                 return err;\r
1725             }\r
1726         \r
1727         protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {\r
1728             CoderResult err = CoderResult.UNDERFLOW;\r
1729             int sourceChar;\r
1730             byte cs, g;\r
1731             int choiceCount;\r
1732             int len, outLen;\r
1733             byte[] choices = new byte[10];\r
1734             int targetValue = 0;\r
1735             boolean usingFallback;\r
1736             byte[] buffer = new byte[8];\r
1737             boolean getTrail = false; // use for getTrail label\r
1738             int oldSourcePos; // for proper error handling\r
1739             \r
1740             choiceCount = 0;\r
1741             \r
1742             /* check if the last codepoint of previous buffer was a lead surrogate */\r
1743             if ((sourceChar = fromUChar32) != 0 && target.hasRemaining()) {\r
1744                 getTrail = true;\r
1745             }\r
1746             \r
1747             while (getTrail || source.hasRemaining()) {\r
1748                 if (getTrail || target.hasRemaining()) {\r
1749                     oldSourcePos = source.position();\r
1750                     if (!getTrail) { /* skip if going to getTrail label */\r
1751                         sourceChar = source.get();\r
1752                     }\r
1753                     /* check if the char is a First surrogate */\r
1754                     if (getTrail || UTF16.isSurrogate((char)sourceChar)) {\r
1755                         if (getTrail || UTF16.isLeadSurrogate((char)sourceChar)) {\r
1756 // getTrail:                 \r
1757                             if (getTrail) {\r
1758                                 getTrail = false;\r
1759                             }\r
1760                             /* look ahead to find the trail surrogate */\r
1761                             if (source.hasRemaining()) {\r
1762                                 /* test the following code unit */\r
1763                                 char trail = source.get();\r
1764                                 /* go back to the previous position */\r
1765                                 source.position(source.position()-1);\r
1766                                 if (UTF16.isTrailSurrogate(trail)) {\r
1767                                     source.get();\r
1768                                     sourceChar = UCharacter.getCodePoint((char)sourceChar, trail);\r
1769                                     fromUChar32 = 0x00;\r
1770                                     /* convert this supplementary code point */\r
1771                                     /* exit this condition tree */\r
1772                                 } else {\r
1773                                     /* this is an unmatched lead code unit (1st surrogate) */\r
1774                                     /* callback(illegal) */\r
1775                                     err = CoderResult.malformedForLength(1);\r
1776                                     fromUChar32 = sourceChar;\r
1777                                     break;\r
1778                                 }\r
1779                             } else {\r
1780                                 /* no more input */\r
1781                                 fromUChar32 = sourceChar;\r
1782                                 break;\r
1783                             }\r
1784                         } else {\r
1785                             /* this is an unmatched trail code unit (2nd surrogate) */\r
1786                             /* callback(illegal) */\r
1787                             err = CoderResult.malformedForLength(1);\r
1788                             fromUChar32 = sourceChar;\r
1789                             break;\r
1790                         }\r
1791                     }\r
1792                     \r
1793                     /* do not convert SO/SI/ESC */\r
1794                     if (IS_2022_CONTROL(sourceChar)) {\r
1795                         /* callback(illegal) */\r
1796                         err = CoderResult.malformedForLength(1);\r
1797                         fromUChar32 = sourceChar;\r
1798                         break;\r
1799                     }\r
1800                     \r
1801                     /* do the conversion */\r
1802                     \r
1803                     if (choiceCount == 0) {\r
1804                         char csm;\r
1805                         /*\r
1806                          * The csm variable keeps track of which charsets are allowed\r
1807                          * and not used yet while building the choices[].\r
1808                          */\r
1809                         csm = (char)jpCharsetMasks[myConverterData.version];\r
1810                         choiceCount = 0;\r
1811                         \r
1812                         /* JIS7/8: try single-byte half-width Katakana before JISX208 */\r
1813                         if (myConverterData.version == 3 || myConverterData.version == 4) {\r
1814                             choices[choiceCount++] = HWKANA_7BIT;\r
1815                         }\r
1816                         /* Do not try single-bit half-width Katakana for other versions. */\r
1817                         csm &= ~CSM(HWKANA_7BIT);\r
1818                         \r
1819                         /* try the current G0 charset */\r
1820                         choices[choiceCount++] = cs = myConverterData.fromU2022State.cs[0];\r
1821                         csm &= ~CSM(cs);\r
1822                         \r
1823                         /* try the current G2 charset */\r
1824                         if ((cs = myConverterData.fromU2022State.cs[2]) != 0) {\r
1825                             choices[choiceCount++] = cs;\r
1826                             csm &= ~CSM(cs);\r
1827                         }\r
1828                         \r
1829                         /* try all the other charsets */\r
1830                         for (int i = 0; i < jpCharsetPref.length; i++) {\r
1831                             cs = jpCharsetPref[i];\r
1832                             if ((CSM(cs) & csm) != 0) {\r
1833                                 choices[choiceCount++] = cs;\r
1834                                 csm &= ~CSM(cs);\r
1835                             }\r
1836                         }\r
1837                     }\r
1838                     \r
1839                     cs = g = 0;\r
1840                     /* \r
1841                      * len==0:  no mapping found yet\r
1842                      * len<0:   found a fallback result:  continue looking for a roundtrip but no further fallbacks\r
1843                      * len>0:   found a roundtrip result, done\r
1844                      */\r
1845                     len = 0;\r
1846                     /*\r
1847                      * We will turn off usingFallBack after finding a fallback,\r
1848                      * but we still get fallbacks from PUA code points as usual.\r
1849                      * Therefore, we will also need to check that we don't overwrite\r
1850                      * an early fallback with a later one.\r
1851                      */\r
1852                     usingFallback = useFallback;\r
1853                     \r
1854                     for (int i = 0; i < choiceCount && len <= 0; i++) {\r
1855                         int[] value = new int[1];\r
1856                         int len2;\r
1857                         byte cs0 = choices[i];\r
1858                         switch (cs0) {\r
1859                         case ASCII:\r
1860                             if (sourceChar <= 0x7f) {\r
1861                                 targetValue = sourceChar;\r
1862                                 len = 1;\r
1863                                 cs = cs0;\r
1864                                 g = 0;\r
1865                             }\r
1866                             break;\r
1867                         case ISO8859_1:\r
1868                             if (GR96_START <= sourceChar && sourceChar <= GR96_END) {\r
1869                                 targetValue = sourceChar - 0x80;\r
1870                                 len = 1;\r
1871                                 cs = cs0;\r
1872                                 g = 2;\r
1873                             }\r
1874                             break;\r
1875                         case HWKANA_7BIT:\r
1876                             if (sourceChar <= HWKANA_END && sourceChar >= HWKANA_START) {\r
1877                                 if (myConverterData.version == 3) {\r
1878                                     /* JIS7: use G1 (SO) */\r
1879                                     /* Shift U+FF61..U+FF9F to bytes 21..5F. */\r
1880                                     targetValue = (int)(UConverterConstants.UNSIGNED_INT_MASK & (sourceChar - (HWKANA_START - 0x21)));\r
1881                                     len = 1;\r
1882                                     myConverterData.fromU2022State.cs[1] = cs = cs0; /* do not output an escape sequence */\r
1883                                     g = 1;\r
1884                                 } else if (myConverterData.version == 4) {\r
1885                                     /* JIS8: use 8-bit bytes with any single-byte charset, see escape sequence output below */\r
1886                                     /* Shift U+FF61..U+FF9F to bytes A1..DF. */\r
1887                                     targetValue = (int)(UConverterConstants.UNSIGNED_INT_MASK & (sourceChar - (HWKANA_START - 0xa1)));\r
1888                                     len = 1;\r
1889                                     \r
1890                                     cs = myConverterData.fromU2022State.cs[0];\r
1891                                     if (IS_JP_DBCS(cs)) {\r
1892                                         /* switch from a DBCS charset to JISX201 */\r
1893                                         cs = JISX201;\r
1894                                     }\r
1895                                     /* else stay in the current G0 charset */\r
1896                                     g = 0;\r
1897                                 }\r
1898                                 /* else do not use HWKANA_7BIT with other versions */\r
1899                             }\r
1900                             break;\r
1901                         case JISX201:\r
1902                             /* G0 SBCS */\r
1903                             value[0] = jisx201FromU(sourceChar);\r
1904                             if (value[0] <= 0x7f) {\r
1905                                 targetValue = value[0];\r
1906                                 len = 1;\r
1907                                 cs = cs0;\r
1908                                 g = 0;\r
1909                                 usingFallback = false;\r
1910                             }\r
1911                             break;\r
1912                         case JISX208:\r
1913                             /* G0 DBCS from JIS table */\r
1914                             myConverterData.currentConverter.sharedData = myConverterData.myConverterArray[cs0];\r
1915                             myConverterData.currentConverter.sharedData.mbcs.outputType = CharsetMBCS.MBCS_OUTPUT_2;\r
1916                             len2 = myConverterData.currentEncoder.fromUChar32(sourceChar, value, usingFallback);\r
1917                             //len2 = MBCSFromUChar32_ISO2022(myConverterData.myConverterArray[cs0], sourceChar, value, usingFallback, CharsetMBCS.MBCS_OUTPUT_2);\r
1918                             if (len2 == 2 || (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len) == 2 */\r
1919                                 value[0] = _2022FromSJIS(value[0]);\r
1920                                 if (value[0] != 0) {\r
1921                                     targetValue = value[0];\r
1922                                     len = len2;\r
1923                                     cs = cs0;\r
1924                                     g = 0;\r
1925                                     usingFallback = false;\r
1926                                 }\r
1927                             } else if (len == 0 && usingFallback  && sourceChar <= HWKANA_END && sourceChar >= HWKANA_START) {\r
1928                                 targetValue = hwkana_fb[sourceChar - HWKANA_START];\r
1929                                 len = -2;\r
1930                                 cs = cs0;\r
1931                                 g = 0;\r
1932                                 usingFallback = false;\r
1933                             }\r
1934                             break;\r
1935                         case ISO8859_7:\r
1936                             /* G0 SBCS forced to 7-bit output */\r
1937                             len2 = MBCSSingleFromUChar32(myConverterData.myConverterArray[cs0], sourceChar, value, usingFallback);\r
1938                             if (len2 != 0 && !(len2 < 0 && len != 0) && GR96_START <= value[0] && value[0] <= GR96_END) {\r
1939                                 targetValue = value[0] - 0x80;\r
1940                                 len = len2;\r
1941                                 cs = cs0;\r
1942                                 g = 2;\r
1943                                 usingFallback = false;\r
1944                             }\r
1945                             break;\r
1946                         default :\r
1947                             /* G0 DBCS */\r
1948                             myConverterData.currentConverter.sharedData = myConverterData.myConverterArray[cs0];\r
1949                             myConverterData.currentConverter.sharedData.mbcs.outputType = CharsetMBCS.MBCS_OUTPUT_2;\r
1950                             len2 = myConverterData.currentEncoder.fromUChar32(sourceChar, value, usingFallback);\r
1951                             //len2 = MBCSFromUChar32_ISO2022(myConverterData.myConverterArray[cs0], sourceChar, value, usingFallback, CharsetMBCS.MBCS_OUTPUT_2);\r
1952                             if (len2 == 2 || (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len)==2 */\r
1953                                 if (cs0 == KSC5601) {\r
1954                                     /*\r
1955                                      * Check for valid bytes for the encoding scheme.\r
1956                                      * This is necessary because the sub-converter (windows-949)\r
1957                                      * has a broader encoding scheme than is valid for 2022.\r
1958                                      */\r
1959                                     value[0] = _2022FromGR94DBCS(value[0]);\r
1960                                     if (value[0] == 0) {\r
1961                                         break;\r
1962                                     }\r
1963                                 }\r
1964                                 targetValue = value[0];\r
1965                                 len = len2;\r
1966                                 cs = cs0;\r
1967                                 g = 0;\r
1968                                 usingFallback = false;\r
1969                             }\r
1970                             break;\r
1971                         }\r
1972                     }\r
1973                     \r
1974                     if (len != 0) {\r
1975                         if (len < 0) {\r
1976                             len = -len; /* fallback */\r
1977                         }\r
1978                         outLen = 0;\r
1979                         \r
1980                         /* write SI if necessary (only for JIS7 */\r
1981                         if (myConverterData.fromU2022State.g == 1 && g == 0) {\r
1982                             buffer[outLen++] = UConverterConstants.SI;\r
1983                             myConverterData.fromU2022State.g = 0;\r
1984                         }\r
1985                         \r
1986                         /* write the designation sequence if necessary */\r
1987                         if (cs != myConverterData.fromU2022State.cs[g]) {\r
1988                             for (int i = 0; i < escSeqChars[cs].length; i++) {\r
1989                                 buffer[outLen++] = escSeqChars[cs][i];\r
1990                             }\r
1991                             myConverterData.fromU2022State.cs[g] = cs;\r
1992                             \r
1993                             /* invalidate the choices[] */\r
1994                             choiceCount = 0;\r
1995                         }\r
1996                         \r
1997                         /* write the shift sequence if necessary */\r
1998                         if (g != myConverterData.fromU2022State.g) {\r
1999                             switch (g) {\r
2000                             /* case 0 handled before writing escapes */\r
2001                             case 1:\r
2002                                 buffer[outLen++] = UConverterConstants.SO;\r
2003                                 myConverterData.fromU2022State.g = 1;\r
2004                                 break;\r
2005                             default : /* case 2 */\r
2006                                 buffer[outLen++] = 0x1b;\r
2007                                 buffer[outLen++] = 0x4e;\r
2008                                 break;\r
2009                             /* case 3: no SS3 in ISO-2022-JP-x */\r
2010                             }\r
2011                         }\r
2012                         \r
2013                         /* write the output bytes */\r
2014                         if (len == 1) {\r
2015                             buffer[outLen++] = (byte)targetValue;\r
2016                         } else { /* len == 2 */\r
2017                             buffer[outLen++] = (byte)(targetValue >> 8);\r
2018                             buffer[outLen++] = (byte)targetValue;\r
2019                         }\r
2020                     }else {\r
2021                         /*\r
2022                          * if we cannot find the character after checking all codepages\r
2023                          * then this is an error.\r
2024                          */\r
2025                         err = CoderResult.unmappableForLength(source.position()-oldSourcePos);\r
2026                         fromUChar32 = sourceChar;\r
2027                         break;\r
2028                     }\r
2029                     \r
2030                     if (sourceChar == CR || sourceChar == LF) {\r
2031                         /* reset the G2 state at the end of a line (conversion got use into ASCII or JISX201 already) */\r
2032                         myConverterData.fromU2022State.cs[2] = 0;\r
2033                         choiceCount = 0;\r
2034                     }\r
2035                     \r
2036                     /* output outLen>0 bytes in buffer[] */\r
2037                     if (outLen == 1) {\r
2038                         target.put(buffer[0]);\r
2039                         if (offsets != null) {\r
2040                             offsets.put(source.remaining() - 1); /* -1 known to be ASCII */\r
2041                         }\r
2042                     } else if (outLen == 2 && (target.position() + 2) <= target.limit()) {\r
2043                         target.put(buffer[0]);\r
2044                         target.put(buffer[1]);\r
2045                         if (offsets != null) {\r
2046                             int sourceIndex = source.position() - 1;\r
2047                             offsets.put(sourceIndex);\r
2048                             offsets.put(sourceIndex);\r
2049                         }\r
2050                     } else {\r
2051                         err = CharsetEncoderICU.fromUWriteBytes(this, buffer, 0, outLen, target, offsets, source.position()-1);\r
2052                     }\r
2053                 } else {\r
2054                     err = CoderResult.OVERFLOW;\r
2055                     break;\r
2056                 }\r
2057             }\r
2058             \r
2059             /*\r
2060              * the end of the input stream and detection of truncated input\r
2061              * are handled by the framework, but for ISO-2022-JP conversion\r
2062              * we need to be in ASCII mode at the very end\r
2063              * \r
2064              * conditions:\r
2065              *  successful\r
2066              *  in SO mode or not in ASCII mode\r
2067              *  end of input and no truncated input\r
2068              */\r
2069             if (!err.isError() &&\r
2070                     (myConverterData.fromU2022State.g != 0 || myConverterData.fromU2022State.cs[0] != ASCII) &&\r
2071                     flush && !source.hasRemaining() && fromUChar32 == 0) {\r
2072                 int sourceIndex;\r
2073                 \r
2074                 outLen = 0;\r
2075                 \r
2076                 if (myConverterData.fromU2022State.g != 0) {\r
2077                     buffer[outLen++] = UConverterConstants.SI;\r
2078                     myConverterData.fromU2022State.g = 0;\r
2079                 }\r
2080                 \r
2081                 if (myConverterData.fromU2022State.cs[0] != ASCII) {\r
2082                     for (int i = 0; i < escSeqChars[ASCII].length; i++) {\r
2083                         buffer[outLen++] = escSeqChars[ASCII][i];\r
2084                     }\r
2085                     myConverterData.fromU2022State.cs[0] = ASCII;\r
2086                 }\r
2087                 \r
2088                 /* get the source index of the last input character */\r
2089                 sourceIndex = source.position();\r
2090                 if (sourceIndex > 0) {\r
2091                     --sourceIndex;\r
2092                     if (UTF16.isTrailSurrogate(source.get(sourceIndex)) &&\r
2093                             (sourceIndex == 0 || UTF16.isLeadSurrogate(source.get(sourceIndex-1)))) {\r
2094                         --sourceIndex;\r
2095                     }\r
2096                 } else {\r
2097                     sourceIndex = -1;\r
2098                 }\r
2099                 \r
2100                 err = CharsetEncoderICU.fromUWriteBytes(this, buffer, 0, outLen, target, offsets, sourceIndex);\r
2101             }\r
2102             return err;\r
2103         }\r
2104     }\r
2105     /****************************ISO-2022-CN************************************/\r
2106     /*\r
2107      * Rules for ISO-2022-CN Encoding:\r
2108      * i)   The designator sequence must appear once on a line before any instance\r
2109      *      of chracter set it designates.\r
2110      * ii)  If two lines contain characters from the same character set, both lines\r
2111      *      must include the designator sequence.\r
2112      * iii) Once the designator sequence is known, a shifting sequence has to be found\r
2113      *      to invoke the shifting\r
2114      * iv)  All lines start in ASCII and end in ASCII.\r
2115      * v)   Four shifting sequences are employed for this purpose:\r
2116      *      Sequence    ASCII Eq    Charsets\r
2117      *      ---------   ---------   --------\r
2118      *      SI          <SI>        US-ASCII\r
2119      *      SO          <SO>        CNS-11643-1992 Plane 1, GB2312, ISO-IR-165\r
2120      *      SS2         <ESC>N      CNS-11643-1992 Plane 2\r
2121      *      SS3         <ESC>O      CNS-11643-1992 Planes 3-7\r
2122      * vi)  \r
2123      *      SOdesignator    : ESC "$" ")" finalchar_for_SO\r
2124      *      SS2designator   : ESC "$" "*" finalchar_for_SS2\r
2125      *      SS3designator   : ESC "$" "+" finalchar_for_SS3\r
2126      *      \r
2127      *      ESC $ ) A       Indicates the bytes following SO are Chinese\r
2128      *       characters as defined in GB 2312-80, until\r
2129      *       another SOdesignation appears\r
2130      *      \r
2131      *      ESC $ ) E       Indicates the bytes following SO are as defined\r
2132      *       in ISO-IR-165 (for details, see section 2.1),\r
2133      *       until another SOdesignation appears\r
2134      *       \r
2135      *      ESC $ ) G       Indicates the bytes following SO are as defined\r
2136      *       in CNS 11643-plane-1, until another SOdesignation appears\r
2137      *       \r
2138      *      ESC $ * H       Indicates teh two bytes immediately following\r
2139      *       SS2 is a Chinese character as defined in CNS\r
2140      *       11643-plane-2, until another SS2designation\r
2141      *       appears\r
2142      *       (Meaning <ESC>N must preceed ever 2 byte sequence.)\r
2143      *      \r
2144      *      ESC $ + I       Indicates the immediate two bytes following SS3\r
2145      *       is a Chinese character as defined in CNS\r
2146      *       11643-plane-3, until another SS3designation\r
2147      *       appears\r
2148      *       (Meaning <ESC>O must preceed every 2 byte sequence.)\r
2149      *      \r
2150      *      ESC $ + J       Indicates the immediate two bytes following SS3\r
2151      *       is a Chinese character as defined in CNS\r
2152      *       11643-plane-4, until another SS3designation\r
2153      *       appears\r
2154      *       (In English: <ESC>O must preceed every 2 byte sequence.)\r
2155      *      \r
2156      *      ESC $ + K       Indicates the immediate two bytes following SS3\r
2157      *       is a Chinese character as defined in CNS\r
2158      *       11643-plane-5, until another SS3designation\r
2159      *       appears\r
2160      *       \r
2161      *      ESC $ + L       Indicates the immediate two bytes following SS3\r
2162      *       is a Chinese character as defined in CNS\r
2163      *       11643-plane-6, until another SS3designation\r
2164      *       appears\r
2165      *       \r
2166      *      ESC $ + M       Indicates the immediate two bytes following SS3\r
2167      *       is a Chinese character as defined in CNS\r
2168      *       11643-plane-7, until another SS3designation\r
2169      *       appears\r
2170      *       \r
2171      *      As in ISO-2022-CN, each line starts in ASCII, and ends in ASCII, and\r
2172      *      has its own designation information before any Chinese chracters\r
2173      *      appears\r
2174      */\r
2175     \r
2176     /* The following are defined this way to make strings truely readonly */\r
2177     private final static byte[] GB_2312_80_STR = { 0x1B, 0x24, 0x29, 0x41 };\r
2178     private final static byte[] ISO_IR_165_STR = { 0x1B, 0x24, 0x29, 0x45 };\r
2179     private final static byte[] CNS_11643_1992_Plane_1_STR = { 0x1B, 0x24, 0x29, 0x47 };\r
2180     private final static byte[] CNS_11643_1992_Plane_2_STR = { 0x1B, 0x24, 0x2A, 0x48 };\r
2181     private final static byte[] CNS_11643_1992_Plane_3_STR = { 0x1B, 0x24, 0x2B, 0x49 };\r
2182     private final static byte[] CNS_11643_1992_Plane_4_STR = { 0x1B, 0x24, 0x2B, 0x4A };\r
2183     private final static byte[] CNS_11643_1992_Plane_5_STR = { 0x1B, 0x24, 0x2B, 0x4B };\r
2184     private final static byte[] CNS_11643_1992_Plane_6_STR = { 0x1B, 0x24, 0x2B, 0x4C };\r
2185     private final static byte[] CNS_11643_1992_Plane_7_STR = { 0x1B, 0x24, 0x2B, 0x4D };\r
2186     \r
2187     /************************ ISO2022-CN Data *****************************/\r
2188     private final static byte[][] escSeqCharsCN = {\r
2189         SHIFT_IN_STR,\r
2190         GB_2312_80_STR,\r
2191         ISO_IR_165_STR,\r
2192         CNS_11643_1992_Plane_1_STR,\r
2193         CNS_11643_1992_Plane_2_STR,\r
2194         CNS_11643_1992_Plane_3_STR,\r
2195         CNS_11643_1992_Plane_4_STR,\r
2196         CNS_11643_1992_Plane_5_STR,\r
2197         CNS_11643_1992_Plane_6_STR,\r
2198         CNS_11643_1992_Plane_7_STR,\r
2199     };\r
2200     \r
2201     private class CharsetEncoderISO2022CN extends CharsetEncoderICU {\r
2202         public CharsetEncoderISO2022CN(CharsetICU cs) {\r
2203             super(cs, fromUSubstitutionChar[0]);\r
2204         }\r
2205         \r
2206         protected void implReset() {\r
2207             super.implReset();\r
2208             myConverterData.reset();\r
2209         }\r
2210         \r
2211         /* This overrides the cbFromUWriteSub method in CharsetEncoderICU */\r
2212         CoderResult cbFromUWriteSub (CharsetEncoderICU encoder, \r
2213             CharBuffer source, ByteBuffer target, IntBuffer offsets){\r
2214             CoderResult err = CoderResult.UNDERFLOW;\r
2215             byte[] buffer = new byte[8];\r
2216             int i = 0;\r
2217             byte[] subchar;\r
2218             subchar = encoder.replacement();\r
2219             \r
2220             if (myConverterData.fromU2022State.g != 0) {\r
2221                 /* not in ASCII mode: switch to ASCII */\r
2222                 myConverterData.fromU2022State.g = 0;\r
2223                 buffer[i++] = UConverterConstants.SI;\r
2224             }\r
2225             buffer[i++] = subchar[0];\r
2226             \r
2227             err = CharsetEncoderICU.fromUWriteBytes(this, buffer, 0, i, target, offsets, source.position() - 1);\r
2228 \r
2229             return err;\r
2230         }\r
2231         \r
2232         protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {\r
2233             CoderResult err = CoderResult.UNDERFLOW;\r
2234             int sourceChar;\r
2235             byte[] buffer = new byte[8];\r
2236             int len;\r
2237             byte[] choices = new byte[3];\r
2238             int choiceCount;\r
2239             int targetValue = 0;\r
2240             boolean usingFallback;\r
2241             boolean gotoGetTrail = false;\r
2242             int oldSourcePos; // For proper error handling\r
2243             \r
2244             choiceCount = 0;\r
2245             \r
2246             /* check if the last codepoint of previous buffer was a lead surrogate */\r
2247             if ((sourceChar = fromUChar32) != 0 && target.hasRemaining()) {\r
2248                 // goto getTrail label\r
2249                 gotoGetTrail = true; \r
2250             }\r
2251             \r
2252             while (source.hasRemaining() || gotoGetTrail) {\r
2253                 if (target.hasRemaining() || gotoGetTrail) {\r
2254                     oldSourcePos = source.position();\r
2255                     if (!gotoGetTrail) {\r
2256                         sourceChar = source.get();\r
2257                     }\r
2258                     /* check if the char is a First surrogate */\r
2259                     if (UTF16.isSurrogate((char)sourceChar) || gotoGetTrail) {\r
2260                         if (UTF16.isLeadSurrogate((char)sourceChar) || gotoGetTrail) {\r
2261 // getTrail label\r
2262                             /* reset gotoGetTrail flag*/\r
2263                              gotoGetTrail = false;\r
2264                             \r
2265                             /* look ahead to find the trail surrogate */\r
2266                             if (source.hasRemaining()) {\r
2267                                 /* test the following code unit */\r
2268                                 char trail = source.get();\r
2269                                 source.position(source.position()-1);\r
2270                                 if (UTF16.isTrailSurrogate(trail)) {\r
2271                                     source.get();\r
2272                                     sourceChar = UCharacter.getCodePoint((char)sourceChar, trail);\r
2273                                     fromUChar32 = 0x00;\r
2274                                     /* convert this supplementary code point */\r
2275                                     /* exit this condition tree */\r
2276                                 } else {\r
2277                                     /* this is an unmatched lead code unit (1st surrogate) */\r
2278                                     /* callback(illegal) */\r
2279                                     err = CoderResult.malformedForLength(1);\r
2280                                     fromUChar32 = sourceChar;\r
2281                                     break;\r
2282                                 }\r
2283                             } else {\r
2284                                 /* no more input */\r
2285                                 fromUChar32 = sourceChar;\r
2286                                 break;\r
2287                             }\r
2288                         } else {\r
2289                             /* this is an unmatched trail code unit (2nd surrogate) */\r
2290                             /* callback(illegal) */\r
2291                             err = CoderResult.malformedForLength(1);\r
2292                             fromUChar32 = sourceChar;\r
2293                             break;\r
2294                         }\r
2295                     }\r
2296                     \r
2297                     /* do the conversion */\r
2298                     if (sourceChar <= 0x007f) {\r
2299                         /* do not converter SO/SI/ESC */\r
2300                         if (IS_2022_CONTROL(sourceChar)) {\r
2301                             /* callback(illegal) */\r
2302                             err = CoderResult.malformedForLength(1);\r
2303                             fromUChar32 = sourceChar;\r
2304                             break;\r
2305                         }\r
2306                         \r
2307                         /* US-ASCII */\r
2308                         if (myConverterData.fromU2022State.g == 0) {\r
2309                             buffer[0] = (byte)sourceChar;\r
2310                             len = 1;\r
2311                         } else {\r
2312                             buffer[0] = UConverterConstants.SI;\r
2313                             buffer[1] = (byte)sourceChar;\r
2314                             len = 2;\r
2315                             myConverterData.fromU2022State.g = 0;\r
2316                             choiceCount = 0;\r
2317                         }\r
2318                         \r
2319                         if (sourceChar == CR || sourceChar == LF) {\r
2320                             /* reset the state at the end of a line */\r
2321                             myConverterData.fromU2022State.reset();\r
2322                             choiceCount = 0;\r
2323                         }\r
2324                     } else {\r
2325                         /* convert U+0080..U+10ffff */\r
2326                         int i;\r
2327                         byte cs, g;\r
2328                         \r
2329                         if (choiceCount == 0) {\r
2330                             /* try the current SO/G1 converter first */\r
2331                             choices[0] = myConverterData.fromU2022State.cs[1];\r
2332                             \r
2333                             /* default to GB2312_1 if none is designated yet */\r
2334                             if (choices[0] == 0) {\r
2335                                 choices[0] = GB2312_1;\r
2336                             }\r
2337                             if (myConverterData.version == 0) {\r
2338                                 /* ISO-2022-CN */\r
2339                                 /* try other SO/G1 converter; a CNS_11643_1 lookup may result in any plane */\r
2340                                 if (choices[0] == GB2312_1) {\r
2341                                     choices[1] = CNS_11643_1;\r
2342                                 } else {\r
2343                                     choices[1] = GB2312_1;\r
2344                                 }\r
2345                                 \r
2346                                 choiceCount = 2;\r
2347                             } else {\r
2348                                 /* ISO-2022-CN-EXT */\r
2349                                 \r
2350                                 /* try one of the other converters */\r
2351                                 switch (choices[0]) {\r
2352                                 case GB2312_1:\r
2353                                     choices[1] = CNS_11643_1;\r
2354                                     choices[2] = ISO_IR_165;\r
2355                                     break;\r
2356                                 case ISO_IR_165:\r
2357                                     choices[1] = GB2312_1;\r
2358                                     choices[2] = CNS_11643_1;\r
2359                                     break;\r
2360                                 default :\r
2361                                     choices[1] = GB2312_1;\r
2362                                     choices[2] = ISO_IR_165;\r
2363                                     break;\r
2364                                 }\r
2365                                 \r
2366                                 choiceCount = 3;\r
2367                             }\r
2368                         }\r
2369                         \r
2370                         cs = g = 0;\r
2371                         /*\r
2372                          * len==0:  no mapping found yet\r
2373                          * len<0:   found a fallback result: continue looking for a roundtrip but no further fallbacks\r
2374                          * len>0:   found a roundtrip result, done\r
2375                          */\r
2376                         len = 0;\r
2377                         /*\r
2378                          * We will turn off usingFallback after finding a fallback,\r
2379                          * but we still get fallbacks from PUA code points as usual.\r
2380                          * Therefore, we will also need to check that we don't overwrite\r
2381                          * an early fallback with a later one.\r
2382                          */\r
2383                         usingFallback = useFallback;\r
2384                         \r
2385                         for (i = 0; i < choiceCount && len <= 0; ++i) {\r
2386                             byte cs0 = choices[i];\r
2387                             if (cs0 > 0) {\r
2388                                 int[] value = new int[1];\r
2389                                 int len2;\r
2390                                 if (cs0 > CNS_11643_0) {\r
2391                                     myConverterData.currentConverter.sharedData = myConverterData.myConverterArray[CNS_11643];\r
2392                                     myConverterData.currentConverter.sharedData.mbcs.outputType = CharsetMBCS.MBCS_OUTPUT_3;\r
2393                                     len2 = myConverterData.currentEncoder.fromUChar32(sourceChar, value, usingFallback);\r
2394                                     //len2 = MBCSFromUChar32_ISO2022(myConverterData.myConverterArray[CNS_11643],\r
2395                                     //        sourceChar, value, usingFallback, CharsetMBCS.MBCS_OUTPUT_3);\r
2396                                     if (len2 == 3 || (len2 == -3 && len == 0)) {\r
2397                                         targetValue = value[0];\r
2398                                         cs = (byte)(CNS_11643_0 + (value[0] >> 16) - 0x80);\r
2399                                         if (len2 >= 0) {\r
2400                                             len = 2;\r
2401                                         } else {\r
2402                                             len = -2;\r
2403                                             usingFallback = false;\r
2404                                         }\r
2405                                         if (cs == CNS_11643_1) {\r
2406                                             g = 1;\r
2407                                         } else if (cs == CNS_11643_2) {\r
2408                                             g = 2;\r
2409                                         } else if (myConverterData.version == 1) { /* plane 3..7 */\r
2410                                             g = 3;\r
2411                                         } else {\r
2412                                             /* ISO-2022-CN (without -EXT) does not support plane 3..7 */\r
2413                                             len = 0;\r
2414                                         }\r
2415                                     }\r
2416                                 } else {\r
2417                                     /* GB2312_1 or ISO-IR-165 */\r
2418                                     myConverterData.currentConverter.sharedData = myConverterData.myConverterArray[cs0];\r
2419                                     myConverterData.currentConverter.sharedData.mbcs.outputType = CharsetMBCS.MBCS_OUTPUT_2;\r
2420                                     len2 = myConverterData.currentEncoder.fromUChar32(sourceChar, value, usingFallback);\r
2421                                     //len2 = MBCSFromUChar32_ISO2022(myConverterData.myConverterArray[cs0],\r
2422                                     //        sourceChar, value, usingFallback, CharsetMBCS.MBCS_OUTPUT_2);\r
2423                                     if (len2 == 2 || (len2 == -2 && len == 0)) {\r
2424                                         targetValue = value[0];\r
2425                                         len = len2;\r
2426                                         cs = cs0;\r
2427                                         g = 1;\r
2428                                         usingFallback = false;\r
2429                                     }\r
2430                                 }\r
2431                             }\r
2432                         }\r
2433                         \r
2434                         if (len != 0) {\r
2435                             len = 0; /* count output bytes; it must have ben abs(len) == 2 */\r
2436                             \r
2437                             /* write the designation sequence if necessary */\r
2438                             if (cs != myConverterData.fromU2022State.cs[g]) {\r
2439                                 if (cs < CNS_11643) {\r
2440                                     for (int n = 0; n < escSeqCharsCN[cs].length; n++) {\r
2441                                         buffer[n] = escSeqCharsCN[cs][n];\r
2442                                     }\r
2443                                 } else {\r
2444                                     for (int n = 0; n < escSeqCharsCN[CNS_11643 + (cs - CNS_11643_1)].length; n++) {\r
2445                                         buffer[n] = escSeqCharsCN[CNS_11643 + (cs - CNS_11643_1)][n];\r
2446                                     }\r
2447                                 }\r
2448                                 len = 4;\r
2449                                 myConverterData.fromU2022State.cs[g] = cs;\r
2450                                 if (g == 1) {\r
2451                                     /* changing the SO/G1 charset invalidates the choices[] */\r
2452                                     choiceCount = 0;\r
2453                                 }\r
2454                             }\r
2455                             \r
2456                             /* write the shift sequence if necessary */\r
2457                             if (g != myConverterData.fromU2022State.g) {\r
2458                                 switch (g) {\r
2459                                 case 1:\r
2460                                     buffer[len++] = UConverterConstants.SO;\r
2461                                     \r
2462                                     /* set the new state only if it is the locking shift SO/G1, not for SS2 or SS3 */\r
2463                                     myConverterData.fromU2022State.g = 1;\r
2464                                     break;\r
2465                                 case 2:\r
2466                                     buffer[len++] = 0x1b;\r
2467                                     buffer[len++] = 0x4e;\r
2468                                     break;\r
2469                                 default: /* case 3 */\r
2470                                     buffer[len++] = 0x1b;\r
2471                                     buffer[len++] = 0x4f;\r
2472                                     break;\r
2473                                 }\r
2474                             }\r
2475                             \r
2476                             /* write the two output bytes */\r
2477                             buffer[len++] = (byte)(targetValue >> 8);\r
2478                             buffer[len++] = (byte)targetValue;\r
2479                         } else {\r
2480                             /* if we cannot find the character after checking all codepages\r
2481                              * then this is an error\r
2482                              */\r
2483                             err = CoderResult.unmappableForLength(source.position()-oldSourcePos);\r
2484                             fromUChar32 = sourceChar;\r
2485                             break;\r
2486                         }\r
2487                     }\r
2488                     /* output len>0 bytes in buffer[] */\r
2489                     if (len == 1) {\r
2490                         target.put(buffer[0]);\r
2491                         if (offsets != null) {\r
2492                             offsets.put(source.position()-1);\r
2493                         }\r
2494                     } else if (len == 2 && (target.remaining() >= 2)) {\r
2495                         target.put(buffer[0]);\r
2496                         target.put(buffer[1]);\r
2497                         if (offsets != null) {\r
2498                             int sourceIndex = source.position();\r
2499                             offsets.put(sourceIndex);\r
2500                             offsets.put(sourceIndex);\r
2501                         }\r
2502                     } else {\r
2503                         err = CharsetEncoderICU.fromUWriteBytes(this, buffer, 0, len, target, offsets, source.position()-1);\r
2504                         if (err.isError()) {\r
2505                             break;\r
2506                         }\r
2507                     }\r
2508                 } else {\r
2509                     err = CoderResult.OVERFLOW;\r
2510                     break;\r
2511                 }\r
2512             } /* end while (source.hasRemaining() */\r
2513             \r
2514             /*\r
2515              * the end of the input stream and detection of truncated input\r
2516              * are handled by the framework, but for ISO-2022-CN conversion\r
2517              * we need to be in ASCII mode at the very end\r
2518              * \r
2519              * condtions:\r
2520              *   succesful\r
2521              *   not in ASCII mode\r
2522              *   end of input and no truncated input\r
2523              */\r
2524             if (!err.isError() && myConverterData.fromU2022State.g != 0 && flush && !source.hasRemaining() && fromUChar32 == 0) {\r
2525                 int sourceIndex;\r
2526                 \r
2527                 /* we are switching to ASCII */\r
2528                 myConverterData.fromU2022State.g = 0;\r
2529                 \r
2530                 /* get the source index of the last input character */\r
2531                 sourceIndex = source.position();\r
2532                 if (sourceIndex > 0) {\r
2533                     --sourceIndex;\r
2534                     if (UTF16.isTrailSurrogate(source.get(sourceIndex)) && \r
2535                             (sourceIndex == 0 || UTF16.isLeadSurrogate(source.get(sourceIndex-1)))) {\r
2536                         --sourceIndex;\r
2537                     }\r
2538                 } else {\r
2539                     sourceIndex = -1;\r
2540                 }\r
2541                 \r
2542                 err = CharsetEncoderICU.fromUWriteBytes(this, SHIFT_IN_STR, 0, 1, target, offsets, sourceIndex);\r
2543             }\r
2544             \r
2545             return err;\r
2546         }\r
2547     }\r
2548     /******************************** ISO-2022-KR *****************************/\r
2549     /*\r
2550      *   Rules for ISO-2022-KR encoding\r
2551      *   i) The KSC5601 designator sequence should appear only once in a file,\r
2552      *      at the begining of a line before any KSC5601 characters. This usually\r
2553      *      means that it appears by itself on the first line of the file\r
2554      *  ii) There are only 2 shifting sequences SO to shift into double byte mode\r
2555      *      and SI to shift into single byte mode\r
2556      */\r
2557     private class CharsetEncoderISO2022KR extends CharsetEncoderICU {\r
2558         public CharsetEncoderISO2022KR(CharsetICU cs) {\r
2559             super(cs, fromUSubstitutionChar[myConverterData.version]);\r
2560         }\r
2561         \r
2562         protected void implReset() {\r
2563             super.implReset();\r
2564             myConverterData.reset();\r
2565             setInitialStateFromUnicodeKR(this);\r
2566         }\r
2567         \r
2568         /* This overrides the cbFromUWriteSub method in CharsetEncoderICU */\r
2569         CoderResult cbFromUWriteSub (CharsetEncoderICU encoder, \r
2570             CharBuffer source, ByteBuffer target, IntBuffer offsets){\r
2571             CoderResult err = CoderResult.UNDERFLOW;\r
2572             byte[] buffer = new byte[8];\r
2573             int length, i = 0;\r
2574             byte[] subchar;\r
2575             \r
2576             subchar = encoder.replacement();\r
2577             length = subchar.length;\r
2578             \r
2579             if (myConverterData.version == 0) {\r
2580                 if (length == 1) {\r
2581                     if (encoder.fromUnicodeStatus != 0) {\r
2582                         /* in DBCS mode: switch to SBCS */\r
2583                         encoder.fromUnicodeStatus = 0;\r
2584                         buffer[i++] = UConverterConstants.SI;\r
2585                     }\r
2586                     buffer[i++] = subchar[0];\r
2587                 } else { /* length == 2 */\r
2588                     if (encoder.fromUnicodeStatus == 0) {\r
2589                         /* in SBCS mode: switch to DBCS */\r
2590                         encoder.fromUnicodeStatus = 1;\r
2591                         buffer[i++] = UConverterConstants.SO;\r
2592                     }\r
2593                     buffer[i++] = subchar[0];\r
2594                     buffer[i++] = subchar[1];\r
2595                 }\r
2596                 err = CharsetEncoderICU.fromUWriteBytes(this, buffer, 0, i, target, offsets, source.position() - 1);\r
2597             } else { \r
2598                 /* save the subvonverter's substitution string */\r
2599                 byte[] currentSubChars = myConverterData.currentEncoder.replacement();\r
2600                 \r
2601                 /* set our substitution string into the subconverter */\r
2602                 myConverterData.currentEncoder.replaceWith(subchar);\r
2603                 myConverterData.currentConverter.subChar1 = fromUSubstitutionChar[0][0];\r
2604                 /* let the subconverter write the subchar, set/retrieve fromUChar32 state */\r
2605                 myConverterData.currentEncoder.fromUChar32 = encoder.fromUChar32;\r
2606                 err = myConverterData.currentEncoder.cbFromUWriteSub(myConverterData.currentEncoder, source, target, offsets);\r
2607                 encoder.fromUChar32 = myConverterData.currentEncoder.fromUChar32;\r
2608                 \r
2609                 /* restore the subconverter's substitution string */\r
2610                 myConverterData.currentEncoder.replaceWith(currentSubChars);\r
2611                 \r
2612                 if (err.isOverflow()) {\r
2613                     if (myConverterData.currentEncoder.errorBufferLength > 0) {\r
2614                         encoder.errorBuffer = myConverterData.currentEncoder.errorBuffer.clone();\r
2615                     }\r
2616                     encoder.errorBufferLength = myConverterData.currentEncoder.errorBufferLength;\r
2617                     myConverterData.currentEncoder.errorBufferLength = 0;\r
2618                 }\r
2619             }\r
2620             \r
2621             return err;\r
2622         }\r
2623         \r
2624         private CoderResult encodeLoopIBM(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {\r
2625             CoderResult err = CoderResult.UNDERFLOW;\r
2626 \r
2627             myConverterData.currentEncoder.fromUChar32 = fromUChar32;\r
2628             err = myConverterData.currentEncoder.cnvMBCSFromUnicodeWithOffsets(source, target, offsets, flush);\r
2629             fromUChar32 = myConverterData.currentEncoder.fromUChar32;\r
2630             \r
2631             if (err.isOverflow()) {\r
2632                 if (myConverterData.currentEncoder.errorBufferLength > 0) {\r
2633                     errorBuffer = myConverterData.currentEncoder.errorBuffer.clone();\r
2634                 }\r
2635                 errorBufferLength = myConverterData.currentEncoder.errorBufferLength;\r
2636                 myConverterData.currentEncoder.errorBufferLength = 0;\r
2637             }\r
2638             \r
2639             return err;\r
2640         }\r
2641         \r
2642         protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {\r
2643             CoderResult err = CoderResult.UNDERFLOW;\r
2644             int[] targetByteUnit = { 0x0000 };\r
2645             int sourceChar = 0x0000;\r
2646             boolean isTargetByteDBCS;\r
2647             boolean oldIsTargetByteDBCS;\r
2648             boolean usingFallback;\r
2649             int length = 0;\r
2650             boolean gotoGetTrail = false; // for goto getTrail label call\r
2651             \r
2652             /*\r
2653              * if the version is 1 then the user is requesting\r
2654              * conversion with ibm-25546 pass the argument to\r
2655              * MBCS converter and return\r
2656              */\r
2657             if (myConverterData.version == 1) {\r
2658                 return encodeLoopIBM(source, target, offsets, flush);\r
2659             }\r
2660             \r
2661             usingFallback = useFallback;\r
2662             isTargetByteDBCS = fromUnicodeStatus == 0 ? false : true;\r
2663             if ((sourceChar = fromUChar32) != 0 && target.hasRemaining()) {\r
2664                 gotoGetTrail = true;\r
2665             }\r
2666             \r
2667             while (source.hasRemaining() || gotoGetTrail) {\r
2668                 targetByteUnit[0] = UConverterConstants.missingCharMarker;\r
2669                 \r
2670                 if (target.hasRemaining() || gotoGetTrail) {\r
2671                     if (!gotoGetTrail) {\r
2672                         sourceChar = source.get();\r
2673                     \r
2674                         /* do not convert SO/SI/ESC */\r
2675                         if (IS_2022_CONTROL(sourceChar)) {\r
2676                             /* callback(illegal) */\r
2677                             err = CoderResult.malformedForLength(1);\r
2678                             fromUChar32 = sourceChar;\r
2679                             break;\r
2680                         }\r
2681                         myConverterData.currentConverter.sharedData.mbcs.outputType = CharsetMBCS.MBCS_OUTPUT_2;\r
2682                         length = myConverterData.currentEncoder.fromUChar32(sourceChar, targetByteUnit, usingFallback);\r
2683                         //length = MBCSFromUChar32_ISO2022(myConverterData.currentConverter.sharedData, sourceChar, targetByteUnit, usingFallback, CharsetMBCS.MBCS_OUTPUT_2); \r
2684                         if (length < 0) {\r
2685                             length = -length; /* fallback */\r
2686                         }\r
2687                         /* only DBCS or SBCS characters are expected */\r
2688                         /* DB characters with high bit set to 1 are expected */\r
2689                         if (length > 2 || length == 0 ||\r
2690                                 (length == 1 && targetByteUnit[0] > 0x7f) ||\r
2691                                 (length ==2 &&\r
2692                                         ((char)(targetByteUnit[0] - 0xa1a1) > (0xfefe - 0xa1a1) ||\r
2693                                         ((targetByteUnit[0] - 0xa1) & UConverterConstants.UNSIGNED_BYTE_MASK) > (0xfe - 0xa1)))) {\r
2694                             targetByteUnit[0] = UConverterConstants.missingCharMarker;\r
2695                         }\r
2696                     }\r
2697                     if (!gotoGetTrail && targetByteUnit[0] != UConverterConstants.missingCharMarker) {\r
2698                         oldIsTargetByteDBCS = isTargetByteDBCS;\r
2699                         isTargetByteDBCS = (targetByteUnit[0] > 0x00FF);\r
2700                         /* append the shift sequence */\r
2701                         if (oldIsTargetByteDBCS != isTargetByteDBCS) {\r
2702                             if (isTargetByteDBCS) {\r
2703                                 target.put((byte)UConverterConstants.SO);\r
2704                             } else {\r
2705                                 target.put((byte)UConverterConstants.SI);\r
2706                             }\r
2707                             if (offsets != null) {\r
2708                                 offsets.put(source.position()-1);\r
2709                             }\r
2710                         }\r
2711                         /* write the targetUniChar to target */\r
2712                         if (targetByteUnit[0] <= 0x00FF) {\r
2713                             if (target.hasRemaining()) {\r
2714                                 target.put((byte)targetByteUnit[0]);\r
2715                                 if (offsets != null) {\r
2716                                     offsets.put(source.position()-1);\r
2717                                 }\r
2718                             } else {\r
2719                                 errorBuffer[errorBufferLength++] = (byte)targetByteUnit[0];\r
2720                                 err = CoderResult.OVERFLOW;\r
2721                             }\r
2722                         } else {\r
2723                             if (target.hasRemaining()) {\r
2724                                 target.put((byte)(UConverterConstants.UNSIGNED_BYTE_MASK & ((targetByteUnit[0]>>8) - 0x80)));\r
2725                                 if (offsets != null) {\r
2726                                     offsets.put(source.position()-1);\r
2727                                 }\r
2728                                 if (target.hasRemaining()) {\r
2729                                     target.put((byte)(UConverterConstants.UNSIGNED_BYTE_MASK & (targetByteUnit[0]- 0x80)));\r
2730                                     if (offsets != null) {\r
2731                                         offsets.put(source.position()-1);\r
2732                                     }\r
2733                                 } else {\r
2734                                     errorBuffer[errorBufferLength++] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & (targetByteUnit[0] - 0x80));\r
2735                                     err = CoderResult.OVERFLOW;\r
2736                                 }\r
2737                                 \r
2738                             } else {\r
2739                                 errorBuffer[errorBufferLength++] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & ((targetByteUnit[0]>>8) - 0x80));\r
2740                                 errorBuffer[errorBufferLength++] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & (targetByteUnit[0]- 0x80));\r
2741                                 err = CoderResult.OVERFLOW;\r
2742                             }\r
2743                         }\r
2744                     } else {\r
2745                         /* oops.. the code point is unassigned\r
2746                          * set the error and reason\r
2747                          */\r
2748                         \r
2749                         /* check if the char is a First surrogate */\r
2750                         if (gotoGetTrail || UTF16.isSurrogate((char)sourceChar)) {\r
2751                             if (gotoGetTrail || UTF16.isLeadSurrogate((char)sourceChar)) {\r
2752 // getTrail label\r
2753                                 // reset gotoGetTrail flag\r
2754                                 gotoGetTrail = false;\r
2755                                 \r
2756                                 /* look ahead to find the trail surrogate */\r
2757                                 if (source.hasRemaining()) {\r
2758                                     /* test the following code unit */\r
2759                                     char trail = source.get();\r
2760                                     source.position(source.position()-1);\r
2761                                     if (UTF16.isTrailSurrogate(trail)) {\r
2762                                         source.get();\r
2763                                          sourceChar = UCharacter.getCodePoint((char)sourceChar, trail);\r
2764                                          err = CoderResult.unmappableForLength(2);\r
2765                                          /* convert this surrogate code point */\r
2766                                          /* exit this condition tree */\r
2767                                     } else {\r
2768                                         /* this is an unmatched lead code unit (1st surrogate) */\r
2769                                         /* callback(illegal) */\r
2770                                         err = CoderResult.malformedForLength(1);\r
2771                                     }\r
2772                                 } else {\r
2773                                     /* no more input */\r
2774                                     err = CoderResult.UNDERFLOW;\r
2775                                 }\r
2776                             } else {\r
2777                                 /* this is an unmatched trail code unit (2nd surrogate ) */\r
2778                                 /* callback(illegal) */\r
2779                                 err = CoderResult.malformedForLength(1);\r
2780                             }\r
2781                         } else {\r
2782                             /* callback(unassigned) for a BMP code point */\r
2783                             err = CoderResult.unmappableForLength(1);\r
2784                         }\r
2785                         \r
2786                         fromUChar32 = sourceChar;\r
2787                         break;\r
2788                     }\r
2789                 } else {\r
2790                     err = CoderResult.OVERFLOW;\r
2791                     break;\r
2792                 }\r
2793             }\r
2794             /*\r
2795              * the end of the input stream and detection of truncated input\r
2796              * are handled by the framework, but for ISO-2022-KR conversion\r
2797              * we need to be inASCII mode at the very end\r
2798              * \r
2799              * conditions:\r
2800              *  successful\r
2801              *  not in ASCII mode\r
2802              *  end of  input and no truncated input\r
2803              */\r
2804             if (!err.isError() && isTargetByteDBCS && flush && !source.hasRemaining() && fromUChar32 == 0) {\r
2805                 int sourceIndex;\r
2806                 \r
2807                 /* we are switching to ASCII */\r
2808                 isTargetByteDBCS = false;\r
2809                 \r
2810                 /* get the source index of the last input character */\r
2811                 sourceIndex = source.position();\r
2812                 if (sourceIndex > 0) {\r
2813                     --sourceIndex;\r
2814                     if (UTF16.isTrailSurrogate(source.get(sourceIndex)) && UTF16.isLeadSurrogate(source.get(sourceIndex-1))) {\r
2815                         --sourceIndex;\r
2816                     }\r
2817                 } else {\r
2818                     sourceIndex = -1;\r
2819                 }\r
2820                 \r
2821                 CharsetEncoderICU.fromUWriteBytes(this, SHIFT_IN_STR, 0, 1, target, offsets, sourceIndex);\r
2822             }\r
2823             /*save the state and return */\r
2824             fromUnicodeStatus = isTargetByteDBCS ? 1 : 0;\r
2825             \r
2826             return err;\r
2827         }\r
2828     }\r
2829     \r
2830     public CharsetDecoder newDecoder() {\r
2831         switch (variant) {\r
2832         case ISO_2022_JP:\r
2833             return new CharsetDecoderISO2022JP(this);\r
2834         \r
2835         case ISO_2022_CN:\r
2836             return new CharsetDecoderISO2022CN(this);\r
2837             \r
2838         case ISO_2022_KR:\r
2839             setInitialStateToUnicodeKR();\r
2840             return new CharsetDecoderISO2022KR(this);\r
2841             \r
2842         default: /* should not happen */\r
2843             return null;\r
2844         }\r
2845     }\r
2846     \r
2847     public CharsetEncoder newEncoder() {\r
2848         CharsetEncoderICU cnv;\r
2849         \r
2850         switch (variant) {\r
2851         case ISO_2022_JP:\r
2852             return new CharsetEncoderISO2022JP(this);\r
2853             \r
2854         case ISO_2022_CN:\r
2855             return new CharsetEncoderISO2022CN(this);\r
2856             \r
2857         case ISO_2022_KR:\r
2858             cnv = new CharsetEncoderISO2022KR(this);\r
2859             setInitialStateFromUnicodeKR(cnv);\r
2860             return cnv;\r
2861             \r
2862         default: /* should not happen */\r
2863             return null;\r
2864         }\r
2865     }\r
2866     \r
2867     private void setInitialStateToUnicodeKR() {\r
2868         if (myConverterData.version == 1) {\r
2869             myConverterData.currentDecoder.toUnicodeStatus = 0;     /* offset */\r
2870             myConverterData.currentDecoder.mode = 0;                /* state */\r
2871             myConverterData.currentDecoder.toULength = 0;           /* byteIndex */\r
2872         }\r
2873     }\r
2874     private void setInitialStateFromUnicodeKR(CharsetEncoderICU cnv) {\r
2875         /* ISO-2022-KR the designator sequence appears only once\r
2876          * in a file so we append it only once\r
2877          */\r
2878         if (cnv.errorBufferLength == 0) {\r
2879             cnv.errorBufferLength = 4;\r
2880             cnv.errorBuffer[0] = 0x1b;\r
2881             cnv.errorBuffer[1] = 0x24;\r
2882             cnv.errorBuffer[2] = 0x29;\r
2883             cnv.errorBuffer[3] = 0x43;\r
2884         }\r
2885         if (myConverterData.version == 1) {\r
2886             ((CharsetMBCS)myConverterData.currentEncoder.charset()).subChar1 = 0x1A;\r
2887             myConverterData.currentEncoder.fromUChar32 = 0;\r
2888             myConverterData.currentEncoder.fromUnicodeStatus = 1; /* prevLength */\r
2889         }\r
2890     }\r
2891     \r
2892     void getUnicodeSetImpl(UnicodeSet setFillIn, int which) {\r
2893         int i;\r
2894         /*open a set and initialize it with code points that are algorithmically round-tripped */\r
2895         \r
2896         switch(variant){\r
2897         case ISO_2022_JP:\r
2898            /*include JIS X 0201 which is hardcoded */\r
2899             setFillIn.add(0xa5);\r
2900             setFillIn.add(0x203e);\r
2901             if((jpCharsetMasks[myConverterData.version]&CSM(ISO8859_1))!=0){\r
2902                 /*include Latin-1 some variants of JP */\r
2903                 setFillIn.add(0, 0xff);\r
2904             \r
2905             }\r
2906             else {\r
2907                 /* include ASCII for JP */\r
2908                 setFillIn.add(0, 0x7f);\r
2909              }\r
2910             if(myConverterData.version==3 || myConverterData.version==4 ||which == ROUNDTRIP_AND_FALLBACK_SET){\r
2911             /*\r
2912              * Do not test(jpCharsetMasks[myConverterData.version]&CSM(HWKANA_7BIT))!=0 because the bit\r
2913              * is on for all JP versions although version 3 & 4 (JIS7 and JIS8) use half-width Katakana.\r
2914              * This is because all ISO_2022_JP variant are lenient in that they accept (in toUnicode) half-width\r
2915              * Katakana via ESC.\r
2916              * However, we only emit (fromUnicode) half-width Katakana according to the\r
2917              * definition of each variant.\r
2918              *\r
2919              * When including fallbacks,\r
2920              * we need to include half-width Katakana Unicode code points for all JP variants because\r
2921              * JIS X 0208 has hardcoded fallbacks for them (which map to full-width Katakana).\r
2922              */\r
2923             /* include half-width Katakana for JP */\r
2924                 setFillIn.add(HWKANA_START, HWKANA_END);\r
2925              }\r
2926             break;\r
2927         case ISO_2022_CN:\r
2928             /* Include ASCII for CN */\r
2929             setFillIn.add(0, 0x7f);\r
2930             break;\r
2931         case ISO_2022_KR:\r
2932             /* there is only one converter for KR */\r
2933           myConverterData.currentConverter.getUnicodeSetImpl(setFillIn, which);\r
2934           break;\r
2935         default:\r
2936             break;\r
2937         }\r
2938         \r
2939         //TODO Replaced by ucnv_MBCSGetFilteredUnicodeSetForUnicode() until\r
2940         for(i=0; i<UCNV_2022_MAX_CONVERTERS;i++){\r
2941             int filter;\r
2942             if(myConverterData.myConverterArray[i]!=null){\r
2943                 if(variant==ISO_2022_CN && myConverterData.version==0 && i==CNS_11643){\r
2944                     /*\r
2945                      * \r
2946                      * version -specific for CN:\r
2947                      * CN version 0 does not map CNS planes 3..7 although\r
2948                      * they are all available in the CNS conversion table;\r
2949                      * CN version 1 (-EXT) does map them all.\r
2950                      * The two versions create different Unicode sets.\r
2951                      */\r
2952                     filter=CharsetMBCS.UCNV_SET_FILTER_2022_CN;\r
2953                 } else if(variant==ISO_2022_JP && i == JISX208){\r
2954                     /* \r
2955                      * Only add code points that map to Shift-JIS codes\r
2956                      * corrosponding to JIS X 208\r
2957                      */\r
2958                     filter=CharsetMBCS.UCNV_SET_FILTER_SJIS;\r
2959                 } else if(i==KSC5601){\r
2960                     /*\r
2961                      * Some of the KSC 5601 tables (Convrtrs.txt has this aliases on multiple tables)\r
2962                      * are broader than GR94.\r
2963                      */\r
2964                     filter=CharsetMBCS.UCNV_SET_FILTER_GR94DBCS;\r
2965                 } else {\r
2966                     filter=CharsetMBCS.UCNV_SET_FILTER_NONE;\r
2967                 }\r
2968                 \r
2969                 myConverterData.currentConverter.MBCSGetFilteredUnicodeSetForUnicode(myConverterData.myConverterArray[i],setFillIn, which, filter);\r
2970            }\r
2971         }\r
2972         /*\r
2973          * ISO Converter must not convert SO/SI/ESC despite what sub-converters do by themselves\r
2974          * Remove these characters from the set.\r
2975          */\r
2976         setFillIn.remove(0x0e);\r
2977         setFillIn.remove(0x0f);\r
2978         setFillIn.remove(0x1b);\r
2979         \r
2980         /* ISO 2022 converter do not convert C! controls either */\r
2981         setFillIn.remove(0x80, 0x9f);\r
2982     }\r
2983 }\r
2984 \r
2985 \r
2986 \r
2987 \r
2988 \r
2989 \r
2990 \r
2991 \r
2992 \r