1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html#License
3/*
4 *******************************************************************************
5 * Copyright (C) 2008-2015, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 *******************************************************************************
8 */
9package com.ibm.icu.charset;
10
11import java.nio.ByteBuffer;
12import java.nio.CharBuffer;
13import java.nio.IntBuffer;
14import java.nio.charset.CharsetDecoder;
15import java.nio.charset.CharsetEncoder;
16import java.nio.charset.CoderResult;
17import java.util.Arrays;
18
19import com.ibm.icu.charset.CharsetMBCS.CharsetDecoderMBCS;
20import com.ibm.icu.charset.CharsetMBCS.CharsetEncoderMBCS;
21import com.ibm.icu.lang.UCharacter;
22import com.ibm.icu.text.UTF16;
23import com.ibm.icu.text.UnicodeSet;
24
25class CharsetISO2022 extends CharsetICU {
26    private UConverterDataISO2022 myConverterData;
27    private int variant;           // one of enum {ISO_2022_JP, ISO_2022_KR, or ISO_2022_CN}
28
29    private static final byte[] SHIFT_IN_STR    = { 0x0f };
30//    private static final byte[] SHIFT_OUT_STR   = { 0x0e };
31
32    private static final byte CR    = 0x0D;
33    private static final byte LF    = 0x0A;
34/*
35    private static final byte H_TAB = 0x09;
36    private static final byte SPACE = 0x20;
37*/
38    private static final char HWKANA_START  = 0xff61;
39    private static final char HWKANA_END    = 0xff9f;
40
41    /*
42     * 94-character sets with native byte values A1..FE are encoded in ISO 2022
43     * as bytes 21..7E. (Subtract 0x80.)
44     * 96-character  sets with native bit values A0..FF are encoded in ISO 2022
45     * as bytes 20..7F. (Subtract 0x80.)
46     * Do not encode C1 control codes with native bytes 80..9F
47     * as bytes 00..1F (C0 control codes).
48     */
49/*
50    private static final char GR94_START    = 0xa1;
51    private static final char GR94_END      = 0xfe;
52*/
53    private static final char GR96_START    = 0xa0;
54    private static final char GR96_END      = 0xff;
55
56    /* for ISO-2022-JP and -CN implementations */
57    // typedef enum {
58        /* shared values */
59        private static final byte INVALID_STATE = -1;
60        private static final byte ASCII         = 0;
61
62        private static final byte SS2_STATE = 0x10;
63        private static final byte SS3_STATE = 0x11;
64
65        /* JP */
66        private static final byte ISO8859_1 = 1;
67        private static final byte ISO8859_7 = 2;
68        private static final byte JISX201   = 3;
69        private static final byte JISX208   = 4;
70        private static final byte JISX212   = 5;
71        private static final byte GB2312    = 6;
72        private static final byte KSC5601   = 7;
73        private static final byte HWKANA_7BIT  = 8; /* Halfwidth Katakana 7 bit */
74
75        /* CN */
76        /* the first few enum constants must keep their values because they corresponds to myConverterArray[] */
77        private static final byte GB2312_1  = 1;
78        private static final byte ISO_IR_165= 2;
79        private static final byte CNS_11643 = 3;
80
81        /*
82         * these are used in StateEnum and ISO2022State variables,
83         * but CNS_11643 must be used to index into myConverterArray[]
84         */
85        private static final byte CNS_11643_0 = 0x20;
86        private static final byte CNS_11643_1 = 0x21;
87        private static final byte CNS_11643_2 = 0x22;
88        private static final byte CNS_11643_3 = 0x23;
89        private static final byte CNS_11643_4 = 0x24;
90        private static final byte CNS_11643_5 = 0x25;
91        private static final byte CNS_11643_6 = 0x26;
92        private static final byte CNS_11643_7 = 0x27;
93    // } StateEnum;
94
95
96    public CharsetISO2022(String icuCanonicalName, String javaCanonicalName, String[] aliases) {
97        super(icuCanonicalName, javaCanonicalName, aliases);
98
99        myConverterData = new UConverterDataISO2022();
100
101        int versionIndex = icuCanonicalName.indexOf("version=");
102        int version = Integer.decode(icuCanonicalName.substring(versionIndex+8, versionIndex+9)).intValue();
103
104        myConverterData.version = version;
105
106        if (icuCanonicalName.indexOf("locale=ja") > 0) {
107            ISO2022InitJP(version);
108        } else if (icuCanonicalName.indexOf("locale=zh") > 0) {
109            ISO2022InitCN(version);
110        } else /* if (icuCanonicalName.indexOf("locale=ko") > 0) */ {
111            ISO2022InitKR(version);
112        }
113
114        myConverterData.currentEncoder = (CharsetEncoderMBCS)myConverterData.currentConverter.newEncoder();
115        myConverterData.currentDecoder = (CharsetDecoderMBCS)myConverterData.currentConverter.newDecoder();
116    }
117
118    private void ISO2022InitJP(int version) {
119        variant = ISO_2022_JP;
120
121        maxBytesPerChar = 6;
122        minBytesPerChar = 1;
123        maxCharsPerByte = 1;
124        // open the required converters and cache them
125        if((jpCharsetMasks[version]&CSM(ISO8859_7)) != 0) {
126            myConverterData.myConverterArray[ISO8859_7] = ((CharsetMBCS)CharsetICU.forNameICU("ISO8859_7")).sharedData;
127        }
128        // myConverterData.myConverterArray[JISX201] = ((CharsetMBCS)CharsetICU.forNameICU("jisx-201")).sharedData;
129        myConverterData.myConverterArray[JISX208] = ((CharsetMBCS)CharsetICU.forNameICU("Shift-JIS")).sharedData;
130        if ((jpCharsetMasks[version]&CSM(JISX212)) != 0) {
131            myConverterData.myConverterArray[JISX212] = ((CharsetMBCS)CharsetICU.forNameICU("jisx-212")).sharedData;
132        }
133        if ((jpCharsetMasks[version]&CSM(GB2312)) != 0) {
134            myConverterData.myConverterArray[GB2312] = ((CharsetMBCS)CharsetICU.forNameICU("ibm-5478")).sharedData;
135        }
136        if ((jpCharsetMasks[version]&CSM(KSC5601)) != 0) {
137            myConverterData.myConverterArray[KSC5601] = ((CharsetMBCS)CharsetICU.forNameICU("ksc_5601")).sharedData;
138        }
139
140        // create a generic CharsetMBCS object
141        myConverterData.currentConverter = (CharsetMBCS)CharsetICU.forNameICU("icu-internal-25546");
142    }
143
144    private void ISO2022InitCN(int version) {
145        variant = ISO_2022_CN;
146
147        maxBytesPerChar = 8;
148        minBytesPerChar = 1;
149        maxCharsPerByte = 1;
150        // open the required coverters and cache them.
151        myConverterData.myConverterArray[GB2312_1] = ((CharsetMBCS)CharsetICU.forNameICU("ibm-5478")).sharedData;
152        if (version == 1) {
153            myConverterData.myConverterArray[ISO_IR_165] = ((CharsetMBCS)CharsetICU.forNameICU("iso-ir-165")).sharedData;
154        }
155        myConverterData.myConverterArray[CNS_11643] = ((CharsetMBCS)CharsetICU.forNameICU("cns-11643-1992")).sharedData;
156
157        // create a generic CharsetMBCS object
158        myConverterData.currentConverter = (CharsetMBCS)CharsetICU.forNameICU("icu-internal-25546");
159    }
160
161    private void ISO2022InitKR(int version) {
162        variant = ISO_2022_KR;
163
164        maxBytesPerChar = 8;
165        minBytesPerChar = 1;
166        maxCharsPerByte = 1;
167
168        if (version == 1) {
169            myConverterData.currentConverter = (CharsetMBCS)CharsetICU.forNameICU("icu-internal-25546");
170            myConverterData.currentConverter.subChar1 = fromUSubstitutionChar[0][0];
171        } else {
172            myConverterData.currentConverter = (CharsetMBCS)CharsetICU.forNameICU("ibm-949");
173        }
174
175        myConverterData.currentEncoder = (CharsetEncoderMBCS)myConverterData.currentConverter.newEncoder();
176        myConverterData.currentDecoder = (CharsetDecoderMBCS)myConverterData.currentConverter.newDecoder();
177    }
178
179    /*
180     * ISO 2022 control codes must not be converted from Unicode
181     * because they would mess up the byte stream.
182     * The bit mask 0x0800c000 has bits set at bit positions 0xe, 0xf, 0x1b
183     * corresponding to SO, SI, and ESC.
184     */
185    private static boolean IS_2022_CONTROL(int c) {
186        return (c<0x20) && (((1<<c) & 0x0800c000) != 0);
187    }
188
189    /*
190     * Check that the result is a 2-byte value with each byte in the range A1..FE
191     * (strict EUC DBCS) before accepting it and subtracting 0x80 from each byte
192     * to move it to the ISO 2022 range 21..7E.
193     * return 0 if out of range.
194     */
195    private static int _2022FromGR94DBCS(int value) {
196        if ((value <= 0xfefe && value >= 0xa1a1) &&
197                ((short)(value&UConverterConstants.UNSIGNED_BYTE_MASK) <= 0xfe && ((short)(value&UConverterConstants.UNSIGNED_BYTE_MASK) >= 0xa1))) {
198            return (value - 0x8080); /* shift down to 21..7e byte range */
199        } else {
200            return 0; /* not valid for ISO 2022 */
201        }
202    }
203
204    /*
205     * Commented out because Ticket 5691: Call sites now check for validity. They can just += 0x8080 after that.
206     *
207     * This method does the reverse of _2022FromGR94DBCS(). Given the 2022 code point, it returns the
208     * 2 byte value that is in the range A1..FE for each byte. Otherwise it returns the 2022 code point
209     * unchanged.
210     *
211    private static int _2022ToGR94DBCS(int value) {
212        int returnValue = value + 0x8080;
213
214        if ((returnValue <= 0xfefe && returnValue >= 0xa1a1) &&
215                ((short)(returnValue&UConverterConstants.UNSIGNED_BYTE_MASK) <= 0xfe && ((short)(returnValue&UConverterConstants.UNSIGNED_BYTE_MASK) >= 0xa1))) {
216            return returnValue;
217        } else {
218            return value;
219        }
220    }*/
221
222    /* is the StateEnum charset value for a DBCS charset? */
223    private static boolean IS_JP_DBCS(byte cs) {
224        return ((JISX208 <= cs) && (cs <= KSC5601));
225    }
226
227    private static short CSM(short cs) {
228        return (short)(1<<cs);
229    }
230
231    /* This gets the valid index of the end of buffer when decoding. */
232    private static int getEndOfBuffer_2022(ByteBuffer source) {
233        int sourceIndex = source.position();
234        byte mySource = 0;
235        mySource = source.get(sourceIndex);
236
237        while (source.hasRemaining() && mySource != ESC_2022) {
238            mySource = source.get();
239            if (mySource == ESC_2022) {
240                break;
241            }
242            sourceIndex++;
243        }
244        return sourceIndex;
245    }
246
247    /*
248     * This is a simple version of _MBCSGetNextUChar() calls the method in CharsetDecoderMBCS and returns
249     * the value given.
250     *
251     * Return value:
252     * U+fffe   unassigned
253     * U+ffff   illegal
254     * otherwise the Unicode code point
255     */
256     private int MBCSSimpleGetNextUChar(UConverterSharedData sharedData,
257                               ByteBuffer   source,
258                               boolean      useFallback) {
259         int returnValue;
260         UConverterSharedData tempSharedData = myConverterData.currentConverter.sharedData;
261         myConverterData.currentConverter.sharedData = sharedData;
262         returnValue = myConverterData.currentDecoder.simpleGetNextUChar(source, useFallback);
263         myConverterData.currentConverter.sharedData = tempSharedData;
264
265         return returnValue;
266    }
267
268    /*
269     * @param is the the output byte
270     * @return 1 roundtrip byte  0 no mapping  -1 fallback byte
271     */
272    static int MBCSSingleFromUChar32(UConverterSharedData sharedData, int c, int[] retval, boolean useFallback) {
273        char[] table;
274        int value;
275        /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
276        if (c >= 0x10000 && !sharedData.mbcs.hasSupplementary()) {
277            return 0;
278        }
279        /* convert the Unicode code point in c into codepage bytes */
280        table = sharedData.mbcs.fromUnicodeTable;
281        /* get the byte for the output */
282        value = CharsetMBCS.MBCS_SINGLE_RESULT_FROM_U(table, sharedData.mbcs.fromUnicodeChars, c);
283        /* get the byte for the output */
284        retval[0] = value & 0xff;
285        if (value >= 0xf00) {
286            return 1; /* roundtrip */
287        } else if (useFallback ? value>=0x800 : value>=0xc00) {
288            return -1; /* fallback taken */
289        } else {
290            return 0; /* no mapping */
291        }
292    }
293
294    /*
295     * Each of these charset masks (with index x) contains a bit for a charset in exact correspondence
296     * to whether that charset is used in the corresponding version x of ISO_2022, locale=ja,version=x
297     *
298     * Note: The converter uses some leniency:
299     * - The escape sequence ESC ( I for half-width 7-bit Katakana is recognized in
300     *   all versions, not just JIS7 and JIS8.
301     * - ICU does not distinguish between different version so of JIS X 0208.
302     */
303    private static final short jpCharsetMasks[] = {
304        (short)(CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)),
305        (short)(CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)),
306        (short)(CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7)),
307        (short)(CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7)),
308        (short)(CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7))
309    };
310
311/*
312    // typedef enum {
313        private static final byte ASCII1 = 0;
314        private static final byte LATIN1 = 1;
315        private static final byte SBCS   = 2;
316        private static final byte DBCS   = 3;
317        private static final byte MBCS   = 4;
318        private static final byte HWKANA = 5;
319    // } Cnv2002Type;
320*/
321
322    private static class ISO2022State {
323        private byte []cs;  /* Charset number for SI (G0)/SO (G1)/SS2 (G2)/SS3 (G3) */
324        private byte g;     /* 0..3 for G0..G3 (SI/SO/SS2/SS3) */
325        private byte prevG; /* g before single shift (SS2 or SS3) */
326
327        ISO2022State() {
328            cs = new byte[4];
329        }
330
331        void reset() {
332            Arrays.fill(cs, (byte)0);
333            g = 0;
334            prevG = 0;
335        }
336    }
337
338//    private static final byte UCNV_OPTIONS_VERSION_MASK = 0xf;
339    private static final byte UCNV_2022_MAX_CONVERTERS  = 10;
340
341    private static class UConverterDataISO2022 {
342        UConverterSharedData []myConverterArray;
343        CharsetEncoderMBCS currentEncoder;
344        CharsetDecoderMBCS currentDecoder;
345        CharsetMBCS currentConverter;
346        ISO2022State toU2022State;
347        ISO2022State fromU2022State;
348        int key;
349        int version;
350        boolean isEmptySegment;
351
352        UConverterDataISO2022() {
353            myConverterArray = new UConverterSharedData[UCNV_2022_MAX_CONVERTERS];
354            toU2022State = new ISO2022State();
355            fromU2022State = new ISO2022State();
356            key = 0;
357            version = 0;
358            isEmptySegment = false;
359        }
360
361        void reset() {
362            toU2022State.reset();
363            fromU2022State.reset();
364            isEmptySegment = false;
365        }
366    }
367
368    private static final byte ESC_2022 = 0x1B; /* ESC */
369
370    // typedef enum {
371        private static final byte INVALID_2022              = -1; /* Doesn't correspond to a valid iso 2022 escape sequence */
372        private static final byte VALID_NON_TERMINAL_2022   =  0;  /* so far corresponds to a valid iso 2022 escape sequence */
373        private static final byte VALID_TERMINAL_2022       =  1;  /* corresponds to a valid iso 2022 escape sequence */
374        private static final byte VALID_MAYBE_TERMINAL_2022 =  2;  /* so far matches one iso 2022 escape sequence, but by adding
375                                                                     more characters might match another escape sequence */
376    // } UCNV_TableStates_2022;
377
378    /*
379     * The way these state transition arrays work is:
380     * ex : ESC$B is the sequence for JISX208
381     *      a) First Iteration: char is ESC
382     *          i) Get the value of ESC from normalize_esq_chars_2022[] with int value of ESC as index
383     *             int x = normalize_esq_chars_2022[27] which is equal to 1
384     *         ii) Search for this value in escSeqStateTable_Key_2022[]
385     *             value of x is stored at escSeqStateTable_Key_2022[0]
386     *        iii) Save this index as offset
387     *         iv) Get state of this sequence from escSeqStateTable_Value_2022[]
388     *             escSeqStateTable_value_2022[offset], which is VALID_NON_TERMINAL_2022
389     *      b) Switch on this state and continue to next char
390     *          i) Get the value of $ from normalize_esq_chars_2022[] with int value of $ as index
391     *             which is normalize_esq_chars_2022[36] == 4
392     *         ii) x is currently 1(from above)
393     *             x<<=5 -- x is now 32
394     *             x+=normalize_esq_chars_2022[36]
395     *             now x is 36
396     *        iii) Search for this value in escSeqStateTable_Key_2022[]
397     *             value of x is stored at escSeqStateTable_Key_2022[2], so offset is 2
398     *         iv) Get state of this sequence from escSeqStateTable_Value_2022[]
399     *             escSeqStateTable_Value_2022[offset], which is VALID_NON_TERMINAL_2022
400     *      c) Switch on this state and continue to next char
401     *          i) Get the value of B from normalize_esq_chars_2022[] with int value of B as index
402     *         ii) x is currently 36 (from above)
403     *             x<<=5 -- x is now 1152
404     *             x+= normalize_esq_chars_2022[66]
405     *             now x is 1161
406     *        iii) Search for this value in escSeqStateTable_Key_2022[]
407     *             value of x is stored at escSeqStateTable_Key_2022[21], so offset is 21
408     *         iv) Get state of this sequence from escSeqStateTable_Value_2022[1]
409     *             escSeqStateTable_Value_2022[offset], which is VALID_TERMINAL_2022
410     *          v) Get the converter name from escSeqStateTable_Result_2022[21] which is JISX208
411     */
412     /* Below are the 3 arrays depicting a state transition table */
413     private static final byte normalize_esq_chars_2022[] = {
414         /* 0       1       2       3       4       5       6       7       8       9 */
415            0,      0,      0,      0,      0,      0,      0,      0,      0,      0,
416            0,      0,      0,      0,      0,      0,      0,      0,      0,      0,
417            0,      0,      0,      0,      0,      0,      0,      1,      0,      0,
418            0,      0,      0,      0,      0,      0,      4,      7,     29,      0,
419            2,     24,     26,     27,      0,      3,     23,      6,      0,      0,
420            0,      0,      0,      0,      0,      0,      0,      0,      0,      0,
421            0,      0,      0,      0,      5,      8,      9,     10,     11,     12,
422           13,     14,     15,     16,     17,     18,     19,     20,     25,     28,
423            0,      0,     21,      0,      0,      0,      0,      0,      0,      0,
424           22,      0,      0,      0,      0,      0,      0,      0,      0,      0,
425            0,      0,      0,      0,      0,      0,      0,      0,      0,      0,
426            0,      0,      0,      0,      0,      0,      0,      0,      0,      0,
427            0,      0,      0,      0,      0,      0,      0,      0,      0,      0,
428            0,      0,      0,      0,      0,      0,      0,      0,      0,      0,
429            0,      0,      0,      0,      0,      0,      0,      0,      0,      0,
430            0,      0,      0,      0,      0,      0,      0,      0,      0,      0,
431            0,      0,      0,      0,      0,      0,      0,      0,      0,      0,
432            0,      0,      0,      0,      0,      0,      0,      0,      0,      0,
433            0,      0,      0,      0,      0,      0,      0,      0,      0,      0,
434            0,      0,      0,      0,      0,      0,      0,      0,      0,      0,
435            0,      0,      0,      0,      0,      0,      0,      0,      0,      0,
436            0,      0,      0,      0,      0,      0,      0,      0,      0,      0,
437            0,      0,      0,      0,      0,      0,      0,      0,      0,      0,
438            0,      0,      0,      0,      0,      0,      0,      0,      0,      0,
439            0,      0,      0,      0,      0,      0,      0,      0,      0,      0,
440            0,      0,      0,      0,      0,      0,      0,      0,      0,      0,
441            0,      0,      0,      0,      0,      0
442     };
443
444     private static final short MAX_STATES_2022 = 74;
445     private static final int escSeqStateTable_Key_2022[/* MAX_STATES_2022 */] = {
446         /* 0        1          2         3        4          5         6         7         8         9 */
447            1,      34,        36,       39,      55,        57,       60,       61,     1093,     1096,
448         1097,    1098,      1099,     1100,     1101,     1102,     1103,     1104,     1105,     1106,
449         1109,    1154,      1157,     1160,     1161,     1176,     1178,     1179,     1254,     1257,
450         1768,    1773,      1957,    35105,    36933,    36936,    36937,    36938,    36939,    36940,
451        36942,   36943,     36944,    36945,    36946,    36947,    36948,    37640,    37642,    37644,
452        37646,   37711,     37744,    37745,    37746,    37747,    37748,    40133,    40136,    40138,
453        40139,   40140,     40141,  1123363, 35947624, 35947625, 35947626, 35947627, 35947629, 35947630,
454     35947631, 35947635, 35947636, 35947638
455     };
456
457     private static final byte escSeqStateTable_Value_2022[/* MAX_STATES_2022 */] = {
458         /*         0                           1                           2                           3                       4               */
459         VALID_NON_TERMINAL_2022,   VALID_NON_TERMINAL_2022,    VALID_NON_TERMINAL_2022,    VALID_NON_TERMINAL_2022,    VALID_NON_TERMINAL_2022,
460             VALID_TERMINAL_2022,       VALID_TERMINAL_2022,    VALID_NON_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,
461       VALID_MAYBE_TERMINAL_2022,       VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,
462             VALID_TERMINAL_2022,       VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,
463             VALID_TERMINAL_2022,   VALID_NON_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,
464         VALID_NON_TERMINAL_2022,   VALID_NON_TERMINAL_2022,    VALID_NON_TERMINAL_2022,    VALID_NON_TERMINAL_2022,        VALID_TERMINAL_2022,
465             VALID_TERMINAL_2022,       VALID_TERMINAL_2022,        VALID_TERMINAL_2022,    VALID_NON_TERMINAL_2022,        VALID_TERMINAL_2022,
466             VALID_TERMINAL_2022,       VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,
467             VALID_TERMINAL_2022,       VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,
468             VALID_TERMINAL_2022,       VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,
469             VALID_TERMINAL_2022,       VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,
470             VALID_TERMINAL_2022,       VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,
471             VALID_TERMINAL_2022,       VALID_TERMINAL_2022,        VALID_TERMINAL_2022,    VALID_NON_TERMINAL_2022,        VALID_TERMINAL_2022,
472             VALID_TERMINAL_2022,       VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,
473             VALID_TERMINAL_2022,       VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022
474     };
475
476     /* Type def for refactoring changeState_2022 code */
477     // typedef enum {
478         private static final byte ISO_2022_JP = 1;
479         private static final byte ISO_2022_KR = 2;
480         private static final byte ISO_2022_CN = 3;
481     // } Variant2022;
482
483    /* const UConverterSharedData _ISO2022Data; */
484    //private UConverterSharedData _ISO2022JPData;
485    //private UConverterSharedData _ISO2022KRData;
486    //private UConverterSharedData _ISO2022CNData;
487
488    /******************** to unicode ********************/
489    /****************************************************
490     * Recognized escape sequenes are
491     * <ESC>(B  ASCII
492     * <ESC>.A  ISO-8859-1
493     * <ESC>.F  ISO-8859-7
494     * <ESC>(J  JISX-201
495     * <ESC>(I  JISX-201
496     * <ESC>$B  JISX-208
497     * <ESC>$@  JISX-208
498     * <ESC>$(D JISX-212
499     * <ESC>$A  GB2312
500     * <ESC>$(C KSC5601
501     */
502    private final static byte nextStateToUnicodeJP[/* MAX_STATES_2022 */] = {
503        /*     0               1               2               3               4               5               6               7               8               9    */
504        INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,      SS2_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,
505                ASCII,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,        JISX201,    HWKANA_7BIT,        JISX201,  INVALID_STATE,
506        INVALID_STATE,  INVALID_STATE,        JISX208,         GB2312,        JISX208,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,
507            ISO8859_1,      ISO8859_7,        JISX208,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,        KSC5601,        JISX212,  INVALID_STATE,
508        INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,
509        INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,
510        INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,
511        INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE
512    };
513
514    private final static byte nextStateToUnicodeCN[/* MAX_STATES_2022 */] = {
515        /*     0               1               2               3               4               5               6               7               8               9    */
516        INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,      SS2_STATE,      SS3_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,
517        INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,
518        INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,
519        INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,
520        INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,       GB2312_1,  INVALID_STATE,     ISO_IR_165,
521          CNS_11643_1,    CNS_11643_2,    CNS_11643_3,    CNS_11643_4,    CNS_11643_5,    CNS_11643_6,    CNS_11643_7,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,
522        INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,
523        INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE
524    };
525
526    /* runs through a state machine to determine the escape sequence - codepage correspondence */
527    @SuppressWarnings("fallthrough")
528    private CoderResult changeState_2022(CharsetDecoderICU decoder, ByteBuffer source, int var) {
529        CoderResult err = CoderResult.UNDERFLOW;
530        boolean DONE = false;
531        byte value;
532        int key[] = {myConverterData.key};
533        int offset[] = {0};
534        int initialToULength = decoder.toULength;
535        byte c;
536        int malformLength = 0;
537
538        value = VALID_NON_TERMINAL_2022;
539        while (source.hasRemaining()) {
540            c = source.get();
541            malformLength++;
542            decoder.toUBytesArray[decoder.toULength++] = c;
543            value = getKey_2022(c, key, offset);
544
545            switch(value) {
546
547            case VALID_NON_TERMINAL_2022:
548                /* continue with the loop */
549                break;
550
551            case VALID_TERMINAL_2022:
552                key[0] = 0;
553                DONE = true;
554                break;
555
556            case INVALID_2022:
557                DONE = true;
558                break;
559
560            case VALID_MAYBE_TERMINAL_2022:
561                /* not ISO_2022 itself, finish here */
562                value = VALID_TERMINAL_2022;
563                key[0] = 0;
564                DONE = true;
565                break;
566            }
567            if (DONE) {
568                break;
569            }
570        }
571// DONE:
572        myConverterData.key = key[0];
573
574        if (value == VALID_NON_TERMINAL_2022) {
575            /* indicate that the escape sequence is incomplete: key !=0 */
576            return err;
577        } else if (value == INVALID_2022) {
578            err = CoderResult.malformedForLength(malformLength);
579        } else /* value == VALID_TERMINAL_2022 */ {
580            switch (var) {
581            case ISO_2022_JP: {
582                byte tempState = nextStateToUnicodeJP[offset[0]];
583                switch (tempState) {
584                case INVALID_STATE:
585                    err = CoderResult.malformedForLength(malformLength);
586                    break;
587                case SS2_STATE:
588                    if (myConverterData.toU2022State.cs[2] != 0) {
589                        if (myConverterData.toU2022State.g < 2) {
590                            myConverterData.toU2022State.prevG = myConverterData.toU2022State.g;
591                        }
592                        myConverterData.toU2022State.g = 2;
593                    } else {
594                        /* illegal to have SS2 before a matching designator */
595                        err = CoderResult.malformedForLength(malformLength);
596                    }
597                    break;
598                /* case SS3_STATE: not used in ISO-2022-JP-x */
599                case ISO8859_1:
600                case ISO8859_7:
601                    if ((jpCharsetMasks[myConverterData.version] & CSM(tempState)) == 0) {
602                        err = CoderResult.unmappableForLength(malformLength);
603                    } else {
604                        /* G2 charset for SS2 */
605                        myConverterData.toU2022State.cs[2] = tempState;
606                    }
607                    break;
608                default:
609                    if ((jpCharsetMasks[myConverterData.version] & CSM(tempState)) == 0) {
610                        err = CoderResult.unmappableForLength(source.position() - 1);
611                    } else {
612                        /* G0 charset */
613                        myConverterData.toU2022State.cs[0] = tempState;
614                    }
615                    break;
616                } // end of switch
617                break;
618            }
619            case ISO_2022_CN: {
620                byte tempState = nextStateToUnicodeCN[offset[0]];
621                switch (tempState) {
622                case INVALID_STATE:
623                    err = CoderResult.unmappableForLength(malformLength);
624                    break;
625                case SS2_STATE:
626                    if (myConverterData.toU2022State.cs[2] != 0) {
627                        if (myConverterData.toU2022State.g < 2) {
628                            myConverterData.toU2022State.prevG = myConverterData.toU2022State.g;
629                        }
630                        myConverterData.toU2022State.g = 2;
631                    } else {
632                        /* illegal to have SS2 before a matching designator */
633                        err = CoderResult.malformedForLength(malformLength);
634                    }
635                    break;
636                case SS3_STATE:
637                    if (myConverterData.toU2022State.cs[3] != 0) {
638                        if (myConverterData.toU2022State.g < 2) {
639                            myConverterData.toU2022State.prevG = myConverterData.toU2022State.g;
640                        }
641                        myConverterData.toU2022State.g = 3;
642                    } else {
643                        /* illegal to have SS3 before a matching designator */
644                        err = CoderResult.malformedForLength(malformLength);
645                    }
646                    break;
647                case ISO_IR_165:
648                    if (myConverterData.version == 0) {
649                        err = CoderResult.unmappableForLength(malformLength);
650                        break;
651                    }
652                    /* fall through */
653                case GB2312_1:
654                    /* fall through */
655                case CNS_11643_1:
656                    myConverterData.toU2022State.cs[1] = tempState;
657                    break;
658                case CNS_11643_2:
659                    myConverterData.toU2022State.cs[2] = tempState;
660                    break;
661                default:
662                    /* other CNS 11643 planes */
663                    if (myConverterData.version == 0) {
664                        err = CoderResult.unmappableForLength(source.position() - 1);
665                    } else {
666                        myConverterData.toU2022State.cs[3] = tempState;
667                    }
668                    break;
669                } //end of switch
670            }
671            break;
672            case ISO_2022_KR:
673                if (offset[0] == 0x30) {
674                    /* nothing to be done, just accept this one escape sequence */
675                } else {
676                    err = CoderResult.unmappableForLength(malformLength);
677                }
678                break;
679            default:
680                err = CoderResult.malformedForLength(malformLength);
681                break;
682            } // end of switch
683        }
684        if (!err.isError()) {
685            decoder.toULength = 0;
686        } else if (err.isMalformed()) {
687            if (decoder.toULength > 1) {
688                /*
689                 * Ticket 5691: consistent illegal sequences:
690                 * - We include at least the first byte (ESC) in the illegal sequence.
691                 * - If any of the non-initial bytes could be the start of a character,
692                 *   we stop the illegal sequece before the first one of those.
693                 *   In escape sequences, all following bytes are "printable", that is,
694                 *   unless they are completely illegal (>7f in SBCS, outside 21..7e in DBCS),
695                 *   they are valid single/lead bytes.
696                 *   For simplicity, we always only report the initial ESC byte as the
697                 *   illegal sequence and back out all other bytes we looked at.
698                 */
699                /* Back out some bytes. */
700                int backOutDistance = decoder.toULength - 1;
701                int bytesFromThisBuffer = decoder.toULength - initialToULength;
702                if (backOutDistance <= bytesFromThisBuffer) {
703                    /* same as initialToULength<=1 */
704                    source.position(source.position() - backOutDistance);
705                } else {
706                    /* Back out bytes from the previous buffer: Need to replay them. */
707                    decoder.preToULength = (byte)(bytesFromThisBuffer - backOutDistance);
708                    /* same as -(initalToULength-1) */
709                    /* preToULength is negative! */
710                    for (int i = 0; i < -(decoder.preToULength); i++) {
711                        decoder.preToUArray[i] = decoder.toUBytesArray[i+1];
712                    }
713                    source.position(source.position() - bytesFromThisBuffer);
714                }
715                decoder.toULength = 1;
716            }
717        }
718
719        return err;
720    }
721
722    private static byte getKey_2022(byte c, int[]key, int[]offset) {
723        int togo;
724        int low = 0;
725        int hi = MAX_STATES_2022;
726        int oldmid = 0;
727
728        togo = normalize_esq_chars_2022[c&UConverterConstants.UNSIGNED_BYTE_MASK];
729
730        if (togo == 0) {
731            /* not a valid character anywhere in an escape sequence */
732            key[0] = 0;
733            offset[0] = 0;
734            return INVALID_2022;
735        }
736        togo = (key[0] << 5) + togo;
737
738        while (hi != low) { /* binary search */
739            int mid = (hi+low) >> 1; /* Finds median */
740
741            if (mid == oldmid) {
742                break;
743            }
744
745            if (escSeqStateTable_Key_2022[mid] > togo) {
746                hi = mid;
747            } else if (escSeqStateTable_Key_2022[mid] < togo) {
748                low = mid;
749            } else /* we found it */ {
750                key[0] = togo;
751                offset[0] = mid;
752                return escSeqStateTable_Value_2022[mid];
753            }
754            oldmid = mid;
755        }
756        return INVALID_2022;
757    }
758
759    /*
760     * To Unicode Callback helper function
761     */
762    private static CoderResult toUnicodeCallback(CharsetDecoderICU cnv, int sourceChar, int targetUniChar) {
763        CoderResult err = CoderResult.UNDERFLOW;
764        if (sourceChar > 0xff) {
765            cnv.toUBytesArray[0] = (byte)(sourceChar>>8);
766            cnv.toUBytesArray[1] = (byte)sourceChar;
767            cnv.toULength = 2;
768        } else {
769            cnv.toUBytesArray[0] = (byte)sourceChar;
770            cnv.toULength = 1;
771        }
772
773        if (targetUniChar == (UConverterConstants.missingCharMarker-1/* 0xfffe */)) {
774            err = CoderResult.unmappableForLength(1);
775        } else {
776            err = CoderResult.malformedForLength(1);
777        }
778
779        return err;
780    }
781
782    /****************************ISO-2022-JP************************************/
783    private class CharsetDecoderISO2022JP extends CharsetDecoderICU {
784        public CharsetDecoderISO2022JP(CharsetICU cs) {
785            super(cs);
786        }
787
788        @Override
789        protected void implReset() {
790            super.implReset();
791            myConverterData.reset();
792        }
793        /*
794         * Map 00..7F to Unicode according to JIS X 0201.
795         * */
796        private int jisx201ToU(int value) {
797            if (value < 0x5c) {
798                return value;
799            } else if (value == 0x5c) {
800                return 0xa5;
801            } else if (value == 0x7e) {
802                return 0x203e;
803            } else { /* value <= 0x7f */
804                return value;
805            }
806        }
807        /*
808         * Convert a pair of JIS X 208 21..7E bytes to Shift-JIS.
809         * If either byte is outside 21..7E make sure that the result is not valid
810         * for Shift-JIS so that the converter catches it.
811         * Some invalid byte values already turn into equally invalid Shift-JIS
812         * byte values and need not be tested explicitly.
813         */
814        private void _2022ToSJIS(char c1, char c2, byte []bytes) {
815            if ((c1&1) > 0) {
816                ++c1;
817                if (c2 <= 0x5f) {
818                    c2 += 0x1f;
819                } else if (c2 <= 0x7e) {
820                    c2 += 0x20;
821                } else {
822                    c2 = 0; /* invalid */
823                }
824            } else {
825                if ((c2 >= 0x21) && (c2 <= 0x7e)) {
826                    c2 += 0x7e;
827                } else {
828                    c2 = 0; /* invalid */
829                }
830            }
831
832            c1 >>=1;
833            if (c1 <= 0x2f) {
834                c1 += 0x70;
835            } else if (c1 <= 0x3f) {
836                c1 += 0xb0;
837            } else {
838                c1 = 0; /* invalid */
839            }
840            bytes[0] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & c1);
841            bytes[1] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & c2);
842        }
843
844        @Override
845        @SuppressWarnings("fallthrough")
846        protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
847            boolean gotoGetTrail = false;
848            boolean gotoEscape = false;
849            CoderResult err = CoderResult.UNDERFLOW;
850            byte []tempBuf = new byte[2];
851            int targetUniChar = 0x0000;
852            int mySourceChar = 0x0000;
853            int mySourceCharTemp = 0x0000; // use for getTrail label call.
854            byte cs; /* StateEnum */
855            byte csTemp= 0; // use for getTrail label call.
856
857            if (myConverterData.key != 0) {
858                /* continue with a partial escape sequence */
859                // goto escape;
860                gotoEscape = true;
861            } else if (toULength == 1 && source.hasRemaining() && target.hasRemaining()) {
862                /* continue with a partial double-byte character */
863                mySourceChar = (toUBytesArray[0] & UConverterConstants.UNSIGNED_BYTE_MASK);
864                toULength = 0;
865                cs = myConverterData.toU2022State.cs[myConverterData.toU2022State.g];
866                // goto getTrailByte;
867                mySourceCharTemp = 0x99;
868                gotoGetTrail = true;
869            }
870
871            while (source.hasRemaining() || gotoEscape || gotoGetTrail) {
872                // This code is here for the goto escape label call above.
873                if (gotoEscape) {
874                    mySourceCharTemp = ESC_2022;
875                }
876
877                targetUniChar = UConverterConstants.missingCharMarker;
878
879                if (gotoEscape || gotoGetTrail || target.hasRemaining()) {
880                    if (!gotoEscape && !gotoGetTrail) {
881                        mySourceChar = source.get() & UConverterConstants.UNSIGNED_BYTE_MASK;
882                        mySourceCharTemp = mySourceChar;
883                    }
884
885                    switch (mySourceCharTemp) {
886                    case UConverterConstants.SI:
887                        if (myConverterData.version == 3) {
888                            myConverterData.toU2022State.g = 0;
889                            continue;
890                        } else {
891                            /* only JIS7 uses SI/SO, not ISO-2022-JP-x */
892                            myConverterData.isEmptySegment = false;
893                            break;
894                        }
895
896                    case UConverterConstants.SO:
897                        if (myConverterData.version == 3) {
898                            /* JIS7: switch to G1 half-width Katakana */
899                            myConverterData.toU2022State.cs[1] = HWKANA_7BIT;
900                            myConverterData.toU2022State.g = 1;
901                            continue;
902                        } else {
903                            /* only JIS7 uses SI/SO, not ISO-2022-JP-x */
904                            myConverterData.isEmptySegment = false; /* reset this, we have a different error */
905                            break;
906                        }
907
908                    case ESC_2022:
909                        if (!gotoEscape) {
910                            source.position(source.position() - 1);
911                        } else {
912                            gotoEscape = false;
913                        }
914// escape:
915                        {
916                            int mySourceBefore = source.position();
917                            int toULengthBefore = this.toULength;
918
919                            err = changeState_2022(this, source, variant);
920
921                            /* If in ISO-2022-JP only and we successully completed an escape sequence, but previous segment was empty, create an error */
922                            if(myConverterData.version == 0 && myConverterData.key == 0 && !err.isError() && myConverterData.isEmptySegment) {
923                                err = CoderResult.malformedForLength(source.position() - mySourceBefore);
924                                this.toULength = toULengthBefore + (source.position() - mySourceBefore);
925                            }
926                        }
927
928                        /* invalid or illegal escape sequence */
929                        if(err.isError()){
930                            myConverterData.isEmptySegment = false; /* Reset to avoid future spurious errors */
931                            return err;
932                        }
933                        /* If we successfully completed an escape sequence, we begin a new segment, empty so far */
934                        if(myConverterData.key == 0) {
935                            myConverterData.isEmptySegment = true;
936                        }
937
938                        continue;
939                    /* ISO-2022-JP does not use single-byte (C1) SS2 and SS3 */
940                    case CR:
941                        /* falls through */
942                    case LF:
943                        /* automatically reset to single-byte mode */
944                        if (myConverterData.toU2022State.cs[0] != ASCII && myConverterData.toU2022State.cs[0] != JISX201) {
945                            myConverterData.toU2022State.cs[0] = ASCII;
946                        }
947                        myConverterData.toU2022State.cs[2] = 0;
948                        myConverterData.toU2022State.g = 0;
949                        /* falls through */
950                    default :
951                        /* convert one or two bytes */
952                        myConverterData.isEmptySegment = false;
953                        cs = myConverterData.toU2022State.cs[myConverterData.toU2022State.g];
954                        csTemp = cs;
955                        if (gotoGetTrail) {
956                            csTemp = (byte)0x99;
957                        }
958                        if (!gotoGetTrail && ((mySourceChar >= 0xa1) && (mySourceChar <= 0xdf) && myConverterData.version == 4 && !IS_JP_DBCS(cs))) {
959                            /* 8-bit halfwidth katakana in any single-byte mode for JIS8 */
960                            targetUniChar = mySourceChar + (HWKANA_START - 0xa1);
961
962                            /* return from a single-shift state to the previous one */
963                            if (myConverterData.toU2022State.g >= 2) {
964                                myConverterData.toU2022State.g = myConverterData.toU2022State.prevG;
965                            }
966                        } else {
967                            switch(csTemp) {
968                            case ASCII:
969                                if (mySourceChar <= 0x7f) {
970                                    targetUniChar = mySourceChar;
971                                }
972                                break;
973                            case ISO8859_1:
974                                if (mySourceChar <= 0x7f) {
975                                    targetUniChar = mySourceChar + 0x80;
976                                }
977                                /* return from a single-shift state to the prevous one */
978                                myConverterData.toU2022State.g = myConverterData.toU2022State.prevG;
979                                break;
980                            case ISO8859_7:
981                                if (mySourceChar <= 0x7f) {
982                                    /* convert mySourceChar+0x80 to use a normal 8-bit table */
983                                    targetUniChar = CharsetMBCS.MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(myConverterData.myConverterArray[cs].mbcs,
984                                            mySourceChar+0x80);
985                                }
986                                /* return from a single-shift state to the previous one */
987                                myConverterData.toU2022State.g = myConverterData.toU2022State.prevG;
988                                break;
989                            case JISX201:
990                                if (mySourceChar <= 0x7f) {
991                                    targetUniChar = jisx201ToU(mySourceChar);
992                                }
993                                break;
994                            case HWKANA_7BIT:
995                                if ((mySourceChar >= 0x21) && (mySourceChar <= 0x5f)) {
996                                    /* 7-bit halfwidth Katakana */
997                                    targetUniChar = mySourceChar + (HWKANA_START - 0x21);
998                                    break;
999                                }
1000                            default :
1001                                /* G0 DBCS */
1002                                if (gotoGetTrail || source.hasRemaining()) {
1003// getTrailByte:
1004                                    int tmpSourceChar;
1005                                    gotoGetTrail = false;
1006                                    short trailByte;
1007                                    boolean leadIsOk, trailIsOk;
1008
1009                                    trailByte = (short)(source.get(source.position()) & UConverterConstants.UNSIGNED_BYTE_MASK);
1010                                    /*
1011                                     * Ticket 5691: consistent illegal sequences:
1012                                     * - We include at least the first byte in the illegal sequence.
1013                                     * - If any of the non-initial bytes could be the start of a character,
1014                                     *   we stop the illegal sequence before the first one of those.
1015                                     *
1016                                     * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
1017                                     * an ESC/SO/SI, we report only the first byte as the illegal sequence.
1018                                     * Otherwise we convert or report the pair of bytes.
1019                                     */
1020                                    leadIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (mySourceChar - 0x21)) <= (0x7e - 0x21);
1021                                    trailIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (trailByte - 0x21)) <= (0x7e - 0x21);
1022                                    if (leadIsOk && trailIsOk) {
1023                                        source.get();
1024                                        tmpSourceChar = (mySourceChar << 8) | trailByte;
1025                                        if (cs == JISX208) {
1026                                            _2022ToSJIS((char)mySourceChar, (char)trailByte, tempBuf);
1027                                            mySourceChar = tmpSourceChar;
1028                                        } else {
1029                                            /* Copy before we modify tmpSourceChar so toUnicodeCallback() sees the correct bytes. */
1030                                            mySourceChar = tmpSourceChar;
1031                                            if (cs == KSC5601) {
1032                                                tmpSourceChar += 0x8080; /* = _2022ToGR94DBCS(tmpSourceChar) */
1033                                            }
1034                                            tempBuf[0] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & (tmpSourceChar >> 8));
1035                                            tempBuf[1] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & tmpSourceChar);
1036                                        }
1037                                        targetUniChar = MBCSSimpleGetNextUChar(myConverterData.myConverterArray[cs], ByteBuffer.wrap(tempBuf), false);
1038                                    } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
1039                                        /* report a pair of illegal bytes if the second byte is not a DBCS starter */
1040                                        source.get();
1041                                        /* add another bit so that the code below writes 2 bytes in case of error */
1042                                        mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte;
1043                                    }
1044                                } else {
1045                                    toUBytesArray[0] = (byte)mySourceChar;
1046                                    toULength = 1;
1047                                    // goto endloop
1048                                    return err;
1049                                }
1050                            } /* end of inner switch */
1051                        }
1052                        break;
1053                    } /* end of outer switch */
1054
1055                    if (targetUniChar < (UConverterConstants.missingCharMarker-1/*0xfffe*/)) {
1056                        if (offsets != null) {
1057                            offsets.put(target.remaining(), source.remaining() - (mySourceChar <= 0xff ? 1 : 2));
1058                        }
1059                        target.put((char)targetUniChar);
1060                    } else if (targetUniChar > UConverterConstants.missingCharMarker) {
1061                        /* disassemble the surrogate pair and write to output */
1062                        targetUniChar -= 0x0010000;
1063                        target.put((char)(0xd800 + (char)(targetUniChar>>10)));
1064                        target.position(target.position()-1);
1065                        if (offsets != null) {
1066                            offsets.put(target.remaining(), source.remaining() - (mySourceChar <= 0xff ? 1 : 2));
1067                        }
1068                        target.get();
1069                        if (target.hasRemaining()) {
1070                            target.put((char)(0xdc00+(char)(targetUniChar&0x3ff)));
1071                            target.position(target.position()-1);
1072                            if (offsets != null) {
1073                                offsets.put(target.remaining(), source.remaining() - (mySourceChar <= 0xff ? 1 : 2));
1074                            }
1075                            target.get();
1076                        } else {
1077                            charErrorBufferArray[charErrorBufferLength++] =
1078                                (char)(0xdc00+(char)(targetUniChar&0x3ff));
1079                        }
1080                    } else {
1081                        /* Call the callback function */
1082                        err = toUnicodeCallback(this, mySourceChar, targetUniChar);
1083                        break;
1084                    }
1085                } else { /* goes with "if (target.hasRemaining())" way up near the top of the function */
1086                    err = CoderResult.OVERFLOW;
1087                    break;
1088                }
1089            }
1090//endloop:
1091            return err;
1092        }
1093    } // end of class CharsetDecoderISO2022JP
1094
1095    /****************************ISO-2022-CN************************************/
1096    private class CharsetDecoderISO2022CN extends CharsetDecoderICU {
1097        public CharsetDecoderISO2022CN(CharsetICU cs) {
1098            super(cs);
1099        }
1100
1101        @Override
1102        protected void implReset() {
1103            super.implReset();
1104            myConverterData.reset();
1105        }
1106
1107        @Override
1108        @SuppressWarnings("fallthrough")
1109        protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
1110            CoderResult err = CoderResult.UNDERFLOW;
1111            byte[] tempBuf = new byte[3];
1112            int targetUniChar = 0x0000;
1113            int mySourceChar = 0x0000;
1114            int mySourceCharTemp = 0x0000;
1115            boolean gotoEscape = false;
1116            boolean gotoGetTrailByte = false;
1117
1118            if (myConverterData.key != 0) {
1119                /* continue with a partial escape sequence */
1120                // goto escape;
1121                gotoEscape = true;
1122            } else if (toULength == 1 && source.hasRemaining() && target.hasRemaining()) {
1123                /* continue with a partial double-byte character */
1124                mySourceChar = (toUBytesArray[0] & UConverterConstants.UNSIGNED_BYTE_MASK);
1125                toULength = 0;
1126                targetUniChar = UConverterConstants.missingCharMarker;
1127                // goto getTrailByte
1128                gotoGetTrailByte = true;
1129            }
1130
1131            while (source.hasRemaining() || gotoGetTrailByte || gotoEscape) {
1132                targetUniChar = UConverterConstants.missingCharMarker;
1133
1134                if (target.hasRemaining() || gotoEscape) {
1135                    if (gotoEscape) {
1136                        mySourceChar = ESC_2022; // goto escape label
1137                        mySourceCharTemp = mySourceChar;
1138                    } else if (gotoGetTrailByte) {
1139                        mySourceCharTemp = 0xff; // goto getTrailByte; set mySourceCharTemp to go to default
1140                    } else {
1141                        mySourceChar = UConverterConstants.UNSIGNED_BYTE_MASK & source.get();
1142                        mySourceCharTemp = mySourceChar;
1143                    }
1144
1145                    switch (mySourceCharTemp) {
1146                    case UConverterConstants.SI:
1147                        myConverterData.toU2022State.g = 0;
1148                        if (myConverterData.isEmptySegment) {
1149                            myConverterData.isEmptySegment = false; /* we are handling it, reset to avoid future spurious errors */
1150                            err = CoderResult.malformedForLength(1);
1151                            this.toUBytesArray[0] = (byte)mySourceChar;
1152                            this.toULength = 1;
1153                            return err;
1154                        }
1155                        continue;
1156
1157                    case UConverterConstants.SO:
1158                        if (myConverterData.toU2022State.cs[1] != 0) {
1159                            myConverterData.toU2022State.g = 1;
1160                            myConverterData.isEmptySegment = true;  /* Begin a new segment, empty so far */
1161                            continue;
1162                        } else {
1163                            /* illegal to have SO before a matching designator */
1164                            myConverterData.isEmptySegment = false; /* Handling a different error, reset this to avoid future spurious errs */
1165                            break;
1166                        }
1167
1168                    case ESC_2022:
1169                        if (!gotoEscape) {
1170                            source.position(source.position()-1);
1171                        }
1172// escape label
1173                        gotoEscape = false;
1174                        {
1175                            int mySourceBefore = source.position();
1176                            int toULengthBefore = this.toULength;
1177
1178                            err = changeState_2022(this, source, ISO_2022_CN);
1179
1180                            /* After SO there must be at least one character before a designator (designator error handled separately) */
1181                            if(myConverterData.key == 0 && !err.isError() && myConverterData.isEmptySegment) {
1182                                err = CoderResult.malformedForLength(source.position() - mySourceBefore);
1183                                this.toULength = toULengthBefore + (source.position() - mySourceBefore);
1184                            }
1185                        }
1186
1187                        /* invalid or illegal escape sequence */
1188                        if(err.isError()){
1189                            myConverterData.isEmptySegment = false; /* Reset to avoid future spurious errors */
1190                            return err;
1191                        }
1192                        continue;
1193
1194                    /*ISO-2022-CN does not use single-byte (C1) SS2 and SS3 */
1195                    case CR:
1196                        /* falls through */
1197                    case LF:
1198                        myConverterData.toU2022State.reset();
1199                        /* falls through */
1200                    default:
1201                        /* converter one or two bytes */
1202                        myConverterData.isEmptySegment = false;
1203                        if (myConverterData.toU2022State.g != 0 || gotoGetTrailByte) {
1204                            if (source.hasRemaining() || gotoGetTrailByte) {
1205                                UConverterSharedData cnv;
1206                                byte tempState;
1207                                int tempBufLen;
1208                                boolean leadIsOk, trailIsOk;
1209                                short trailByte;
1210// getTrailByte: label
1211                                gotoGetTrailByte = false; // reset gotoGetTrailByte
1212
1213                                trailByte = (short)(source.get(source.position()) & UConverterConstants.UNSIGNED_BYTE_MASK);
1214                                /*
1215                                 * Ticket 5691: consistent illegal sequences:
1216                                 * - We include at least the first byte in the illegal sequence.
1217                                 * - If any of the non-initial bytes could be the start of a character,
1218                                 *   we stop the illegal sequence before the first one of those.
1219                                 *
1220                                 * In ISO-2022 DBCS, if the second byte is in the range 21..7e range or is
1221                                 * an ESC/SO/SI, we report only the first byte as the illegal sequence.
1222                                 * Otherwise we convert or report the pair of bytes.
1223                                 */
1224                                leadIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (mySourceChar - 0x21)) <= (0x7e - 0x21);
1225                                trailIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (trailByte - 0x21)) <= (0x7e - 0x21);
1226                                if (leadIsOk && trailIsOk) {
1227                                    source.get();
1228                                    tempState = myConverterData.toU2022State.cs[myConverterData.toU2022State.g];
1229                                    if (tempState > CNS_11643_0) {
1230                                        cnv = myConverterData.myConverterArray[CNS_11643];
1231                                        tempBuf[0] = (byte)(0x80 + (tempState - CNS_11643_0));
1232                                        tempBuf[1] = (byte)mySourceChar;
1233                                        tempBuf[2] = (byte)trailByte;
1234                                        tempBufLen = 3;
1235                                    } else {
1236                                        cnv = myConverterData.myConverterArray[tempState];
1237                                        tempBuf[0] = (byte)mySourceChar;
1238                                        tempBuf[1] = (byte)trailByte;
1239                                        tempBufLen = 2;
1240                                    }
1241                                    ByteBuffer tempBuffer = ByteBuffer.wrap(tempBuf);
1242                                    tempBuffer.limit(tempBufLen);
1243                                    targetUniChar = MBCSSimpleGetNextUChar(cnv, tempBuffer, false);
1244                                    mySourceChar = (mySourceChar << 8) | trailByte;
1245
1246                                } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
1247                                    /* report a pair of illegal bytes if the second byte is not a DBCS starter */
1248                                    source.get();
1249                                    /* add another bit so that the code below writes 2 bytes in case of error */
1250                                    mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte;
1251                                }
1252                                if (myConverterData.toU2022State.g >= 2) {
1253                                    /* return from a single-shift state to the previous one */
1254                                    myConverterData.toU2022State.g = myConverterData.toU2022State.prevG;
1255                                }
1256                            } else {
1257                                toUBytesArray[0] = (byte)mySourceChar;
1258                                toULength = 1;
1259                                // goto endloop;
1260                                return err;
1261                            }
1262                        } else {
1263                            if (mySourceChar <= 0x7f) {
1264                                targetUniChar = (char)mySourceChar;
1265                            }
1266                        }
1267                        break;
1268                    }
1269                    if ((UConverterConstants.UNSIGNED_INT_MASK&targetUniChar) < (UConverterConstants.UNSIGNED_INT_MASK&(UConverterConstants.missingCharMarker-1))) {
1270                        if (offsets != null) {
1271                            offsets.array()[target.position()] = source.remaining() - (mySourceChar <= 0xff ? 1 : 2);
1272                        }
1273                        target.put((char)targetUniChar);
1274                    } else if ((UConverterConstants.UNSIGNED_INT_MASK&targetUniChar) > (UConverterConstants.UNSIGNED_INT_MASK&(UConverterConstants.missingCharMarker))) {
1275                        /* disassemble the surrogate pair and write to output */
1276                        targetUniChar -= 0x0010000;
1277                        target.put((char)(0xd800+(char)(targetUniChar>>10)));
1278                        if (offsets != null) {
1279                            offsets.array()[target.position()-1] = source.position() - (mySourceChar <= 0xff ? 1 : 2);
1280                        }
1281                        if (target.hasRemaining()) {
1282                            target.put((char)(0xdc00+(char)(targetUniChar&0x3ff)));
1283                            if (offsets != null) {
1284                                offsets.array()[target.position()-1] = source.position() - (mySourceChar <= 0xff ? 1 : 2);
1285                            }
1286                        } else {
1287                            charErrorBufferArray[charErrorBufferLength++] = (char)(0xdc00+(char)(targetUniChar&0x3ff));
1288                        }
1289                    } else {
1290                        /* Call the callback function */
1291                        err = toUnicodeCallback(this, mySourceChar, targetUniChar);
1292                        break;
1293                    }
1294
1295                } else {
1296                    err = CoderResult.OVERFLOW;
1297                    break;
1298                }
1299            }
1300
1301            return err;
1302        }
1303
1304    }
1305    /************************ ISO-2022-KR ********************/
1306    private class CharsetDecoderISO2022KR extends CharsetDecoderICU {
1307        public CharsetDecoderISO2022KR(CharsetICU cs) {
1308            super(cs);
1309        }
1310
1311        @Override
1312        protected void implReset() {
1313            super.implReset();
1314            setInitialStateToUnicodeKR();
1315            myConverterData.reset();
1316        }
1317
1318        @Override
1319        protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
1320            CoderResult err = CoderResult.UNDERFLOW;
1321            int mySourceChar = 0x0000;
1322            int targetUniChar = 0x0000;
1323            byte[] tempBuf = new byte[2];
1324            boolean usingFallback;
1325            boolean gotoGetTrailByte = false;
1326            boolean gotoEscape = false;
1327
1328            if (myConverterData.version == 1) {
1329                return decodeLoopIBM(myConverterData.currentDecoder, source, target, offsets, flush);
1330            }
1331
1332            /* initialize state */
1333            usingFallback = isFallbackUsed();
1334
1335            if (myConverterData.key != 0) {
1336                /* continue with a partial escape sequence */
1337                gotoEscape = true;
1338            } else if (toULength == 1 && source.hasRemaining() && target.hasRemaining()) {
1339                /* continue with a partial double-byte character */
1340                mySourceChar = (toUBytesArray[0] & UConverterConstants.UNSIGNED_BYTE_MASK);
1341                toULength = 0;
1342                gotoGetTrailByte = true;
1343            }
1344
1345            while (source.hasRemaining() || gotoGetTrailByte || gotoEscape) {
1346                if (target.hasRemaining() || gotoGetTrailByte || gotoEscape) {
1347                    if (!gotoGetTrailByte && !gotoEscape) {
1348                        mySourceChar = (char)(source.get() & UConverterConstants.UNSIGNED_BYTE_MASK);
1349                    }
1350
1351                    if (!gotoGetTrailByte && !gotoEscape && mySourceChar == UConverterConstants.SI) {
1352                        myConverterData.toU2022State.g = 0;
1353                        if (myConverterData.isEmptySegment) {
1354                            myConverterData.isEmptySegment = false; /* we are handling it, reset to avoid future spurious errors */
1355                            err = CoderResult.malformedForLength(1);
1356                            this.toUBytesArray[0] = (byte)mySourceChar;
1357                            this.toULength = 1;
1358                            return err;
1359                        }
1360                        /* consume the source */
1361                        continue;
1362                    } else if (!gotoGetTrailByte && !gotoEscape && mySourceChar == UConverterConstants.SO) {
1363                        myConverterData.toU2022State.g = 1;
1364                        myConverterData.isEmptySegment = true;
1365                        /* consume the source */
1366                        continue;
1367                    } else if (!gotoGetTrailByte && (gotoEscape || mySourceChar == ESC_2022)) {
1368                        if (!gotoEscape) {
1369                            source.position(source.position()-1);
1370                        }
1371// escape label
1372                        gotoEscape = false; // reset gotoEscape flag
1373                        myConverterData.isEmptySegment = false; /* Any invalid ESC sequences will be detected separately, so just reset this */
1374                        err = changeState_2022(this, source, ISO_2022_KR);
1375                        if (err.isError()) {
1376                            return err;
1377                        }
1378                        continue;
1379                    }
1380                    myConverterData.isEmptySegment = false; /* Any invalid char errors will be detected separately, so just reset this */
1381                    if (myConverterData.toU2022State.g == 1 || gotoGetTrailByte) {
1382                        if (source.hasRemaining() || gotoGetTrailByte) {
1383                            boolean leadIsOk, trailIsOk;
1384                            short trailByte;
1385// getTrailByte label
1386                            gotoGetTrailByte = false; // reset gotoGetTrailByte flag
1387
1388                            trailByte = (short)(source.get(source.position()) & UConverterConstants.UNSIGNED_BYTE_MASK);
1389                            targetUniChar = UConverterConstants.missingCharMarker;
1390                            /*
1391                             * Ticket 5691: consistent illegal sequences:
1392                             * - We include at least the first byte in the illegal sequence.
1393                             * - If any of the non-initial bytes could be the start of a character,
1394                             *   we stop the illegal sequence before the first one of those.
1395                             *
1396                             * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
1397                             * an ESC/SO/SI, we report only the first byte as the illegal sequence.
1398                             * Otherwise we convert or report the pair of bytes.
1399                             */
1400                            leadIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (mySourceChar - 0x21)) <= (0x7e - 0x21);
1401                            trailIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (trailByte - 0x21)) <= (0x7e - 0x21);
1402                            if (leadIsOk && trailIsOk) {
1403                                source.get();
1404                                tempBuf[0] = (byte)(mySourceChar + 0x80);
1405                                tempBuf[1] = (byte)(trailByte + 0x80);
1406                                targetUniChar = MBCSSimpleGetNextUChar(myConverterData.currentConverter.sharedData, ByteBuffer.wrap(tempBuf), usingFallback);
1407                                mySourceChar = (char)((mySourceChar << 8) | trailByte);
1408                            } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
1409                                /* report a pair of illegal bytes if the second byte is not a DBCS starter */
1410                                source.get();
1411                                /* add another bit so that the code below writes 2 bytes in case of error */
1412                                mySourceChar = (char)(0x10000 | (mySourceChar << 8) | trailByte);
1413                            }
1414                        } else {
1415                            toUBytesArray[0] = (byte)mySourceChar;
1416                            toULength = 1;
1417                            break;
1418                        }
1419                    } else if (mySourceChar <= 0x7f) {
1420                        int savedSourceLimit = source.limit();
1421                        int savedSourcePosition = source.position();
1422                        source.limit(source.position());
1423                        source.position(source.position()-1);
1424                        targetUniChar = MBCSSimpleGetNextUChar(myConverterData.currentConverter.sharedData, source, usingFallback);
1425                        source.limit(savedSourceLimit);
1426                        source.position(savedSourcePosition);
1427                    } else {
1428                        targetUniChar = 0xffff;
1429                    }
1430                    if (targetUniChar < 0xfffe) {
1431                        target.put((char)targetUniChar);
1432                        if (offsets != null) {
1433                            offsets.array()[target.position()] = source.position() - (mySourceChar <= 0xff ? 1 : 2);
1434                        }
1435                    } else {
1436                        /* Call the callback function */
1437                        err = toUnicodeCallback(this, mySourceChar, targetUniChar);
1438                        break;
1439                    }
1440                } else {
1441                    err = CoderResult.OVERFLOW;
1442                    break;
1443                }
1444            }
1445
1446            return err;
1447        }
1448
1449        protected CoderResult decodeLoopIBM(CharsetDecoderMBCS cnv, ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
1450            CoderResult err = CoderResult.UNDERFLOW;
1451            int sourceStart;
1452            int sourceLimit;
1453            int argSource;
1454            int argTarget;
1455            boolean gotoEscape = false;
1456            int oldSourceLimit;
1457
1458            /* remember the original start of the input for offsets */
1459            sourceStart = argSource = source.position();
1460
1461            if (myConverterData.key != 0) {
1462                /* continue with a partial escape sequence */
1463                gotoEscape = true;
1464            }
1465
1466            while (gotoEscape || (!err.isError() && source.hasRemaining())) {
1467                if (!gotoEscape) {
1468                    /* Find the end of the buffer e.g : Next Escape Seq | end of Buffer */
1469                    int oldSourcePos = source.position();
1470                    sourceLimit = getEndOfBuffer_2022(source);
1471                    source.position(oldSourcePos);
1472                    if (source.position() != sourceLimit) {
1473                        /*
1474                         * get the current partial byte sequence
1475                         *
1476                         * it needs to be moved between the public and the subconverter
1477                         * so that the conversion frameword, which only sees the public
1478                         * converter, can handle truncated and illegal input etc.
1479                         */
1480                        if (toULength > 0) {
1481                            cnv.toUBytesArray = toUBytesArray.clone();
1482                        }
1483                        cnv.toULength = toULength;
1484
1485                        /*
1486                         * Convert up to the end of the input, or to before the next escape character.
1487                         * Does not handle conversion extensions because the preToU[] state etc.
1488                         * is not copied.
1489                         */
1490                        argTarget = target.position();
1491                        oldSourceLimit = source.limit(); // save the old source limit change to new one
1492                        source.limit(sourceLimit);
1493                        err = myConverterData.currentDecoder.cnvMBCSToUnicodeWithOffsets(source, target, offsets, flush);
1494                        source.limit(oldSourceLimit); // restore source limit;
1495                        if (offsets != null && sourceStart != argSource) {
1496                            /* update offsets to base them on the actual start of the input */
1497                            int delta = argSource - sourceStart;
1498                            while (argTarget < target.position()) {
1499                                int currentOffset = offsets.get();
1500                                offsets.position(offsets.position()-1);
1501                                if (currentOffset >= 0) {
1502                                    offsets.put(currentOffset + delta);
1503                                    offsets.position(offsets.position()-1);
1504                                }
1505                                offsets.get();
1506                                target.get();
1507                            }
1508                        }
1509                        argSource = source.position();
1510
1511                        /* copy input/error/overflow buffers */
1512                        if (cnv.toULength > 0) {
1513                            toUBytesArray = cnv.toUBytesArray.clone();
1514                        }
1515                        toULength = cnv.toULength;
1516
1517                        if (err.isOverflow()) {
1518                            if (cnv.charErrorBufferLength > 0) {
1519                                charErrorBufferArray = cnv.charErrorBufferArray.clone();
1520                            }
1521                            charErrorBufferLength = cnv.charErrorBufferLength;
1522                            cnv.charErrorBufferLength = 0;
1523                        }
1524                    }
1525
1526                    if (err.isError() || err.isOverflow() || (source.position() == source.limit())) {
1527                        return err;
1528                    }
1529                }
1530// escape label
1531                gotoEscape = false;
1532                err = changeState_2022(this, source, ISO_2022_KR);
1533            }
1534            return err;
1535        }
1536    }
1537
1538    /******************** from unicode **********************/
1539    /* preference order of JP charsets */
1540    private final static byte []jpCharsetPref = {
1541        ASCII,
1542        JISX201,
1543        ISO8859_1,
1544        JISX208,
1545        ISO8859_7,
1546        JISX212,
1547        GB2312,
1548        KSC5601,
1549        HWKANA_7BIT
1550    };
1551    /*
1552     * The escape sequences must be in order of the enum constants like JISX201 = 3,
1553     * not in order of jpCharsetPref[]!
1554     */
1555    private final static byte [][]escSeqChars = {
1556            { 0x1B, 0x28, 0x42},        /* <ESC>(B  ASCII       */
1557            { 0x1B, 0x2E, 0x41},        /* <ESC>.A  ISO-8859-1  */
1558            { 0x1B, 0x2E, 0x46},        /* <ESC>.F  ISO-8859-7  */
1559            { 0x1B, 0x28, 0x4A},        /* <ESC>(J  JISX-201    */
1560            { 0x1B, 0x24, 0x42},        /* <ESC>$B  JISX-208    */
1561            { 0x1B, 0x24, 0x28, 0x44},  /* <ESC>$(D JISX-212    */
1562            { 0x1B, 0x24, 0x41},        /* <ESC>$A  GB2312      */
1563            { 0x1B, 0x24, 0x28, 0x43},  /* <ESC>$(C KSC5601     */
1564            { 0x1B, 0x28, 0x49}         /* <ESC>(I  HWKANA_7BIT */
1565    };
1566    /*
1567     * JIS X 0208 has fallbacks from Unicode half-width Katakana to full-width (DBCS)
1568     * Katakana.
1569     * Now that we use a Shift-JIS table for JIS X 0208 we need to hardcode these fallbacks
1570     * because Shift-JIS roundtrips half-width Katakana to single bytes.
1571     * These were the only fallbacks in ICU's jisx-208.ucm file.
1572     */
1573    private final static char []hwkana_fb = {
1574        0x2123,  /* U+FF61 */
1575        0x2156,
1576        0x2157,
1577        0x2122,
1578        0x2126,
1579        0x2572,
1580        0x2521,
1581        0x2523,
1582        0x2525,
1583        0x2527,
1584        0x2529,
1585        0x2563,
1586        0x2565,
1587        0x2567,
1588        0x2543,
1589        0x213C,  /* U+FF70 */
1590        0x2522,
1591        0x2524,
1592        0x2526,
1593        0x2528,
1594        0x252A,
1595        0x252B,
1596        0x252D,
1597        0x252F,
1598        0x2531,
1599        0x2533,
1600        0x2535,
1601        0x2537,
1602        0x2539,
1603        0x253B,
1604        0x253D,
1605        0x253F,  /* U+FF80 */
1606        0x2541,
1607        0x2544,
1608        0x2546,
1609        0x2548,
1610        0x254A,
1611        0x254B,
1612        0x254C,
1613        0x254D,
1614        0x254E,
1615        0x254F,
1616        0x2552,
1617        0x2555,
1618        0x2558,
1619        0x255B,
1620        0x255E,
1621        0x255F,  /* U+FF90 */
1622        0x2560,
1623        0x2561,
1624        0x2562,
1625        0x2564,
1626        0x2566,
1627        0x2568,
1628        0x2569,
1629        0x256A,
1630        0x256B,
1631        0x256C,
1632        0x256D,
1633        0x256F,
1634        0x2573,
1635        0x212B,
1636        0x212C   /* U+FF9F */
1637    };
1638
1639    protected byte [][]fromUSubstitutionChar = new byte[][]{ { (byte)0x1A }, { (byte)0x2F, (byte)0x7E} };
1640    /****************************ISO-2022-JP************************************/
1641    private class CharsetEncoderISO2022JP extends CharsetEncoderICU {
1642        public CharsetEncoderISO2022JP(CharsetICU cs) {
1643            super(cs, fromUSubstitutionChar[0]);
1644        }
1645
1646        @Override
1647        protected void implReset() {
1648            super.implReset();
1649            myConverterData.reset();
1650        }
1651        /* Map Unicode to 00..7F according to JIS X 0201. Return U+FFFE if unmappable. */
1652        private int jisx201FromU(int value) {
1653            if (value <= 0x7f) {
1654                if (value != 0x5c && value != 0x7e) {
1655                    return value;
1656                }
1657            } else if (value == 0xa5) {
1658                return 0x5c;
1659            } else if (value == 0x203e) {
1660                return 0x7e;
1661            }
1662            return (int)(UConverterConstants.UNSIGNED_INT_MASK & 0xfffe);
1663        }
1664
1665        /*
1666         * Take a valid Shift-JIS byte pair, check that it is in the range corresponding
1667         * to JIS X 0208, and convert it to a pair of 21..7E bytes.
1668         * Return 0 if the byte pair is out of range.
1669         */
1670        private int _2022FromSJIS(int value) {
1671            short trail;
1672
1673            if (value > 0xEFFC) {
1674                return 0; /* beyond JIS X 0208 */
1675            }
1676
1677            trail = (short)(value & UConverterConstants.UNSIGNED_BYTE_MASK);
1678
1679            value &= 0xff00; /* lead byte */
1680            if (value <= 0x9f00) {
1681                value -= 0x7000;
1682            } else { /* 0xe000 <= value <= 0xef00 */
1683                value -= 0xb000;
1684            }
1685
1686            value <<= 1;
1687
1688            if (trail <= 0x9e) {
1689                value -= 0x100;
1690                if (trail <= 0x7e) {
1691                    value |= ((trail - 0x1f) & UConverterConstants.UNSIGNED_BYTE_MASK);
1692                } else {
1693                    value |= ((trail - 0x20) & UConverterConstants.UNSIGNED_BYTE_MASK);
1694                }
1695            } else { /* trail <= 0xfc */
1696                value |= ((trail - 0x7e) & UConverterConstants.UNSIGNED_BYTE_MASK);
1697            }
1698
1699            return value;
1700        }
1701        /* This overrides the cbFromUWriteSub method in CharsetEncoderICU */
1702        @Override
1703        CoderResult cbFromUWriteSub (CharsetEncoderICU encoder,
1704                CharBuffer source, ByteBuffer target, IntBuffer offsets){
1705                CoderResult err = CoderResult.UNDERFLOW;
1706                byte[] buffer = new byte[8];
1707                int i = 0;
1708                byte[] subchar;
1709                subchar = encoder.replacement();
1710
1711                byte cs;
1712                if (myConverterData.fromU2022State.g == 1) {
1713                    /* JIS7: switch from G1 to G0 */
1714                    myConverterData.fromU2022State.g = 0;
1715                    buffer[i++] = UConverterConstants.SI;
1716                }
1717                cs = myConverterData.fromU2022State.cs[0];
1718
1719                if (cs != ASCII && cs != JISX201) {
1720                    /* not in ASCII or JIS X 0201: switch to ASCII */
1721                    myConverterData.fromU2022State.cs[0] = ASCII;
1722                    buffer[i++] = 0x1B;
1723                    buffer[i++] = 0x28;
1724                    buffer[i++] = 0x42;
1725                }
1726
1727                buffer[i++] = subchar[0];
1728
1729                err = CharsetEncoderICU.fromUWriteBytes(this, buffer, 0, i, target, offsets, source.position() - 1);
1730
1731                return err;
1732            }
1733
1734        @Override
1735        protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {
1736            CoderResult err = CoderResult.UNDERFLOW;
1737            int sourceChar;
1738            byte cs, g;
1739            int choiceCount;
1740            int len, outLen;
1741            byte[] choices = new byte[10];
1742            int targetValue = 0;
1743            boolean usingFallback;
1744            byte[] buffer = new byte[8];
1745            boolean getTrail = false; // use for getTrail label
1746            int oldSourcePos; // for proper error handling
1747
1748            choiceCount = 0;
1749
1750            /* check if the last codepoint of previous buffer was a lead surrogate */
1751            if ((sourceChar = fromUChar32) != 0 && target.hasRemaining()) {
1752                getTrail = true;
1753            }
1754
1755            while (getTrail || source.hasRemaining()) {
1756                if (getTrail || target.hasRemaining()) {
1757                    oldSourcePos = source.position();
1758                    if (!getTrail) { /* skip if going to getTrail label */
1759                        sourceChar = source.get();
1760                    }
1761                    /* check if the char is a First surrogate */
1762                    if (getTrail || UTF16.isSurrogate((char)sourceChar)) {
1763                        if (getTrail || UTF16.isLeadSurrogate((char)sourceChar)) {
1764// getTrail:
1765                            if (getTrail) {
1766                                getTrail = false;
1767                            }
1768                            /* look ahead to find the trail surrogate */
1769                            if (source.hasRemaining()) {
1770                                /* test the following code unit */
1771                                char trail = source.get();
1772                                /* go back to the previous position */
1773                                source.position(source.position()-1);
1774                                if (UTF16.isTrailSurrogate(trail)) {
1775                                    source.get();
1776                                    sourceChar = UCharacter.getCodePoint((char)sourceChar, trail);
1777                                    fromUChar32 = 0x00;
1778                                    /* convert this supplementary code point */
1779                                    /* exit this condition tree */
1780                                } else {
1781                                    /* this is an unmatched lead code unit (1st surrogate) */
1782                                    /* callback(illegal) */
1783                                    err = CoderResult.malformedForLength(1);
1784                                    fromUChar32 = sourceChar;
1785                                    break;
1786                                }
1787                            } else {
1788                                /* no more input */
1789                                fromUChar32 = sourceChar;
1790                                break;
1791                            }
1792                        } else {
1793                            /* this is an unmatched trail code unit (2nd surrogate) */
1794                            /* callback(illegal) */
1795                            err = CoderResult.malformedForLength(1);
1796                            fromUChar32 = sourceChar;
1797                            break;
1798                        }
1799                    }
1800
1801                    /* do not convert SO/SI/ESC */
1802                    if (IS_2022_CONTROL(sourceChar)) {
1803                        /* callback(illegal) */
1804                        err = CoderResult.malformedForLength(1);
1805                        fromUChar32 = sourceChar;
1806                        break;
1807                    }
1808
1809                    /* do the conversion */
1810
1811                    if (choiceCount == 0) {
1812                        char csm;
1813                        /*
1814                         * The csm variable keeps track of which charsets are allowed
1815                         * and not used yet while building the choices[].
1816                         */
1817                        csm = (char)jpCharsetMasks[myConverterData.version];
1818                        choiceCount = 0;
1819
1820                        /* JIS7/8: try single-byte half-width Katakana before JISX208 */
1821                        if (myConverterData.version == 3 || myConverterData.version == 4) {
1822                            choices[choiceCount++] = HWKANA_7BIT;
1823                        }
1824                        /* Do not try single-bit half-width Katakana for other versions. */
1825                        csm &= ~CSM(HWKANA_7BIT);
1826
1827                        /* try the current G0 charset */
1828                        choices[choiceCount++] = cs = myConverterData.fromU2022State.cs[0];
1829                        csm &= ~CSM(cs);
1830
1831                        /* try the current G2 charset */
1832                        if ((cs = myConverterData.fromU2022State.cs[2]) != 0) {
1833                            choices[choiceCount++] = cs;
1834                            csm &= ~CSM(cs);
1835                        }
1836
1837                        /* try all the other charsets */
1838                        for (int i = 0; i < jpCharsetPref.length; i++) {
1839                            cs = jpCharsetPref[i];
1840                            if ((CSM(cs) & csm) != 0) {
1841                                choices[choiceCount++] = cs;
1842                                csm &= ~CSM(cs);
1843                            }
1844                        }
1845                    }
1846
1847                    cs = g = 0;
1848                    /*
1849                     * len==0:  no mapping found yet
1850                     * len<0:   found a fallback result:  continue looking for a roundtrip but no further fallbacks
1851                     * len>0:   found a roundtrip result, done
1852                     */
1853                    len = 0;
1854                    /*
1855                     * We will turn off usingFallBack after finding a fallback,
1856                     * but we still get fallbacks from PUA code points as usual.
1857                     * Therefore, we will also need to check that we don't overwrite
1858                     * an early fallback with a later one.
1859                     */
1860                    usingFallback = useFallback;
1861
1862                    for (int i = 0; i < choiceCount && len <= 0; i++) {
1863                        int[] value = new int[1];
1864                        int len2;
1865                        byte cs0 = choices[i];
1866                        switch (cs0) {
1867                        case ASCII:
1868                            if (sourceChar <= 0x7f) {
1869                                targetValue = sourceChar;
1870                                len = 1;
1871                                cs = cs0;
1872                                g = 0;
1873                            }
1874                            break;
1875                        case ISO8859_1:
1876                            if (GR96_START <= sourceChar && sourceChar <= GR96_END) {
1877                                targetValue = sourceChar - 0x80;
1878                                len = 1;
1879                                cs = cs0;
1880                                g = 2;
1881                            }
1882                            break;
1883                        case HWKANA_7BIT:
1884                            if (sourceChar <= HWKANA_END && sourceChar >= HWKANA_START) {
1885                                if (myConverterData.version == 3) {
1886                                    /* JIS7: use G1 (SO) */
1887                                    /* Shift U+FF61..U+FF9F to bytes 21..5F. */
1888                                    targetValue = (int)(UConverterConstants.UNSIGNED_INT_MASK & (sourceChar - (HWKANA_START - 0x21)));
1889                                    len = 1;
1890                                    myConverterData.fromU2022State.cs[1] = cs = cs0; /* do not output an escape sequence */
1891                                    g = 1;
1892                                } else if (myConverterData.version == 4) {
1893                                    /* JIS8: use 8-bit bytes with any single-byte charset, see escape sequence output below */
1894                                    /* Shift U+FF61..U+FF9F to bytes A1..DF. */
1895                                    targetValue = (int)(UConverterConstants.UNSIGNED_INT_MASK & (sourceChar - (HWKANA_START - 0xa1)));
1896                                    len = 1;
1897
1898                                    cs = myConverterData.fromU2022State.cs[0];
1899                                    if (IS_JP_DBCS(cs)) {
1900                                        /* switch from a DBCS charset to JISX201 */
1901                                        cs = JISX201;
1902                                    }
1903                                    /* else stay in the current G0 charset */
1904                                    g = 0;
1905                                }
1906                                /* else do not use HWKANA_7BIT with other versions */
1907                            }
1908                            break;
1909                        case JISX201:
1910                            /* G0 SBCS */
1911                            value[0] = jisx201FromU(sourceChar);
1912                            if (value[0] <= 0x7f) {
1913                                targetValue = value[0];
1914                                len = 1;
1915                                cs = cs0;
1916                                g = 0;
1917                                usingFallback = false;
1918                            }
1919                            break;
1920                        case JISX208:
1921                            /* G0 DBCS from JIS table */
1922                            myConverterData.currentConverter.sharedData = myConverterData.myConverterArray[cs0];
1923                            myConverterData.currentConverter.sharedData.mbcs.outputType = CharsetMBCS.MBCS_OUTPUT_2;
1924                            len2 = myConverterData.currentEncoder.fromUChar32(sourceChar, value, usingFallback);
1925                            //len2 = MBCSFromUChar32_ISO2022(myConverterData.myConverterArray[cs0], sourceChar, value, usingFallback, CharsetMBCS.MBCS_OUTPUT_2);
1926                            if (len2 == 2 || (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len) == 2 */
1927                                value[0] = _2022FromSJIS(value[0]);
1928                                if (value[0] != 0) {
1929                                    targetValue = value[0];
1930                                    len = len2;
1931                                    cs = cs0;
1932                                    g = 0;
1933                                    usingFallback = false;
1934                                }
1935                            } else if (len == 0 && usingFallback  && sourceChar <= HWKANA_END && sourceChar >= HWKANA_START) {
1936                                targetValue = hwkana_fb[sourceChar - HWKANA_START];
1937                                len = -2;
1938                                cs = cs0;
1939                                g = 0;
1940                                usingFallback = false;
1941                            }
1942                            break;
1943                        case ISO8859_7:
1944                            /* G0 SBCS forced to 7-bit output */
1945                            len2 = MBCSSingleFromUChar32(myConverterData.myConverterArray[cs0], sourceChar, value, usingFallback);
1946                            if (len2 != 0 && !(len2 < 0 && len != 0) && GR96_START <= value[0] && value[0] <= GR96_END) {
1947                                targetValue = value[0] - 0x80;
1948                                len = len2;
1949                                cs = cs0;
1950                                g = 2;
1951                                usingFallback = false;
1952                            }
1953                            break;
1954                        default :
1955                            /* G0 DBCS */
1956                            myConverterData.currentConverter.sharedData = myConverterData.myConverterArray[cs0];
1957                            myConverterData.currentConverter.sharedData.mbcs.outputType = CharsetMBCS.MBCS_OUTPUT_2;
1958                            len2 = myConverterData.currentEncoder.fromUChar32(sourceChar, value, usingFallback);
1959                            //len2 = MBCSFromUChar32_ISO2022(myConverterData.myConverterArray[cs0], sourceChar, value, usingFallback, CharsetMBCS.MBCS_OUTPUT_2);
1960                            if (len2 == 2 || (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len)==2 */
1961                                if (cs0 == KSC5601) {
1962                                    /*
1963                                     * Check for valid bytes for the encoding scheme.
1964                                     * This is necessary because the sub-converter (windows-949)
1965                                     * has a broader encoding scheme than is valid for 2022.
1966                                     */
1967                                    value[0] = _2022FromGR94DBCS(value[0]);
1968                                    if (value[0] == 0) {
1969                                        break;
1970                                    }
1971                                }
1972                                targetValue = value[0];
1973                                len = len2;
1974                                cs = cs0;
1975                                g = 0;
1976                                usingFallback = false;
1977                            }
1978                            break;
1979                        }
1980                    }
1981
1982                    if (len != 0) {
1983                        if (len < 0) {
1984                            len = -len; /* fallback */
1985                        }
1986                        outLen = 0;
1987
1988                        /* write SI if necessary (only for JIS7 */
1989                        if (myConverterData.fromU2022State.g == 1 && g == 0) {
1990                            buffer[outLen++] = UConverterConstants.SI;
1991                            myConverterData.fromU2022State.g = 0;
1992                        }
1993
1994                        /* write the designation sequence if necessary */
1995                        if (cs != myConverterData.fromU2022State.cs[g]) {
1996                            for (int i = 0; i < escSeqChars[cs].length; i++) {
1997                                buffer[outLen++] = escSeqChars[cs][i];
1998                            }
1999                            myConverterData.fromU2022State.cs[g] = cs;
2000
2001                            /* invalidate the choices[] */
2002                            choiceCount = 0;
2003                        }
2004
2005                        /* write the shift sequence if necessary */
2006                        if (g != myConverterData.fromU2022State.g) {
2007                            switch (g) {
2008                            /* case 0 handled before writing escapes */
2009                            case 1:
2010                                buffer[outLen++] = UConverterConstants.SO;
2011                                myConverterData.fromU2022State.g = 1;
2012                                break;
2013                            default : /* case 2 */
2014                                buffer[outLen++] = 0x1b;
2015                                buffer[outLen++] = 0x4e;
2016                                break;
2017                            /* case 3: no SS3 in ISO-2022-JP-x */
2018                            }
2019                        }
2020
2021                        /* write the output bytes */
2022                        if (len == 1) {
2023                            buffer[outLen++] = (byte)targetValue;
2024                        } else { /* len == 2 */
2025                            buffer[outLen++] = (byte)(targetValue >> 8);
2026                            buffer[outLen++] = (byte)targetValue;
2027                        }
2028                    }else {
2029                        /*
2030                         * if we cannot find the character after checking all codepages
2031                         * then this is an error.
2032                         */
2033                        err = CoderResult.unmappableForLength(source.position()-oldSourcePos);
2034                        fromUChar32 = sourceChar;
2035                        break;
2036                    }
2037
2038                    if (sourceChar == CR || sourceChar == LF) {
2039                        /* reset the G2 state at the end of a line (conversion got use into ASCII or JISX201 already) */
2040                        myConverterData.fromU2022State.cs[2] = 0;
2041                        choiceCount = 0;
2042                    }
2043
2044                    /* output outLen>0 bytes in buffer[] */
2045                    if (outLen == 1) {
2046                        target.put(buffer[0]);
2047                        if (offsets != null) {
2048                            offsets.put(source.remaining() - 1); /* -1 known to be ASCII */
2049                        }
2050                    } else if (outLen == 2 && (target.position() + 2) <= target.limit()) {
2051                        target.put(buffer[0]);
2052                        target.put(buffer[1]);
2053                        if (offsets != null) {
2054                            int sourceIndex = source.position() - 1;
2055                            offsets.put(sourceIndex);
2056                            offsets.put(sourceIndex);
2057                        }
2058                    } else {
2059                        err = CharsetEncoderICU.fromUWriteBytes(this, buffer, 0, outLen, target, offsets, source.position()-1);
2060                    }
2061                } else {
2062                    err = CoderResult.OVERFLOW;
2063                    break;
2064                }
2065            }
2066
2067            /*
2068             * the end of the input stream and detection of truncated input
2069             * are handled by the framework, but for ISO-2022-JP conversion
2070             * we need to be in ASCII mode at the very end
2071             *
2072             * conditions:
2073             *  successful
2074             *  in SO mode or not in ASCII mode
2075             *  end of input and no truncated input
2076             */
2077            if (!err.isError() &&
2078                    (myConverterData.fromU2022State.g != 0 || myConverterData.fromU2022State.cs[0] != ASCII) &&
2079                    flush && !source.hasRemaining() && fromUChar32 == 0) {
2080                int sourceIndex;
2081
2082                outLen = 0;
2083
2084                if (myConverterData.fromU2022State.g != 0) {
2085                    buffer[outLen++] = UConverterConstants.SI;
2086                    myConverterData.fromU2022State.g = 0;
2087                }
2088
2089                if (myConverterData.fromU2022State.cs[0] != ASCII) {
2090                    for (int i = 0; i < escSeqChars[ASCII].length; i++) {
2091                        buffer[outLen++] = escSeqChars[ASCII][i];
2092                    }
2093                    myConverterData.fromU2022State.cs[0] = ASCII;
2094                }
2095
2096                /* get the source index of the last input character */
2097                sourceIndex = source.position();
2098                if (sourceIndex > 0) {
2099                    --sourceIndex;
2100                    if (UTF16.isTrailSurrogate(source.get(sourceIndex)) &&
2101                            (sourceIndex == 0 || UTF16.isLeadSurrogate(source.get(sourceIndex-1)))) {
2102                        --sourceIndex;
2103                    }
2104                } else {
2105                    sourceIndex = -1;
2106                }
2107
2108                err = CharsetEncoderICU.fromUWriteBytes(this, buffer, 0, outLen, target, offsets, sourceIndex);
2109            }
2110            return err;
2111        }
2112    }
2113    /****************************ISO-2022-CN************************************/
2114    /*
2115     * Rules for ISO-2022-CN Encoding:
2116     * i)   The designator sequence must appear once on a line before any instance
2117     *      of chracter set it designates.
2118     * ii)  If two lines contain characters from the same character set, both lines
2119     *      must include the designator sequence.
2120     * iii) Once the designator sequence is known, a shifting sequence has to be found
2121     *      to invoke the shifting
2122     * iv)  All lines start in ASCII and end in ASCII.
2123     * v)   Four shifting sequences are employed for this purpose:
2124     *      Sequence    ASCII Eq    Charsets
2125     *      ---------   ---------   --------
2126     *      SI          <SI>        US-ASCII
2127     *      SO          <SO>        CNS-11643-1992 Plane 1, GB2312, ISO-IR-165
2128     *      SS2         <ESC>N      CNS-11643-1992 Plane 2
2129     *      SS3         <ESC>O      CNS-11643-1992 Planes 3-7
2130     * vi)
2131     *      SOdesignator    : ESC "$" ")" finalchar_for_SO
2132     *      SS2designator   : ESC "$" "*" finalchar_for_SS2
2133     *      SS3designator   : ESC "$" "+" finalchar_for_SS3
2134     *
2135     *      ESC $ ) A       Indicates the bytes following SO are Chinese
2136     *       characters as defined in GB 2312-80, until
2137     *       another SOdesignation appears
2138     *
2139     *      ESC $ ) E       Indicates the bytes following SO are as defined
2140     *       in ISO-IR-165 (for details, see section 2.1),
2141     *       until another SOdesignation appears
2142     *
2143     *      ESC $ ) G       Indicates the bytes following SO are as defined
2144     *       in CNS 11643-plane-1, until another SOdesignation appears
2145     *
2146     *      ESC $ * H       Indicates teh two bytes immediately following
2147     *       SS2 is a Chinese character as defined in CNS
2148     *       11643-plane-2, until another SS2designation
2149     *       appears
2150     *       (Meaning <ESC>N must preceed ever 2 byte sequence.)
2151     *
2152     *      ESC $ + I       Indicates the immediate two bytes following SS3
2153     *       is a Chinese character as defined in CNS
2154     *       11643-plane-3, until another SS3designation
2155     *       appears
2156     *       (Meaning <ESC>O must preceed every 2 byte sequence.)
2157     *
2158     *      ESC $ + J       Indicates the immediate two bytes following SS3
2159     *       is a Chinese character as defined in CNS
2160     *       11643-plane-4, until another SS3designation
2161     *       appears
2162     *       (In English: <ESC>O must preceed every 2 byte sequence.)
2163     *
2164     *      ESC $ + K       Indicates the immediate two bytes following SS3
2165     *       is a Chinese character as defined in CNS
2166     *       11643-plane-5, until another SS3designation
2167     *       appears
2168     *
2169     *      ESC $ + L       Indicates the immediate two bytes following SS3
2170     *       is a Chinese character as defined in CNS
2171     *       11643-plane-6, until another SS3designation
2172     *       appears
2173     *
2174     *      ESC $ + M       Indicates the immediate two bytes following SS3
2175     *       is a Chinese character as defined in CNS
2176     *       11643-plane-7, until another SS3designation
2177     *       appears
2178     *
2179     *      As in ISO-2022-CN, each line starts in ASCII, and ends in ASCII, and
2180     *      has its own designation information before any Chinese chracters
2181     *      appears
2182     */
2183
2184    /* The following are defined this way to make strings truely readonly */
2185    private final static byte[] GB_2312_80_STR = { 0x1B, 0x24, 0x29, 0x41 };
2186    private final static byte[] ISO_IR_165_STR = { 0x1B, 0x24, 0x29, 0x45 };
2187    private final static byte[] CNS_11643_1992_Plane_1_STR = { 0x1B, 0x24, 0x29, 0x47 };
2188    private final static byte[] CNS_11643_1992_Plane_2_STR = { 0x1B, 0x24, 0x2A, 0x48 };
2189    private final static byte[] CNS_11643_1992_Plane_3_STR = { 0x1B, 0x24, 0x2B, 0x49 };
2190    private final static byte[] CNS_11643_1992_Plane_4_STR = { 0x1B, 0x24, 0x2B, 0x4A };
2191    private final static byte[] CNS_11643_1992_Plane_5_STR = { 0x1B, 0x24, 0x2B, 0x4B };
2192    private final static byte[] CNS_11643_1992_Plane_6_STR = { 0x1B, 0x24, 0x2B, 0x4C };
2193    private final static byte[] CNS_11643_1992_Plane_7_STR = { 0x1B, 0x24, 0x2B, 0x4D };
2194
2195    /************************ ISO2022-CN Data *****************************/
2196    private final static byte[][] escSeqCharsCN = {
2197        SHIFT_IN_STR,
2198        GB_2312_80_STR,
2199        ISO_IR_165_STR,
2200        CNS_11643_1992_Plane_1_STR,
2201        CNS_11643_1992_Plane_2_STR,
2202        CNS_11643_1992_Plane_3_STR,
2203        CNS_11643_1992_Plane_4_STR,
2204        CNS_11643_1992_Plane_5_STR,
2205        CNS_11643_1992_Plane_6_STR,
2206        CNS_11643_1992_Plane_7_STR,
2207    };
2208
2209    private class CharsetEncoderISO2022CN extends CharsetEncoderICU {
2210        public CharsetEncoderISO2022CN(CharsetICU cs) {
2211            super(cs, fromUSubstitutionChar[0]);
2212        }
2213
2214        @Override
2215        protected void implReset() {
2216            super.implReset();
2217            myConverterData.reset();
2218        }
2219
2220        /* This overrides the cbFromUWriteSub method in CharsetEncoderICU */
2221        @Override
2222        CoderResult cbFromUWriteSub (CharsetEncoderICU encoder,
2223            CharBuffer source, ByteBuffer target, IntBuffer offsets){
2224            CoderResult err = CoderResult.UNDERFLOW;
2225            byte[] buffer = new byte[8];
2226            int i = 0;
2227            byte[] subchar;
2228            subchar = encoder.replacement();
2229
2230            if (myConverterData.fromU2022State.g != 0) {
2231                /* not in ASCII mode: switch to ASCII */
2232                myConverterData.fromU2022State.g = 0;
2233                buffer[i++] = UConverterConstants.SI;
2234            }
2235            buffer[i++] = subchar[0];
2236
2237            err = CharsetEncoderICU.fromUWriteBytes(this, buffer, 0, i, target, offsets, source.position() - 1);
2238
2239            return err;
2240        }
2241
2242        @Override
2243        protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {
2244            CoderResult err = CoderResult.UNDERFLOW;
2245            int sourceChar;
2246            byte[] buffer = new byte[8];
2247            int len;
2248            byte[] choices = new byte[3];
2249            int choiceCount;
2250            int targetValue = 0;
2251            boolean usingFallback;
2252            boolean gotoGetTrail = false;
2253            int oldSourcePos; // For proper error handling
2254
2255            choiceCount = 0;
2256
2257            /* check if the last codepoint of previous buffer was a lead surrogate */
2258            if ((sourceChar = fromUChar32) != 0 && target.hasRemaining()) {
2259                // goto getTrail label
2260                gotoGetTrail = true;
2261            }
2262
2263            while (source.hasRemaining() || gotoGetTrail) {
2264                if (target.hasRemaining() || gotoGetTrail) {
2265                    oldSourcePos = source.position();
2266                    if (!gotoGetTrail) {
2267                        sourceChar = source.get();
2268                    }
2269                    /* check if the char is a First surrogate */
2270                    if (UTF16.isSurrogate((char)sourceChar) || gotoGetTrail) {
2271                        if (UTF16.isLeadSurrogate((char)sourceChar) || gotoGetTrail) {
2272// getTrail label
2273                            /* reset gotoGetTrail flag*/
2274                             gotoGetTrail = false;
2275
2276                            /* look ahead to find the trail surrogate */
2277                            if (source.hasRemaining()) {
2278                                /* test the following code unit */
2279                                char trail = source.get();
2280                                source.position(source.position()-1);
2281                                if (UTF16.isTrailSurrogate(trail)) {
2282                                    source.get();
2283                                    sourceChar = UCharacter.getCodePoint((char)sourceChar, trail);
2284                                    fromUChar32 = 0x00;
2285                                    /* convert this supplementary code point */
2286                                    /* exit this condition tree */
2287                                } else {
2288                                    /* this is an unmatched lead code unit (1st surrogate) */
2289                                    /* callback(illegal) */
2290                                    err = CoderResult.malformedForLength(1);
2291                                    fromUChar32 = sourceChar;
2292                                    break;
2293                                }
2294                            } else {
2295                                /* no more input */
2296                                fromUChar32 = sourceChar;
2297                                break;
2298                            }
2299                        } else {
2300                            /* this is an unmatched trail code unit (2nd surrogate) */
2301                            /* callback(illegal) */
2302                            err = CoderResult.malformedForLength(1);
2303                            fromUChar32 = sourceChar;
2304                            break;
2305                        }
2306                    }
2307
2308                    /* do the conversion */
2309                    if (sourceChar <= 0x007f) {
2310                        /* do not converter SO/SI/ESC */
2311                        if (IS_2022_CONTROL(sourceChar)) {
2312                            /* callback(illegal) */
2313                            err = CoderResult.malformedForLength(1);
2314                            fromUChar32 = sourceChar;
2315                            break;
2316                        }
2317
2318                        /* US-ASCII */
2319                        if (myConverterData.fromU2022State.g == 0) {
2320                            buffer[0] = (byte)sourceChar;
2321                            len = 1;
2322                        } else {
2323                            buffer[0] = UConverterConstants.SI;
2324                            buffer[1] = (byte)sourceChar;
2325                            len = 2;
2326                            myConverterData.fromU2022State.g = 0;
2327                            choiceCount = 0;
2328                        }
2329
2330                        if (sourceChar == CR || sourceChar == LF) {
2331                            /* reset the state at the end of a line */
2332                            myConverterData.fromU2022State.reset();
2333                            choiceCount = 0;
2334                        }
2335                    } else {
2336                        /* convert U+0080..U+10ffff */
2337                        int i;
2338                        byte cs, g;
2339
2340                        if (choiceCount == 0) {
2341                            /* try the current SO/G1 converter first */
2342                            choices[0] = myConverterData.fromU2022State.cs[1];
2343
2344                            /* default to GB2312_1 if none is designated yet */
2345                            if (choices[0] == 0) {
2346                                choices[0] = GB2312_1;
2347                            }
2348                            if (myConverterData.version == 0) {
2349                                /* ISO-2022-CN */
2350                                /* try other SO/G1 converter; a CNS_11643_1 lookup may result in any plane */
2351                                if (choices[0] == GB2312_1) {
2352                                    choices[1] = CNS_11643_1;
2353                                } else {
2354                                    choices[1] = GB2312_1;
2355                                }
2356
2357                                choiceCount = 2;
2358                            } else if (myConverterData.version == 1) {
2359                                /* ISO-2022-CN-EXT */
2360
2361                                /* try one of the other converters */
2362                                switch (choices[0]) {
2363                                case GB2312_1:
2364                                    choices[1] = CNS_11643_1;
2365                                    choices[2] = ISO_IR_165;
2366                                    break;
2367                                case ISO_IR_165:
2368                                    choices[1] = GB2312_1;
2369                                    choices[2] = CNS_11643_1;
2370                                    break;
2371                                default :
2372                                    choices[1] = GB2312_1;
2373                                    choices[2] = ISO_IR_165;
2374                                    break;
2375                                }
2376
2377                                choiceCount = 3;
2378                            } else {
2379                                /* ISO-2022-CN-CNS */
2380                                choices[0] = CNS_11643_1;
2381                                choices[1] = GB2312_1;
2382
2383                                choiceCount = 2;
2384                            }
2385                        }
2386
2387                        cs = g = 0;
2388                        /*
2389                         * len==0:  no mapping found yet
2390                         * len<0:   found a fallback result: continue looking for a roundtrip but no further fallbacks
2391                         * len>0:   found a roundtrip result, done
2392                         */
2393                        len = 0;
2394                        /*
2395                         * We will turn off usingFallback after finding a fallback,
2396                         * but we still get fallbacks from PUA code points as usual.
2397                         * Therefore, we will also need to check that we don't overwrite
2398                         * an early fallback with a later one.
2399                         */
2400                        usingFallback = useFallback;
2401
2402                        for (i = 0; i < choiceCount && len <= 0; ++i) {
2403                            byte cs0 = choices[i];
2404                            if (cs0 > 0) {
2405                                int[] value = new int[1];
2406                                int len2;
2407                                if (cs0 > CNS_11643_0) {
2408                                    myConverterData.currentConverter.sharedData = myConverterData.myConverterArray[CNS_11643];
2409                                    myConverterData.currentConverter.sharedData.mbcs.outputType = CharsetMBCS.MBCS_OUTPUT_3;
2410                                    len2 = myConverterData.currentEncoder.fromUChar32(sourceChar, value, usingFallback);
2411                                    //len2 = MBCSFromUChar32_ISO2022(myConverterData.myConverterArray[CNS_11643],
2412                                    //        sourceChar, value, usingFallback, CharsetMBCS.MBCS_OUTPUT_3);
2413                                    if (len2 == 3 || (len2 == -3 && len == 0)) {
2414                                        targetValue = value[0];
2415                                        cs = (byte)(CNS_11643_0 + (value[0] >> 16) - 0x80);
2416                                        if (len2 >= 0) {
2417                                            len = 2;
2418                                        } else {
2419                                            len = -2;
2420                                            usingFallback = false;
2421                                        }
2422                                        if (cs == CNS_11643_1) {
2423                                            g = 1;
2424                                        } else if (cs == CNS_11643_2) {
2425                                            g = 2;
2426                                        } else if (myConverterData.version == 1) { /* plane 3..7 */
2427                                            g = 3;
2428                                        } else {
2429                                            /* ISO-2022-CN (without -EXT) does not support plane 3..7 */
2430                                            len = 0;
2431                                        }
2432                                    }
2433                                } else {
2434                                    /* GB2312_1 or ISO-IR-165 */
2435                                    myConverterData.currentConverter.sharedData = myConverterData.myConverterArray[cs0];
2436                                    myConverterData.currentConverter.sharedData.mbcs.outputType = CharsetMBCS.MBCS_OUTPUT_2;
2437                                    len2 = myConverterData.currentEncoder.fromUChar32(sourceChar, value, usingFallback);
2438                                    //len2 = MBCSFromUChar32_ISO2022(myConverterData.myConverterArray[cs0],
2439                                    //        sourceChar, value, usingFallback, CharsetMBCS.MBCS_OUTPUT_2);
2440                                    if (len2 == 2 || (len2 == -2 && len == 0)) {
2441                                        targetValue = value[0];
2442                                        len = len2;
2443                                        cs = cs0;
2444                                        g = 1;
2445                                        usingFallback = false;
2446                                    }
2447                                }
2448                            }
2449                        }
2450
2451                        if (len != 0) {
2452                            len = 0; /* count output bytes; it must have ben abs(len) == 2 */
2453
2454                            /* write the designation sequence if necessary */
2455                            if (cs != myConverterData.fromU2022State.cs[g]) {
2456                                if (cs < CNS_11643) {
2457                                    for (int n = 0; n < escSeqCharsCN[cs].length; n++) {
2458                                        buffer[n] = escSeqCharsCN[cs][n];
2459                                    }
2460                                } else {
2461                                    for (int n = 0; n < escSeqCharsCN[CNS_11643 + (cs - CNS_11643_1)].length; n++) {
2462                                        buffer[n] = escSeqCharsCN[CNS_11643 + (cs - CNS_11643_1)][n];
2463                                    }
2464                                }
2465                                len = 4;
2466                                myConverterData.fromU2022State.cs[g] = cs;
2467                                if (g == 1) {
2468                                    /* changing the SO/G1 charset invalidates the choices[] */
2469                                    choiceCount = 0;
2470                                }
2471                            }
2472
2473                            /* write the shift sequence if necessary */
2474                            if (g != myConverterData.fromU2022State.g) {
2475                                switch (g) {
2476                                case 1:
2477                                    buffer[len++] = UConverterConstants.SO;
2478
2479                                    /* set the new state only if it is the locking shift SO/G1, not for SS2 or SS3 */
2480                                    myConverterData.fromU2022State.g = 1;
2481                                    break;
2482                                case 2:
2483                                    buffer[len++] = 0x1b;
2484                                    buffer[len++] = 0x4e;
2485                                    break;
2486                                default: /* case 3 */
2487                                    buffer[len++] = 0x1b;
2488                                    buffer[len++] = 0x4f;
2489                                    break;
2490                                }
2491                            }
2492
2493                            /* write the two output bytes */
2494                            buffer[len++] = (byte)(targetValue >> 8);
2495                            buffer[len++] = (byte)targetValue;
2496                        } else {
2497                            /* if we cannot find the character after checking all codepages
2498                             * then this is an error
2499                             */
2500                            err = CoderResult.unmappableForLength(source.position()-oldSourcePos);
2501                            fromUChar32 = sourceChar;
2502                            break;
2503                        }
2504                    }
2505                    /* output len>0 bytes in buffer[] */
2506                    if (len == 1) {
2507                        target.put(buffer[0]);
2508                        if (offsets != null) {
2509                            offsets.put(source.position()-1);
2510                        }
2511                    } else if (len == 2 && (target.remaining() >= 2)) {
2512                        target.put(buffer[0]);
2513                        target.put(buffer[1]);
2514                        if (offsets != null) {
2515                            int sourceIndex = source.position();
2516                            offsets.put(sourceIndex);
2517                            offsets.put(sourceIndex);
2518                        }
2519                    } else {
2520                        err = CharsetEncoderICU.fromUWriteBytes(this, buffer, 0, len, target, offsets, source.position()-1);
2521                        if (err.isError()) {
2522                            break;
2523                        }
2524                    }
2525                } else {
2526                    err = CoderResult.OVERFLOW;
2527                    break;
2528                }
2529            } /* end while (source.hasRemaining() */
2530
2531            /*
2532             * the end of the input stream and detection of truncated input
2533             * are handled by the framework, but for ISO-2022-CN conversion
2534             * we need to be in ASCII mode at the very end
2535             *
2536             * condtions:
2537             *   succesful
2538             *   not in ASCII mode
2539             *   end of input and no truncated input
2540             */
2541            if (!err.isError() && myConverterData.fromU2022State.g != 0 && flush && !source.hasRemaining() && fromUChar32 == 0) {
2542                int sourceIndex;
2543
2544                /* we are switching to ASCII */
2545                myConverterData.fromU2022State.g = 0;
2546
2547                /* get the source index of the last input character */
2548                sourceIndex = source.position();
2549                if (sourceIndex > 0) {
2550                    --sourceIndex;
2551                    if (UTF16.isTrailSurrogate(source.get(sourceIndex)) &&
2552                            (sourceIndex == 0 || UTF16.isLeadSurrogate(source.get(sourceIndex-1)))) {
2553                        --sourceIndex;
2554                    }
2555                } else {
2556                    sourceIndex = -1;
2557                }
2558
2559                err = CharsetEncoderICU.fromUWriteBytes(this, SHIFT_IN_STR, 0, 1, target, offsets, sourceIndex);
2560            }
2561
2562            return err;
2563        }
2564    }
2565    /******************************** ISO-2022-KR *****************************/
2566    /*
2567     *   Rules for ISO-2022-KR encoding
2568     *   i) The KSC5601 designator sequence should appear only once in a file,
2569     *      at the begining of a line before any KSC5601 characters. This usually
2570     *      means that it appears by itself on the first line of the file
2571     *  ii) There are only 2 shifting sequences SO to shift into double byte mode
2572     *      and SI to shift into single byte mode
2573     */
2574    private class CharsetEncoderISO2022KR extends CharsetEncoderICU {
2575        public CharsetEncoderISO2022KR(CharsetICU cs) {
2576            super(cs, fromUSubstitutionChar[myConverterData.version]);
2577        }
2578
2579        @Override
2580        protected void implReset() {
2581            super.implReset();
2582            myConverterData.reset();
2583            setInitialStateFromUnicodeKR(this);
2584        }
2585
2586        /* This overrides the cbFromUWriteSub method in CharsetEncoderICU */
2587        @Override
2588        CoderResult cbFromUWriteSub (CharsetEncoderICU encoder,
2589            CharBuffer source, ByteBuffer target, IntBuffer offsets){
2590            CoderResult err = CoderResult.UNDERFLOW;
2591            byte[] buffer = new byte[8];
2592            int length, i = 0;
2593            byte[] subchar;
2594
2595            subchar = encoder.replacement();
2596            length = subchar.length;
2597
2598            if (myConverterData.version == 0) {
2599                if (length == 1) {
2600                    if (encoder.fromUnicodeStatus != 0) {
2601                        /* in DBCS mode: switch to SBCS */
2602                        encoder.fromUnicodeStatus = 0;
2603                        buffer[i++] = UConverterConstants.SI;
2604                    }
2605                    buffer[i++] = subchar[0];
2606                } else { /* length == 2 */
2607                    if (encoder.fromUnicodeStatus == 0) {
2608                        /* in SBCS mode: switch to DBCS */
2609                        encoder.fromUnicodeStatus = 1;
2610                        buffer[i++] = UConverterConstants.SO;
2611                    }
2612                    buffer[i++] = subchar[0];
2613                    buffer[i++] = subchar[1];
2614                }
2615                err = CharsetEncoderICU.fromUWriteBytes(this, buffer, 0, i, target, offsets, source.position() - 1);
2616            } else {
2617                /* save the subvonverter's substitution string */
2618                byte[] currentSubChars = myConverterData.currentEncoder.replacement();
2619
2620                /* set our substitution string into the subconverter */
2621                myConverterData.currentEncoder.replaceWith(subchar);
2622                myConverterData.currentConverter.subChar1 = fromUSubstitutionChar[0][0];
2623                /* let the subconverter write the subchar, set/retrieve fromUChar32 state */
2624                myConverterData.currentEncoder.fromUChar32 = encoder.fromUChar32;
2625                err = myConverterData.currentEncoder.cbFromUWriteSub(myConverterData.currentEncoder, source, target, offsets);
2626                encoder.fromUChar32 = myConverterData.currentEncoder.fromUChar32;
2627
2628                /* restore the subconverter's substitution string */
2629                myConverterData.currentEncoder.replaceWith(currentSubChars);
2630
2631                if (err.isOverflow()) {
2632                    if (myConverterData.currentEncoder.errorBufferLength > 0) {
2633                        encoder.errorBuffer = myConverterData.currentEncoder.errorBuffer.clone();
2634                    }
2635                    encoder.errorBufferLength = myConverterData.currentEncoder.errorBufferLength;
2636                    myConverterData.currentEncoder.errorBufferLength = 0;
2637                }
2638            }
2639
2640            return err;
2641        }
2642
2643        private CoderResult encodeLoopIBM(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {
2644            CoderResult err = CoderResult.UNDERFLOW;
2645
2646            myConverterData.currentEncoder.fromUChar32 = fromUChar32;
2647            err = myConverterData.currentEncoder.cnvMBCSFromUnicodeWithOffsets(source, target, offsets, flush);
2648            fromUChar32 = myConverterData.currentEncoder.fromUChar32;
2649
2650            if (err.isOverflow()) {
2651                if (myConverterData.currentEncoder.errorBufferLength > 0) {
2652                    errorBuffer = myConverterData.currentEncoder.errorBuffer.clone();
2653                }
2654                errorBufferLength = myConverterData.currentEncoder.errorBufferLength;
2655                myConverterData.currentEncoder.errorBufferLength = 0;
2656            }
2657
2658            return err;
2659        }
2660
2661        @Override
2662        protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {
2663            CoderResult err = CoderResult.UNDERFLOW;
2664            int[] targetByteUnit = { 0x0000 };
2665            int sourceChar = 0x0000;
2666            boolean isTargetByteDBCS;
2667            boolean oldIsTargetByteDBCS;
2668            boolean usingFallback;
2669            int length = 0;
2670            boolean gotoGetTrail = false; // for goto getTrail label call
2671
2672            /*
2673             * if the version is 1 then the user is requesting
2674             * conversion with ibm-25546 pass the argument to
2675             * MBCS converter and return
2676             */
2677            if (myConverterData.version == 1) {
2678                return encodeLoopIBM(source, target, offsets, flush);
2679            }
2680
2681            usingFallback = useFallback;
2682            isTargetByteDBCS = fromUnicodeStatus == 0 ? false : true;
2683            if ((sourceChar = fromUChar32) != 0 && target.hasRemaining()) {
2684                gotoGetTrail = true;
2685            }
2686
2687            while (source.hasRemaining() || gotoGetTrail) {
2688                targetByteUnit[0] = UConverterConstants.missingCharMarker;
2689
2690                if (target.hasRemaining() || gotoGetTrail) {
2691                    if (!gotoGetTrail) {
2692                        sourceChar = source.get();
2693
2694                        /* do not convert SO/SI/ESC */
2695                        if (IS_2022_CONTROL(sourceChar)) {
2696                            /* callback(illegal) */
2697                            err = CoderResult.malformedForLength(1);
2698                            fromUChar32 = sourceChar;
2699                            break;
2700                        }
2701                        myConverterData.currentConverter.sharedData.mbcs.outputType = CharsetMBCS.MBCS_OUTPUT_2;
2702                        length = myConverterData.currentEncoder.fromUChar32(sourceChar, targetByteUnit, usingFallback);
2703                        //length = MBCSFromUChar32_ISO2022(myConverterData.currentConverter.sharedData, sourceChar, targetByteUnit, usingFallback, CharsetMBCS.MBCS_OUTPUT_2);
2704                        if (length < 0) {
2705                            length = -length; /* fallback */
2706                        }
2707                        /* only DBCS or SBCS characters are expected */
2708                        /* DB characters with high bit set to 1 are expected */
2709                        if (length > 2 || length == 0 ||
2710                                (length == 1 && targetByteUnit[0] > 0x7f) ||
2711                                (length ==2 &&
2712                                        ((char)(targetByteUnit[0] - 0xa1a1) > (0xfefe - 0xa1a1) ||
2713                                        ((targetByteUnit[0] - 0xa1) & UConverterConstants.UNSIGNED_BYTE_MASK) > (0xfe - 0xa1)))) {
2714                            targetByteUnit[0] = UConverterConstants.missingCharMarker;
2715                        }
2716                    }
2717                    if (!gotoGetTrail && targetByteUnit[0] != UConverterConstants.missingCharMarker) {
2718                        oldIsTargetByteDBCS = isTargetByteDBCS;
2719                        isTargetByteDBCS = (targetByteUnit[0] > 0x00FF);
2720                        /* append the shift sequence */
2721                        if (oldIsTargetByteDBCS != isTargetByteDBCS) {
2722                            if (isTargetByteDBCS) {
2723                                target.put((byte)UConverterConstants.SO);
2724                            } else {
2725                                target.put((byte)UConverterConstants.SI);
2726                            }
2727                            if (offsets != null) {
2728                                offsets.put(source.position()-1);
2729                            }
2730                        }
2731                        /* write the targetUniChar to target */
2732                        if (targetByteUnit[0] <= 0x00FF) {
2733                            if (target.hasRemaining()) {
2734                                target.put((byte)targetByteUnit[0]);
2735                                if (offsets != null) {
2736                                    offsets.put(source.position()-1);
2737                                }
2738                            } else {
2739                                errorBuffer[errorBufferLength++] = (byte)targetByteUnit[0];
2740                                err = CoderResult.OVERFLOW;
2741                            }
2742                        } else {
2743                            if (target.hasRemaining()) {
2744                                target.put((byte)(UConverterConstants.UNSIGNED_BYTE_MASK & ((targetByteUnit[0]>>8) - 0x80)));
2745                                if (offsets != null) {
2746                                    offsets.put(source.position()-1);
2747                                }
2748                                if (target.hasRemaining()) {
2749                                    target.put((byte)(UConverterConstants.UNSIGNED_BYTE_MASK & (targetByteUnit[0]- 0x80)));
2750                                    if (offsets != null) {
2751                                        offsets.put(source.position()-1);
2752                                    }
2753                                } else {
2754                                    errorBuffer[errorBufferLength++] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & (targetByteUnit[0] - 0x80));
2755                                    err = CoderResult.OVERFLOW;
2756                                }
2757
2758                            } else {
2759                                errorBuffer[errorBufferLength++] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & ((targetByteUnit[0]>>8) - 0x80));
2760                                errorBuffer[errorBufferLength++] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & (targetByteUnit[0]- 0x80));
2761                                err = CoderResult.OVERFLOW;
2762                            }
2763                        }
2764                    } else {
2765                        /* oops.. the code point is unassigned
2766                         * set the error and reason
2767                         */
2768
2769                        /* check if the char is a First surrogate */
2770                        if (gotoGetTrail || UTF16.isSurrogate((char)sourceChar)) {
2771                            if (gotoGetTrail || UTF16.isLeadSurrogate((char)sourceChar)) {
2772// getTrail label
2773                                // reset gotoGetTrail flag
2774                                gotoGetTrail = false;
2775
2776                                /* look ahead to find the trail surrogate */
2777                                if (source.hasRemaining()) {
2778                                    /* test the following code unit */
2779                                    char trail = source.get();
2780                                    source.position(source.position()-1);
2781                                    if (UTF16.isTrailSurrogate(trail)) {
2782                                        source.get();
2783                                         sourceChar = UCharacter.getCodePoint((char)sourceChar, trail);
2784                                         err = CoderResult.unmappableForLength(2);
2785                                         /* convert this surrogate code point */
2786                                         /* exit this condition tree */
2787                                    } else {
2788                                        /* this is an unmatched lead code unit (1st surrogate) */
2789                                        /* callback(illegal) */
2790                                        err = CoderResult.malformedForLength(1);
2791                                    }
2792                                } else {
2793                                    /* no more input */
2794                                    err = CoderResult.UNDERFLOW;
2795                                }
2796                            } else {
2797                                /* this is an unmatched trail code unit (2nd surrogate ) */
2798                                /* callback(illegal) */
2799                                err = CoderResult.malformedForLength(1);
2800                            }
2801                        } else {
2802                            /* callback(unassigned) for a BMP code point */
2803                            err = CoderResult.unmappableForLength(1);
2804                        }
2805
2806                        fromUChar32 = sourceChar;
2807                        break;
2808                    }
2809                } else {
2810                    err = CoderResult.OVERFLOW;
2811                    break;
2812                }
2813            }
2814            /*
2815             * the end of the input stream and detection of truncated input
2816             * are handled by the framework, but for ISO-2022-KR conversion
2817             * we need to be inASCII mode at the very end
2818             *
2819             * conditions:
2820             *  successful
2821             *  not in ASCII mode
2822             *  end of  input and no truncated input
2823             */
2824            if (!err.isError() && isTargetByteDBCS && flush && !source.hasRemaining() && fromUChar32 == 0) {
2825                int sourceIndex;
2826
2827                /* we are switching to ASCII */
2828                isTargetByteDBCS = false;
2829
2830                /* get the source index of the last input character */
2831                sourceIndex = source.position();
2832                if (sourceIndex > 0) {
2833                    --sourceIndex;
2834                    if (UTF16.isTrailSurrogate(source.get(sourceIndex)) && UTF16.isLeadSurrogate(source.get(sourceIndex-1))) {
2835                        --sourceIndex;
2836                    }
2837                } else {
2838                    sourceIndex = -1;
2839                }
2840
2841                CharsetEncoderICU.fromUWriteBytes(this, SHIFT_IN_STR, 0, 1, target, offsets, sourceIndex);
2842            }
2843            /*save the state and return */
2844            fromUnicodeStatus = isTargetByteDBCS ? 1 : 0;
2845
2846            return err;
2847        }
2848    }
2849
2850    @Override
2851    public CharsetDecoder newDecoder() {
2852        switch (variant) {
2853        case ISO_2022_JP:
2854            return new CharsetDecoderISO2022JP(this);
2855
2856        case ISO_2022_CN:
2857            return new CharsetDecoderISO2022CN(this);
2858
2859        case ISO_2022_KR:
2860            setInitialStateToUnicodeKR();
2861            return new CharsetDecoderISO2022KR(this);
2862
2863        default: /* should not happen */
2864            return null;
2865        }
2866    }
2867
2868    @Override
2869    public CharsetEncoder newEncoder() {
2870        CharsetEncoderICU cnv;
2871
2872        switch (variant) {
2873        case ISO_2022_JP:
2874            return new CharsetEncoderISO2022JP(this);
2875
2876        case ISO_2022_CN:
2877            return new CharsetEncoderISO2022CN(this);
2878
2879        case ISO_2022_KR:
2880            cnv = new CharsetEncoderISO2022KR(this);
2881            setInitialStateFromUnicodeKR(cnv);
2882            return cnv;
2883
2884        default: /* should not happen */
2885            return null;
2886        }
2887    }
2888
2889    private void setInitialStateToUnicodeKR() {
2890        if (myConverterData.version == 1) {
2891            myConverterData.currentDecoder.toUnicodeStatus = 0;     /* offset */
2892            myConverterData.currentDecoder.mode = 0;                /* state */
2893            myConverterData.currentDecoder.toULength = 0;           /* byteIndex */
2894        }
2895    }
2896    private void setInitialStateFromUnicodeKR(CharsetEncoderICU cnv) {
2897        /* ISO-2022-KR the designator sequence appears only once
2898         * in a file so we append it only once
2899         */
2900        if (cnv.errorBufferLength == 0) {
2901            cnv.errorBufferLength = 4;
2902            cnv.errorBuffer[0] = 0x1b;
2903            cnv.errorBuffer[1] = 0x24;
2904            cnv.errorBuffer[2] = 0x29;
2905            cnv.errorBuffer[3] = 0x43;
2906        }
2907        if (myConverterData.version == 1) {
2908            ((CharsetMBCS)myConverterData.currentEncoder.charset()).subChar1 = 0x1A;
2909            myConverterData.currentEncoder.fromUChar32 = 0;
2910            myConverterData.currentEncoder.fromUnicodeStatus = 1; /* prevLength */
2911        }
2912    }
2913
2914    @Override
2915    void getUnicodeSetImpl(UnicodeSet setFillIn, int which) {
2916        int i;
2917        /*open a set and initialize it with code points that are algorithmically round-tripped */
2918
2919        switch(variant){
2920        case ISO_2022_JP:
2921           /*include JIS X 0201 which is hardcoded */
2922            setFillIn.add(0xa5);
2923            setFillIn.add(0x203e);
2924            if((jpCharsetMasks[myConverterData.version]&CSM(ISO8859_1))!=0){
2925                /*include Latin-1 some variants of JP */
2926                setFillIn.add(0, 0xff);
2927
2928            }
2929            else {
2930                /* include ASCII for JP */
2931                setFillIn.add(0, 0x7f);
2932             }
2933            if(myConverterData.version==3 || myConverterData.version==4 ||which == ROUNDTRIP_AND_FALLBACK_SET){
2934            /*
2935             * Do not test(jpCharsetMasks[myConverterData.version]&CSM(HWKANA_7BIT))!=0 because the bit
2936             * is on for all JP versions although version 3 & 4 (JIS7 and JIS8) use half-width Katakana.
2937             * This is because all ISO_2022_JP variant are lenient in that they accept (in toUnicode) half-width
2938             * Katakana via ESC.
2939             * However, we only emit (fromUnicode) half-width Katakana according to the
2940             * definition of each variant.
2941             *
2942             * When including fallbacks,
2943             * we need to include half-width Katakana Unicode code points for all JP variants because
2944             * JIS X 0208 has hardcoded fallbacks for them (which map to full-width Katakana).
2945             */
2946            /* include half-width Katakana for JP */
2947                setFillIn.add(HWKANA_START, HWKANA_END);
2948             }
2949            break;
2950        case ISO_2022_CN:
2951            /* Include ASCII for CN */
2952            setFillIn.add(0, 0x7f);
2953            break;
2954        case ISO_2022_KR:
2955            /* there is only one converter for KR */
2956          myConverterData.currentConverter.getUnicodeSetImpl(setFillIn, which);
2957          break;
2958        default:
2959            break;
2960        }
2961
2962        //TODO Replaced by ucnv_MBCSGetFilteredUnicodeSetForUnicode() until
2963        for(i=0; i<UCNV_2022_MAX_CONVERTERS;i++){
2964            int filter;
2965            if(myConverterData.myConverterArray[i]!=null){
2966                if(variant==ISO_2022_CN && myConverterData.version==0 && i==CNS_11643){
2967                    /*
2968                     *
2969                     * version -specific for CN:
2970                     * CN version 0 does not map CNS planes 3..7 although
2971                     * they are all available in the CNS conversion table;
2972                     * CN version 1 (-EXT) does map them all.
2973                     * The two versions create different Unicode sets.
2974                     */
2975                    filter=CharsetMBCS.UCNV_SET_FILTER_2022_CN;
2976                } else if(variant==ISO_2022_JP && i == JISX208){
2977                    /*
2978                     * Only add code points that map to Shift-JIS codes
2979                     * corrosponding to JIS X 208
2980                     */
2981                    filter=CharsetMBCS.UCNV_SET_FILTER_SJIS;
2982                } else if(i==KSC5601){
2983                    /*
2984                     * Some of the KSC 5601 tables (Convrtrs.txt has this aliases on multiple tables)
2985                     * are broader than GR94.
2986                     */
2987                    filter=CharsetMBCS.UCNV_SET_FILTER_GR94DBCS;
2988                } else {
2989                    filter=CharsetMBCS.UCNV_SET_FILTER_NONE;
2990                }
2991
2992                myConverterData.currentConverter.MBCSGetFilteredUnicodeSetForUnicode(myConverterData.myConverterArray[i],setFillIn, which, filter);
2993           }
2994        }
2995        /*
2996         * ISO Converter must not convert SO/SI/ESC despite what sub-converters do by themselves
2997         * Remove these characters from the set.
2998         */
2999        setFillIn.remove(0x0e);
3000        setFillIn.remove(0x0f);
3001        setFillIn.remove(0x1b);
3002
3003        /* ISO 2022 converter do not convert C! controls either */
3004        setFillIn.remove(0x80, 0x9f);
3005    }
3006}
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016